From 5c784aa37512515ee67069f2456dff10138587eb Mon Sep 17 00:00:00 2001
From: Kevin Wheatley <kevin.wheatley@framestore.com>
Date: Mon, 23 Dec 2024 15:12:00 +0000
Subject: [PATCH 01/70] Extend ocioperf to take config file parameter on CLI

Signed-off-by: Kevin Wheatley <kevin.wheatley@framestore.com>
---
 src/apps/ocioperf/main.cpp | 71 +++++++++++++++++++-------------------
 1 file changed, 35 insertions(+), 36 deletions(-)

diff --git a/src/apps/ocioperf/main.cpp b/src/apps/ocioperf/main.cpp
index a3af75a587..4c7837494f 100644
--- a/src/apps/ocioperf/main.cpp
+++ b/src/apps/ocioperf/main.cpp
@@ -24,7 +24,6 @@ class CustomMeasure
 
     explicit CustomMeasure(const char * explanation)
         :   m_explanations(explanation)
-        ,   m_iterations(1)
     {
         resume();
     }
@@ -183,6 +182,10 @@ int main(int argc, const char **argv)
     bool useDisplayview = false;
     bool useInvertview  = false;
 
+    std::string inputconfig;
+    OCIO::ConstConfigRcPtr srcConfig;
+
+
     ArgParse ap;
     ap.options("ocioperf -- apply and measure a color transformation processing\n\n"
                "usage: ocioperf [options] --transform /path/to/file.clf\n\n",
@@ -206,6 +209,8 @@ int main(int argc, const char **argv)
                                             "(Deprecated) Provide the input and (display, view) pair to apply on the image",
                "--invertview %s %s %s",     &display, &view, &outColorSpace,
                                             "Provide the (display, view) pair and output color space to apply on the image",
+               "--iconfig %s",              &inputconfig,
+                                            "Input .ocio configuration file (default: $OCIO)",
                "--iter %d",                 &iterations, "Provide the number of iterations on the processing. Default is 50",
                "--bitdepths %s %s",         &inBitDepthStr, &outBitDepthStr,
                                             "Provide input and output bit-depths (i.e. ui16, f32). Default is f32",
@@ -232,25 +237,6 @@ int main(int argc, const char **argv)
     {
         std::cout << std::endl;
         std::cout << "OCIO Version: " << OCIO::GetVersion() << std::endl;
-        const char * env = OCIO::GetEnvVariable("OCIO");
-        if(env && *env)
-        {
-            try
-            {
-                std::cout << std::endl;
-                std::cout << "OCIO Config. file:    '" << env << "'" << std::endl;
-                OCIO::ConstConfigRcPtr config = OCIO::GetCurrentConfig();
-                std::cout << "OCIO Config. version: " << config->getMajorVersion() << "." 
-                                                      << config->getMinorVersion() << std::endl;
-                std::cout << "OCIO search_path:     " << config->getSearchPath() << std::endl;
-            }
-            catch(...)
-            {
-
-                std::cerr << "ERROR: Error loading the config file: '" << env << "'";
-                return 1;
-            }
-        }
     }
 
     if (!transformFile.empty())
@@ -258,7 +244,7 @@ int main(int argc, const char **argv)
         std::cout << std::endl;
         std::cout << "Processing using '" << transformFile << "'" << std::endl << std::endl;
     }
-
+    
     std::cout << std::endl << std::endl;
     std::cout << "Processing statistics:" << std::endl << std::endl;
 
@@ -296,25 +282,38 @@ int main(int argc, const char **argv)
         // Checking for an input colorspace or input (display, view) pair.
         else if (!inColorSpace.empty() || (!display.empty() && !view.empty()))
         {
+            if(!inputconfig.empty())
+            {
+                std::cout << std::endl;
+                std::cout << "Loading " << inputconfig << std::endl;
+                srcConfig = OCIO::Config::CreateFromFile(inputconfig.c_str());
+            }
+            else if(OCIO::GetEnvVariable("OCIO"))
+            {
+                std::cout << std::endl;
+                std::cout << "Loading $OCIO " << OCIO::GetEnvVariable("OCIO") << std::endl;
+                srcConfig = OCIO::Config::CreateFromEnv();
+            }
+            else
+            {
+                throw OCIO::Exception("You must specify an input OCIO configuration (either with --iconfig or $OCIO).\n");
+            }
+
             if (verbose)
             {
-                const char * env = OCIO::GetEnvVariable("OCIO");
-                if (env && *env)
-                {
-                    std::cout << std::endl;
-                    const std::string inputStr = !inColorSpace.empty() ?  inColorSpace : "(" + display + ", " + view + ")";
-                    const std::string outputStr = !outColorSpace.empty() ?  outColorSpace : "(" + display + ", " + view + ")";
-                    std::cout << "Processing from '" 
-                              << inputStr << "' to '"
-                              << outputStr << "'" << std::endl;
-                }
-                else
-                {
-                    throw OCIO::Exception("Missing the ${OCIO} env. variable.");
-                }
+                std::cout << std::endl;
+                std::cout << "OCIO Config. version: " << srcConfig->getMajorVersion() << "." 
+                                                      << srcConfig->getMinorVersion() << std::endl;
+                std::cout << "OCIO search_path:     " << srcConfig->getSearchPath() << std::endl;
+                std::cout << std::endl;
+                const std::string inputStr = !inColorSpace.empty() ?  inColorSpace : "(" + display + ", " + view + ")";
+                const std::string outputStr = !outColorSpace.empty() ?  outColorSpace : "(" + display + ", " + view + ")";
+                std::cout << "Processing from '" 
+                          << inputStr << "' to '"
+                          << outputStr << "'" << std::endl;
             }
 
-            OCIO::ConfigRcPtr config  = OCIO::Config::CreateFromEnv()->createEditableCopy();
+            OCIO::ConfigRcPtr config  = srcConfig->createEditableCopy();
             config->setProcessorCacheFlags(nocache ? OCIO::PROCESSOR_CACHE_OFF 
                                                    : OCIO::PROCESSOR_CACHE_DEFAULT);
 

From da0ab980a8d78050f97a2dc5b6e4a80cb8edb396 Mon Sep 17 00:00:00 2001
From: Kevin Wheatley <kevin.wheatley@framestore.com>
Date: Tue, 24 Dec 2024 12:06:39 +0000
Subject: [PATCH 02/70] Extend ocioconvert to take config on command line

Signed-off-by: Kevin Wheatley <kevin.wheatley@framestore.com>
---
 src/apps/ocioconvert/main.cpp | 68 +++++++++++++++++++++--------------
 1 file changed, 41 insertions(+), 27 deletions(-)

diff --git a/src/apps/ocioconvert/main.cpp b/src/apps/ocioconvert/main.cpp
index 60305e06ec..e8071c76fa 100644
--- a/src/apps/ocioconvert/main.cpp
+++ b/src/apps/ocioconvert/main.cpp
@@ -50,6 +50,7 @@ int main(int argc, const char **argv)
     std::vector<std::string> stringAttrs;
 
     std::string outputDepth;
+    std::string inputconfig;
 
     bool usegpu                 = false;
     bool usegpuLegacy           = false;
@@ -85,6 +86,7 @@ int main(int argc, const char **argv)
                "--h",                   &help,                  "Display the help and exit",
                "--help",                &help,                  "Display the help and exit",
                "-v" ,                   &verbose,               "Display general information",
+              "--iconfig %s",           &inputconfig,           "Input .ocio configuration file (default: $OCIO)",
                "<SEPARATOR>", "\nOpenImageIO or OpenEXR options:",
                "--bitdepth %s",         &outputDepth,  "Output image bitdepth",
                "--float-attribute %L",  &floatAttrs,   "\"name=float\" pair defining OIIO float attribute "
@@ -266,34 +268,51 @@ int main(int argc, const char **argv)
         outputimage     = args[2].c_str();
     }
 
+    // Load the current config.
+    OCIO::ConstConfigRcPtr config;
+    
+    try
+    {
+        if (useLut)
+        {
+            config = OCIO::Config::CreateRaw();
+        }
+        else if (!inputconfig.empty() )
+        {
+            config = OCIO::Config::CreateFromFile(inputconfig.c_str());
+        }
+        else
+        {
+            const char * env = OCIO::GetEnvVariable("OCIO");
+            if (env && *env)
+                inputconfig = env;
+            config = OCIO::GetCurrentConfig();
+        }
+    }
+    catch (const OCIO::Exception & e)
+    {
+        std::cout << "ERROR loading config file: " << e.what() << std::endl;
+        exit(1);
+    }
+    catch (...)
+    {
+
+        std::cerr << "ERROR loading config file: '" << inputconfig << "'" << std::endl;
+        exit(1);
+    }
+
     if (verbose)
     {
         std::cout << std::endl;
         std::cout << OCIO::ImageIO::GetVersion() << std::endl;
         std::cout << "OCIO Version: " << OCIO::GetVersion() << std::endl;
-        const char * env = OCIO::GetEnvVariable("OCIO");
-        if (env && *env && !useLut)
+        if (!useLut)
         {
-            try
-            {
-                std::cout << std::endl;
-                std::cout << "OCIO Config. file:    '" << env << "'" << std::endl;
-                OCIO::ConstConfigRcPtr config = OCIO::GetCurrentConfig();
-                std::cout << "OCIO Config. version: " << config->getMajorVersion() << "." 
-                                                      << config->getMinorVersion() << std::endl;
-                std::cout << "OCIO search_path:     " << config->getSearchPath() << std::endl;
-            }
-            catch (const OCIO::Exception & e)
-            {
-                std::cout << "ERROR loading config file: " << e.what() << std::endl;
-                exit(1);
-            }
-            catch (...)
-            {
-
-                std::cerr << "ERROR loading config file: '" << env << "'" << std::endl;
-                exit(1);
-            }
+            std::cout << std::endl;
+            std::cout << "OCIO Config. file:    '" << inputconfig << "'" << std::endl;
+            std::cout << "OCIO Config. version: " << config->getMajorVersion() << "." 
+                                                  << config->getMinorVersion() << std::endl;
+            std::cout << "OCIO search_path:     " << config->getSearchPath() << std::endl;
         }
     }
 
@@ -388,10 +407,6 @@ int main(int argc, const char **argv)
     // Process the image.
     try
     {
-        // Load the current config.
-        OCIO::ConstConfigRcPtr config
-            = useLut ? OCIO::Config::CreateRaw() : OCIO::GetCurrentConfig();
-
         // Get the processor.
         OCIO::ConstProcessorRcPtr processor;
 
@@ -637,7 +652,6 @@ int main(int argc, const char **argv)
     {
         if (useDisplayView)
         {
-            OCIO::ConstConfigRcPtr config = OCIO::GetCurrentConfig();
             outputcolorspace = config->getDisplayViewColorSpaceName(display, view);
         }
 

From b5ef4f21bfac6080b92ea9982f203bcfc3f53a42 Mon Sep 17 00:00:00 2001
From: Kevin Wheatley <kevin.wheatley@framestore.com>
Date: Tue, 24 Dec 2024 15:18:14 +0000
Subject: [PATCH 03/70] Extract tonescale_fwd function

Signed-off-by: Kevin Wheatley <kevin.wheatley@framestore.com>
---
 .../ops/fixedfunction/ACES2/Transform.cpp        | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

diff --git a/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp b/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp
index 80b8ba5c72..3871dad06e 100644
--- a/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp
+++ b/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp
@@ -246,12 +246,8 @@ float toe_inv( float x, float limit, float k1_in, float k2_in)
     return (x * x + k1 * x) / (k3 * (x + k2));
 }
 
-f3 tonescale_chroma_compress_fwd(const f3 &JMh, const JMhParams &p, const ToneScaleParams &pt, const ChromaCompressParams &pc)
+float tonescale_fwd(const float J, const JMhParams &p, const ToneScaleParams &pt)
 {
-    const float J = JMh[0];
-    const float M = JMh[1];
-    const float h = JMh[2];
-
     // Tonescale applied in Y (convert to and from J)
     const float A = p.A_w_J * powf(std::abs(J) / 100.f, 1.f / (surround[1] * p.z));
     const float Y = std::copysign(1.f, J) * 100.f / p.F_L * powf((27.13f * A) / (400.f - A), 1.f / 0.42f) / 100.f;
@@ -261,6 +257,16 @@ f3 tonescale_chroma_compress_fwd(const f3 &JMh, const JMhParams &p, const ToneSc
 
     const float F_L_Y = powf(p.F_L * std::abs(Y_ts) / 100.f, 0.42f);
     const float J_ts = std::copysign(1.f, Y_ts) * 100.f * powf(((400.f * F_L_Y) / (27.13f + F_L_Y)) / p.A_w_J, surround[1] * p.z);
+    return J_ts;
+}
+
+f3 tonescale_chroma_compress_fwd(const f3 &JMh, const JMhParams &p, const ToneScaleParams &pt, const ChromaCompressParams &pc)
+{
+    const float J = JMh[0];
+    const float M = JMh[1];
+    const float h = JMh[2];
+
+    const float J_ts = tonescale_fwd(J, p, pt);
 
     // ChromaCompress
     float M_cp = M;

From 14244a8ce5432e3eeffbc528203f974f51ebdf74 Mon Sep 17 00:00:00 2001
From: Kevin Wheatley <kevin.wheatley@framestore.com>
Date: Tue, 24 Dec 2024 18:08:07 +0000
Subject: [PATCH 04/70] Extract inverse tonescale function

Signed-off-by: Kevin Wheatley <kevin.wheatley@framestore.com>
---
 .../ops/fixedfunction/ACES2/Transform.cpp     | 79 ++++++++++++++-----
 1 file changed, 58 insertions(+), 21 deletions(-)

diff --git a/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp b/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp
index 3871dad06e..2176ae51de 100644
--- a/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp
+++ b/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp
@@ -111,6 +111,30 @@ float hue_dependent_upper_hull_gamma(float h, const ACES2::Table1D &gt)
 // CAM
 //
 
+inline float _post_adaptation_cone_response_compression_fwd(float Rc, const float F_L)
+{
+    const float F_L_Y = powf(Rc * F_L / reference_luminance, 0.42f);
+    const float Ra    = (400.f * F_L_Y) / (27.13f + F_L_Y);
+    return Ra;
+}
+
+inline float _post_adaptation_cone_response_compression_inv(float Ra, const float F_L)
+{
+    const float F_L_Y = (27.13f * Ra) / (400.f - Ra); // TODO: what happens when Ra >= 400.0f
+    const float Rc    = reference_luminance / F_L * powf(F_L_Y, 1.f / 0.42f);
+    return Rc;
+}
+
+inline float J_from_Achromatic(float A, const JMhParams &p)
+{
+    return 100.f * powf(A / p.A_w_J, surround[1] * p.z);
+}
+
+inline float Achromatic_from_J(float J, const JMhParams &p)
+{
+    return  p.A_w_J * powf(J / 100.f, 1.f / (surround[1] * p.z));  //TODO
+}
+
 // Post adaptation non linear response compression
 float panlrc_forward(float v, float F_L)
 {
@@ -230,7 +254,10 @@ float toe_fwd( float x, float limit, float k1_in, float k2_in)
     const float k2 = std::max(k2_in, 0.001f);
     const float k1 = sqrt(k1_in * k1_in + k2 * k2);
     const float k3 = (limit + k1) / (limit + k2);
-    return 0.5f * (k3 * x - k1 + sqrt((k3 * x - k1) * (k3 * x - k1) + 4.f * k2 * k3 * x));
+
+    const float minus_b = k3 * x - k1;
+    const float minus_ac = k2 * k3 * x; // a is 1.0
+    return 0.5f * (minus_b + sqrt(minus_b * minus_b + 4.f * minus_ac)); // a is 1.0, mins_b squared == b^2
 }
 
 float toe_inv( float x, float limit, float k1_in, float k2_in)
@@ -249,15 +276,34 @@ float toe_inv( float x, float limit, float k1_in, float k2_in)
 float tonescale_fwd(const float J, const JMhParams &p, const ToneScaleParams &pt)
 {
     // Tonescale applied in Y (convert to and from J)
-    const float A = p.A_w_J * powf(std::abs(J) / 100.f, 1.f / (surround[1] * p.z));
-    const float Y = std::copysign(1.f, J) * 100.f / p.F_L * powf((27.13f * A) / (400.f - A), 1.f / 0.42f) / 100.f;
+    const float J_abs = std::abs(J);
+    const float Ra    = Achromatic_from_J(J_abs, p);
+    const float Y     = _post_adaptation_cone_response_compression_inv(Ra, p.F_L);
+    
+    const float Y_norm = Y / 100.f; //TODO
+    const float f    = pt.m_2 * powf(Y_norm / (Y_norm + pt.s_2), pt.g);
+    const float Y_ts = std::max(0.f, f * f / (f + pt.t_1)) * pt.n_r;  // max prevents -ve values being output also handles division by zero possibility
+
+    const float Ra_ts = _post_adaptation_cone_response_compression_fwd(Y_ts, p.F_L);
+    const float J_ts  = std::copysign(J_from_Achromatic(Ra_ts, p), Y_ts);
+    return J_ts;
+}
 
-    const float f = pt.m_2 * powf(std::max(0.f, Y) / (Y + pt.s_2), pt.g);
-    const float Y_ts = std::max(0.f, f * f / (f + pt.t_1)) * pt.n_r;
+float tonescale_inv(const float J_ts, const JMhParams &p, const ToneScaleParams &pt)
+{
+    // Inverse Tonescale applied in Y (convert to and from J)
+    const float J_abs = std::abs(J_ts);
+    const float Ra = Achromatic_from_J(J_abs, p);
+    const float Y_ts = _post_adaptation_cone_response_compression_inv(Ra, p.F_L);
 
-    const float F_L_Y = powf(p.F_L * std::abs(Y_ts) / 100.f, 0.42f);
-    const float J_ts = std::copysign(1.f, Y_ts) * 100.f * powf(((400.f * F_L_Y) / (27.13f + F_L_Y)) / p.A_w_J, surround[1] * p.z);
-    return J_ts;
+    const float Y_ts_norm = Y_ts / 100.0f; // TODO
+    const float Z = std::max(0.f, std::min(pt.n / (pt.u_2 * pt.n_r), Y_ts_norm));  //TODO
+    const float f = (Z + sqrt(Z * (4.f * pt.t_1 + Z))) / 2.f;
+    const float Y = pt.s_2 / (powf((pt.m_2 / f), (1.f / pt.g)) - 1.f) * pt.n_r;
+
+    const float Ra_ts = _post_adaptation_cone_response_compression_fwd(Y, p.F_L);
+    const float J  = std::copysign(J_from_Achromatic(Ra_ts, p), Y_ts);
+    return J;
 }
 
 f3 tonescale_chroma_compress_fwd(const f3 &JMh, const JMhParams &p, const ToneScaleParams &pt, const ChromaCompressParams &pc)
@@ -294,16 +340,7 @@ f3 tonescale_chroma_compress_inv(const f3 &JMh, const JMhParams &p, const ToneSc
     const float M_cp = JMh[1];
     const float h    = JMh[2];
 
-    // Inverse Tonescale applied in Y (convert to and from J)
-    const float A = p.A_w_J * powf(std::abs(J_ts) / 100.f, 1.f / (surround[1] * p.z));
-    const float Y_ts = std::copysign(1.f, J_ts) * 100.f / p.F_L * powf((27.13f * A) / (400.f - A), 1.f / 0.42f) / 100.f;
-
-    const float Z = std::max(0.f, std::min(pt.n / (pt.u_2 * pt.n_r), Y_ts));
-    const float ht = (Z + sqrt(Z * (4.f * pt.t_1 + Z))) / 2.f;
-    const float Y = pt.s_2 / (powf((pt.m_2 / ht), (1.f / pt.g)) - 1.f) * pt.n_r;
-
-    const float F_L_Y = powf(p.F_L * std::abs(Y) / 100.f, 0.42f);
-    const float J = std::copysign(1.f, Y) * 100.f * powf(((400.f * F_L_Y) / (27.13f + F_L_Y)) / p.A_w_J, surround[1] * p.z);
+    const float J = tonescale_inv(J_ts, p, pt);
 
     // Inverse ChromaCompress
     float M = M_cp;
@@ -325,12 +362,12 @@ f3 tonescale_chroma_compress_inv(const f3 &JMh, const JMhParams &p, const ToneSc
     return {J, M, h};
 }
 
-JMhParams init_JMhParams(const Primaries &P)
+JMhParams init_JMhParams(const Primaries &prims)
 {
     JMhParams p;
 
     const m33f MATRIX_16 = XYZtoRGB_f33(CAM16::primaries);
-    const m33f RGB_to_XYZ = RGBtoXYZ_f33(P);
+    const m33f RGB_to_XYZ = RGBtoXYZ_f33(prims);
     const f3 XYZ_w = mult_f3_f33(f3_from_f(reference_luminance), RGB_to_XYZ);
 
     const float Y_W = XYZ_w[1];
@@ -374,7 +411,7 @@ JMhParams init_JMhParams(const Primaries &P)
     p.A_w = A_w;
     p.A_w_J = A_w_J;
 
-    p.MATRIX_RGB_to_CAM16 = mult_f33_f33(RGBtoRGB_f33(P, CAM16::primaries), scale_f33(Identity_M33, f3_from_f(100.f)));
+    p.MATRIX_RGB_to_CAM16 = mult_f33_f33(RGBtoRGB_f33(prims, CAM16::primaries), scale_f33(Identity_M33, f3_from_f(100.f)));
     p.MATRIX_CAM16_to_RGB = invert_f33(p.MATRIX_RGB_to_CAM16);
 
     return p;

From 37336984e492d1f97cb1dd7e63ac73aab41124fc Mon Sep 17 00:00:00 2001
From: Kevin Wheatley <kevin.wheatley@framestore.com>
Date: Mon, 6 Jan 2025 10:45:04 +0000
Subject: [PATCH 05/70] Combine c and Z variables in J calculation exponent
 replace 100.0 entries when referring to the scale of J Extract calculation of
 nonlinear compression into functions

Signed-off-by: Kevin Wheatley <kevin.wheatley@framestore.com>
---
 .../ops/fixedfunction/ACES2/Common.h          |  3 +-
 .../ops/fixedfunction/ACES2/Transform.cpp     | 43 +++++++++++--------
 .../ops/fixedfunction/FixedFunctionOpGPU.cpp  | 12 +++---
 3 files changed, 32 insertions(+), 26 deletions(-)

diff --git a/src/OpenColorIO/ops/fixedfunction/ACES2/Common.h b/src/OpenColorIO/ops/fixedfunction/ACES2/Common.h
index aeb1524071..3c852274a0 100644
--- a/src/OpenColorIO/ops/fixedfunction/ACES2/Common.h
+++ b/src/OpenColorIO/ops/fixedfunction/ACES2/Common.h
@@ -38,7 +38,7 @@ struct Table1D
 struct JMhParams
 {
     float F_L;
-    float z;
+    float cz;
     float A_w;
     float A_w_J;
     f3 XYZ_w;
@@ -91,6 +91,7 @@ constexpr float ac_resp = 1.f;
 constexpr float ra = 2.f * ac_resp;
 constexpr float ba = 0.05f + (2.f - ra);
 constexpr f3 surround = {0.9f, 0.59f, 0.9f}; // Dim surround
+constexpr float J_scale = 100.0f;
 
 // Chroma compression
 constexpr float chroma_compress = 2.4f;
diff --git a/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp b/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp
index 2176ae51de..f6ef3ee59f 100644
--- a/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp
+++ b/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp
@@ -125,35 +125,40 @@ inline float _post_adaptation_cone_response_compression_inv(float Ra, const floa
     return Rc;
 }
 
-inline float J_from_Achromatic(float A, const JMhParams &p)
+inline float J_from_Achromatic(float A, const JMhParams &p, const float cz)
 {
-    return 100.f * powf(A / p.A_w_J, surround[1] * p.z);
+    return J_scale * powf(A / p.A_w_J, cz);
 }
 
-inline float Achromatic_from_J(float J, const JMhParams &p)
+inline float Achromatic_from_J(float J, const JMhParams &p, const float cz)
 {
-    return  p.A_w_J * powf(J / 100.f, 1.f / (surround[1] * p.z));  //TODO
+    return  p.A_w_J * powf(J / J_scale, 1.f / cz);  //TODO
 }
 
 // Post adaptation non linear response compression
 float panlrc_forward(float v, float F_L)
 {
-    const float F_L_v = powf(F_L * std::abs(v) / reference_luminance, 0.42f);
+    const float abs_v = std::abs(v);
+    const float Ra =  _post_adaptation_cone_response_compression_fwd(abs_v, F_L);
     // Note that std::copysign(1.f, 0.f) returns 1 but the CTL copysign(1.,0.) returns 0.
     // TODO: Should we change the behaviour?
-    return (400.f * std::copysign(1.f, v) * F_L_v) / (27.13f + F_L_v);
+    return std::copysign(Ra, v);
 }
 
 float panlrc_inverse(float v, float F_L)
 {
-    return std::copysign(1.f, v) * reference_luminance / F_L * powf((27.13f * std::abs(v) / (400.f - std::abs(v))), 1.f / 0.42f);
+    const float abs_v = std::abs(v);
+    const float Rc    = _post_adaptation_cone_response_compression_inv(abs_v, F_L);
+    return std::copysign(Rc, v);
 }
 
 // Optimization used during initialization
 float Y_to_J(float Y, const JMhParams &params)
 {
-    float F_L_Y = powf(params.F_L * std::abs(Y) / reference_luminance, 0.42f);
-    return std::copysign(1.f, Y) * reference_luminance * powf(((400.f * F_L_Y) / (27.13f + F_L_Y)) / params.A_w_J, surround[1] * params.z);
+    const float abs_Y = std::abs(Y);
+    const float Ra    = _post_adaptation_cone_response_compression_fwd(abs_Y, params.F_L);
+    const float J     = J_scale * powf(Ra / params.A_w_J, params.cz);
+    return std::copysign(J, Y);
 }
 
 f3 RGB_to_JMh(const f3 &RGB, const JMhParams &p)
@@ -174,7 +179,7 @@ f3 RGB_to_JMh(const f3 &RGB, const JMhParams &p)
     const float a = red_a - 12.f * grn_a / 11.f + blu_a / 11.f;
     const float b = (red_a + grn_a - 2.f * blu_a) / 9.f;
 
-    const float J = 100.f * powf(A / p.A_w, surround[1] * p.z);
+    const float J = J_scale * powf(A / p.A_w, p.cz);
 
     const float M = J == 0.f ? 0.f : 43.f * surround[2] * sqrt(a * a + b * b);
 
@@ -199,7 +204,7 @@ f3 JMh_to_RGB(const f3 &JMh, const JMhParams &p)
     const float h_rad = h * PI / 180.f;
 
     const float scale = M / (43.f * surround[2]);
-    const float A = p.A_w * powf(J / 100.f, 1.f / (surround[1] * p.z));
+    const float A = p.A_w * powf(J / J_scale, 1.f / p.cz); // TODO
     const float a = scale * cos(h_rad);
     const float b = scale * sin(h_rad);
 
@@ -277,7 +282,7 @@ float tonescale_fwd(const float J, const JMhParams &p, const ToneScaleParams &pt
 {
     // Tonescale applied in Y (convert to and from J)
     const float J_abs = std::abs(J);
-    const float Ra    = Achromatic_from_J(J_abs, p);
+    const float Ra    = Achromatic_from_J(J_abs, p, p.cz);
     const float Y     = _post_adaptation_cone_response_compression_inv(Ra, p.F_L);
     
     const float Y_norm = Y / 100.f; //TODO
@@ -285,16 +290,16 @@ float tonescale_fwd(const float J, const JMhParams &p, const ToneScaleParams &pt
     const float Y_ts = std::max(0.f, f * f / (f + pt.t_1)) * pt.n_r;  // max prevents -ve values being output also handles division by zero possibility
 
     const float Ra_ts = _post_adaptation_cone_response_compression_fwd(Y_ts, p.F_L);
-    const float J_ts  = std::copysign(J_from_Achromatic(Ra_ts, p), Y_ts);
-    return J_ts;
+    const float J_ts  = J_from_Achromatic(Ra_ts, p, p.cz);
+    return std::copysign(J_ts, Y_ts);
 }
 
 float tonescale_inv(const float J_ts, const JMhParams &p, const ToneScaleParams &pt)
 {
     // Inverse Tonescale applied in Y (convert to and from J)
     const float J_abs = std::abs(J_ts);
-    const float Ra = Achromatic_from_J(J_abs, p);
-    const float Y_ts = _post_adaptation_cone_response_compression_inv(Ra, p.F_L);
+    const float Ra    = Achromatic_from_J(J_abs, p, p.cz);
+    const float Y_ts  = _post_adaptation_cone_response_compression_inv(Ra, p.F_L);
 
     const float Y_ts_norm = Y_ts / 100.0f; // TODO
     const float Z = std::max(0.f, std::min(pt.n / (pt.u_2 * pt.n_r), Y_ts_norm));  //TODO
@@ -302,8 +307,8 @@ float tonescale_inv(const float J_ts, const JMhParams &p, const ToneScaleParams
     const float Y = pt.s_2 / (powf((pt.m_2 / f), (1.f / pt.g)) - 1.f) * pt.n_r;
 
     const float Ra_ts = _post_adaptation_cone_response_compression_fwd(Y, p.F_L);
-    const float J  = std::copysign(J_from_Achromatic(Ra_ts, p), Y_ts);
-    return J;
+    const float J     = J_from_Achromatic(Ra_ts, p, p.cz);
+    return std::copysign(J, Y_ts);
 }
 
 f3 tonescale_chroma_compress_fwd(const f3 &JMh, const JMhParams &p, const ToneScaleParams &pt, const ChromaCompressParams &pc)
@@ -406,7 +411,7 @@ JMhParams init_JMhParams(const Primaries &prims)
 
     p.XYZ_w = XYZ_w;
     p.F_L = F_L;
-    p.z = z;
+    p.cz = surround[1] * z;
     p.D_RGB = D_RGB;
     p.A_w = A_w;
     p.A_w_J = A_w_J;
diff --git a/src/OpenColorIO/ops/fixedfunction/FixedFunctionOpGPU.cpp b/src/OpenColorIO/ops/fixedfunction/FixedFunctionOpGPU.cpp
index 1b4ba870dd..5b65944101 100644
--- a/src/OpenColorIO/ops/fixedfunction/FixedFunctionOpGPU.cpp
+++ b/src/OpenColorIO/ops/fixedfunction/FixedFunctionOpGPU.cpp
@@ -366,7 +366,7 @@ void _Add_RGB_to_JMh_Shader(
     ss.newLine() << ss.floatDecl("a") << " = rgb_a.r - 12.0 * rgb_a.g / 11.0 + rgb_a.b / 11.0;";
     ss.newLine() << ss.floatDecl("b") << " = (rgb_a.r + rgb_a.g - 2.0 * rgb_a.b) / 9.0;";
 
-    ss.newLine() << ss.floatDecl("J") << " = 100.0 * pow(A / " << p.A_w << ", " << ACES2::surround[1] << " * " << p.z << ");";
+    ss.newLine() << ss.floatDecl("J") << " = 100.0 * pow(A / " << p.A_w << ", " << p.cz << ");";
 
     ss.newLine() << ss.floatDecl("M") << " = (J == 0.0) ? 0.0 : 43.0 * " << ACES2::surround[2] << " * sqrt(a * a + b * b);";
 
@@ -387,7 +387,7 @@ void _Add_JMh_to_RGB_Shader(
     ss.newLine() << ss.floatDecl("h") << " = " << pxl << ".b * 3.14159265358979 / 180.0;";
 
     ss.newLine() << ss.floatDecl("scale") << " = " << pxl << ".g / (43.0 * " << ACES2::surround[2] << ");";
-    ss.newLine() << ss.floatDecl("A") << " = " << p.A_w << " * pow(" << pxl << ".r / 100.0, 1.0 / (" << ACES2::surround[1] << " * " << p.z << "));";
+    ss.newLine() << ss.floatDecl("A") << " = " << p.A_w << " * pow(" << pxl << ".r / 100.0, 1.0 / (" << p.cz << "));";
     ss.newLine() << ss.floatDecl("a") << " = scale * cos(h);";
     ss.newLine() << ss.floatDecl("b") << " = scale * sin(h);";
 
@@ -550,14 +550,14 @@ void _Add_Tonescale_Compress_Fwd_Shader(
     ss.newLine() << ss.floatDecl("h") << " = " << pxl << ".b;";
 
     // Tonescale applied in Y (convert to and from J)
-    ss.newLine() << ss.floatDecl("A") << " = " << p.A_w_J << " * pow(abs(J) / 100.0, 1.0 / (" << ACES2::surround[1] << " * " << p.z << "));";
+    ss.newLine() << ss.floatDecl("A") << " = " << p.A_w_J << " * pow(abs(J) / 100.0, 1.0 / (" << p.cz << "));";
     ss.newLine() << ss.floatDecl("Y") << " = sign(J) * 100.0 / " << p.F_L << " * pow((27.13 * A) / (400.0 - A), 1.0 / 0.42) / 100.0;";
 
     ss.newLine() << ss.floatDecl("f") << " = " << t.m_2  << " * pow(max(0.0, Y) / (Y + " << t.s_2 << "), " << t.g << ");";
     ss.newLine() << ss.floatDecl("Y_ts") << " = max(0.0, f * f / (f + " << t.t_1 << ")) * " << t.n_r << ";";
 
     ss.newLine() << ss.floatDecl("F_L_Y") << " = pow(" << p.F_L << " * abs(Y_ts) / 100.0, 0.42);";
-    ss.newLine() << ss.floatDecl("J_ts") << " = sign(Y_ts) * 100.0 * pow(((400.0 * F_L_Y) / (27.13 + F_L_Y)) / " << p.A_w_J << ", " << ACES2::surround[1] << " * " << p.z << ");";
+    ss.newLine() << ss.floatDecl("J_ts") << " = sign(Y_ts) * 100.0 * pow(((400.0 * F_L_Y) / (27.13 + F_L_Y)) / " << p.A_w_J << ", " << p.cz << ");";
 
     // ChromaCompress
     ss.newLine() << ss.floatDecl("M_cp") << " = M;";
@@ -621,7 +621,7 @@ void _Add_Tonescale_Compress_Inv_Shader(
     ss.newLine() << ss.floatDecl("h") << " = " << pxl << ".b;";
 
     // Inverse Tonescale applied in Y (convert to and from J)
-    ss.newLine() << ss.floatDecl("A") << " = " << p.A_w_J << " * pow(abs(J_ts) / 100.0, 1.0 / (" << ACES2::surround[1] << " * " << p.z << "));";
+    ss.newLine() << ss.floatDecl("A") << " = " << p.A_w_J << " * pow(abs(J_ts) / 100.0, 1.0 / (" << p.cz << "));";
     ss.newLine() << ss.floatDecl("Y_ts") << " = sign(J_ts) * 100.0 / " << p.F_L << " * pow((27.13 * A) / (400.0 - A), 1.0 / 0.42) / 100.0;";
 
     ss.newLine() << ss.floatDecl("Z") << " = max(0.0, min(" << t.n << " / (" << t.u_2 * t.n_r << "), Y_ts));";
@@ -629,7 +629,7 @@ void _Add_Tonescale_Compress_Inv_Shader(
     ss.newLine() << ss.floatDecl("Y") << " = " << t.s_2 << " / (pow((" << t.m_2 << " / ht), (1.0 / " << t.g << ")) - 1.0);";
 
     ss.newLine() << ss.floatDecl("F_L_Y") << " = pow(" << p.F_L << " * abs(Y * 100.0) / 100.0, 0.42);";
-    ss.newLine() << ss.floatDecl("J") << " = sign(Y) * 100.0 * pow(((400.0 * F_L_Y) / (27.13 + F_L_Y)) / " << p.A_w_J << ", " << ACES2::surround[1] << " * " << p.z << ");";
+    ss.newLine() << ss.floatDecl("J") << " = sign(Y) * 100.0 * pow(((400.0 * F_L_Y) / (27.13 + F_L_Y)) / " << p.A_w_J << ", " << p.cz << ");";
 
     // ChromaCompress
     ss.newLine() << ss.floatDecl("M") << " = M_cp;";

From 2286c571beebf45e09d5c62ed89de7ef2a743dee Mon Sep 17 00:00:00 2001
From: Kevin Wheatley <kevin.wheatley@framestore.com>
Date: Mon, 6 Jan 2025 11:37:54 +0000
Subject: [PATCH 06/70] Split RGB<->JMh function into two parts to expose
 opponent intermediate values

Signed-off-by: Kevin Wheatley <kevin.wheatley@framestore.com>
---
 .../ops/fixedfunction/ACES2/Transform.cpp     | 38 +++++++++++++++----
 1 file changed, 30 insertions(+), 8 deletions(-)

diff --git a/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp b/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp
index f6ef3ee59f..16c292e86b 100644
--- a/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp
+++ b/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp
@@ -161,7 +161,7 @@ float Y_to_J(float Y, const JMhParams &params)
     return std::copysign(J, Y);
 }
 
-f3 RGB_to_JMh(const f3 &RGB, const JMhParams &p)
+inline f3 RGB_to_Aab(const f3 &RGB, const JMhParams &p)
 {
     const float red = RGB[0];
     const float grn = RGB[1];
@@ -178,13 +178,17 @@ f3 RGB_to_JMh(const f3 &RGB, const JMhParams &p)
     const float A = 2.f * red_a + grn_a + 0.05f * blu_a;
     const float a = red_a - 12.f * grn_a / 11.f + blu_a / 11.f;
     const float b = (red_a + grn_a - 2.f * blu_a) / 9.f;
+    return {A, a, b};
+}
 
-    const float J = J_scale * powf(A / p.A_w, p.cz);
+inline f3 Aab_to_JMh(const f3 &Aab, const JMhParams &p)
+{
+    const float J = J_scale * powf(Aab[0] / p.A_w, p.cz); //TODO
 
-    const float M = J == 0.f ? 0.f : 43.f * surround[2] * sqrt(a * a + b * b);
+    const float M = J == 0.f ? 0.f : 43.f * surround[2] * sqrt(Aab[1] * Aab[1] + Aab[2] * Aab[2]);
 
     const float PI = 3.14159265358979f;
-    const float h_rad = std::atan2(b, a);
+    const float h_rad = std::atan2(Aab[2], Aab[1]);
     float h = std::fmod(h_rad * 180.f / PI, 360.f);
     if (h < 0.f)
     {
@@ -194,7 +198,14 @@ f3 RGB_to_JMh(const f3 &RGB, const JMhParams &p)
     return {J, M, h};
 }
 
-f3 JMh_to_RGB(const f3 &JMh, const JMhParams &p)
+f3 RGB_to_JMh(const f3 &RGB, const JMhParams &p)
+{
+    const f3 Aab = RGB_to_Aab(RGB, p);
+    const f3 JMh = Aab_to_JMh(Aab, p);
+    return JMh;
+}
+
+inline f3 JMh_to_Aab(const f3 &JMh, const JMhParams &p)
 {
     const float J = JMh[0];
     const float M = JMh[1];
@@ -207,10 +218,14 @@ f3 JMh_to_RGB(const f3 &JMh, const JMhParams &p)
     const float A = p.A_w * powf(J / J_scale, 1.f / p.cz); // TODO
     const float a = scale * cos(h_rad);
     const float b = scale * sin(h_rad);
+    return {A, a, b};
+}
 
-    const float red_a = (460.f * A + 451.f * a + 288.f * b) / 1403.f;
-    const float grn_a = (460.f * A - 891.f * a - 261.f * b) / 1403.f;
-    const float blu_a = (460.f * A - 220.f * a - 6300.f * b) / 1403.f;
+inline f3 Aab_to_RGB(const f3 &Aab, const JMhParams &p)
+{
+    const float red_a = (460.f * Aab[0] + 451.f * Aab[1] + 288.f * Aab[2]) / 1403.f;
+    const float grn_a = (460.f * Aab[0] - 891.f * Aab[1] - 261.f * Aab[2]) / 1403.f;
+    const float blu_a = (460.f * Aab[0] - 220.f * Aab[1] - 6300.f * Aab[2]) / 1403.f;
 
     float red_m = panlrc_inverse(red_a, p.F_L) / p.D_RGB[0];
     float grn_m = panlrc_inverse(grn_a, p.F_L) / p.D_RGB[1];
@@ -223,6 +238,13 @@ f3 JMh_to_RGB(const f3 &JMh, const JMhParams &p)
     return {red, grn, blu};
 }
 
+f3 JMh_to_RGB(const f3 &JMh, const JMhParams &p)
+{
+    const f3 Aab = JMh_to_Aab(JMh, p);
+    const f3 rgb = Aab_to_RGB(Aab, p);
+    return rgb;
+}
+
 //
 // Tonescale / Chroma compress
 //

From a399575a6442e0f03508031d9c9d69d40c9ba907 Mon Sep 17 00:00:00 2001
From: Kevin Wheatley <kevin.wheatley@framestore.com>
Date: Mon, 6 Jan 2025 11:52:27 +0000
Subject: [PATCH 07/70] Use function to compute matrix multiply for LMS
 calculations

Signed-off-by: Kevin Wheatley <kevin.wheatley@framestore.com>
---
 .../ops/fixedfunction/ACES2/Transform.cpp     | 37 ++++++++-----------
 1 file changed, 16 insertions(+), 21 deletions(-)

diff --git a/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp b/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp
index 16c292e86b..c7317faef1 100644
--- a/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp
+++ b/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp
@@ -163,21 +163,17 @@ float Y_to_J(float Y, const JMhParams &params)
 
 inline f3 RGB_to_Aab(const f3 &RGB, const JMhParams &p)
 {
-    const float red = RGB[0];
-    const float grn = RGB[1];
-    const float blu = RGB[2];
+    const f3 rgb_m = mult_f3_f33(RGB, p.MATRIX_RGB_to_CAM16);
 
-    const float red_m = red * p.MATRIX_RGB_to_CAM16[0] + grn * p.MATRIX_RGB_to_CAM16[1] + blu * p.MATRIX_RGB_to_CAM16[2];
-    const float grn_m = red * p.MATRIX_RGB_to_CAM16[3] + grn * p.MATRIX_RGB_to_CAM16[4] + blu * p.MATRIX_RGB_to_CAM16[5];
-    const float blu_m = red * p.MATRIX_RGB_to_CAM16[6] + grn * p.MATRIX_RGB_to_CAM16[7] + blu * p.MATRIX_RGB_to_CAM16[8];
-
-    const float red_a =  panlrc_forward(red_m * p.D_RGB[0], p.F_L);
-    const float grn_a =  panlrc_forward(grn_m * p.D_RGB[1], p.F_L);
-    const float blu_a =  panlrc_forward(blu_m * p.D_RGB[2], p.F_L);
+    const f3 rgb_a = {
+        panlrc_forward(rgb_m[0] * p.D_RGB[0], p.F_L),
+        panlrc_forward(rgb_m[1] * p.D_RGB[1], p.F_L),
+        panlrc_forward(rgb_m[2] * p.D_RGB[2], p.F_L)
+    };
 
-    const float A = 2.f * red_a + grn_a + 0.05f * blu_a;
-    const float a = red_a - 12.f * grn_a / 11.f + blu_a / 11.f;
-    const float b = (red_a + grn_a - 2.f * blu_a) / 9.f;
+    const float A = 2.f * rgb_a[0] + rgb_a[1] + 0.05f * rgb_a[2];
+    const float a = rgb_a[0] - 12.f * rgb_a[1] / 11.f + rgb_a[2] / 11.f;
+    const float b = (rgb_a[0] + rgb_a[1] - 2.f * rgb_a[2]) / 9.f;
     return {A, a, b};
 }
 
@@ -227,15 +223,14 @@ inline f3 Aab_to_RGB(const f3 &Aab, const JMhParams &p)
     const float grn_a = (460.f * Aab[0] - 891.f * Aab[1] - 261.f * Aab[2]) / 1403.f;
     const float blu_a = (460.f * Aab[0] - 220.f * Aab[1] - 6300.f * Aab[2]) / 1403.f;
 
-    float red_m = panlrc_inverse(red_a, p.F_L) / p.D_RGB[0];
-    float grn_m = panlrc_inverse(grn_a, p.F_L) / p.D_RGB[1];
-    float blu_m = panlrc_inverse(blu_a, p.F_L) / p.D_RGB[2];
-
-    const float red = red_m * p.MATRIX_CAM16_to_RGB[0] + grn_m * p.MATRIX_CAM16_to_RGB[1] + blu_m * p.MATRIX_CAM16_to_RGB[2];
-    const float grn = red_m * p.MATRIX_CAM16_to_RGB[3] + grn_m * p.MATRIX_CAM16_to_RGB[4] + blu_m * p.MATRIX_CAM16_to_RGB[5];
-    const float blu = red_m * p.MATRIX_CAM16_to_RGB[6] + grn_m * p.MATRIX_CAM16_to_RGB[7] + blu_m * p.MATRIX_CAM16_to_RGB[8];
+    const f3 rgb_m = {
+        panlrc_inverse(red_a, p.F_L) / p.D_RGB[0],
+        panlrc_inverse(grn_a, p.F_L) / p.D_RGB[1],
+        panlrc_inverse(blu_a, p.F_L) / p.D_RGB[2]
+    };
 
-    return {red, grn, blu};
+    const f3 rgb = mult_f3_f33(rgb_m, p.MATRIX_CAM16_to_RGB);
+    return rgb;
 }
 
 f3 JMh_to_RGB(const f3 &JMh, const JMhParams &p)

From dab7c7aa96e3c1defeeaee10d1fd9b8907ffae84 Mon Sep 17 00:00:00 2001
From: Kevin Wheatley <kevin.wheatley@framestore.com>
Date: Thu, 2 Jan 2025 16:25:48 +0000
Subject: [PATCH 08/70] Remove unused member variable from JMhParams structure

Signed-off-by: Kevin Wheatley <kevin.wheatley@framestore.com>
---
 src/OpenColorIO/ops/fixedfunction/ACES2/Common.h      | 1 -
 src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp | 1 -
 2 files changed, 2 deletions(-)

diff --git a/src/OpenColorIO/ops/fixedfunction/ACES2/Common.h b/src/OpenColorIO/ops/fixedfunction/ACES2/Common.h
index 3c852274a0..59009c339f 100644
--- a/src/OpenColorIO/ops/fixedfunction/ACES2/Common.h
+++ b/src/OpenColorIO/ops/fixedfunction/ACES2/Common.h
@@ -41,7 +41,6 @@ struct JMhParams
     float cz;
     float A_w;
     float A_w_J;
-    f3 XYZ_w;
     f3 D_RGB;
     m33f MATRIX_RGB_to_CAM16;
     m33f MATRIX_CAM16_to_RGB;
diff --git a/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp b/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp
index c7317faef1..666db40aa7 100644
--- a/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp
+++ b/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp
@@ -426,7 +426,6 @@ JMhParams init_JMhParams(const Primaries &prims)
     const float F_L_W = powf(F_L, 0.42f);
     const float A_w_J   = (400.f * F_L_W) / (27.13f + F_L_W);
 
-    p.XYZ_w = XYZ_w;
     p.F_L = F_L;
     p.cz = surround[1] * z;
     p.D_RGB = D_RGB;

From bb80581c008ca351f2666a76873a5f40c5842e6d Mon Sep 17 00:00:00 2001
From: Kevin Wheatley <kevin.wheatley@framestore.com>
Date: Mon, 6 Jan 2025 12:18:45 +0000
Subject: [PATCH 09/70] Combine chromatic adaptation weights into LMS matrix
 (and inverse) - CHANGES PIXEL OUTPUT

Signed-off-by: Kevin Wheatley <kevin.wheatley@framestore.com>
---
 .../ops/fixedfunction/ACES2/Common.h          |  5 ++--
 .../ops/fixedfunction/ACES2/Transform.cpp     | 23 ++++++++++---------
 .../ops/fixedfunction/FixedFunctionOpGPU.cpp  |  6 ++---
 3 files changed, 16 insertions(+), 18 deletions(-)

diff --git a/src/OpenColorIO/ops/fixedfunction/ACES2/Common.h b/src/OpenColorIO/ops/fixedfunction/ACES2/Common.h
index 59009c339f..1ec71fe391 100644
--- a/src/OpenColorIO/ops/fixedfunction/ACES2/Common.h
+++ b/src/OpenColorIO/ops/fixedfunction/ACES2/Common.h
@@ -41,9 +41,8 @@ struct JMhParams
     float cz;
     float A_w;
     float A_w_J;
-    f3 D_RGB;
-    m33f MATRIX_RGB_to_CAM16;
-    m33f MATRIX_CAM16_to_RGB;
+    m33f MATRIX_RGB_to_CAM16_c;
+    m33f MATRIX_CAM16_c_to_RGB;
 };
 
 struct ToneScaleParams
diff --git a/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp b/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp
index 666db40aa7..d582ee40f1 100644
--- a/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp
+++ b/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp
@@ -163,12 +163,12 @@ float Y_to_J(float Y, const JMhParams &params)
 
 inline f3 RGB_to_Aab(const f3 &RGB, const JMhParams &p)
 {
-    const f3 rgb_m = mult_f3_f33(RGB, p.MATRIX_RGB_to_CAM16);
+    const f3 rgb_m = mult_f3_f33(RGB, p.MATRIX_RGB_to_CAM16_c);
 
     const f3 rgb_a = {
-        panlrc_forward(rgb_m[0] * p.D_RGB[0], p.F_L),
-        panlrc_forward(rgb_m[1] * p.D_RGB[1], p.F_L),
-        panlrc_forward(rgb_m[2] * p.D_RGB[2], p.F_L)
+        panlrc_forward(rgb_m[0], p.F_L),
+        panlrc_forward(rgb_m[1], p.F_L),
+        panlrc_forward(rgb_m[2], p.F_L)
     };
 
     const float A = 2.f * rgb_a[0] + rgb_a[1] + 0.05f * rgb_a[2];
@@ -224,12 +224,12 @@ inline f3 Aab_to_RGB(const f3 &Aab, const JMhParams &p)
     const float blu_a = (460.f * Aab[0] - 220.f * Aab[1] - 6300.f * Aab[2]) / 1403.f;
 
     const f3 rgb_m = {
-        panlrc_inverse(red_a, p.F_L) / p.D_RGB[0],
-        panlrc_inverse(grn_a, p.F_L) / p.D_RGB[1],
-        panlrc_inverse(blu_a, p.F_L) / p.D_RGB[2]
+        panlrc_inverse(red_a, p.F_L),
+        panlrc_inverse(grn_a, p.F_L),
+        panlrc_inverse(blu_a, p.F_L)
     };
 
-    const f3 rgb = mult_f3_f33(rgb_m, p.MATRIX_CAM16_to_RGB);
+    const f3 rgb = mult_f3_f33(rgb_m, p.MATRIX_CAM16_c_to_RGB);
     return rgb;
 }
 
@@ -428,12 +428,13 @@ JMhParams init_JMhParams(const Primaries &prims)
 
     p.F_L = F_L;
     p.cz = surround[1] * z;
-    p.D_RGB = D_RGB;
     p.A_w = A_w;
     p.A_w_J = A_w_J;
 
-    p.MATRIX_RGB_to_CAM16 = mult_f33_f33(RGBtoRGB_f33(prims, CAM16::primaries), scale_f33(Identity_M33, f3_from_f(100.f)));
-    p.MATRIX_CAM16_to_RGB = invert_f33(p.MATRIX_RGB_to_CAM16);
+    // Note we are prescaling the CAM16 LMS responses to directly provide for chromatic adaptation.
+    const m33f MATRIX_RGB_to_CAM16 = mult_f33_f33(RGBtoRGB_f33(prims, CAM16::primaries), scale_f33(Identity_M33, f3_from_f(100.f)));
+    p.MATRIX_RGB_to_CAM16_c = mult_f33_f33(scale_f33(Identity_M33, D_RGB), MATRIX_RGB_to_CAM16);
+    p.MATRIX_CAM16_c_to_RGB = invert_f33(p.MATRIX_RGB_to_CAM16_c);
 
     return p;
 }
diff --git a/src/OpenColorIO/ops/fixedfunction/FixedFunctionOpGPU.cpp b/src/OpenColorIO/ops/fixedfunction/FixedFunctionOpGPU.cpp
index 5b65944101..668bcd728c 100644
--- a/src/OpenColorIO/ops/fixedfunction/FixedFunctionOpGPU.cpp
+++ b/src/OpenColorIO/ops/fixedfunction/FixedFunctionOpGPU.cpp
@@ -356,8 +356,7 @@ void _Add_RGB_to_JMh_Shader(
 {
     const std::string pxl(shaderCreator->getPixelName());
 
-    ss.newLine() << ss.float3Decl("lms") << " = " << ss.mat3fMul(&p.MATRIX_RGB_to_CAM16[0], pxl + ".rgb") << ";";
-    ss.newLine() << "lms = " << "lms * " << ss.float3Const(p.D_RGB[0], p.D_RGB[1], p.D_RGB[2]) << ";";
+    ss.newLine() << ss.float3Decl("lms") << " = " << ss.mat3fMul(&p.MATRIX_RGB_to_CAM16_c[0], pxl + ".rgb") << ";";
 
     ss.newLine() << ss.float3Decl("F_L_v") << " = pow(" << p.F_L << " * abs(lms) / 100.0, " << ss.float3Const(0.42f) << ");";
     ss.newLine() << ss.float3Decl("rgb_a") << " = (400.0 * sign(lms) * F_L_v) / (27.13 + F_L_v);";
@@ -397,9 +396,8 @@ void _Add_JMh_to_RGB_Shader(
     ss.newLine() << "rgb_a.b = (460.0 * A - 220.0 * a - 6300.0 *b) / 1403.0;";
 
     ss.newLine() << ss.float3Decl("lms") << " = sign(rgb_a) * 100.0 / " << p.F_L << " * pow(27.13 * abs(rgb_a) / (400.0 - abs(rgb_a)), " << ss.float3Const(1.f / 0.42f) << ");";
-    ss.newLine() << "lms = " << "lms / " << ss.float3Const(p.D_RGB[0], p.D_RGB[1], p.D_RGB[2]) << ";";
 
-    ss.newLine() << pxl << ".rgb = " << ss.mat3fMul(&p.MATRIX_CAM16_to_RGB[0], "lms") << ";";
+    ss.newLine() << pxl << ".rgb = " << ss.mat3fMul(&p.MATRIX_CAM16_c_to_RGB[0], "lms") << ";";
 }
 
 std::string _Add_Reach_table(

From 2fb497b198d833930272729d6ceb1fe327d0afcf Mon Sep 17 00:00:00 2001
From: Kevin Wheatley <kevin.wheatley@framestore.com>
Date: Thu, 2 Jan 2025 18:18:37 +0000
Subject: [PATCH 10/70] Use matrix form for transforming cone responses to Aab

Signed-off-by: Kevin Wheatley <kevin.wheatley@framestore.com>
---
 .../ops/fixedfunction/ACES2/Common.h          |  2 ++
 .../ops/fixedfunction/ACES2/Transform.cpp     | 22 +++++++++--------
 .../ops/fixedfunction/FixedFunctionOpGPU.cpp  | 24 ++++++++-----------
 3 files changed, 24 insertions(+), 24 deletions(-)

diff --git a/src/OpenColorIO/ops/fixedfunction/ACES2/Common.h b/src/OpenColorIO/ops/fixedfunction/ACES2/Common.h
index 1ec71fe391..133482b8f7 100644
--- a/src/OpenColorIO/ops/fixedfunction/ACES2/Common.h
+++ b/src/OpenColorIO/ops/fixedfunction/ACES2/Common.h
@@ -43,6 +43,8 @@ struct JMhParams
     float A_w_J;
     m33f MATRIX_RGB_to_CAM16_c;
     m33f MATRIX_CAM16_c_to_RGB;
+    m33f MATRIX_cone_response_to_Aab;
+    m33f MATRIX_Aab_to_cone_response;
 };
 
 struct ToneScaleParams
diff --git a/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp b/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp
index d582ee40f1..273bce9f65 100644
--- a/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp
+++ b/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp
@@ -171,10 +171,8 @@ inline f3 RGB_to_Aab(const f3 &RGB, const JMhParams &p)
         panlrc_forward(rgb_m[2], p.F_L)
     };
 
-    const float A = 2.f * rgb_a[0] + rgb_a[1] + 0.05f * rgb_a[2];
-    const float a = rgb_a[0] - 12.f * rgb_a[1] / 11.f + rgb_a[2] / 11.f;
-    const float b = (rgb_a[0] + rgb_a[1] - 2.f * rgb_a[2]) / 9.f;
-    return {A, a, b};
+    const f3 Aab = mult_f3_f33(rgb_a, p.MATRIX_cone_response_to_Aab);
+    return Aab;
 }
 
 inline f3 Aab_to_JMh(const f3 &Aab, const JMhParams &p)
@@ -219,14 +217,12 @@ inline f3 JMh_to_Aab(const f3 &JMh, const JMhParams &p)
 
 inline f3 Aab_to_RGB(const f3 &Aab, const JMhParams &p)
 {
-    const float red_a = (460.f * Aab[0] + 451.f * Aab[1] + 288.f * Aab[2]) / 1403.f;
-    const float grn_a = (460.f * Aab[0] - 891.f * Aab[1] - 261.f * Aab[2]) / 1403.f;
-    const float blu_a = (460.f * Aab[0] - 220.f * Aab[1] - 6300.f * Aab[2]) / 1403.f;
+    const f3 rgb_a = mult_f3_f33(Aab, p.MATRIX_Aab_to_cone_response);
 
     const f3 rgb_m = {
-        panlrc_inverse(red_a, p.F_L),
-        panlrc_inverse(grn_a, p.F_L),
-        panlrc_inverse(blu_a, p.F_L)
+        panlrc_inverse(rgb_a[0], p.F_L),
+        panlrc_inverse(rgb_a[1], p.F_L),
+        panlrc_inverse(rgb_a[2], p.F_L)
     };
 
     const f3 rgb = mult_f3_f33(rgb_m, p.MATRIX_CAM16_c_to_RGB);
@@ -436,6 +432,12 @@ JMhParams init_JMhParams(const Primaries &prims)
     p.MATRIX_RGB_to_CAM16_c = mult_f33_f33(scale_f33(Identity_M33, D_RGB), MATRIX_RGB_to_CAM16);
     p.MATRIX_CAM16_c_to_RGB = invert_f33(p.MATRIX_RGB_to_CAM16_c);
 
+    p.MATRIX_cone_response_to_Aab = {
+        2.0f, 1.0f, 1.0f / 20.0f,
+        1.0f, -12.0f / 11.0f, 1.0f / 11.0f,
+        1.0f / 9.0f, 1.0f / 9.0f, -2.0f / 9.0f
+    };
+    p.MATRIX_Aab_to_cone_response = invert_f33(p.MATRIX_cone_response_to_Aab);
     return p;
 }
 
diff --git a/src/OpenColorIO/ops/fixedfunction/FixedFunctionOpGPU.cpp b/src/OpenColorIO/ops/fixedfunction/FixedFunctionOpGPU.cpp
index 668bcd728c..fd123ab4d4 100644
--- a/src/OpenColorIO/ops/fixedfunction/FixedFunctionOpGPU.cpp
+++ b/src/OpenColorIO/ops/fixedfunction/FixedFunctionOpGPU.cpp
@@ -361,15 +361,13 @@ void _Add_RGB_to_JMh_Shader(
     ss.newLine() << ss.float3Decl("F_L_v") << " = pow(" << p.F_L << " * abs(lms) / 100.0, " << ss.float3Const(0.42f) << ");";
     ss.newLine() << ss.float3Decl("rgb_a") << " = (400.0 * sign(lms) * F_L_v) / (27.13 + F_L_v);";
 
-    ss.newLine() << ss.floatDecl("A") << " = 2.0 * rgb_a.r + rgb_a.g + 0.05 * rgb_a.b;";
-    ss.newLine() << ss.floatDecl("a") << " = rgb_a.r - 12.0 * rgb_a.g / 11.0 + rgb_a.b / 11.0;";
-    ss.newLine() << ss.floatDecl("b") << " = (rgb_a.r + rgb_a.g - 2.0 * rgb_a.b) / 9.0;";
+    ss.newLine() << ss.float3Decl("Aab") << " = " << ss.mat3fMul(&p.MATRIX_cone_response_to_Aab[0], "rgb_a.rgb") << ";";
 
-    ss.newLine() << ss.floatDecl("J") << " = 100.0 * pow(A / " << p.A_w << ", " << p.cz << ");";
+    ss.newLine() << ss.floatDecl("J") << " = 100.0 * pow(Aab.r / " << p.A_w << ", " << p.cz << ");";
 
-    ss.newLine() << ss.floatDecl("M") << " = (J == 0.0) ? 0.0 : 43.0 * " << ACES2::surround[2] << " * sqrt(a * a + b * b);";
+    ss.newLine() << ss.floatDecl("M") << " = (J == 0.0) ? 0.0 : 43.0 * " << ACES2::surround[2] << " * sqrt(Aab.g * Aab.g + Aab.b * Aab.b);";
 
-    ss.newLine() << ss.floatDecl("h") << " = (a == 0.0) ? 0.0 : " << ss.atan2("b", "a") << " * 180.0 / 3.14159265358979;";
+    ss.newLine() << ss.floatDecl("h") << " = (Aab.g == 0.0) ? 0.0 : " << ss.atan2("Aab.b", "Aab.g") << " * 180.0 / 3.14159265358979;";
     ss.newLine() << "h = h - floor(h / 360.0) * 360.0;";
     ss.newLine() << "h = (h < 0.0) ? h + 360.0 : h;";
 
@@ -386,14 +384,12 @@ void _Add_JMh_to_RGB_Shader(
     ss.newLine() << ss.floatDecl("h") << " = " << pxl << ".b * 3.14159265358979 / 180.0;";
 
     ss.newLine() << ss.floatDecl("scale") << " = " << pxl << ".g / (43.0 * " << ACES2::surround[2] << ");";
-    ss.newLine() << ss.floatDecl("A") << " = " << p.A_w << " * pow(" << pxl << ".r / 100.0, 1.0 / (" << p.cz << "));";
-    ss.newLine() << ss.floatDecl("a") << " = scale * cos(h);";
-    ss.newLine() << ss.floatDecl("b") << " = scale * sin(h);";
-
-    ss.newLine() << ss.float3Decl("rgb_a") << ";";
-    ss.newLine() << "rgb_a.r = (460.0 * A + 451.0 * a + 288.0 *b) / 1403.0;";
-    ss.newLine() << "rgb_a.g = (460.0 * A - 891.0 * a - 261.0 *b) / 1403.0;";
-    ss.newLine() << "rgb_a.b = (460.0 * A - 220.0 * a - 6300.0 *b) / 1403.0;";
+    ss.newLine() << ss.float3Decl("Aab") << ";";
+    ss.newLine() << "Aab.r = " << p.A_w << " * pow(" << pxl << ".r / 100.0, 1.0 / (" << p.cz << "));";
+    ss.newLine() << "Aab.g = scale * cos(h);";
+    ss.newLine() << "Aab.b = scale * sin(h);";
+
+    ss.newLine() << ss.float3Decl("rgb_a") << " = " << ss.mat3fMul(&p.MATRIX_Aab_to_cone_response[0], "Aab.rgb") << ";";
 
     ss.newLine() << ss.float3Decl("lms") << " = sign(rgb_a) * 100.0 / " << p.F_L << " * pow(27.13 * abs(rgb_a) / (400.0 - abs(rgb_a)), " << ss.float3Const(1.f / 0.42f) << ");";
 

From 9e70f69148ac844cb8cd9dc8d8854b8617ad201d Mon Sep 17 00:00:00 2001
From: Kevin Wheatley <kevin.wheatley@framestore.com>
Date: Mon, 6 Jan 2025 13:58:53 +0000
Subject: [PATCH 11/70] Normalise the F_L parameter

Signed-off-by: Kevin Wheatley <kevin.wheatley@framestore.com>
---
 .../ops/fixedfunction/ACES2/Common.h          |  2 +-
 .../ops/fixedfunction/ACES2/Transform.cpp     | 43 ++++++++++---------
 .../ops/fixedfunction/FixedFunctionOpGPU.cpp  | 12 +++---
 3 files changed, 29 insertions(+), 28 deletions(-)

diff --git a/src/OpenColorIO/ops/fixedfunction/ACES2/Common.h b/src/OpenColorIO/ops/fixedfunction/ACES2/Common.h
index 133482b8f7..0741c74e85 100644
--- a/src/OpenColorIO/ops/fixedfunction/ACES2/Common.h
+++ b/src/OpenColorIO/ops/fixedfunction/ACES2/Common.h
@@ -37,7 +37,7 @@ struct Table1D
 
 struct JMhParams
 {
-    float F_L;
+    float F_L_n;    // F_L normalised
     float cz;
     float A_w;
     float A_w_J;
diff --git a/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp b/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp
index 273bce9f65..f4bb17fc8e 100644
--- a/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp
+++ b/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp
@@ -111,17 +111,17 @@ float hue_dependent_upper_hull_gamma(float h, const ACES2::Table1D &gt)
 // CAM
 //
 
-inline float _post_adaptation_cone_response_compression_fwd(float Rc, const float F_L)
+inline float _post_adaptation_cone_response_compression_fwd(float Rc, const float F_L_n)
 {
-    const float F_L_Y = powf(Rc * F_L / reference_luminance, 0.42f);
+    const float F_L_Y = powf(Rc * F_L_n, 0.42f);
     const float Ra    = (400.f * F_L_Y) / (27.13f + F_L_Y);
     return Ra;
 }
 
-inline float _post_adaptation_cone_response_compression_inv(float Ra, const float F_L)
+inline float _post_adaptation_cone_response_compression_inv(float Ra, const float F_L_n)
 {
     const float F_L_Y = (27.13f * Ra) / (400.f - Ra); // TODO: what happens when Ra >= 400.0f
-    const float Rc    = reference_luminance / F_L * powf(F_L_Y, 1.f / 0.42f);
+    const float Rc    = powf(F_L_Y, 1.f / 0.42f) / F_L_n;
     return Rc;
 }
 
@@ -156,8 +156,8 @@ float panlrc_inverse(float v, float F_L)
 float Y_to_J(float Y, const JMhParams &params)
 {
     const float abs_Y = std::abs(Y);
-    const float Ra    = _post_adaptation_cone_response_compression_fwd(abs_Y, params.F_L);
-    const float J     = J_scale * powf(Ra / params.A_w_J, params.cz);
+    const float Ra    = _post_adaptation_cone_response_compression_fwd(abs_Y, params.F_L_n);
+    const float J     = J_from_Achromatic(Ra, params, params.cz);
     return std::copysign(J, Y);
 }
 
@@ -166,9 +166,9 @@ inline f3 RGB_to_Aab(const f3 &RGB, const JMhParams &p)
     const f3 rgb_m = mult_f3_f33(RGB, p.MATRIX_RGB_to_CAM16_c);
 
     const f3 rgb_a = {
-        panlrc_forward(rgb_m[0], p.F_L),
-        panlrc_forward(rgb_m[1], p.F_L),
-        panlrc_forward(rgb_m[2], p.F_L)
+        panlrc_forward(rgb_m[0], p.F_L_n),
+        panlrc_forward(rgb_m[1], p.F_L_n),
+        panlrc_forward(rgb_m[2], p.F_L_n)
     };
 
     const f3 Aab = mult_f3_f33(rgb_a, p.MATRIX_cone_response_to_Aab);
@@ -220,9 +220,9 @@ inline f3 Aab_to_RGB(const f3 &Aab, const JMhParams &p)
     const f3 rgb_a = mult_f3_f33(Aab, p.MATRIX_Aab_to_cone_response);
 
     const f3 rgb_m = {
-        panlrc_inverse(rgb_a[0], p.F_L),
-        panlrc_inverse(rgb_a[1], p.F_L),
-        panlrc_inverse(rgb_a[2], p.F_L)
+        panlrc_inverse(rgb_a[0], p.F_L_n),
+        panlrc_inverse(rgb_a[1], p.F_L_n),
+        panlrc_inverse(rgb_a[2], p.F_L_n)
     };
 
     const f3 rgb = mult_f3_f33(rgb_m, p.MATRIX_CAM16_c_to_RGB);
@@ -296,13 +296,13 @@ float tonescale_fwd(const float J, const JMhParams &p, const ToneScaleParams &pt
     // Tonescale applied in Y (convert to and from J)
     const float J_abs = std::abs(J);
     const float Ra    = Achromatic_from_J(J_abs, p, p.cz);
-    const float Y     = _post_adaptation_cone_response_compression_inv(Ra, p.F_L);
+    const float Y     = _post_adaptation_cone_response_compression_inv(Ra, p.F_L_n);
     
     const float Y_norm = Y / 100.f; //TODO
     const float f    = pt.m_2 * powf(Y_norm / (Y_norm + pt.s_2), pt.g);
     const float Y_ts = std::max(0.f, f * f / (f + pt.t_1)) * pt.n_r;  // max prevents -ve values being output also handles division by zero possibility
 
-    const float Ra_ts = _post_adaptation_cone_response_compression_fwd(Y_ts, p.F_L);
+    const float Ra_ts = _post_adaptation_cone_response_compression_fwd(Y_ts, p.F_L_n);
     const float J_ts  = J_from_Achromatic(Ra_ts, p, p.cz);
     return std::copysign(J_ts, Y_ts);
 }
@@ -312,14 +312,14 @@ float tonescale_inv(const float J_ts, const JMhParams &p, const ToneScaleParams
     // Inverse Tonescale applied in Y (convert to and from J)
     const float J_abs = std::abs(J_ts);
     const float Ra    = Achromatic_from_J(J_abs, p, p.cz);
-    const float Y_ts  = _post_adaptation_cone_response_compression_inv(Ra, p.F_L);
+    const float Y_ts  = _post_adaptation_cone_response_compression_inv(Ra, p.F_L_n);
 
     const float Y_ts_norm = Y_ts / 100.0f; // TODO
     const float Z = std::max(0.f, std::min(pt.n / (pt.u_2 * pt.n_r), Y_ts_norm));  //TODO
     const float f = (Z + sqrt(Z * (4.f * pt.t_1 + Z))) / 2.f;
     const float Y = pt.s_2 / (powf((pt.m_2 / f), (1.f / pt.g)) - 1.f) * pt.n_r;
 
-    const float Ra_ts = _post_adaptation_cone_response_compression_fwd(Y, p.F_L);
+    const float Ra_ts = _post_adaptation_cone_response_compression_fwd(Y, p.F_L_n);
     const float J     = J_from_Achromatic(Ra_ts, p, p.cz);
     return std::copysign(J, Y_ts);
 }
@@ -399,6 +399,8 @@ JMhParams init_JMhParams(const Primaries &prims)
     const float F_L = 0.2f * K4 * (5.f * L_A) + 0.1f * powf((1.f - K4), 2.f) * powf(5.f * L_A, 1.f/3.f);
     const float z = 1.48f + sqrt(N);
 
+    p.F_L_n = F_L / reference_luminance;
+
     const f3 D_RGB = {
         Y_W / RGB_w[0],
         Y_W / RGB_w[1],
@@ -412,9 +414,9 @@ JMhParams init_JMhParams(const Primaries &prims)
     };
 
     const f3 RGB_AW = {
-        panlrc_forward(RGB_WC[0], F_L),
-        panlrc_forward(RGB_WC[1], F_L),
-        panlrc_forward(RGB_WC[2], F_L)
+        panlrc_forward(RGB_WC[0], p.F_L_n),
+        panlrc_forward(RGB_WC[1], p.F_L_n),
+        panlrc_forward(RGB_WC[2], p.F_L_n)
     };
 
     const float A_w = ra * RGB_AW[0] + RGB_AW[1] + ba * RGB_AW[2];
@@ -422,13 +424,12 @@ JMhParams init_JMhParams(const Primaries &prims)
     const float F_L_W = powf(F_L, 0.42f);
     const float A_w_J   = (400.f * F_L_W) / (27.13f + F_L_W);
 
-    p.F_L = F_L;
     p.cz = surround[1] * z;
     p.A_w = A_w;
     p.A_w_J = A_w_J;
 
     // Note we are prescaling the CAM16 LMS responses to directly provide for chromatic adaptation.
-    const m33f MATRIX_RGB_to_CAM16 = mult_f33_f33(RGBtoRGB_f33(prims, CAM16::primaries), scale_f33(Identity_M33, f3_from_f(100.f)));
+    const m33f MATRIX_RGB_to_CAM16 = mult_f33_f33(RGBtoRGB_f33(prims, CAM16::primaries), scale_f33(Identity_M33, f3_from_f(reference_luminance)));
     p.MATRIX_RGB_to_CAM16_c = mult_f33_f33(scale_f33(Identity_M33, D_RGB), MATRIX_RGB_to_CAM16);
     p.MATRIX_CAM16_c_to_RGB = invert_f33(p.MATRIX_RGB_to_CAM16_c);
 
diff --git a/src/OpenColorIO/ops/fixedfunction/FixedFunctionOpGPU.cpp b/src/OpenColorIO/ops/fixedfunction/FixedFunctionOpGPU.cpp
index fd123ab4d4..d5040abf55 100644
--- a/src/OpenColorIO/ops/fixedfunction/FixedFunctionOpGPU.cpp
+++ b/src/OpenColorIO/ops/fixedfunction/FixedFunctionOpGPU.cpp
@@ -358,7 +358,7 @@ void _Add_RGB_to_JMh_Shader(
 
     ss.newLine() << ss.float3Decl("lms") << " = " << ss.mat3fMul(&p.MATRIX_RGB_to_CAM16_c[0], pxl + ".rgb") << ";";
 
-    ss.newLine() << ss.float3Decl("F_L_v") << " = pow(" << p.F_L << " * abs(lms) / 100.0, " << ss.float3Const(0.42f) << ");";
+    ss.newLine() << ss.float3Decl("F_L_v") << " = pow(" << p.F_L_n << " * abs(lms), " << ss.float3Const(0.42f) << ");";
     ss.newLine() << ss.float3Decl("rgb_a") << " = (400.0 * sign(lms) * F_L_v) / (27.13 + F_L_v);";
 
     ss.newLine() << ss.float3Decl("Aab") << " = " << ss.mat3fMul(&p.MATRIX_cone_response_to_Aab[0], "rgb_a.rgb") << ";";
@@ -391,7 +391,7 @@ void _Add_JMh_to_RGB_Shader(
 
     ss.newLine() << ss.float3Decl("rgb_a") << " = " << ss.mat3fMul(&p.MATRIX_Aab_to_cone_response[0], "Aab.rgb") << ";";
 
-    ss.newLine() << ss.float3Decl("lms") << " = sign(rgb_a) * 100.0 / " << p.F_L << " * pow(27.13 * abs(rgb_a) / (400.0 - abs(rgb_a)), " << ss.float3Const(1.f / 0.42f) << ");";
+    ss.newLine() << ss.float3Decl("lms") << " = sign(rgb_a) * pow(27.13 * abs(rgb_a) / (400.0 - abs(rgb_a)), " << ss.float3Const(1.f / 0.42f) << ")/ " << p.F_L_n << ";";
 
     ss.newLine() << pxl << ".rgb = " << ss.mat3fMul(&p.MATRIX_CAM16_c_to_RGB[0], "lms") << ";";
 }
@@ -545,12 +545,12 @@ void _Add_Tonescale_Compress_Fwd_Shader(
 
     // Tonescale applied in Y (convert to and from J)
     ss.newLine() << ss.floatDecl("A") << " = " << p.A_w_J << " * pow(abs(J) / 100.0, 1.0 / (" << p.cz << "));";
-    ss.newLine() << ss.floatDecl("Y") << " = sign(J) * 100.0 / " << p.F_L << " * pow((27.13 * A) / (400.0 - A), 1.0 / 0.42) / 100.0;";
+    ss.newLine() << ss.floatDecl("Y") << " = sign(J) * pow((27.13 * A) / (400.0 - A), 1.0 / 0.42) / " << p.F_L_n << " / 100.0;";
 
     ss.newLine() << ss.floatDecl("f") << " = " << t.m_2  << " * pow(max(0.0, Y) / (Y + " << t.s_2 << "), " << t.g << ");";
     ss.newLine() << ss.floatDecl("Y_ts") << " = max(0.0, f * f / (f + " << t.t_1 << ")) * " << t.n_r << ";";
 
-    ss.newLine() << ss.floatDecl("F_L_Y") << " = pow(" << p.F_L << " * abs(Y_ts) / 100.0, 0.42);";
+    ss.newLine() << ss.floatDecl("F_L_Y") << " = pow(" << p.F_L_n << " * abs(Y_ts), 0.42);";
     ss.newLine() << ss.floatDecl("J_ts") << " = sign(Y_ts) * 100.0 * pow(((400.0 * F_L_Y) / (27.13 + F_L_Y)) / " << p.A_w_J << ", " << p.cz << ");";
 
     // ChromaCompress
@@ -616,13 +616,13 @@ void _Add_Tonescale_Compress_Inv_Shader(
 
     // Inverse Tonescale applied in Y (convert to and from J)
     ss.newLine() << ss.floatDecl("A") << " = " << p.A_w_J << " * pow(abs(J_ts) / 100.0, 1.0 / (" << p.cz << "));";
-    ss.newLine() << ss.floatDecl("Y_ts") << " = sign(J_ts) * 100.0 / " << p.F_L << " * pow((27.13 * A) / (400.0 - A), 1.0 / 0.42) / 100.0;";
+    ss.newLine() << ss.floatDecl("Y_ts") << " = sign(J_ts) * pow((27.13 * A) / (400.0 - A), 1.0 / 0.42) / " << p.F_L_n << " / 100.0;";
 
     ss.newLine() << ss.floatDecl("Z") << " = max(0.0, min(" << t.n << " / (" << t.u_2 * t.n_r << "), Y_ts));";
     ss.newLine() << ss.floatDecl("ht") << " = (Z + sqrt(Z * (4.0 * " << t.t_1 << " + Z))) / 2.0;";
     ss.newLine() << ss.floatDecl("Y") << " = " << t.s_2 << " / (pow((" << t.m_2 << " / ht), (1.0 / " << t.g << ")) - 1.0);";
 
-    ss.newLine() << ss.floatDecl("F_L_Y") << " = pow(" << p.F_L << " * abs(Y * 100.0) / 100.0, 0.42);";
+    ss.newLine() << ss.floatDecl("F_L_Y") << " = pow(" << p.F_L_n << " * abs(Y * 100.0), 0.42);";
     ss.newLine() << ss.floatDecl("J") << " = sign(Y) * 100.0 * pow(((400.0 * F_L_Y) / (27.13 + F_L_Y)) / " << p.A_w_J << ", " << p.cz << ");";
 
     // ChromaCompress

From 26a08813d53053b4cab2e0a5600947e2876a88d0 Mon Sep 17 00:00:00 2001
From: Kevin Wheatley <kevin.wheatley@framestore.com>
Date: Mon, 6 Jan 2025 13:53:43 +0000
Subject: [PATCH 12/70] Remove ra and ba related variables to avoid them being
 out of sync with opponent calculation

Signed-off-by: Kevin Wheatley <kevin.wheatley@framestore.com>
---
 .../ops/fixedfunction/ACES2/Common.h          |  3 ---
 .../ops/fixedfunction/ACES2/Transform.cpp     | 19 +++++++++----------
 2 files changed, 9 insertions(+), 13 deletions(-)

diff --git a/src/OpenColorIO/ops/fixedfunction/ACES2/Common.h b/src/OpenColorIO/ops/fixedfunction/ACES2/Common.h
index 0741c74e85..9b01f1285f 100644
--- a/src/OpenColorIO/ops/fixedfunction/ACES2/Common.h
+++ b/src/OpenColorIO/ops/fixedfunction/ACES2/Common.h
@@ -87,9 +87,6 @@ struct GamutCompressParams
 constexpr float reference_luminance = 100.f;
 constexpr float L_A = 100.f;
 constexpr float Y_b = 20.f;
-constexpr float ac_resp = 1.f;
-constexpr float ra = 2.f * ac_resp;
-constexpr float ba = 0.05f + (2.f - ra);
 constexpr f3 surround = {0.9f, 0.59f, 0.9f}; // Dim surround
 constexpr float J_scale = 100.0f;
 
diff --git a/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp b/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp
index f4bb17fc8e..33a4c39792 100644
--- a/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp
+++ b/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp
@@ -384,6 +384,13 @@ JMhParams init_JMhParams(const Primaries &prims)
 {
     JMhParams p;
 
+    p.MATRIX_cone_response_to_Aab = {
+        2.0f, 1.0f, 1.0f / 20.0f,
+        1.0f, -12.0f / 11.0f, 1.0f / 11.0f,
+        1.0f / 9.0f, 1.0f / 9.0f, -2.0f / 9.0f
+    };
+    p.MATRIX_Aab_to_cone_response = invert_f33(p.MATRIX_cone_response_to_Aab);
+
     const m33f MATRIX_16 = XYZtoRGB_f33(CAM16::primaries);
     const m33f RGB_to_XYZ = RGBtoXYZ_f33(prims);
     const f3 XYZ_w = mult_f3_f33(f3_from_f(reference_luminance), RGB_to_XYZ);
@@ -419,10 +426,8 @@ JMhParams init_JMhParams(const Primaries &prims)
         panlrc_forward(RGB_WC[2], p.F_L_n)
     };
 
-    const float A_w = ra * RGB_AW[0] + RGB_AW[1] + ba * RGB_AW[2];
-
-    const float F_L_W = powf(F_L, 0.42f);
-    const float A_w_J   = (400.f * F_L_W) / (27.13f + F_L_W);
+    const float A_w = p.MATRIX_cone_response_to_Aab[0] * RGB_AW[0] + p.MATRIX_cone_response_to_Aab[1] * RGB_AW[1] + p.MATRIX_cone_response_to_Aab[2] * RGB_AW[2];
+    const float A_w_J = _post_adaptation_cone_response_compression_fwd(reference_luminance, p.F_L_n);
 
     p.cz = surround[1] * z;
     p.A_w = A_w;
@@ -433,12 +438,6 @@ JMhParams init_JMhParams(const Primaries &prims)
     p.MATRIX_RGB_to_CAM16_c = mult_f33_f33(scale_f33(Identity_M33, D_RGB), MATRIX_RGB_to_CAM16);
     p.MATRIX_CAM16_c_to_RGB = invert_f33(p.MATRIX_RGB_to_CAM16_c);
 
-    p.MATRIX_cone_response_to_Aab = {
-        2.0f, 1.0f, 1.0f / 20.0f,
-        1.0f, -12.0f / 11.0f, 1.0f / 11.0f,
-        1.0f / 9.0f, 1.0f / 9.0f, -2.0f / 9.0f
-    };
-    p.MATRIX_Aab_to_cone_response = invert_f33(p.MATRIX_cone_response_to_Aab);
     return p;
 }
 

From 1f3b1000e91610693b5f3d4ace0bacd769200034 Mon Sep 17 00:00:00 2001
From: Kevin Wheatley <kevin.wheatley@framestore.com>
Date: Fri, 3 Jan 2025 11:48:25 +0000
Subject: [PATCH 13/70] Make A<->J conversion function generic

Signed-off-by: Kevin Wheatley <kevin.wheatley@framestore.com>
---
 .../ops/fixedfunction/ACES2/Transform.cpp     | 35 +++++++++----------
 1 file changed, 16 insertions(+), 19 deletions(-)

diff --git a/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp b/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp
index 33a4c39792..a09882d04d 100644
--- a/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp
+++ b/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp
@@ -125,14 +125,14 @@ inline float _post_adaptation_cone_response_compression_inv(float Ra, const floa
     return Rc;
 }
 
-inline float J_from_Achromatic(float A, const JMhParams &p, const float cz)
+inline float J_from_Achromatic(float A, const float A_w, const float cz)
 {
-    return J_scale * powf(A / p.A_w_J, cz);
+    return J_scale * powf(A / A_w, cz);
 }
 
-inline float Achromatic_from_J(float J, const JMhParams &p, const float cz)
+inline float Achromatic_from_J(float J, const float A_w, const float cz)
 {
-    return  p.A_w_J * powf(J / J_scale, 1.f / cz);  //TODO
+    return  A_w * powf(J / J_scale, 1.f / cz);  //TODO
 }
 
 // Post adaptation non linear response compression
@@ -153,11 +153,11 @@ float panlrc_inverse(float v, float F_L)
 }
 
 // Optimization used during initialization
-float Y_to_J(float Y, const JMhParams &params)
+float Y_to_J(float Y, const JMhParams &p)
 {
     const float abs_Y = std::abs(Y);
-    const float Ra    = _post_adaptation_cone_response_compression_fwd(abs_Y, params.F_L_n);
-    const float J     = J_from_Achromatic(Ra, params, params.cz);
+    const float Ra    = _post_adaptation_cone_response_compression_fwd(abs_Y, p.F_L_n);
+    const float J     = J_from_Achromatic(Ra, p.A_w_J, p.cz);
     return std::copysign(J, Y);
 }
 
@@ -177,7 +177,7 @@ inline f3 RGB_to_Aab(const f3 &RGB, const JMhParams &p)
 
 inline f3 Aab_to_JMh(const f3 &Aab, const JMhParams &p)
 {
-    const float J = J_scale * powf(Aab[0] / p.A_w, p.cz); //TODO
+    const float J = J_from_Achromatic(Aab[0], p.A_w, p.cz);
 
     const float M = J == 0.f ? 0.f : 43.f * surround[2] * sqrt(Aab[1] * Aab[1] + Aab[2] * Aab[2]);
 
@@ -209,7 +209,7 @@ inline f3 JMh_to_Aab(const f3 &JMh, const JMhParams &p)
     const float h_rad = h * PI / 180.f;
 
     const float scale = M / (43.f * surround[2]);
-    const float A = p.A_w * powf(J / J_scale, 1.f / p.cz); // TODO
+    const float A = Achromatic_from_J(J, p.A_w, p.cz);
     const float a = scale * cos(h_rad);
     const float b = scale * sin(h_rad);
     return {A, a, b};
@@ -295,7 +295,7 @@ float tonescale_fwd(const float J, const JMhParams &p, const ToneScaleParams &pt
 {
     // Tonescale applied in Y (convert to and from J)
     const float J_abs = std::abs(J);
-    const float Ra    = Achromatic_from_J(J_abs, p, p.cz);
+    const float Ra    = Achromatic_from_J(J_abs, p.A_w_J, p.cz);
     const float Y     = _post_adaptation_cone_response_compression_inv(Ra, p.F_L_n);
     
     const float Y_norm = Y / 100.f; //TODO
@@ -303,7 +303,7 @@ float tonescale_fwd(const float J, const JMhParams &p, const ToneScaleParams &pt
     const float Y_ts = std::max(0.f, f * f / (f + pt.t_1)) * pt.n_r;  // max prevents -ve values being output also handles division by zero possibility
 
     const float Ra_ts = _post_adaptation_cone_response_compression_fwd(Y_ts, p.F_L_n);
-    const float J_ts  = J_from_Achromatic(Ra_ts, p, p.cz);
+    const float J_ts  = J_from_Achromatic(Ra_ts, p.A_w_J, p.cz);
     return std::copysign(J_ts, Y_ts);
 }
 
@@ -311,7 +311,7 @@ float tonescale_inv(const float J_ts, const JMhParams &p, const ToneScaleParams
 {
     // Inverse Tonescale applied in Y (convert to and from J)
     const float J_abs = std::abs(J_ts);
-    const float Ra    = Achromatic_from_J(J_abs, p, p.cz);
+    const float Ra    = Achromatic_from_J(J_abs, p.A_w_J, p.cz);
     const float Y_ts  = _post_adaptation_cone_response_compression_inv(Ra, p.F_L_n);
 
     const float Y_ts_norm = Y_ts / 100.0f; // TODO
@@ -320,7 +320,7 @@ float tonescale_inv(const float J_ts, const JMhParams &p, const ToneScaleParams
     const float Y = pt.s_2 / (powf((pt.m_2 / f), (1.f / pt.g)) - 1.f) * pt.n_r;
 
     const float Ra_ts = _post_adaptation_cone_response_compression_fwd(Y, p.F_L_n);
-    const float J     = J_from_Achromatic(Ra_ts, p, p.cz);
+    const float J     = J_from_Achromatic(Ra_ts, p.A_w_J, p.cz);
     return std::copysign(J, Y_ts);
 }
 
@@ -407,6 +407,7 @@ JMhParams init_JMhParams(const Primaries &prims)
     const float z = 1.48f + sqrt(N);
 
     p.F_L_n = F_L / reference_luminance;
+    p.cz    = surround[1] * z;
 
     const f3 D_RGB = {
         Y_W / RGB_w[0],
@@ -426,12 +427,8 @@ JMhParams init_JMhParams(const Primaries &prims)
         panlrc_forward(RGB_WC[2], p.F_L_n)
     };
 
-    const float A_w = p.MATRIX_cone_response_to_Aab[0] * RGB_AW[0] + p.MATRIX_cone_response_to_Aab[1] * RGB_AW[1] + p.MATRIX_cone_response_to_Aab[2] * RGB_AW[2];
-    const float A_w_J = _post_adaptation_cone_response_compression_fwd(reference_luminance, p.F_L_n);
-
-    p.cz = surround[1] * z;
-    p.A_w = A_w;
-    p.A_w_J = A_w_J;
+    p.A_w   = p.MATRIX_cone_response_to_Aab[0] * RGB_AW[0] + p.MATRIX_cone_response_to_Aab[1] * RGB_AW[1] + p.MATRIX_cone_response_to_Aab[2] * RGB_AW[2];
+    p.A_w_J = _post_adaptation_cone_response_compression_fwd(reference_luminance, p.F_L_n);
 
     // Note we are prescaling the CAM16 LMS responses to directly provide for chromatic adaptation.
     const m33f MATRIX_RGB_to_CAM16 = mult_f33_f33(RGBtoRGB_f33(prims, CAM16::primaries), scale_f33(Identity_M33, f3_from_f(reference_luminance)));

From b62139117040f0307351f6f1ac36d08307efbdc6 Mon Sep 17 00:00:00 2001
From: Kevin Wheatley <kevin.wheatley@framestore.com>
Date: Fri, 3 Jan 2025 12:19:08 +0000
Subject: [PATCH 14/70] Deduplicate Y<->J conversions

Signed-off-by: Kevin Wheatley <kevin.wheatley@framestore.com>
---
 .../ops/fixedfunction/ACES2/Transform.cpp     | 73 +++++++++++--------
 1 file changed, 41 insertions(+), 32 deletions(-)

diff --git a/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp b/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp
index a09882d04d..aa6564d099 100644
--- a/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp
+++ b/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp
@@ -125,18 +125,8 @@ inline float _post_adaptation_cone_response_compression_inv(float Ra, const floa
     return Rc;
 }
 
-inline float J_from_Achromatic(float A, const float A_w, const float cz)
-{
-    return J_scale * powf(A / A_w, cz);
-}
 
-inline float Achromatic_from_J(float J, const float A_w, const float cz)
-{
-    return  A_w * powf(J / J_scale, 1.f / cz);  //TODO
-}
-
-// Post adaptation non linear response compression
-float panlrc_forward(float v, float F_L)
+float post_adaptation_cone_response_compression_fwd(float v, float F_L)
 {
     const float abs_v = std::abs(v);
     const float Ra =  _post_adaptation_cone_response_compression_fwd(abs_v, F_L);
@@ -145,19 +135,42 @@ float panlrc_forward(float v, float F_L)
     return std::copysign(Ra, v);
 }
 
-float panlrc_inverse(float v, float F_L)
+float post_adaptation_cone_response_compression_inv(float v, float F_L)
 {
     const float abs_v = std::abs(v);
     const float Rc    = _post_adaptation_cone_response_compression_inv(abs_v, F_L);
     return std::copysign(Rc, v);
 }
 
-// Optimization used during initialization
-float Y_to_J(float Y, const JMhParams &p)
+inline float J_from_Achromatic(float A, const float A_w, const float cz)
+{
+    return J_scale * powf(A / A_w, cz);
+}
+
+inline float Achromatic_from_J(float J, const float A_w, const float cz)
+{
+    return  A_w * powf(J / J_scale, 1.f / cz);  //TODO
+}
+
+// Optimization for achromatic values
+
+inline float _J_to_Y(float abs_J, const JMhParams &p)
+{
+    const float Ra = Achromatic_from_J(abs_J, p.A_w_J, p.cz);
+    const float Y  = _post_adaptation_cone_response_compression_inv(Ra, p.F_L_n);
+    return Y;
+}
+inline float _Y_to_J(float abs_Y, const JMhParams &p)
 {
-    const float abs_Y = std::abs(Y);
     const float Ra    = _post_adaptation_cone_response_compression_fwd(abs_Y, p.F_L_n);
     const float J     = J_from_Achromatic(Ra, p.A_w_J, p.cz);
+    return J;
+}
+
+float Y_to_J(float Y, const JMhParams &p)
+{
+    const float abs_Y = std::abs(Y);
+    const float J     = _Y_to_J(abs_Y, p);
     return std::copysign(J, Y);
 }
 
@@ -166,9 +179,9 @@ inline f3 RGB_to_Aab(const f3 &RGB, const JMhParams &p)
     const f3 rgb_m = mult_f3_f33(RGB, p.MATRIX_RGB_to_CAM16_c);
 
     const f3 rgb_a = {
-        panlrc_forward(rgb_m[0], p.F_L_n),
-        panlrc_forward(rgb_m[1], p.F_L_n),
-        panlrc_forward(rgb_m[2], p.F_L_n)
+        post_adaptation_cone_response_compression_fwd(rgb_m[0], p.F_L_n),
+        post_adaptation_cone_response_compression_fwd(rgb_m[1], p.F_L_n),
+        post_adaptation_cone_response_compression_fwd(rgb_m[2], p.F_L_n)
     };
 
     const f3 Aab = mult_f3_f33(rgb_a, p.MATRIX_cone_response_to_Aab);
@@ -220,9 +233,9 @@ inline f3 Aab_to_RGB(const f3 &Aab, const JMhParams &p)
     const f3 rgb_a = mult_f3_f33(Aab, p.MATRIX_Aab_to_cone_response);
 
     const f3 rgb_m = {
-        panlrc_inverse(rgb_a[0], p.F_L_n),
-        panlrc_inverse(rgb_a[1], p.F_L_n),
-        panlrc_inverse(rgb_a[2], p.F_L_n)
+        post_adaptation_cone_response_compression_inv(rgb_a[0], p.F_L_n),
+        post_adaptation_cone_response_compression_inv(rgb_a[1], p.F_L_n),
+        post_adaptation_cone_response_compression_inv(rgb_a[2], p.F_L_n)
     };
 
     const f3 rgb = mult_f3_f33(rgb_m, p.MATRIX_CAM16_c_to_RGB);
@@ -295,15 +308,13 @@ float tonescale_fwd(const float J, const JMhParams &p, const ToneScaleParams &pt
 {
     // Tonescale applied in Y (convert to and from J)
     const float J_abs = std::abs(J);
-    const float Ra    = Achromatic_from_J(J_abs, p.A_w_J, p.cz);
-    const float Y     = _post_adaptation_cone_response_compression_inv(Ra, p.F_L_n);
+    const float Y     = _J_to_Y(J_abs, p);
     
     const float Y_norm = Y / 100.f; //TODO
     const float f    = pt.m_2 * powf(Y_norm / (Y_norm + pt.s_2), pt.g);
     const float Y_ts = std::max(0.f, f * f / (f + pt.t_1)) * pt.n_r;  // max prevents -ve values being output also handles division by zero possibility
 
-    const float Ra_ts = _post_adaptation_cone_response_compression_fwd(Y_ts, p.F_L_n);
-    const float J_ts  = J_from_Achromatic(Ra_ts, p.A_w_J, p.cz);
+    const float J_ts  = _Y_to_J(Y_ts, p);
     return std::copysign(J_ts, Y_ts);
 }
 
@@ -311,16 +322,14 @@ float tonescale_inv(const float J_ts, const JMhParams &p, const ToneScaleParams
 {
     // Inverse Tonescale applied in Y (convert to and from J)
     const float J_abs = std::abs(J_ts);
-    const float Ra    = Achromatic_from_J(J_abs, p.A_w_J, p.cz);
-    const float Y_ts  = _post_adaptation_cone_response_compression_inv(Ra, p.F_L_n);
+    const float Y_ts  = _J_to_Y(J_abs, p);
 
     const float Y_ts_norm = Y_ts / 100.0f; // TODO
     const float Z = std::max(0.f, std::min(pt.n / (pt.u_2 * pt.n_r), Y_ts_norm));  //TODO
     const float f = (Z + sqrt(Z * (4.f * pt.t_1 + Z))) / 2.f;
     const float Y = pt.s_2 / (powf((pt.m_2 / f), (1.f / pt.g)) - 1.f) * pt.n_r;
 
-    const float Ra_ts = _post_adaptation_cone_response_compression_fwd(Y, p.F_L_n);
-    const float J     = J_from_Achromatic(Ra_ts, p.A_w_J, p.cz);
+    const float J     = _Y_to_J(Y, p);
     return std::copysign(J, Y_ts);
 }
 
@@ -422,9 +431,9 @@ JMhParams init_JMhParams(const Primaries &prims)
     };
 
     const f3 RGB_AW = {
-        panlrc_forward(RGB_WC[0], p.F_L_n),
-        panlrc_forward(RGB_WC[1], p.F_L_n),
-        panlrc_forward(RGB_WC[2], p.F_L_n)
+        post_adaptation_cone_response_compression_fwd(RGB_WC[0], p.F_L_n),
+        post_adaptation_cone_response_compression_fwd(RGB_WC[1], p.F_L_n),
+        post_adaptation_cone_response_compression_fwd(RGB_WC[2], p.F_L_n)
     };
 
     p.A_w   = p.MATRIX_cone_response_to_Aab[0] * RGB_AW[0] + p.MATRIX_cone_response_to_Aab[1] * RGB_AW[1] + p.MATRIX_cone_response_to_Aab[2] * RGB_AW[2];

From 593e2caabc12d9581dc8561b1cae724680416f06 Mon Sep 17 00:00:00 2001
From: Kevin Wheatley <kevin.wheatley@framestore.com>
Date: Fri, 3 Jan 2025 13:17:57 +0000
Subject: [PATCH 15/70] Factor JMh scaling parameters into Aab matrices

Signed-off-by: Kevin Wheatley <kevin.wheatley@framestore.com>
---
 .../ops/fixedfunction/ACES2/Transform.cpp     | 36 ++++++++++---------
 .../ops/fixedfunction/FixedFunctionOpGPU.cpp  | 11 +++---
 2 files changed, 25 insertions(+), 22 deletions(-)

diff --git a/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp b/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp
index aa6564d099..192de5c693 100644
--- a/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp
+++ b/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp
@@ -142,28 +142,28 @@ float post_adaptation_cone_response_compression_inv(float v, float F_L)
     return std::copysign(Rc, v);
 }
 
-inline float J_from_Achromatic(float A, const float A_w, const float cz)
+inline float Achromatic_n_to_J(float A, const float cz)
 {
-    return J_scale * powf(A / A_w, cz);
+    return J_scale * powf(A, cz);
 }
 
-inline float Achromatic_from_J(float J, const float A_w, const float cz)
+inline float J_to_Achromatic_n(float J, const float cz)
 {
-    return  A_w * powf(J / J_scale, 1.f / cz);  //TODO
+    return powf(J / J_scale, 1.f / cz);
 }
 
 // Optimization for achromatic values
 
 inline float _J_to_Y(float abs_J, const JMhParams &p)
 {
-    const float Ra = Achromatic_from_J(abs_J, p.A_w_J, p.cz);
+    const float Ra = p.A_w_J * J_to_Achromatic_n(abs_J, p.cz);
     const float Y  = _post_adaptation_cone_response_compression_inv(Ra, p.F_L_n);
     return Y;
 }
 inline float _Y_to_J(float abs_Y, const JMhParams &p)
 {
     const float Ra    = _post_adaptation_cone_response_compression_fwd(abs_Y, p.F_L_n);
-    const float J     = J_from_Achromatic(Ra, p.A_w_J, p.cz);
+    const float J     = Achromatic_n_to_J(Ra / p.A_w_J, p.cz);
     return J;
 }
 
@@ -190,9 +190,9 @@ inline f3 RGB_to_Aab(const f3 &RGB, const JMhParams &p)
 
 inline f3 Aab_to_JMh(const f3 &Aab, const JMhParams &p)
 {
-    const float J = J_from_Achromatic(Aab[0], p.A_w, p.cz);
+    const float J = Achromatic_n_to_J(Aab[0], p.cz);
 
-    const float M = J == 0.f ? 0.f : 43.f * surround[2] * sqrt(Aab[1] * Aab[1] + Aab[2] * Aab[2]);
+    const float M = J == 0.f ? 0.f : sqrt(Aab[1] * Aab[1] + Aab[2] * Aab[2]);
 
     const float PI = 3.14159265358979f;
     const float h_rad = std::atan2(Aab[2], Aab[1]);
@@ -221,10 +221,9 @@ inline f3 JMh_to_Aab(const f3 &JMh, const JMhParams &p)
     const float PI = 3.14159265358979f;
     const float h_rad = h * PI / 180.f;
 
-    const float scale = M / (43.f * surround[2]);
-    const float A = Achromatic_from_J(J, p.A_w, p.cz);
-    const float a = scale * cos(h_rad);
-    const float b = scale * sin(h_rad);
+    const float A = J_to_Achromatic_n(J, p.cz);
+    const float a = M * cos(h_rad);
+    const float b = M * sin(h_rad);
     return {A, a, b};
 }
 
@@ -392,13 +391,11 @@ f3 tonescale_chroma_compress_inv(const f3 &JMh, const JMhParams &p, const ToneSc
 JMhParams init_JMhParams(const Primaries &prims)
 {
     JMhParams p;
-
-    p.MATRIX_cone_response_to_Aab = {
+    const m33f cone_response_to_Aab = {
         2.0f, 1.0f, 1.0f / 20.0f,
         1.0f, -12.0f / 11.0f, 1.0f / 11.0f,
         1.0f / 9.0f, 1.0f / 9.0f, -2.0f / 9.0f
     };
-    p.MATRIX_Aab_to_cone_response = invert_f33(p.MATRIX_cone_response_to_Aab);
 
     const m33f MATRIX_16 = XYZtoRGB_f33(CAM16::primaries);
     const m33f RGB_to_XYZ = RGBtoXYZ_f33(prims);
@@ -436,7 +433,7 @@ JMhParams init_JMhParams(const Primaries &prims)
         post_adaptation_cone_response_compression_fwd(RGB_WC[2], p.F_L_n)
     };
 
-    p.A_w   = p.MATRIX_cone_response_to_Aab[0] * RGB_AW[0] + p.MATRIX_cone_response_to_Aab[1] * RGB_AW[1] + p.MATRIX_cone_response_to_Aab[2] * RGB_AW[2];
+    p.A_w   = cone_response_to_Aab[0] * RGB_AW[0] + cone_response_to_Aab[1] * RGB_AW[1] + cone_response_to_Aab[2] * RGB_AW[2];
     p.A_w_J = _post_adaptation_cone_response_compression_fwd(reference_luminance, p.F_L_n);
 
     // Note we are prescaling the CAM16 LMS responses to directly provide for chromatic adaptation.
@@ -444,6 +441,13 @@ JMhParams init_JMhParams(const Primaries &prims)
     p.MATRIX_RGB_to_CAM16_c = mult_f33_f33(scale_f33(Identity_M33, D_RGB), MATRIX_RGB_to_CAM16);
     p.MATRIX_CAM16_c_to_RGB = invert_f33(p.MATRIX_RGB_to_CAM16_c);
 
+    p.MATRIX_cone_response_to_Aab = {
+        cone_response_to_Aab[0] / p.A_w,              cone_response_to_Aab[1] / p.A_w,              cone_response_to_Aab[2] / p.A_w,
+        cone_response_to_Aab[3] * 43.f * surround[2], cone_response_to_Aab[4] * 43.f * surround[2], cone_response_to_Aab[5] * 43.f * surround[2],
+        cone_response_to_Aab[6] * 43.f * surround[2], cone_response_to_Aab[7] * 43.f * surround[2], cone_response_to_Aab[8] * 43.f * surround[2],
+    };
+    p.MATRIX_Aab_to_cone_response = invert_f33(p.MATRIX_cone_response_to_Aab);
+
     return p;
 }
 
diff --git a/src/OpenColorIO/ops/fixedfunction/FixedFunctionOpGPU.cpp b/src/OpenColorIO/ops/fixedfunction/FixedFunctionOpGPU.cpp
index d5040abf55..bc9a2f582c 100644
--- a/src/OpenColorIO/ops/fixedfunction/FixedFunctionOpGPU.cpp
+++ b/src/OpenColorIO/ops/fixedfunction/FixedFunctionOpGPU.cpp
@@ -363,9 +363,9 @@ void _Add_RGB_to_JMh_Shader(
 
     ss.newLine() << ss.float3Decl("Aab") << " = " << ss.mat3fMul(&p.MATRIX_cone_response_to_Aab[0], "rgb_a.rgb") << ";";
 
-    ss.newLine() << ss.floatDecl("J") << " = 100.0 * pow(Aab.r / " << p.A_w << ", " << p.cz << ");";
+    ss.newLine() << ss.floatDecl("J") << " = 100.0 * pow(Aab.r, " << p.cz << ");";
 
-    ss.newLine() << ss.floatDecl("M") << " = (J == 0.0) ? 0.0 : 43.0 * " << ACES2::surround[2] << " * sqrt(Aab.g * Aab.g + Aab.b * Aab.b);";
+    ss.newLine() << ss.floatDecl("M") << " = (J == 0.0) ? 0.0 : sqrt(Aab.g * Aab.g + Aab.b * Aab.b);";
 
     ss.newLine() << ss.floatDecl("h") << " = (Aab.g == 0.0) ? 0.0 : " << ss.atan2("Aab.b", "Aab.g") << " * 180.0 / 3.14159265358979;";
     ss.newLine() << "h = h - floor(h / 360.0) * 360.0;";
@@ -383,11 +383,10 @@ void _Add_JMh_to_RGB_Shader(
 
     ss.newLine() << ss.floatDecl("h") << " = " << pxl << ".b * 3.14159265358979 / 180.0;";
 
-    ss.newLine() << ss.floatDecl("scale") << " = " << pxl << ".g / (43.0 * " << ACES2::surround[2] << ");";
     ss.newLine() << ss.float3Decl("Aab") << ";";
-    ss.newLine() << "Aab.r = " << p.A_w << " * pow(" << pxl << ".r / 100.0, 1.0 / (" << p.cz << "));";
-    ss.newLine() << "Aab.g = scale * cos(h);";
-    ss.newLine() << "Aab.b = scale * sin(h);";
+    ss.newLine() << "Aab.r = pow(" << pxl << ".r / 100.0, 1.0 / (" << p.cz << "));";
+    ss.newLine() << "Aab.g = " << pxl << ".g * cos(h);";
+    ss.newLine() << "Aab.b = " << pxl << ".g * sin(h);";
 
     ss.newLine() << ss.float3Decl("rgb_a") << " = " << ss.mat3fMul(&p.MATRIX_Aab_to_cone_response[0], "Aab.rgb") << ";";
 

From db07f8217b55521e1f32099560450957e2f87caa Mon Sep 17 00:00:00 2001
From: Kevin Wheatley <kevin.wheatley@framestore.com>
Date: Fri, 3 Jan 2025 15:49:14 +0000
Subject: [PATCH 16/70] factor our references to PI, 360 and 180 constants
 Avoid looking up cusp twice during inverse Whilst searching for the cusp we
 have already constrained the search so we do not need to clamp

Signed-off-by: Kevin Wheatley <kevin.wheatley@framestore.com>
---
 .../ops/fixedfunction/ACES2/Common.h          |  2 +
 .../ops/fixedfunction/ACES2/Transform.cpp     | 66 +++++++++----------
 2 files changed, 32 insertions(+), 36 deletions(-)

diff --git a/src/OpenColorIO/ops/fixedfunction/ACES2/Common.h b/src/OpenColorIO/ops/fixedfunction/ACES2/Common.h
index 9b01f1285f..65b7bc7c48 100644
--- a/src/OpenColorIO/ops/fixedfunction/ACES2/Common.h
+++ b/src/OpenColorIO/ops/fixedfunction/ACES2/Common.h
@@ -89,6 +89,8 @@ constexpr float L_A = 100.f;
 constexpr float Y_b = 20.f;
 constexpr f3 surround = {0.9f, 0.59f, 0.9f}; // Dim surround
 constexpr float J_scale = 100.0f;
+constexpr float PI = 3.14159265358979f;
+constexpr float hue_limit = 360.0f; // 2.0f * PI;
 
 // Chroma compression
 constexpr float chroma_compress = 2.4f;
diff --git a/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp b/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp
index 192de5c693..9e828af36d 100644
--- a/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp
+++ b/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp
@@ -13,27 +13,36 @@ namespace ACES2
 //
 // Table lookups
 //
+inline constexpr float degrees_to_radians(float d)
+{
+    return d / 180.0f * PI;
+}
 
-float wrap_to_360(float hue)
+inline constexpr float radians_to_degrees(float r)
 {
-    float y = std::fmod(hue, 360.f);
+    return r / PI * 180.f;
+}
+
+inline float wrap_to_hue_limit(float hue) // TODO: track this and strip out unneeded calls
+{
+    float y = std::fmod(hue, hue_limit);
     if ( y < 0.f)
     {
-        y = y + 360.f;
+        y = y + hue_limit;
     }
     return y;
 }
 
 float base_hue_for_position(int i_lo, int table_size) 
 {
-    const float result = i_lo * 360.f / table_size;
+    const float result = i_lo * hue_limit / table_size;
     return result;
 }
 
 int hue_position_in_uniform_table(float hue, int table_size)
 {
-    const float wrapped_hue = wrap_to_360(hue);
-    return int(wrapped_hue / 360.f * (float) table_size);
+    const float wrapped_hue = wrap_to_hue_limit(hue);
+    return int(wrapped_hue / hue_limit * (float) table_size);
 }
 
 int next_position_in_table(int entry, int table_size)
@@ -62,7 +71,7 @@ f2 cusp_from_table(float h, const Table3D &gt)
         {
             i_hi = i;
         }
-        i = clamp_to_table_bounds((i_lo + i_hi) / 2, gt.total_size);
+        i = (i_lo + i_hi) / 2, gt.total_size;
     }
 
     i_hi = std::max(1, i_hi);
@@ -102,7 +111,7 @@ float hue_dependent_upper_hull_gamma(float h, const ACES2::Table1D &gt)
 
     const float base_hue = (float) (i_lo - gt.base_index);
 
-    const float t = wrap_to_360(h) - base_hue;
+    const float t = wrap_to_hue_limit(h) - base_hue;
 
     return lerpf(gt.table[i_lo], gt.table[i_hi], t);
 }
@@ -194,13 +203,8 @@ inline f3 Aab_to_JMh(const f3 &Aab, const JMhParams &p)
 
     const float M = J == 0.f ? 0.f : sqrt(Aab[1] * Aab[1] + Aab[2] * Aab[2]);
 
-    const float PI = 3.14159265358979f;
     const float h_rad = std::atan2(Aab[2], Aab[1]);
-    float h = std::fmod(h_rad * 180.f / PI, 360.f);
-    if (h < 0.f)
-    {
-        h += 360.f;
-    }
+    float h = wrap_to_hue_limit(radians_to_degrees(h_rad));
 
     return {J, M, h};
 }
@@ -218,8 +222,7 @@ inline f3 JMh_to_Aab(const f3 &JMh, const JMhParams &p)
     const float M = JMh[1];
     const float h = JMh[2];
 
-    const float PI = 3.14159265358979f;
-    const float h_rad = h * PI / 180.f;
+    const float h_rad = degrees_to_radians(h);
 
     const float A = J_to_Achromatic_n(J, p.cz);
     const float a = M * cos(h_rad);
@@ -254,8 +257,7 @@ f3 JMh_to_RGB(const f3 &JMh, const JMhParams &p)
 
 float chroma_compress_norm(float h, float chroma_compress_scale)
 {
-    const float PI = 3.14159265358979f;
-    const float h_rad = h / 180.f * PI;
+    const float h_rad = degrees_to_radians(h);
     const float a = cos(h_rad);
     const float b = sin(h_rad);
     const float cos_hr2 = a * a - b * b;
@@ -494,9 +496,9 @@ Table3D make_gamut_table(const Primaries &P, float peakLuminance)
     gamutCuspTable.table[gamutCuspTable.base_index + gamutCuspTable.size][1] = gamutCuspTable.table[gamutCuspTable.base_index][1];
     gamutCuspTable.table[gamutCuspTable.base_index + gamutCuspTable.size][2] = gamutCuspTable.table[gamutCuspTable.base_index][2];
 
-    // Wrap the hues, to maintain monotonicity. These entries will fall outside [0.0, 360.0]
-    gamutCuspTable.table[0][2] = gamutCuspTable.table[0][2] - 360.f;
-    gamutCuspTable.table[gamutCuspTable.size+1][2] = gamutCuspTable.table[gamutCuspTable.size+1][2] + 360.f;
+    // Wrap the hues, to maintain monotonicity. These entries will fall outside [0.0, 360.0)
+    gamutCuspTable.table[0][2] = gamutCuspTable.table[0][2] - hue_limit;
+    gamutCuspTable.table[gamutCuspTable.size+1][2] = gamutCuspTable.table[gamutCuspTable.size+1][2] + hue_limit;
 
     return gamutCuspTable;
 }
@@ -713,7 +715,7 @@ float compression_function(
     return vCompressed;
 }
 
-f3 compressGamut(const f3 &JMh, float Jx, const ACES2::GamutCompressParams& p, bool invert)
+f3 compressGamut(const f3 &JMh, float Jx, const ACES2::GamutCompressParams& p, const f2& JMcusp, bool invert)
 {
     const float J = JMh[0];
     const float M = JMh[1];
@@ -726,7 +728,6 @@ f3 compressGamut(const f3 &JMh, float Jx, const ACES2::GamutCompressParams& p, b
     else
     {
         const f2 project_from = {J, M};
-        const f2 JMcusp = cusp_from_table(h, p.gamut_cusp_table);
         const float focusJ = lerpf(JMcusp[0], p.mid_J, std::min(1.f, cusp_mid_blend - (JMcusp[0] / p.limit_J_max)));
         const float slope_gain = p.limit_J_max * p.focus_dist * get_focus_gain(Jx, JMcusp[0], p.limit_J_max);
 
@@ -761,7 +762,8 @@ f3 compressGamut(const f3 &JMh, float Jx, const ACES2::GamutCompressParams& p, b
 
 f3 gamut_compress_fwd(const f3 &JMh, const GamutCompressParams &p)
 {
-    return compressGamut(JMh, JMh[0], p, false);
+    const f2 JMcusp = cusp_from_table(JMh[2], p.gamut_cusp_table);
+    return compressGamut(JMh, JMh[0], p, JMcusp, false);
 }
 
 f3 gamut_compress_inv(const f3 &JMh, const GamutCompressParams &p)
@@ -769,21 +771,13 @@ f3 gamut_compress_inv(const f3 &JMh, const GamutCompressParams &p)
     const f2 JMcusp = cusp_from_table(JMh[2], p.gamut_cusp_table);
     float Jx = JMh[0];
 
-    f3 unCompressedJMh;
-
     // Analytic inverse below threshold
-    if (Jx <= lerpf(JMcusp[0], p.limit_J_max, focus_gain_blend))
-    {
-        unCompressedJMh = compressGamut(JMh, Jx, p, true);
-    }
-    // Approximation above threshold
-    else
+    if (Jx > lerpf(JMcusp[0], p.limit_J_max, focus_gain_blend))
     {
-        Jx = compressGamut(JMh, Jx, p, true)[0];
-        unCompressedJMh = compressGamut(JMh, Jx, p, true);
+        // Approximation above threshold
+        Jx = compressGamut(JMh, Jx, p, JMcusp, true)[0];
     }
-
-    return unCompressedJMh;
+    return compressGamut(JMh, Jx, p, JMcusp, true);
 }
 
 bool evaluate_gamma_fit(

From c7415c77611f850cb339573cf3f7ddc324a5dd71 Mon Sep 17 00:00:00 2001
From: Kevin Wheatley <kevin.wheatley@framestore.com>
Date: Mon, 6 Jan 2025 09:53:36 +0000
Subject: [PATCH 17/70] Add functions to explain some of the calculations

Signed-off-by: Kevin Wheatley <kevin.wheatley@framestore.com>
---
 .../ops/fixedfunction/ACES2/Transform.cpp     | 34 ++++++++++++++++---
 1 file changed, 30 insertions(+), 4 deletions(-)

diff --git a/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp b/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp
index 9e828af36d..7aeaeca2d5 100644
--- a/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp
+++ b/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp
@@ -626,9 +626,35 @@ float smin(float a, float b, float s)
     return std::min(a, b) - h * h * h * s * (1.f / 6.f);
 }
 
+inline float compute_compression_vector_slope(const float intersectJ, const float focusJ, const float limitJmax, const float slope_gain)
+{
+    const float direction_scaler = (intersectJ < focusJ) ? intersectJ : (limitJmax - intersectJ); // TODO < vs <=
+    return direction_scaler * (intersectJ - focusJ) / (focusJ * slope_gain);
+}
+
+inline float estimate_line_and_boundary_intersection_M(const float J_axis_intersect, const float slope, const float inv_gamma,
+                                                       const float J_max, const float M_max, const float J_intersection_reference)
+{
+    // Line defined by     J = slope * x + J_axis_intersect
+    // Boundary defined by J = J_max * (x / M_max) ^ (1/inv_gamma)
+    // Approximate as we do not want to iteratively solve intersection of a straight line and an exponential
+
+    // We calculate a shifted intersection from the original intersection using the inverse of the exponential
+    // and the provided reference
+    const float normalised_J         = J_axis_intersect / J_intersection_reference;
+    const float shifted_intersection = J_intersection_reference * powf(normalised_J, inv_gamma);
+
+    // Now we find the M intersection of two lines
+    // line from origin to J,M Max       l1(x) = J/M * x
+    // line from J Intersect' with slope l2(x) = slope * x + Intersect'
+
+    return shifted_intersection / ((J_max / M_max) - slope);
+    //return shifted_intersection * M_max / (J_max - slope * M_max);
+}
+
 f3 find_gamut_boundary_intersection(const f3 &JMh_s, const f2 &JM_cusp_in, float J_focus, float J_max, float slope_gain, float gamma_top, float gamma_bottom)
 {
-    const float s = std::max(0.000001f, smooth_cusps);
+    const float s = std::max(0.000001f, smooth_cusps); // TODO: pre smooth the cusp
     const f2 JM_cusp = {
         JM_cusp_in[0],
         JM_cusp_in[1] * (1.f + smooth_m * s)
@@ -650,7 +676,7 @@ f3 find_gamut_boundary_intersection(const f3 &JMh_s, const f2 &JM_cusp_in, float
     const float M_boundary_lower = J_intersect_cusp * powf(J_intersect_source / J_intersect_cusp, 1.f / gamma_bottom) / (JM_cusp[0] / JM_cusp[1] - slope);
     const float M_boundary_upper = JM_cusp[1] * (J_max - J_intersect_cusp) * powf((J_max - J_intersect_source) / (J_max - J_intersect_cusp), 1.f / gamma_top) / (slope * JM_cusp[1] + J_max - JM_cusp[0]);
     const float M_boundary = JM_cusp[1] * smin(M_boundary_lower / JM_cusp[1], M_boundary_upper / JM_cusp[1], s);
-    const float J_boundary = J_intersect_source + slope * M_boundary;
+    const float J_boundary = J_intersect_source + slope * M_boundary; // TODO don't recalculate this
 
     return {J_boundary, M_boundary, J_intersect_source};
 }
@@ -762,13 +788,13 @@ f3 compressGamut(const f3 &JMh, float Jx, const ACES2::GamutCompressParams& p, c
 
 f3 gamut_compress_fwd(const f3 &JMh, const GamutCompressParams &p)
 {
-    const f2 JMcusp = cusp_from_table(JMh[2], p.gamut_cusp_table);
+    const f2 JMcusp = cusp_from_table(JMh[2], p.gamut_cusp_table); // TODO: call once and pass this into compress functions ?
     return compressGamut(JMh, JMh[0], p, JMcusp, false);
 }
 
 f3 gamut_compress_inv(const f3 &JMh, const GamutCompressParams &p)
 {
-    const f2 JMcusp = cusp_from_table(JMh[2], p.gamut_cusp_table);
+    const f2 JMcusp = cusp_from_table(JMh[2], p.gamut_cusp_table); // TODO: call once and pass this into compress functions ?
     float Jx = JMh[0];
 
     // Analytic inverse below threshold

From 3651734b1c64837cca12808a92ca964e9bfd386d Mon Sep 17 00:00:00 2001
From: Kevin Wheatley <kevin.wheatley@framestore.com>
Date: Tue, 7 Jan 2025 10:23:34 +0000
Subject: [PATCH 18/70] Further clarify when 100 means reference luminance
 Migrate rescaling into tonescale s_2 parameter Rename model_gamma to reflect
 it is actually the inverse

Signed-off-by: Kevin Wheatley <kevin.wheatley@framestore.com>
---
 .../ops/fixedfunction/ACES2/Common.h          |  4 +-
 .../ops/fixedfunction/ACES2/Transform.cpp     | 49 ++++++++++---------
 .../ops/fixedfunction/FixedFunctionOpGPU.cpp  | 14 +++---
 3 files changed, 35 insertions(+), 32 deletions(-)

diff --git a/src/OpenColorIO/ops/fixedfunction/ACES2/Common.h b/src/OpenColorIO/ops/fixedfunction/ACES2/Common.h
index 65b7bc7c48..eb7f569b35 100644
--- a/src/OpenColorIO/ops/fixedfunction/ACES2/Common.h
+++ b/src/OpenColorIO/ops/fixedfunction/ACES2/Common.h
@@ -62,7 +62,7 @@ struct ToneScaleParams
 struct ChromaCompressParams
 {
     float limit_J_max;
-    float model_gamma;
+    float model_gamma_inv;
     float sat;
     float sat_thr;
     float compr;
@@ -75,7 +75,7 @@ struct GamutCompressParams
 {
     float limit_J_max;
     float mid_J;
-    float model_gamma;
+    float model_gamma_inv;
     float focus_dist;
     float lower_hull_gamma;
     Table1D reach_m_table;
diff --git a/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp b/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp
index 7aeaeca2d5..6b0aa60abc 100644
--- a/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp
+++ b/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp
@@ -311,8 +311,7 @@ float tonescale_fwd(const float J, const JMhParams &p, const ToneScaleParams &pt
     const float J_abs = std::abs(J);
     const float Y     = _J_to_Y(J_abs, p);
     
-    const float Y_norm = Y / 100.f; //TODO
-    const float f    = pt.m_2 * powf(Y_norm / (Y_norm + pt.s_2), pt.g);
+    const float f    = pt.m_2 * powf(Y / (Y + pt.s_2), pt.g);
     const float Y_ts = std::max(0.f, f * f / (f + pt.t_1)) * pt.n_r;  // max prevents -ve values being output also handles division by zero possibility
 
     const float J_ts  = _Y_to_J(Y_ts, p);
@@ -325,10 +324,10 @@ float tonescale_inv(const float J_ts, const JMhParams &p, const ToneScaleParams
     const float J_abs = std::abs(J_ts);
     const float Y_ts  = _J_to_Y(J_abs, p);
 
-    const float Y_ts_norm = Y_ts / 100.0f; // TODO
+    const float Y_ts_norm = Y_ts / reference_luminance; // TODO
     const float Z = std::max(0.f, std::min(pt.n / (pt.u_2 * pt.n_r), Y_ts_norm));  //TODO
     const float f = (Z + sqrt(Z * (4.f * pt.t_1 + Z))) / 2.f;
-    const float Y = pt.s_2 / (powf((pt.m_2 / f), (1.f / pt.g)) - 1.f) * pt.n_r;
+    const float Y = pt.s_2 / (powf((pt.m_2 / f), (1.f / pt.g)) - 1.f);
 
     const float J     = _Y_to_J(Y, p);
     return std::copysign(J, Y_ts);
@@ -350,9 +349,9 @@ f3 tonescale_chroma_compress_fwd(const f3 &JMh, const JMhParams &p, const ToneSc
         const float nJ = J_ts / pc.limit_J_max;
         const float snJ = std::max(0.f, 1.f - nJ);
         const float Mnorm = chroma_compress_norm(h, pc.chroma_compress_scale);
-        const float limit = powf(nJ, pc.model_gamma) * reach_m_from_table(h, pc.reach_m_table) / Mnorm;
+        const float limit = powf(nJ, pc.model_gamma_inv) * reach_m_from_table(h, pc.reach_m_table) / Mnorm;
 
-        M_cp = M * powf(J_ts / J, pc.model_gamma);
+        M_cp = M * powf(J_ts / J, pc.model_gamma_inv);
         M_cp = M_cp / Mnorm;
         M_cp = limit - toe_fwd(limit - M_cp, limit - 0.001f, snJ * pc.sat, sqrt(nJ * nJ + pc.sat_thr));
         M_cp = toe_fwd(M_cp, limit, nJ * pc.compr, snJ);
@@ -378,18 +377,24 @@ f3 tonescale_chroma_compress_inv(const f3 &JMh, const JMhParams &p, const ToneSc
         const float nJ = J_ts / pc.limit_J_max;
         const float snJ = std::max(0.f, 1.f - nJ);
         const float Mnorm = chroma_compress_norm(h, pc.chroma_compress_scale);
-        const float limit = powf(nJ, pc.model_gamma) * reach_m_from_table(h, pc.reach_m_table) / Mnorm;
+        const float limit = powf(nJ, pc.model_gamma_inv) * reach_m_from_table(h, pc.reach_m_table) / Mnorm;
 
         M = M_cp / Mnorm;
         M = toe_inv(M, limit, nJ * pc.compr, snJ);
         M = limit - toe_inv(limit - M, limit - 0.001f, snJ * pc.sat, sqrt(nJ * nJ + pc.sat_thr));
         M = M * Mnorm;
-        M = M * powf(J_ts / J, -pc.model_gamma);
+        M = M * powf(J_ts / J, -pc.model_gamma_inv);
     }
 
     return {J, M, h};
 }
 
+inline float model_gamma(void)
+{
+    // c * z nonlinearity
+    return surround[1] * (1.48f + sqrt(Y_b / reference_luminance));
+}
+
 JMhParams init_JMhParams(const Primaries &prims)
 {
     JMhParams p;
@@ -408,14 +413,12 @@ JMhParams init_JMhParams(const Primaries &prims)
     const f3 RGB_w = mult_f3_f33(XYZ_w, MATRIX_16);
 
     // Viewing condition dependent parameters
-    const float K = 1.f / (5.f * L_A + 1.f);
-    const float K4 = powf(K, 4.f);
-    const float N = Y_b / Y_W;
+    constexpr float K = 1.f / (5.f * L_A + 1.f);
+    constexpr float K4 = K * K * K * K;
     const float F_L = 0.2f * K4 * (5.f * L_A) + 0.1f * powf((1.f - K4), 2.f) * powf(5.f * L_A, 1.f/3.f);
-    const float z = 1.48f + sqrt(N);
 
     p.F_L_n = F_L / reference_luminance;
-    p.cz    = surround[1] * z;
+    p.cz    = model_gamma();
 
     const f3 D_RGB = {
         Y_W / RGB_w[0],
@@ -450,7 +453,7 @@ JMhParams init_JMhParams(const Primaries &prims)
     };
     p.MATRIX_Aab_to_cone_response = invert_f33(p.MATRIX_cone_response_to_Aab);
 
-    return p;
+   return p;
 }
 
 Table3D make_gamut_table(const Primaries &P, float peakLuminance)
@@ -688,7 +691,7 @@ f3 get_reach_boundary(
     const f2 &JMcusp,
     float focusJ,
     float limit_J_max,
-    float model_gamma,
+    float model_gamma_inv,
     float focus_dist,
     const ACES2::Table1D & reach_m_table
 )
@@ -709,7 +712,7 @@ f3 get_reach_boundary(
         slope = (limit_J_max - intersectJ) * (intersectJ - focusJ) / (focusJ * slope_gain);
     }
 
-    const float boundary = limit_J_max * powf(intersectJ / limit_J_max, model_gamma) * reachMaxM / (limit_J_max - slope * reachMaxM);
+    const float boundary = limit_J_max * powf(intersectJ / limit_J_max, model_gamma_inv) * reachMaxM / (limit_J_max - slope * reachMaxM);
     return {J, boundary, h};
 }
 
@@ -769,7 +772,7 @@ f3 compressGamut(const f3 &JMh, float Jx, const ACES2::GamutCompressParams& p, c
             return {J, 0.f, h};
         }
 
-        const f3 reachBoundary = get_reach_boundary(JMboundary[0], JMboundary[1], h, JMcusp, focusJ, p.limit_J_max, p.model_gamma, p.focus_dist, p.reach_m_table);
+        const f3 reachBoundary = get_reach_boundary(JMboundary[0], JMboundary[1], h, JMcusp, focusJ, p.limit_J_max, p.model_gamma_inv, p.focus_dist, p.reach_m_table);
 
         const float difference = std::max(1.0001f, reachBoundary[1] / JMboundary[1]);
         const float threshold = std::max(compression_threshold, 1.f / difference);
@@ -943,7 +946,7 @@ ToneScaleParams init_ToneScaleParams(float peakLuminance)
     const float g_ip = 0.5f * (c_t + sqrt(c_t * (c_t + 4.f * t_1)));
     const float g_ipp2 = -(m_1 * powf((g_ip/m),(1.f/g))) / (powf(g_ip/m , 1.f/g)-1.f);
     const float w_2 = c / g_ipp2;
-    const float s_2 = w_2 * m_1;
+    const float s_2 = w_2 * m_1 * reference_luminance;
     const float u_2 = powf((r_hit/m_1)/((r_hit/m_1) + w_2), g);
     const float m_2 = m_1 / u_2;
 
@@ -973,11 +976,11 @@ ChromaCompressParams init_ChromaCompressParams(float peakLuminance)
     const float compr = chroma_compress + (chroma_compress * chroma_compress_fact) * log_peak;
     const float sat = std::max(0.2f, chroma_expand - (chroma_expand * chroma_expand_fact) * log_peak);
     const float sat_thr = chroma_expand_thr / tsParams.n;
-    const float model_gamma = 1.f / (surround[1] * (1.48f + sqrt(Y_b / L_A)));
+    const float model_gamma_inv = 1.f / model_gamma();
 
     ChromaCompressParams params{};
     params.limit_J_max = limit_J_max;
-    params.model_gamma = model_gamma;
+    params.model_gamma_inv = model_gamma_inv;
     params.sat = sat;
     params.sat_thr = sat_thr;
     params.compr = compr;
@@ -992,11 +995,11 @@ GamutCompressParams init_GamutCompressParams(float peakLuminance, const Primarie
     const JMhParams inputJMhParams = init_JMhParams(ACES_AP0::primaries);
 
     float limit_J_max = Y_to_J(peakLuminance, inputJMhParams);
-    float mid_J = Y_to_J(tsParams.c_t * 100.f, inputJMhParams);
+    float mid_J = Y_to_J(tsParams.c_t * reference_luminance, inputJMhParams);
 
     // Calculated chroma compress variables
     const float log_peak = log10( tsParams.n / tsParams.n_r);
-    const float model_gamma = 1.f / (surround[1] * (1.48f + sqrt(Y_b / L_A)));
+    const float model_gamma_inv = 1.f / model_gamma();
     const float focus_dist = focus_distance + focus_distance * focus_distance_scaling * log_peak;
     const float lower_hull_gamma =  1.14f + 0.07f * log_peak;
 
@@ -1005,7 +1008,7 @@ GamutCompressParams init_GamutCompressParams(float peakLuminance, const Primarie
     GamutCompressParams params{};
     params.limit_J_max = limit_J_max;
     params.mid_J = mid_J;
-    params.model_gamma = model_gamma;
+    params.model_gamma_inv = model_gamma_inv;
     params.focus_dist = focus_dist;
     params.lower_hull_gamma = lower_hull_gamma;
     params.reach_m_table = make_reach_m_table(ACES_AP1::primaries, peakLuminance);
diff --git a/src/OpenColorIO/ops/fixedfunction/FixedFunctionOpGPU.cpp b/src/OpenColorIO/ops/fixedfunction/FixedFunctionOpGPU.cpp
index bc9a2f582c..5dcd9eaabe 100644
--- a/src/OpenColorIO/ops/fixedfunction/FixedFunctionOpGPU.cpp
+++ b/src/OpenColorIO/ops/fixedfunction/FixedFunctionOpGPU.cpp
@@ -544,7 +544,7 @@ void _Add_Tonescale_Compress_Fwd_Shader(
 
     // Tonescale applied in Y (convert to and from J)
     ss.newLine() << ss.floatDecl("A") << " = " << p.A_w_J << " * pow(abs(J) / 100.0, 1.0 / (" << p.cz << "));";
-    ss.newLine() << ss.floatDecl("Y") << " = sign(J) * pow((27.13 * A) / (400.0 - A), 1.0 / 0.42) / " << p.F_L_n << " / 100.0;";
+    ss.newLine() << ss.floatDecl("Y") << " = sign(J) * pow((27.13 * A) / (400.0 - A), 1.0 / 0.42) / " << p.F_L_n << ";";
 
     ss.newLine() << ss.floatDecl("f") << " = " << t.m_2  << " * pow(max(0.0, Y) / (Y + " << t.s_2 << "), " << t.g << ");";
     ss.newLine() << ss.floatDecl("Y_ts") << " = max(0.0, f * f / (f + " << t.t_1 << ")) * " << t.n_r << ";";
@@ -582,8 +582,8 @@ void _Add_Tonescale_Compress_Fwd_Shader(
     ss.newLine() << "}";
 
     ss.newLine() << ss.floatDecl("reachM") << " = " << reachName << "_sample(h);";
-    ss.newLine() << ss.floatDecl("limit") << " = pow(nJ, " << c.model_gamma << ") * reachM / Mnorm;";
-    ss.newLine() << "M_cp = M * pow(J_ts / J, " << c.model_gamma << ");";
+    ss.newLine() << ss.floatDecl("limit") << " = pow(nJ, " << c.model_gamma_inv << ") * reachM / Mnorm;";
+    ss.newLine() << "M_cp = M * pow(J_ts / J, " << c.model_gamma_inv << ");";
     ss.newLine() << "M_cp = M_cp / Mnorm;";
 
     ss.newLine() << "M_cp = limit - " << toeName << "(limit - M_cp, limit - 0.001, snJ * " << c.sat << ", sqrt(nJ * nJ + " << c.sat_thr << "));";
@@ -621,7 +621,7 @@ void _Add_Tonescale_Compress_Inv_Shader(
     ss.newLine() << ss.floatDecl("ht") << " = (Z + sqrt(Z * (4.0 * " << t.t_1 << " + Z))) / 2.0;";
     ss.newLine() << ss.floatDecl("Y") << " = " << t.s_2 << " / (pow((" << t.m_2 << " / ht), (1.0 / " << t.g << ")) - 1.0);";
 
-    ss.newLine() << ss.floatDecl("F_L_Y") << " = pow(" << p.F_L_n << " * abs(Y * 100.0), 0.42);";
+    ss.newLine() << ss.floatDecl("F_L_Y") << " = pow(" << p.F_L_n << " * abs(Y), 0.42);";
     ss.newLine() << ss.floatDecl("J") << " = sign(Y) * 100.0 * pow(((400.0 * F_L_Y) / (27.13 + F_L_Y)) / " << p.A_w_J << ", " << p.cz << ");";
 
     // ChromaCompress
@@ -654,13 +654,13 @@ void _Add_Tonescale_Compress_Inv_Shader(
     ss.newLine() << "}";
 
     ss.newLine() << ss.floatDecl("reachM") << " = " << reachName << "_sample(h);";
-    ss.newLine() << ss.floatDecl("limit") << " = pow(nJ, " << c.model_gamma << ") * reachM / Mnorm;";
+    ss.newLine() << ss.floatDecl("limit") << " = pow(nJ, " << c.model_gamma_inv << ") * reachM / Mnorm;";
 
     ss.newLine() << "M = M_cp / Mnorm;";
     ss.newLine() << "M = " << toeName << "(M, limit, nJ * " << c.compr << ", snJ);";
     ss.newLine() << "M = limit - " << toeName << "(limit - M, limit - 0.001, snJ * " << c.sat << ", sqrt(nJ * nJ + " << c.sat_thr << "));";
     ss.newLine() << "M = M * Mnorm;";
-    ss.newLine() << "M = M * pow(J_ts / J, " << -c.model_gamma << ");";
+    ss.newLine() << "M = M * pow(J_ts / J, " << -c.model_gamma_inv << ");";
 
     ss.dedent();
     ss.newLine() << "}";
@@ -1113,7 +1113,7 @@ std::string _Add_Reach_Boundary_func(
     ss.dedent();
     ss.newLine() << "}";
 
-    ss.newLine() << ss.floatDecl("boundary") << " = " << g.limit_J_max << " * pow(intersectJ / " << g.limit_J_max << ", " << g.model_gamma << ") * reachMaxM / (" << g.limit_J_max << " - slope * reachMaxM);";
+    ss.newLine() << ss.floatDecl("boundary") << " = " << g.limit_J_max << " * pow(intersectJ / " << g.limit_J_max << ", " << g.model_gamma_inv << ") * reachMaxM / (" << g.limit_J_max << " - slope * reachMaxM);";
 
     ss.newLine() << "return " << ss.float3Const("J", "boundary", "h") << ";";
 

From 590b3eba59249ce11c4efb3170610ba02158aebd Mon Sep 17 00:00:00 2001
From: Kevin Wheatley <kevin.wheatley@framestore.com>
Date: Tue, 7 Jan 2025 16:49:23 +0000
Subject: [PATCH 19/70] migrate init steps performed within other init
 functions to the top level to avoid repeat init of precomputed values.

Signed-off-by: Kevin Wheatley <kevin.wheatley@framestore.com>
---
 .../ops/fixedfunction/ACES2/Common.h          |  12 +-
 .../ops/fixedfunction/ACES2/Transform.cpp     | 164 +++++++++---------
 .../ops/fixedfunction/ACES2/Transform.h       |  14 +-
 .../ops/fixedfunction/FixedFunctionOpCPU.cpp  |  33 ++--
 .../ops/fixedfunction/FixedFunctionOpGPU.cpp  | 147 +++++++++-------
 5 files changed, 199 insertions(+), 171 deletions(-)

diff --git a/src/OpenColorIO/ops/fixedfunction/ACES2/Common.h b/src/OpenColorIO/ops/fixedfunction/ACES2/Common.h
index eb7f569b35..eeecf8c953 100644
--- a/src/OpenColorIO/ops/fixedfunction/ACES2/Common.h
+++ b/src/OpenColorIO/ops/fixedfunction/ACES2/Common.h
@@ -57,28 +57,30 @@ struct ToneScaleParams
     float s_2;
     float u_2;
     float m_2;
+    float log_peak;
 };
 
-struct ChromaCompressParams
+struct SharedCompressionParameters
 {
     float limit_J_max;
     float model_gamma_inv;
+    Table1D reach_m_table;
+};
+
+struct ChromaCompressParams
+{
     float sat;
     float sat_thr;
     float compr;
-    Table1D reach_m_table;
     float chroma_compress_scale;
     static constexpr float cusp_mid_blend = 1.3f;
 };
 
 struct GamutCompressParams
 {
-    float limit_J_max;
     float mid_J;
-    float model_gamma_inv;
     float focus_dist;
     float lower_hull_gamma;
-    Table1D reach_m_table;
     Table3D gamut_cusp_table;
     Table1D upper_hull_gamma_table;
 };
diff --git a/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp b/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp
index 6b0aa60abc..710fa46e5b 100644
--- a/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp
+++ b/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp
@@ -333,7 +333,7 @@ float tonescale_inv(const float J_ts, const JMhParams &p, const ToneScaleParams
     return std::copysign(J, Y_ts);
 }
 
-f3 tonescale_chroma_compress_fwd(const f3 &JMh, const JMhParams &p, const ToneScaleParams &pt, const ChromaCompressParams &pc)
+f3 tonescale_chroma_compress_fwd(const f3 &JMh, const JMhParams &p, const ToneScaleParams &pt, const SharedCompressionParameters &ps, const ChromaCompressParams &pc)
 {
     const float J = JMh[0];
     const float M = JMh[1];
@@ -346,12 +346,12 @@ f3 tonescale_chroma_compress_fwd(const f3 &JMh, const JMhParams &p, const ToneSc
 
     if (M != 0.0)
     {
-        const float nJ = J_ts / pc.limit_J_max;
+        const float nJ = J_ts / ps.limit_J_max;
         const float snJ = std::max(0.f, 1.f - nJ);
         const float Mnorm = chroma_compress_norm(h, pc.chroma_compress_scale);
-        const float limit = powf(nJ, pc.model_gamma_inv) * reach_m_from_table(h, pc.reach_m_table) / Mnorm;
+        const float limit = powf(nJ, ps.model_gamma_inv) * reach_m_from_table(h, ps.reach_m_table) / Mnorm;
 
-        M_cp = M * powf(J_ts / J, pc.model_gamma_inv);
+        M_cp = M * powf(J_ts / J, ps.model_gamma_inv);
         M_cp = M_cp / Mnorm;
         M_cp = limit - toe_fwd(limit - M_cp, limit - 0.001f, snJ * pc.sat, sqrt(nJ * nJ + pc.sat_thr));
         M_cp = toe_fwd(M_cp, limit, nJ * pc.compr, snJ);
@@ -361,7 +361,7 @@ f3 tonescale_chroma_compress_fwd(const f3 &JMh, const JMhParams &p, const ToneSc
     return {J_ts, M_cp, h};
 }
 
-f3 tonescale_chroma_compress_inv(const f3 &JMh, const JMhParams &p, const ToneScaleParams &pt, const ChromaCompressParams &pc)
+f3 tonescale_chroma_compress_inv(const f3 &JMh, const JMhParams &p, const ToneScaleParams &pt, const SharedCompressionParameters &ps, const ChromaCompressParams &pc)
 {
     const float J_ts = JMh[0];
     const float M_cp = JMh[1];
@@ -374,16 +374,16 @@ f3 tonescale_chroma_compress_inv(const f3 &JMh, const JMhParams &p, const ToneSc
 
     if (M_cp != 0.0)
     {
-        const float nJ = J_ts / pc.limit_J_max;
+        const float nJ = J_ts / ps.limit_J_max;
         const float snJ = std::max(0.f, 1.f - nJ);
         const float Mnorm = chroma_compress_norm(h, pc.chroma_compress_scale);
-        const float limit = powf(nJ, pc.model_gamma_inv) * reach_m_from_table(h, pc.reach_m_table) / Mnorm;
+        const float limit = powf(nJ, ps.model_gamma_inv) * reach_m_from_table(h, ps.reach_m_table) / Mnorm;
 
         M = M_cp / Mnorm;
         M = toe_inv(M, limit, nJ * pc.compr, snJ);
         M = limit - toe_inv(limit - M, limit - 0.001f, snJ * pc.sat, sqrt(nJ * nJ + pc.sat_thr));
         M = M * Mnorm;
-        M = M * powf(J_ts / J, -pc.model_gamma_inv);
+        M = M * powf(J_ts / J, -ps.model_gamma_inv);
     }
 
     return {J, M, h};
@@ -397,7 +397,6 @@ inline float model_gamma(void)
 
 JMhParams init_JMhParams(const Primaries &prims)
 {
-    JMhParams p;
     const m33f cone_response_to_Aab = {
         2.0f, 1.0f, 1.0f / 20.0f,
         1.0f, -12.0f / 11.0f, 1.0f / 11.0f,
@@ -417,8 +416,8 @@ JMhParams init_JMhParams(const Primaries &prims)
     constexpr float K4 = K * K * K * K;
     const float F_L = 0.2f * K4 * (5.f * L_A) + 0.1f * powf((1.f - K4), 2.f) * powf(5.f * L_A, 1.f/3.f);
 
-    p.F_L_n = F_L / reference_luminance;
-    p.cz    = model_gamma();
+    const float F_L_n = F_L / reference_luminance;
+    const float cz    = model_gamma();
 
     const f3 D_RGB = {
         Y_W / RGB_w[0],
@@ -433,33 +432,41 @@ JMhParams init_JMhParams(const Primaries &prims)
     };
 
     const f3 RGB_AW = {
-        post_adaptation_cone_response_compression_fwd(RGB_WC[0], p.F_L_n),
-        post_adaptation_cone_response_compression_fwd(RGB_WC[1], p.F_L_n),
-        post_adaptation_cone_response_compression_fwd(RGB_WC[2], p.F_L_n)
+        post_adaptation_cone_response_compression_fwd(RGB_WC[0], F_L_n),
+        post_adaptation_cone_response_compression_fwd(RGB_WC[1], F_L_n),
+        post_adaptation_cone_response_compression_fwd(RGB_WC[2], F_L_n)
     };
 
-    p.A_w   = cone_response_to_Aab[0] * RGB_AW[0] + cone_response_to_Aab[1] * RGB_AW[1] + cone_response_to_Aab[2] * RGB_AW[2];
-    p.A_w_J = _post_adaptation_cone_response_compression_fwd(reference_luminance, p.F_L_n);
+    const float A_w   = cone_response_to_Aab[0] * RGB_AW[0] + cone_response_to_Aab[1] * RGB_AW[1] + cone_response_to_Aab[2] * RGB_AW[2];
+    const float A_w_J = _post_adaptation_cone_response_compression_fwd(reference_luminance, F_L_n);
 
     // Note we are prescaling the CAM16 LMS responses to directly provide for chromatic adaptation.
     const m33f MATRIX_RGB_to_CAM16 = mult_f33_f33(RGBtoRGB_f33(prims, CAM16::primaries), scale_f33(Identity_M33, f3_from_f(reference_luminance)));
-    p.MATRIX_RGB_to_CAM16_c = mult_f33_f33(scale_f33(Identity_M33, D_RGB), MATRIX_RGB_to_CAM16);
-    p.MATRIX_CAM16_c_to_RGB = invert_f33(p.MATRIX_RGB_to_CAM16_c);
+    const m33f MATRIX_RGB_to_CAM16_c = mult_f33_f33(scale_f33(Identity_M33, D_RGB), MATRIX_RGB_to_CAM16);
+    const m33f MATRIX_CAM16_c_to_RGB = invert_f33(MATRIX_RGB_to_CAM16_c);
 
-    p.MATRIX_cone_response_to_Aab = {
-        cone_response_to_Aab[0] / p.A_w,              cone_response_to_Aab[1] / p.A_w,              cone_response_to_Aab[2] / p.A_w,
+    const m33f MATRIX_cone_response_to_Aab = {
+        cone_response_to_Aab[0] / A_w,                cone_response_to_Aab[1] / A_w,                cone_response_to_Aab[2] / A_w,
         cone_response_to_Aab[3] * 43.f * surround[2], cone_response_to_Aab[4] * 43.f * surround[2], cone_response_to_Aab[5] * 43.f * surround[2],
         cone_response_to_Aab[6] * 43.f * surround[2], cone_response_to_Aab[7] * 43.f * surround[2], cone_response_to_Aab[8] * 43.f * surround[2],
     };
-    p.MATRIX_Aab_to_cone_response = invert_f33(p.MATRIX_cone_response_to_Aab);
-
+    const m33f MATRIX_Aab_to_cone_response = invert_f33(MATRIX_cone_response_to_Aab);
+
+    JMhParams p = {
+        F_L_n,
+        cz,
+        A_w,
+        A_w_J,
+        MATRIX_RGB_to_CAM16_c,
+        MATRIX_CAM16_c_to_RGB,
+        MATRIX_cone_response_to_Aab,
+        MATRIX_Aab_to_cone_response
+    };
    return p;
 }
 
-Table3D make_gamut_table(const Primaries &P, float peakLuminance)
+Table3D make_gamut_table(const JMhParams &params, float peakLuminance)
 {
-    const JMhParams params = init_JMhParams(P);
-
     Table3D gamutCuspTableUnsorted{};
     for (int i = 0; i < gamutCuspTableUnsorted.size; i++)
     {
@@ -511,9 +518,8 @@ bool any_below_zero(const f3 &rgb)
     return (rgb[0] < 0. || rgb[1] < 0. || rgb[2] < 0.);
 }
 
-Table1D make_reach_m_table(const Primaries &P, float peakLuminance)
+Table1D make_reach_m_table(const JMhParams &params, float peakLuminance)
 {
-    const JMhParams params = init_JMhParams(P);
     const float limit_J_max = Y_to_J(peakLuminance, params);
 
     Table1D gamutReachTable{};
@@ -744,26 +750,26 @@ float compression_function(
     return vCompressed;
 }
 
-f3 compressGamut(const f3 &JMh, float Jx, const ACES2::GamutCompressParams& p, const f2& JMcusp, bool invert)
+f3 compressGamut(const f3 &JMh, float Jx, const ACES2::SharedCompressionParameters& sp, const ACES2::GamutCompressParams& p, const f2& JMcusp, bool invert)
 {
     const float J = JMh[0];
     const float M = JMh[1];
     const float h = JMh[2];
 
-    if (M < 0.0001f || J > p.limit_J_max)
+    if (M < 0.0001f || J > sp.limit_J_max)
     {
         return {J, 0.f, h};
     }
     else
     {
         const f2 project_from = {J, M};
-        const float focusJ = lerpf(JMcusp[0], p.mid_J, std::min(1.f, cusp_mid_blend - (JMcusp[0] / p.limit_J_max)));
-        const float slope_gain = p.limit_J_max * p.focus_dist * get_focus_gain(Jx, JMcusp[0], p.limit_J_max);
+        const float focusJ = lerpf(JMcusp[0], p.mid_J, std::min(1.f, cusp_mid_blend - (JMcusp[0] / sp.limit_J_max)));
+        const float slope_gain = sp.limit_J_max * p.focus_dist * get_focus_gain(Jx, JMcusp[0], sp.limit_J_max);
 
         const float gamma_top = hue_dependent_upper_hull_gamma(h, p.upper_hull_gamma_table);
         const float gamma_bottom = p.lower_hull_gamma;
 
-        const f3 boundaryReturn = find_gamut_boundary_intersection({J, M, h}, JMcusp, focusJ, p.limit_J_max, slope_gain, gamma_top, gamma_bottom);
+        const f3 boundaryReturn = find_gamut_boundary_intersection({J, M, h}, JMcusp, focusJ, sp.limit_J_max, slope_gain, gamma_top, gamma_bottom);
         const f2 JMboundary = {boundaryReturn[0], boundaryReturn[1]};
         const f2 project_to = {boundaryReturn[2], 0.f};
 
@@ -772,7 +778,7 @@ f3 compressGamut(const f3 &JMh, float Jx, const ACES2::GamutCompressParams& p, c
             return {J, 0.f, h};
         }
 
-        const f3 reachBoundary = get_reach_boundary(JMboundary[0], JMboundary[1], h, JMcusp, focusJ, p.limit_J_max, p.model_gamma_inv, p.focus_dist, p.reach_m_table);
+        const f3 reachBoundary = get_reach_boundary(JMboundary[0], JMboundary[1], h, JMcusp, focusJ, sp.limit_J_max, sp.model_gamma_inv, p.focus_dist, sp.reach_m_table); // TODO
 
         const float difference = std::max(1.0001f, reachBoundary[1] / JMboundary[1]);
         const float threshold = std::max(compression_threshold, 1.f / difference);
@@ -789,24 +795,24 @@ f3 compressGamut(const f3 &JMh, float Jx, const ACES2::GamutCompressParams& p, c
     }
 }
 
-f3 gamut_compress_fwd(const f3 &JMh, const GamutCompressParams &p)
+f3 gamut_compress_fwd(const f3 &JMh, const SharedCompressionParameters &sp, const GamutCompressParams &p)
 {
     const f2 JMcusp = cusp_from_table(JMh[2], p.gamut_cusp_table); // TODO: call once and pass this into compress functions ?
-    return compressGamut(JMh, JMh[0], p, JMcusp, false);
+    return compressGamut(JMh, JMh[0], sp, p, JMcusp, false);
 }
 
-f3 gamut_compress_inv(const f3 &JMh, const GamutCompressParams &p)
+f3 gamut_compress_inv(const f3 &JMh, const SharedCompressionParameters &sp, const GamutCompressParams &p)
 {
     const f2 JMcusp = cusp_from_table(JMh[2], p.gamut_cusp_table); // TODO: call once and pass this into compress functions ?
     float Jx = JMh[0];
 
     // Analytic inverse below threshold
-    if (Jx > lerpf(JMcusp[0], p.limit_J_max, focus_gain_blend))
+    if (Jx > lerpf(JMcusp[0], sp.limit_J_max, focus_gain_blend))
     {
         // Approximation above threshold
-        Jx = compressGamut(JMh, Jx, p, JMcusp, true)[0];
+        Jx = compressGamut(JMh, Jx, sp, p, JMcusp, true)[0];
     }
-    return compressGamut(JMh, Jx, p, JMcusp, true);
+    return compressGamut(JMh, Jx, sp, p, JMcusp, true);
 }
 
 bool evaluate_gamma_fit(
@@ -949,6 +955,7 @@ ToneScaleParams init_ToneScaleParams(float peakLuminance)
     const float s_2 = w_2 * m_1 * reference_luminance;
     const float u_2 = powf((r_hit/m_1)/((r_hit/m_1) + w_2), g);
     const float m_2 = m_1 / u_2;
+    const float log_peak = log10( n / n_r);
 
     ToneScaleParams TonescaleParams = {
         n,
@@ -958,70 +965,61 @@ ToneScaleParams init_ToneScaleParams(float peakLuminance)
         c_t,
         s_2,
         u_2,
-        m_2
+        m_2,
+        log_peak
     };
 
     return TonescaleParams;
 }
 
-ChromaCompressParams init_ChromaCompressParams(float peakLuminance)
+SharedCompressionParameters init_SharedCompressionParams(float peakLuminance, const JMhParams &inputJMhParams)
 {
-    const ToneScaleParams tsParams = init_ToneScaleParams(peakLuminance);
-    const JMhParams inputJMhParams = init_JMhParams(ACES_AP0::primaries);
+    const float limit_J_max = Y_to_J(peakLuminance, inputJMhParams);
+    const float model_gamma_inv = 1.f / model_gamma();
+    const JMhParams compressionGamut = init_JMhParams(ACES_AP1::primaries);
 
-    float limit_J_max = Y_to_J(peakLuminance, inputJMhParams);
+    SharedCompressionParameters params = {
+        limit_J_max,
+        model_gamma_inv,
+        make_reach_m_table(compressionGamut, peakLuminance)
+    };
+    return params;
+}
 
+ChromaCompressParams init_ChromaCompressParams(float peakLuminance, const ToneScaleParams &tsParams)
+{
     // Calculated chroma compress variables
-    const float log_peak = log10( tsParams.n / tsParams.n_r);
-    const float compr = chroma_compress + (chroma_compress * chroma_compress_fact) * log_peak;
-    const float sat = std::max(0.2f, chroma_expand - (chroma_expand * chroma_expand_fact) * log_peak);
+    const float compr = chroma_compress + (chroma_compress * chroma_compress_fact) * tsParams.log_peak;
+    const float sat = std::max(0.2f, chroma_expand - (chroma_expand * chroma_expand_fact) * tsParams.log_peak);
     const float sat_thr = chroma_expand_thr / tsParams.n;
-    const float model_gamma_inv = 1.f / model_gamma();
+    const float chroma_compress_scale =  powf(0.03379f * peakLuminance, 0.30596f) - 0.45135f;
 
-    ChromaCompressParams params{};
-    params.limit_J_max = limit_J_max;
-    params.model_gamma_inv = model_gamma_inv;
-    params.sat = sat;
-    params.sat_thr = sat_thr;
-    params.compr = compr;
-    params.chroma_compress_scale = powf(0.03379f * peakLuminance, 0.30596f) - 0.45135f;
-    params.reach_m_table = make_reach_m_table(ACES_AP1::primaries, peakLuminance);
+    ChromaCompressParams params = {
+        sat,
+        sat_thr,
+        compr,
+        chroma_compress_scale
+    };
     return params;
 }
 
-GamutCompressParams init_GamutCompressParams(float peakLuminance, const Primaries &limitingPrimaries)
+GamutCompressParams init_GamutCompressParams(float peakLuminance, const JMhParams &inputJMhParams, const JMhParams &limitJMhParams,
+                                             const ToneScaleParams &tsParams, const SharedCompressionParameters &shParams)
 {
-    const ToneScaleParams tsParams = init_ToneScaleParams(peakLuminance);
-    const JMhParams inputJMhParams = init_JMhParams(ACES_AP0::primaries);
-
-    float limit_J_max = Y_to_J(peakLuminance, inputJMhParams);
-    float mid_J = Y_to_J(tsParams.c_t * reference_luminance, inputJMhParams);
+    float mid_J = Y_to_J(tsParams.c_t * reference_luminance, inputJMhParams); // TODO
 
     // Calculated chroma compress variables
-    const float log_peak = log10( tsParams.n / tsParams.n_r);
-    const float model_gamma_inv = 1.f / model_gamma();
-    const float focus_dist = focus_distance + focus_distance * focus_distance_scaling * log_peak;
-    const float lower_hull_gamma =  1.14f + 0.07f * log_peak;
-
-    const JMhParams limitJMhParams = init_JMhParams(limitingPrimaries);
-
-    GamutCompressParams params{};
-    params.limit_J_max = limit_J_max;
-    params.mid_J = mid_J;
-    params.model_gamma_inv = model_gamma_inv;
-    params.focus_dist = focus_dist;
-    params.lower_hull_gamma = lower_hull_gamma;
-    params.reach_m_table = make_reach_m_table(ACES_AP1::primaries, peakLuminance);
-    params.gamut_cusp_table = make_gamut_table(limitingPrimaries, peakLuminance);
-    params.upper_hull_gamma_table = make_upper_hull_gamma(
-        params.gamut_cusp_table,
-        peakLuminance,
-        limit_J_max,
+    const float focus_dist = focus_distance + focus_distance * focus_distance_scaling * tsParams.log_peak;
+    const float lower_hull_gamma =  1.14f + 0.07f * tsParams.log_peak;
+
+    GamutCompressParams params = {
         mid_J,
         focus_dist,
         lower_hull_gamma,
-        limitJMhParams);
-
+        make_gamut_table(limitJMhParams, peakLuminance),
+        make_upper_hull_gamma(params.gamut_cusp_table, peakLuminance, shParams.limit_J_max,
+                              mid_J, focus_dist, lower_hull_gamma, limitJMhParams)
+    };
     return params;
 }
 
diff --git a/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.h b/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.h
index ff3f969e5a..9d41ad4225 100644
--- a/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.h
+++ b/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.h
@@ -14,17 +14,19 @@ namespace ACES2
 
 JMhParams init_JMhParams(const Primaries &P);
 ToneScaleParams init_ToneScaleParams(float peakLuminance);
-ChromaCompressParams init_ChromaCompressParams(float peakLuminance);
-GamutCompressParams init_GamutCompressParams(float peakLuminance, const Primaries &P);
+SharedCompressionParameters init_SharedCompressionParams(float peakLuminance, const JMhParams &inputJMhParams);
+ChromaCompressParams init_ChromaCompressParams(float peakLuminance, const ToneScaleParams &tsParams);
+GamutCompressParams init_GamutCompressParams(float peakLuminance, const JMhParams &inputJMhParams, const JMhParams &limitJMhParams,
+                                             const ToneScaleParams &tsParams, const SharedCompressionParameters &shParams);
 
 f3 RGB_to_JMh(const f3 &RGB, const JMhParams &p);
 f3 JMh_to_RGB(const f3 &JMh, const JMhParams &p);
 
-f3 tonescale_chroma_compress_fwd(const f3 &JMh, const JMhParams &p, const ToneScaleParams &pt, const ChromaCompressParams &pc);
-f3 tonescale_chroma_compress_inv(const f3 &JMh, const JMhParams &p, const ToneScaleParams &pt, const ChromaCompressParams &pc);
+f3 tonescale_chroma_compress_fwd(const f3 &JMh, const JMhParams &p, const ToneScaleParams &pt, const SharedCompressionParameters &ps, const ChromaCompressParams &pc);
+f3 tonescale_chroma_compress_inv(const f3 &JMh, const JMhParams &p, const ToneScaleParams &pt, const SharedCompressionParameters &ps, const ChromaCompressParams &pc);
 
-f3 gamut_compress_fwd(const f3 &JMh, const GamutCompressParams &p);
-f3 gamut_compress_inv(const f3 &JMh, const GamutCompressParams &p);
+f3 gamut_compress_fwd(const f3 &JMh, const SharedCompressionParameters &ps, const GamutCompressParams &p);
+f3 gamut_compress_inv(const f3 &JMh, const SharedCompressionParameters &ps, const GamutCompressParams &p);
 
 
 } // namespace ACES2
diff --git a/src/OpenColorIO/ops/fixedfunction/FixedFunctionOpCPU.cpp b/src/OpenColorIO/ops/fixedfunction/FixedFunctionOpCPU.cpp
index e9e9774338..7af8f0a0b3 100644
--- a/src/OpenColorIO/ops/fixedfunction/FixedFunctionOpCPU.cpp
+++ b/src/OpenColorIO/ops/fixedfunction/FixedFunctionOpCPU.cpp
@@ -147,6 +147,7 @@ class Renderer_ACES_OutputTransform20 : public OpCPU
     ACES2::JMhParams m_pIn;
     ACES2::JMhParams m_pOut;
     ACES2::ToneScaleParams m_t;
+    ACES2::SharedCompressionParameters m_s;
     ACES2::ChromaCompressParams m_c;
     ACES2::GamutCompressParams m_g;
 };
@@ -184,6 +185,7 @@ class Renderer_ACES_TONESCALE_COMPRESS_20 : public OpCPU
     bool m_fwd;
     ACES2::JMhParams m_p;
     ACES2::ToneScaleParams m_t;
+    ACES2::SharedCompressionParameters m_s;
     ACES2::ChromaCompressParams m_c;
 };
 
@@ -201,6 +203,7 @@ class Renderer_ACES_GAMUT_COMPRESS_20 : public OpCPU
 
 protected:
     bool m_fwd;
+    ACES2::SharedCompressionParameters m_s;
     ACES2::GamutCompressParams m_g;
 };
 
@@ -1023,8 +1026,9 @@ Renderer_ACES_OutputTransform20::Renderer_ACES_OutputTransform20(ConstFixedFunct
     m_pIn = ACES2::init_JMhParams(ACES_AP0::primaries);
     m_pOut = ACES2::init_JMhParams(lim_primaries);
     m_t = ACES2::init_ToneScaleParams(peak_luminance);
-    m_c = ACES2::init_ChromaCompressParams(peak_luminance);
-    m_g = ACES2::init_GamutCompressParams(peak_luminance, lim_primaries);
+    m_s = ACES2::init_SharedCompressionParams(peak_luminance, m_pIn);
+    m_c = ACES2::init_ChromaCompressParams(peak_luminance, m_t);
+    m_g = ACES2::init_GamutCompressParams(peak_luminance, m_pIn, m_pOut, m_t, m_s);
 }
 
 void Renderer_ACES_OutputTransform20::apply(const void * inImg, void * outImg, long numPixels) const
@@ -1048,8 +1052,8 @@ void Renderer_ACES_OutputTransform20::fwd(const void * inImg, void * outImg, lon
     {
         const ACES2::f3 RGBIn {in[0], in[1], in[2]};
         const ACES2::f3 JMh           = ACES2::RGB_to_JMh(RGBIn, m_pIn);
-        const ACES2::f3 tonemappedJMh = ACES2::tonescale_chroma_compress_fwd(JMh, m_pIn, m_t, m_c);
-        const ACES2::f3 compressedJMh = ACES2::gamut_compress_fwd(tonemappedJMh, m_g);
+        const ACES2::f3 tonemappedJMh = ACES2::tonescale_chroma_compress_fwd(JMh, m_pIn, m_t, m_s, m_c);
+        const ACES2::f3 compressedJMh = ACES2::gamut_compress_fwd(tonemappedJMh, m_s, m_g);
         const ACES2::f3 RGBOut        = ACES2::JMh_to_RGB(compressedJMh, m_pOut);
 
         out[0] = RGBOut[0];
@@ -1071,8 +1075,8 @@ void Renderer_ACES_OutputTransform20::inv(const void * inImg, void * outImg, lon
     {
         const ACES2::f3 RGBout {in[0], in[1], in[2]};
         const ACES2::f3 compressedJMh = ACES2::RGB_to_JMh(RGBout, m_pOut);
-        const ACES2::f3 tonemappedJMh = ACES2::gamut_compress_inv(compressedJMh, m_g);
-        const ACES2::f3 JMh           = ACES2::tonescale_chroma_compress_inv(tonemappedJMh, m_pIn, m_t, m_c);
+        const ACES2::f3 tonemappedJMh = ACES2::gamut_compress_inv(compressedJMh, m_s, m_g);
+        const ACES2::f3 JMh           = ACES2::tonescale_chroma_compress_inv(tonemappedJMh, m_pIn, m_t, m_s, m_c);
         const ACES2::f3 RGBin         = ACES2::JMh_to_RGB(JMh, m_pIn);
 
         out[0] = RGBin[0];
@@ -1168,7 +1172,8 @@ Renderer_ACES_TONESCALE_COMPRESS_20::Renderer_ACES_TONESCALE_COMPRESS_20(ConstFi
 
     m_p = ACES2::init_JMhParams(ACES_AP0::primaries);
     m_t = ACES2::init_ToneScaleParams(peak_luminance);
-    m_c = ACES2::init_ChromaCompressParams(peak_luminance);
+    m_s = ACES2::init_SharedCompressionParams(peak_luminance, m_p);
+    m_c = ACES2::init_ChromaCompressParams(peak_luminance, m_t);
 }
 
 void Renderer_ACES_TONESCALE_COMPRESS_20::apply(const void * inImg, void * outImg, long numPixels) const
@@ -1190,7 +1195,7 @@ void Renderer_ACES_TONESCALE_COMPRESS_20::fwd(const void * inImg, void * outImg,
 
     for(long idx=0; idx<numPixels; ++idx)
     {
-        const ACES2::f3 JMh = ACES2::tonescale_chroma_compress_fwd({in[0], in[1], in[2]}, m_p, m_t, m_c);
+        const ACES2::f3 JMh = ACES2::tonescale_chroma_compress_fwd({in[0], in[1], in[2]}, m_p, m_t, m_s, m_c);
 
         out[0] = JMh[0];
         out[1] = JMh[1];
@@ -1209,7 +1214,7 @@ void Renderer_ACES_TONESCALE_COMPRESS_20::inv(const void * inImg, void * outImg,
 
     for(long idx=0; idx<numPixels; ++idx)
     {
-        const ACES2::f3 JMh = ACES2::tonescale_chroma_compress_inv({in[0], in[1], in[2]}, m_p, m_t, m_c);
+        const ACES2::f3 JMh = ACES2::tonescale_chroma_compress_inv({in[0], in[1], in[2]}, m_p, m_t, m_s, m_c);
 
         out[0] = JMh[0];
         out[1] = JMh[1];
@@ -1244,7 +1249,11 @@ Renderer_ACES_GAMUT_COMPRESS_20::Renderer_ACES_GAMUT_COMPRESS_20(ConstFixedFunct
         {white_x, white_y}
     };
 
-    m_g = ACES2::init_GamutCompressParams(peakLuminance, limitingPrimaries);
+    const ACES2::JMhParams pIn = ACES2::init_JMhParams(ACES_AP0::primaries);
+    const ACES2::JMhParams pLim = ACES2::init_JMhParams(limitingPrimaries);
+    const ACES2::ToneScaleParams t = ACES2::init_ToneScaleParams(peakLuminance);
+    m_s = ACES2::init_SharedCompressionParams(peakLuminance, pIn);
+    m_g = ACES2::init_GamutCompressParams(peakLuminance, pIn, pLim, t, m_s);
 }
 
 void Renderer_ACES_GAMUT_COMPRESS_20::apply(const void * inImg, void * outImg, long numPixels) const
@@ -1266,7 +1275,7 @@ void Renderer_ACES_GAMUT_COMPRESS_20::fwd(const void * inImg, void * outImg, lon
 
     for(long idx=0; idx<numPixels; ++idx)
     {
-        const ACES2::f3 JMh = ACES2::gamut_compress_fwd({in[0], in[1], in[2]}, m_g);
+        const ACES2::f3 JMh = ACES2::gamut_compress_fwd({in[0], in[1], in[2]}, m_s, m_g);
 
         out[0] = JMh[0];
         out[1] = JMh[1];
@@ -1285,7 +1294,7 @@ void Renderer_ACES_GAMUT_COMPRESS_20::inv(const void * inImg, void * outImg, lon
 
     for(long idx=0; idx<numPixels; ++idx)
     {
-        const ACES2::f3 JMh = ACES2::gamut_compress_inv({in[0], in[1], in[2]}, m_g);
+        const ACES2::f3 JMh = ACES2::gamut_compress_inv({in[0], in[1], in[2]}, m_s, m_g);
 
         out[0] = JMh[0];
         out[1] = JMh[1];
diff --git a/src/OpenColorIO/ops/fixedfunction/FixedFunctionOpGPU.cpp b/src/OpenColorIO/ops/fixedfunction/FixedFunctionOpGPU.cpp
index 5dcd9eaabe..76937c4de2 100644
--- a/src/OpenColorIO/ops/fixedfunction/FixedFunctionOpGPU.cpp
+++ b/src/OpenColorIO/ops/fixedfunction/FixedFunctionOpGPU.cpp
@@ -531,6 +531,7 @@ void _Add_Tonescale_Compress_Fwd_Shader(
     unsigned resourceIndex,
     const ACES2::JMhParams & p,
     const ACES2::ToneScaleParams & t,
+    const ACES2::SharedCompressionParameters & s,
     const ACES2::ChromaCompressParams & c,
     const std::string & reachName)
 {
@@ -559,7 +560,7 @@ void _Add_Tonescale_Compress_Fwd_Shader(
     ss.newLine() << "{";
     ss.indent();
 
-    ss.newLine() << ss.floatDecl("nJ") << " = J_ts / " << c.limit_J_max << ";";
+    ss.newLine() << ss.floatDecl("nJ") << " = J_ts / " << s.limit_J_max << ";";
     ss.newLine() << ss.floatDecl("snJ") << " = max(0.0, 1.0 - nJ);";
 
     // Mnorm
@@ -582,8 +583,8 @@ void _Add_Tonescale_Compress_Fwd_Shader(
     ss.newLine() << "}";
 
     ss.newLine() << ss.floatDecl("reachM") << " = " << reachName << "_sample(h);";
-    ss.newLine() << ss.floatDecl("limit") << " = pow(nJ, " << c.model_gamma_inv << ") * reachM / Mnorm;";
-    ss.newLine() << "M_cp = M * pow(J_ts / J, " << c.model_gamma_inv << ");";
+    ss.newLine() << ss.floatDecl("limit") << " = pow(nJ, " << s.model_gamma_inv << ") * reachM / Mnorm;";
+    ss.newLine() << "M_cp = M * pow(J_ts / J, " << s.model_gamma_inv << ");";
     ss.newLine() << "M_cp = M_cp / Mnorm;";
 
     ss.newLine() << "M_cp = limit - " << toeName << "(limit - M_cp, limit - 0.001, snJ * " << c.sat << ", sqrt(nJ * nJ + " << c.sat_thr << "));";
@@ -602,6 +603,7 @@ void _Add_Tonescale_Compress_Inv_Shader(
     unsigned resourceIndex,
     const ACES2::JMhParams & p,
     const ACES2::ToneScaleParams & t,
+    const ACES2::SharedCompressionParameters & s,
     const ACES2::ChromaCompressParams & c,
     const std::string & reachName)
 {
@@ -631,7 +633,7 @@ void _Add_Tonescale_Compress_Inv_Shader(
     ss.newLine() << "{";
     ss.indent();
 
-    ss.newLine() << ss.floatDecl("nJ") << " = J_ts / " << c.limit_J_max << ";";
+    ss.newLine() << ss.floatDecl("nJ") << " = J_ts / " << s.limit_J_max << ";";
     ss.newLine() << ss.floatDecl("snJ") << " = max(0.0, 1.0 - nJ);";
 
     // Mnorm
@@ -654,13 +656,13 @@ void _Add_Tonescale_Compress_Inv_Shader(
     ss.newLine() << "}";
 
     ss.newLine() << ss.floatDecl("reachM") << " = " << reachName << "_sample(h);";
-    ss.newLine() << ss.floatDecl("limit") << " = pow(nJ, " << c.model_gamma_inv << ") * reachM / Mnorm;";
+    ss.newLine() << ss.floatDecl("limit") << " = pow(nJ, " << s.model_gamma_inv << ") * reachM / Mnorm;";
 
     ss.newLine() << "M = M_cp / Mnorm;";
     ss.newLine() << "M = " << toeName << "(M, limit, nJ * " << c.compr << ", snJ);";
     ss.newLine() << "M = limit - " << toeName << "(limit - M, limit - 0.001, snJ * " << c.sat << ", sqrt(nJ * nJ + " << c.sat_thr << "));";
     ss.newLine() << "M = M * Mnorm;";
-    ss.newLine() << "M = M * pow(J_ts / J, " << -c.model_gamma_inv << ");";
+    ss.newLine() << "M = M * pow(J_ts / J, " << -s.model_gamma_inv << ");";
 
     ss.dedent();
     ss.newLine() << "}";
@@ -878,6 +880,7 @@ std::string _Add_Gamma_table(
 std::string _Add_Focus_Gain_func(
     GpuShaderCreatorRcPtr & shaderCreator,
     unsigned resourceIndex,
+    const ACES2::SharedCompressionParameters & s,
     const ACES2::GamutCompressParams & g)
 {
     // Reserve name
@@ -897,12 +900,12 @@ std::string _Add_Focus_Gain_func(
     ss.newLine() << "{";
     ss.indent();
 
-    ss.newLine() << ss.floatDecl("thr") << " = " << ss.lerp("cuspJ", std::to_string(g.limit_J_max), std::to_string(ACES2::focus_gain_blend)) << ";";
+    ss.newLine() << ss.floatDecl("thr") << " = " << ss.lerp("cuspJ", std::to_string(s.limit_J_max), std::to_string(ACES2::focus_gain_blend)) << ";";
 
     ss.newLine() << "if (J > thr)";
     ss.newLine() << "{";
     ss.indent();
-    ss.newLine() << ss.floatDecl("gain") << " = ( " << g.limit_J_max << " - thr) / max(0.0001, (" << g.limit_J_max << " - min(" << g.limit_J_max << ", J)));";
+    ss.newLine() << ss.floatDecl("gain") << " = ( " << s.limit_J_max << " - thr) / max(0.0001, (" << s.limit_J_max << " - min(" << s.limit_J_max << ", J)));";
     ss.newLine() << "return pow(log(gain)/log(10.0), 1.0 / " << ACES2::focus_adjust_gain << ") + 1.0;";
     ss.dedent();
     ss.newLine() << "}";
@@ -924,6 +927,7 @@ std::string _Add_Focus_Gain_func(
 std::string _Add_Solve_J_Intersect_func(
     GpuShaderCreatorRcPtr & shaderCreator,
     unsigned resourceIndex,
+    const ACES2::SharedCompressionParameters & s,
     const ACES2::GamutCompressParams & g)
 {
     // Reserve name
@@ -957,7 +961,7 @@ std::string _Add_Solve_J_Intersect_func(
     ss.newLine() << "else";
     ss.newLine() << "{";
     ss.indent();
-    ss.newLine() << "b = - (1.0 + M / slope_gain + " << g.limit_J_max << " * M / (focusJ * slope_gain));";
+    ss.newLine() << "b = - (1.0 + M / slope_gain + " << s.limit_J_max << " * M / (focusJ * slope_gain));";
     ss.dedent();
     ss.newLine() << "}";
 
@@ -970,7 +974,7 @@ std::string _Add_Solve_J_Intersect_func(
     ss.newLine() << "else";
     ss.newLine() << "{";
     ss.indent();
-    ss.newLine() << "c = " << g.limit_J_max << " * M / slope_gain + J;";
+    ss.newLine() << "c = " << s.limit_J_max << " * M / slope_gain + J;";
     ss.dedent();
     ss.newLine() << "}";
 
@@ -1002,6 +1006,7 @@ std::string _Add_Solve_J_Intersect_func(
 std::string _Add_Find_Gamut_Boundary_Intersection_func(
     GpuShaderCreatorRcPtr & shaderCreator,
     unsigned resourceIndex,
+    const ACES2::SharedCompressionParameters & s,
     const ACES2::GamutCompressParams & g,
     const std::string & solveJIntersectName)
 {
@@ -1037,12 +1042,12 @@ std::string _Add_Find_Gamut_Boundary_Intersection_func(
     ss.newLine() << "else";
     ss.newLine() << "{";
     ss.indent();
-    ss.newLine() << "slope = (" << g.limit_J_max << " - J_intersect_source) * (J_intersect_source - J_focus) / (J_focus * slope_gain);";
+    ss.newLine() << "slope = (" << s.limit_J_max << " - J_intersect_source) * (J_intersect_source - J_focus) / (J_focus * slope_gain);";
     ss.dedent();
     ss.newLine() << "}";
 
     ss.newLine() << ss.floatDecl("M_boundary_lower ") << " = J_intersect_cusp * pow(J_intersect_source / J_intersect_cusp, 1.0 / gamma_bottom) / (JM_cusp.r / JM_cusp.g - slope);";
-    ss.newLine() << ss.floatDecl("M_boundary_upper") << " = JM_cusp.g * (" << g.limit_J_max << " - J_intersect_cusp) * pow((" << g.limit_J_max << " - J_intersect_source) / (" << g.limit_J_max << " - J_intersect_cusp), 1.0 / gamma_top) / (slope * JM_cusp.g + " << g.limit_J_max << " - JM_cusp.r);";
+    ss.newLine() << ss.floatDecl("M_boundary_upper") << " = JM_cusp.g * (" << s.limit_J_max << " - J_intersect_cusp) * pow((" << s.limit_J_max << " - J_intersect_source) / (" << s.limit_J_max << " - J_intersect_cusp), 1.0 / gamma_top) / (slope * JM_cusp.g + " << s.limit_J_max << " - JM_cusp.r);";
 
     ss.newLine() << ss.floatDecl("smin") << " = 0.0;";
     ss.newLine() << "{";
@@ -1073,6 +1078,7 @@ std::string _Add_Find_Gamut_Boundary_Intersection_func(
 std::string _Add_Reach_Boundary_func(
     GpuShaderCreatorRcPtr & shaderCreator,
     unsigned resourceIndex,
+    const ACES2::SharedCompressionParameters & s,
     const ACES2::GamutCompressParams & g,
     const std::string & reachName,
     const std::string & getFocusGainName,
@@ -1096,7 +1102,7 @@ std::string _Add_Reach_Boundary_func(
     ss.indent();
 
     ss.newLine() << ss.floatDecl("reachMaxM") << " = " << reachName << "_sample(h);";
-    ss.newLine() << ss.floatDecl("slope_gain") << " = " << g.limit_J_max << " * " << g.focus_dist << " * " << getFocusGainName << "(J, JMcusp.r);";
+    ss.newLine() << ss.floatDecl("slope_gain") << " = " << s.limit_J_max << " * " << g.focus_dist << " * " << getFocusGainName << "(J, JMcusp.r);";
     ss.newLine() << ss.floatDecl("intersectJ") << " = " << solveJIntersectName << "(J, M, focusJ, slope_gain);";
 
     ss.newLine() << ss.floatDecl("slope") << " = 0.0;";
@@ -1109,11 +1115,11 @@ std::string _Add_Reach_Boundary_func(
     ss.newLine() << "else";
     ss.newLine() << "{";
     ss.indent();
-    ss.newLine() << "slope = (" << g.limit_J_max << " - intersectJ) * (intersectJ - focusJ) / (focusJ * slope_gain);";
+    ss.newLine() << "slope = (" << s.limit_J_max << " - intersectJ) * (intersectJ - focusJ) / (focusJ * slope_gain);";
     ss.dedent();
     ss.newLine() << "}";
 
-    ss.newLine() << ss.floatDecl("boundary") << " = " << g.limit_J_max << " * pow(intersectJ / " << g.limit_J_max << ", " << g.model_gamma_inv << ") * reachMaxM / (" << g.limit_J_max << " - slope * reachMaxM);";
+    ss.newLine() << ss.floatDecl("boundary") << " = " << s.limit_J_max << " * pow(intersectJ / " << s.limit_J_max << ", " << s.model_gamma_inv << ") * reachMaxM / (" << s.limit_J_max << " - slope * reachMaxM);";
 
     ss.newLine() << "return " << ss.float3Const("J", "boundary", "h") << ";";
 
@@ -1198,6 +1204,7 @@ std::string _Add_Compression_func(
 std::string _Add_Compress_Gamut_func(
     GpuShaderCreatorRcPtr & shaderCreator,
     unsigned resourceIndex,
+    const ACES2::SharedCompressionParameters & s,
     const ACES2::GamutCompressParams & g,
     const std::string & cuspName,
     const std::string & getFocusGainName,
@@ -1227,7 +1234,7 @@ std::string _Add_Compress_Gamut_func(
     ss.newLine() << ss.floatDecl("M") << " = JMh.g;";
     ss.newLine() << ss.floatDecl("h") << " = JMh.b;";
 
-    ss.newLine() << "if (M < 0.0001 || J > " << g.limit_J_max << ")";
+    ss.newLine() << "if (M < 0.0001 || J > " << s.limit_J_max << ")";
     ss.newLine() << "{";
     ss.indent();
     ss.newLine() << "return " << ss.float3Const("J", "0.0", "h") << ";";
@@ -1240,8 +1247,8 @@ std::string _Add_Compress_Gamut_func(
     ss.newLine() << ss.float2Decl("project_from") << " = " << ss.float2Const("J", "M") << ";";
     ss.newLine() << ss.float2Decl("JMcusp") << " = " << cuspName << "_sample(h);";
 
-    ss.newLine() << ss.floatDecl("focusJ") << " = " << ss.lerp("JMcusp.r", std::to_string(g.mid_J), std::string("min(1.0, ") + std::to_string(ACES2::cusp_mid_blend) + " - (JMcusp.r / " + std::to_string(g.limit_J_max)) << "));";
-    ss.newLine() << ss.floatDecl("slope_gain") << " = " << g.limit_J_max << " * " << g.focus_dist << " * " << getFocusGainName << "(Jx, JMcusp.r);";
+    ss.newLine() << ss.floatDecl("focusJ") << " = " << ss.lerp("JMcusp.r", std::to_string(g.mid_J), std::string("min(1.0, ") + std::to_string(ACES2::cusp_mid_blend) + " - (JMcusp.r / " + std::to_string(s.limit_J_max)) << "));";
+    ss.newLine() << ss.floatDecl("slope_gain") << " = " << s.limit_J_max << " * " << g.focus_dist << " * " << getFocusGainName << "(Jx, JMcusp.r);";
 
     ss.newLine() << ss.floatDecl("gamma_top") << " = " << gammaName << "_sample(h);";
     ss.newLine() << ss.floatDecl("gamma_bottom") << " = " << g.lower_hull_gamma << ";";
@@ -1287,17 +1294,18 @@ void _Add_Gamut_Compress_Fwd_Shader(
     GpuShaderCreatorRcPtr & shaderCreator,
     GpuShaderText & ss,
     unsigned int resourceIndex,
+    const ACES2::SharedCompressionParameters & s,
     const ACES2::GamutCompressParams & g,
     const std::string & reachName)
 {
     std::string cuspName = _Add_Cusp_table(shaderCreator, resourceIndex, g);
     std::string gammaName = _Add_Gamma_table(shaderCreator, resourceIndex, g);
-    std::string getFocusGainName = _Add_Focus_Gain_func(shaderCreator, resourceIndex, g);
-    std::string solveJIntersectName = _Add_Solve_J_Intersect_func(shaderCreator, resourceIndex, g);
-    std::string findGamutBoundaryIntersectionName = _Add_Find_Gamut_Boundary_Intersection_func(shaderCreator, resourceIndex, g, solveJIntersectName);
-    std::string getReachBoundaryName = _Add_Reach_Boundary_func(shaderCreator, resourceIndex, g, reachName, getFocusGainName, solveJIntersectName);
+    std::string getFocusGainName = _Add_Focus_Gain_func(shaderCreator, resourceIndex, s, g);
+    std::string solveJIntersectName = _Add_Solve_J_Intersect_func(shaderCreator, resourceIndex, s, g);
+    std::string findGamutBoundaryIntersectionName = _Add_Find_Gamut_Boundary_Intersection_func(shaderCreator, resourceIndex, s, g, solveJIntersectName);
+    std::string getReachBoundaryName = _Add_Reach_Boundary_func(shaderCreator, resourceIndex, s, g, reachName, getFocusGainName, solveJIntersectName);
     std::string compressionName = _Add_Compression_func(shaderCreator, resourceIndex, false);
-    std::string gamutCompressName = _Add_Compress_Gamut_func(shaderCreator, resourceIndex, g, cuspName, getFocusGainName, gammaName, findGamutBoundaryIntersectionName, getReachBoundaryName, compressionName);
+    std::string gamutCompressName = _Add_Compress_Gamut_func(shaderCreator, resourceIndex, s, g, cuspName, getFocusGainName, gammaName, findGamutBoundaryIntersectionName, getReachBoundaryName, compressionName);
 
     const std::string pxl(shaderCreator->getPixelName());
 
@@ -1308,17 +1316,18 @@ void _Add_Gamut_Compress_Inv_Shader(
     GpuShaderCreatorRcPtr & shaderCreator,
     GpuShaderText & ss,
     unsigned int resourceIndex,
+    const ACES2::SharedCompressionParameters & s,
     const ACES2::GamutCompressParams & g,
     const std::string & reachName)
 {
     std::string cuspName = _Add_Cusp_table(shaderCreator, resourceIndex, g);
     std::string gammaName = _Add_Gamma_table(shaderCreator, resourceIndex, g);
-    std::string getFocusGainName = _Add_Focus_Gain_func(shaderCreator, resourceIndex, g);
-    std::string solveJIntersectName = _Add_Solve_J_Intersect_func(shaderCreator, resourceIndex, g);
-    std::string findGamutBoundaryIntersectionName = _Add_Find_Gamut_Boundary_Intersection_func(shaderCreator, resourceIndex, g, solveJIntersectName);
-    std::string getReachBoundaryName = _Add_Reach_Boundary_func(shaderCreator, resourceIndex, g, reachName, getFocusGainName, solveJIntersectName);
+    std::string getFocusGainName = _Add_Focus_Gain_func(shaderCreator, resourceIndex, s, g);
+    std::string solveJIntersectName = _Add_Solve_J_Intersect_func(shaderCreator, resourceIndex,s, g);
+    std::string findGamutBoundaryIntersectionName = _Add_Find_Gamut_Boundary_Intersection_func(shaderCreator, resourceIndex, s, g, solveJIntersectName);
+    std::string getReachBoundaryName = _Add_Reach_Boundary_func(shaderCreator, resourceIndex, s, g, reachName, getFocusGainName, solveJIntersectName);
     std::string compressionName = _Add_Compression_func(shaderCreator, resourceIndex, true);
-    std::string gamutCompressName = _Add_Compress_Gamut_func(shaderCreator, resourceIndex, g, cuspName, getFocusGainName, gammaName, findGamutBoundaryIntersectionName, getReachBoundaryName, compressionName);
+    std::string gamutCompressName = _Add_Compress_Gamut_func(shaderCreator, resourceIndex, s, g, cuspName, getFocusGainName, gammaName, findGamutBoundaryIntersectionName, getReachBoundaryName, compressionName);
 
     const std::string pxl(shaderCreator->getPixelName());
 
@@ -1327,7 +1336,7 @@ void _Add_Gamut_Compress_Inv_Shader(
     ss.newLine() << ss.float3Decl("unCompressedJMh") << ";";
 
     // Analytic inverse below threshold
-    ss.newLine() << "if (Jx <= " << ss.lerp("JMcusp.r", std::to_string(g.limit_J_max), std::to_string(ACES2::focus_gain_blend)) << ")";
+    ss.newLine() << "if (Jx <= " << ss.lerp("JMcusp.r", std::to_string(s.limit_J_max), std::to_string(ACES2::focus_gain_blend)) << ")";
     ss.newLine() << "{";
     ss.indent();
     ss.newLine() << "unCompressedJMh = " << gamutCompressName << "(" << pxl << ".rgb, Jx);";
@@ -1361,8 +1370,6 @@ void Add_ACES_OutputTransform_Fwd_Shader(
     const float white_x = (float) params[7];
     const float white_y = (float) params[8];
 
-    const Primaries in_primaries = ACES_AP0::primaries;
-
     const Primaries lim_primaries = {
         {red_x  , red_y  },
         {green_x, green_y},
@@ -1370,15 +1377,16 @@ void Add_ACES_OutputTransform_Fwd_Shader(
         {white_x, white_y}
     };
 
-    ACES2::JMhParams pIn = ACES2::init_JMhParams(in_primaries);
-    ACES2::JMhParams pLim = ACES2::init_JMhParams(lim_primaries);
-    ACES2::ToneScaleParams t = ACES2::init_ToneScaleParams(peak_luminance);
-    ACES2::ChromaCompressParams c = ACES2::init_ChromaCompressParams(peak_luminance);
-    const ACES2::GamutCompressParams g = ACES2::init_GamutCompressParams(peak_luminance, lim_primaries);
+    const ACES2::JMhParams pIn = ACES2::init_JMhParams(ACES_AP0::primaries);
+    const ACES2::JMhParams pLim = ACES2::init_JMhParams(lim_primaries);
+    const ACES2::ToneScaleParams t = ACES2::init_ToneScaleParams(peak_luminance);
+    const ACES2::SharedCompressionParameters s = ACES2::init_SharedCompressionParams(peak_luminance, pIn);
+    const ACES2::ChromaCompressParams c = ACES2::init_ChromaCompressParams(peak_luminance, t);
+    const ACES2::GamutCompressParams g = ACES2::init_GamutCompressParams(peak_luminance, pIn, pLim, t, s);
 
     unsigned resourceIndex = shaderCreator->getNextResourceIndex();
 
-    std::string reachName = _Add_Reach_table(shaderCreator, resourceIndex, g.reach_m_table);
+    std::string reachName = _Add_Reach_table(shaderCreator, resourceIndex, s.reach_m_table);
 
     ss.newLine() << "";
     ss.newLine() << "// Add RGB to JMh";
@@ -1394,7 +1402,7 @@ void Add_ACES_OutputTransform_Fwd_Shader(
     ss.newLine() << "";
     ss.newLine() << "{";
     ss.indent();
-        _Add_Tonescale_Compress_Fwd_Shader(shaderCreator, ss, resourceIndex, pIn, t, c, reachName);
+        _Add_Tonescale_Compress_Fwd_Shader(shaderCreator, ss, resourceIndex, pIn, t, s, c, reachName);
     ss.dedent();
     ss.newLine() << "}";
 
@@ -1403,7 +1411,7 @@ void Add_ACES_OutputTransform_Fwd_Shader(
     ss.newLine() << "";
     ss.newLine() << "{";
     ss.indent();
-        _Add_Gamut_Compress_Fwd_Shader(shaderCreator, ss, resourceIndex, g, reachName);
+        _Add_Gamut_Compress_Fwd_Shader(shaderCreator, ss, resourceIndex, s, g, reachName);
     ss.dedent();
     ss.newLine() << "}";
 
@@ -1434,8 +1442,6 @@ void Add_ACES_OutputTransform_Inv_Shader(
     const float white_x = (float) params[7];
     const float white_y = (float) params[8];
 
-    const Primaries in_primaries = ACES_AP0::primaries;
-
     const Primaries lim_primaries = {
         {red_x  , red_y  },
         {green_x, green_y},
@@ -1443,15 +1449,16 @@ void Add_ACES_OutputTransform_Inv_Shader(
         {white_x, white_y}
     };
 
-    ACES2::JMhParams pIn = ACES2::init_JMhParams(in_primaries);
-    ACES2::JMhParams pLim = ACES2::init_JMhParams(lim_primaries);
-    ACES2::ToneScaleParams t = ACES2::init_ToneScaleParams(peak_luminance);
-    ACES2::ChromaCompressParams c = ACES2::init_ChromaCompressParams(peak_luminance);
-    const ACES2::GamutCompressParams g = ACES2::init_GamutCompressParams(peak_luminance, lim_primaries);
+    const ACES2::JMhParams pIn = ACES2::init_JMhParams(ACES_AP0::primaries);
+    const ACES2::JMhParams pLim = ACES2::init_JMhParams(lim_primaries);
+    const ACES2::ToneScaleParams t = ACES2::init_ToneScaleParams(peak_luminance);
+    const ACES2::SharedCompressionParameters s = ACES2::init_SharedCompressionParams(peak_luminance, pIn);
+    const ACES2::ChromaCompressParams c = ACES2::init_ChromaCompressParams(peak_luminance, t);
+    const ACES2::GamutCompressParams g = ACES2::init_GamutCompressParams(peak_luminance, pIn, pLim, t, s);
 
     unsigned resourceIndex = shaderCreator->getNextResourceIndex();
 
-    std::string reachName = _Add_Reach_table(shaderCreator, resourceIndex, c.reach_m_table);
+    std::string reachName = _Add_Reach_table(shaderCreator, resourceIndex, s.reach_m_table);
 
     ss.newLine() << "";
     ss.newLine() << "// Add RGB to JMh";
@@ -1467,7 +1474,7 @@ void Add_ACES_OutputTransform_Inv_Shader(
     ss.newLine() << "";
     ss.newLine() << "{";
     ss.indent();
-        _Add_Gamut_Compress_Inv_Shader(shaderCreator, ss, resourceIndex, g, reachName);
+        _Add_Gamut_Compress_Inv_Shader(shaderCreator, ss, resourceIndex, s, g, reachName);
     ss.dedent();
     ss.newLine() << "}";
 
@@ -1476,7 +1483,7 @@ void Add_ACES_OutputTransform_Inv_Shader(
     ss.newLine() << "";
     ss.newLine() << "{";
     ss.indent();
-        _Add_Tonescale_Compress_Inv_Shader(shaderCreator, ss, resourceIndex, pIn, t, c, reachName);
+        _Add_Tonescale_Compress_Inv_Shader(shaderCreator, ss, resourceIndex, pIn, t, s, c, reachName);
     ss.dedent();
     ss.newLine() << "}";
 
@@ -1548,15 +1555,16 @@ void Add_Tonescale_Compress_Fwd_Shader(
 {
     const float peak_luminance = (float) params[0];
 
-    ACES2::JMhParams p = ACES2::init_JMhParams(ACES_AP0::primaries);
-    ACES2::ToneScaleParams t = ACES2::init_ToneScaleParams(peak_luminance);
-    ACES2::ChromaCompressParams c = ACES2::init_ChromaCompressParams(peak_luminance);
+    const ACES2::JMhParams p = ACES2::init_JMhParams(ACES_AP0::primaries);
+    const ACES2::ToneScaleParams t = ACES2::init_ToneScaleParams(peak_luminance);
+    const ACES2::SharedCompressionParameters s = ACES2::init_SharedCompressionParams(peak_luminance, p);
+    const ACES2::ChromaCompressParams c = ACES2::init_ChromaCompressParams(peak_luminance, t);
 
     unsigned resourceIndex = shaderCreator->getNextResourceIndex();
 
-    std::string reachName = _Add_Reach_table(shaderCreator, resourceIndex, c.reach_m_table);
+    std::string reachName = _Add_Reach_table(shaderCreator, resourceIndex, s.reach_m_table);
 
-    _Add_Tonescale_Compress_Fwd_Shader(shaderCreator, ss, resourceIndex, p, t, c, reachName);
+    _Add_Tonescale_Compress_Fwd_Shader(shaderCreator, ss, resourceIndex, p, t, s, c, reachName);
 }
 
 void Add_Tonescale_Compress_Inv_Shader(
@@ -1566,15 +1574,16 @@ void Add_Tonescale_Compress_Inv_Shader(
 {
     const float peak_luminance = (float) params[0];
 
-    ACES2::JMhParams p = ACES2::init_JMhParams(ACES_AP0::primaries);
-    ACES2::ToneScaleParams t = ACES2::init_ToneScaleParams(peak_luminance);
-    ACES2::ChromaCompressParams c = ACES2::init_ChromaCompressParams(peak_luminance);
+    const ACES2::JMhParams p = ACES2::init_JMhParams(ACES_AP0::primaries);
+    const ACES2::ToneScaleParams t = ACES2::init_ToneScaleParams(peak_luminance);
+    const ACES2::SharedCompressionParameters s = ACES2::init_SharedCompressionParams(peak_luminance, p);
+    const ACES2::ChromaCompressParams c = ACES2::init_ChromaCompressParams(peak_luminance, t);
 
     unsigned resourceIndex = shaderCreator->getNextResourceIndex();
 
-    std::string reachName = _Add_Reach_table(shaderCreator, resourceIndex, c.reach_m_table);
+    std::string reachName = _Add_Reach_table(shaderCreator, resourceIndex, s.reach_m_table);
 
-    _Add_Tonescale_Compress_Inv_Shader(shaderCreator, ss, resourceIndex, p, t, c, reachName);
+    _Add_Tonescale_Compress_Inv_Shader(shaderCreator, ss, resourceIndex, p, t, s, c, reachName);
 }
 
 void Add_Gamut_Compress_Fwd_Shader(
@@ -1600,13 +1609,17 @@ void Add_Gamut_Compress_Fwd_Shader(
         {white_x, white_y}
     };
 
-    const ACES2::GamutCompressParams g = ACES2::init_GamutCompressParams(peak_luminance, primaries);
+    const ACES2::JMhParams pIn = ACES2::init_JMhParams(ACES_AP0::primaries);
+    const ACES2::JMhParams pLim = ACES2::init_JMhParams(primaries);
+    const ACES2::ToneScaleParams t = ACES2::init_ToneScaleParams(peak_luminance);
+    const ACES2::SharedCompressionParameters s = ACES2::init_SharedCompressionParams(peak_luminance, pIn);
+    const ACES2::GamutCompressParams g = ACES2::init_GamutCompressParams(peak_luminance, pIn, pLim, t, s); 
 
     unsigned resourceIndex = shaderCreator->getNextResourceIndex();
 
-    std::string reachName = _Add_Reach_table(shaderCreator, resourceIndex, g.reach_m_table);
+    std::string reachName = _Add_Reach_table(shaderCreator, resourceIndex, s.reach_m_table);
 
-    _Add_Gamut_Compress_Fwd_Shader(shaderCreator, ss, resourceIndex, g, reachName);
+    _Add_Gamut_Compress_Fwd_Shader(shaderCreator, ss, resourceIndex, s, g, reachName);
 }
 
 void Add_Gamut_Compress_Inv_Shader(
@@ -1632,13 +1645,17 @@ void Add_Gamut_Compress_Inv_Shader(
         {white_x, white_y}
     };
 
-    const ACES2::GamutCompressParams g = ACES2::init_GamutCompressParams(peak_luminance, primaries);
+    const ACES2::JMhParams pIn = ACES2::init_JMhParams(ACES_AP0::primaries);
+    const ACES2::JMhParams pLim = ACES2::init_JMhParams(primaries);
+    const ACES2::ToneScaleParams t = ACES2::init_ToneScaleParams(peak_luminance);
+    const ACES2::SharedCompressionParameters s = ACES2::init_SharedCompressionParams(peak_luminance, pIn);
+    const ACES2::GamutCompressParams g = ACES2::init_GamutCompressParams(peak_luminance, pIn, pLim, t, s);
 
     unsigned resourceIndex = shaderCreator->getNextResourceIndex();
 
-    std::string reachName = _Add_Reach_table(shaderCreator, resourceIndex, g.reach_m_table);
+    std::string reachName = _Add_Reach_table(shaderCreator, resourceIndex, s.reach_m_table);
 
-    _Add_Gamut_Compress_Inv_Shader(shaderCreator, ss, resourceIndex, g, reachName);
+    _Add_Gamut_Compress_Inv_Shader(shaderCreator, ss, resourceIndex, s, g, reachName);
 }
 
 void Add_Surround_10_Fwd_Shader(GpuShaderCreatorRcPtr & shaderCreator, GpuShaderText & ss, float gamma)

From ae7ddaa9b30617b01f35d2b1d795e3f0bb372738 Mon Sep 17 00:00:00 2001
From: Kevin Wheatley <kevin.wheatley@framestore.com>
Date: Tue, 7 Jan 2025 18:05:16 +0000
Subject: [PATCH 20/70] extract some of the fixed values that only depend on
 the hue to reduce recomputation during inverse gamut mapping

Signed-off-by: Kevin Wheatley <kevin.wheatley@framestore.com>
---
 .../ops/fixedfunction/ACES2/Common.h          |  7 +++
 .../ops/fixedfunction/ACES2/Transform.cpp     | 51 ++++++++++++-------
 2 files changed, 39 insertions(+), 19 deletions(-)

diff --git a/src/OpenColorIO/ops/fixedfunction/ACES2/Common.h b/src/OpenColorIO/ops/fixedfunction/ACES2/Common.h
index eeecf8c953..3d46fe2de2 100644
--- a/src/OpenColorIO/ops/fixedfunction/ACES2/Common.h
+++ b/src/OpenColorIO/ops/fixedfunction/ACES2/Common.h
@@ -76,6 +76,13 @@ struct ChromaCompressParams
     static constexpr float cusp_mid_blend = 1.3f;
 };
 
+struct HueDependantGamutParams
+{
+    float gamma_top;
+    float gamma_bottom;
+    f2 JMcusp;
+    float focusJ;
+};
 struct GamutCompressParams
 {
     float mid_J;
diff --git a/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp b/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp
index 710fa46e5b..46ee471998 100644
--- a/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp
+++ b/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp
@@ -661,7 +661,7 @@ inline float estimate_line_and_boundary_intersection_M(const float J_axis_inters
     //return shifted_intersection * M_max / (J_max - slope * M_max);
 }
 
-f3 find_gamut_boundary_intersection(const f3 &JMh_s, const f2 &JM_cusp_in, float J_focus, float J_max, float slope_gain, float gamma_top, float gamma_bottom)
+f2 find_gamut_boundary_intersection(const f2 &JM_cusp_in, float J_focus, float J_max, float slope_gain, float gamma_top, float gamma_bottom, const float J_intersect_source)
 {
     const float s = std::max(0.000001f, smooth_cusps); // TODO: pre smooth the cusp
     const f2 JM_cusp = {
@@ -669,7 +669,6 @@ f3 find_gamut_boundary_intersection(const f3 &JMh_s, const f2 &JM_cusp_in, float
         JM_cusp_in[1] * (1.f + smooth_m * s)
     };
 
-    const float J_intersect_source = solve_J_intersect(JMh_s[0], JMh_s[1], J_focus, J_max, slope_gain);
     const float J_intersect_cusp = solve_J_intersect(JM_cusp[0], JM_cusp[1], J_focus, J_max, slope_gain);
 
     float slope = 0.f;
@@ -687,7 +686,7 @@ f3 find_gamut_boundary_intersection(const f3 &JMh_s, const f2 &JM_cusp_in, float
     const float M_boundary = JM_cusp[1] * smin(M_boundary_lower / JM_cusp[1], M_boundary_upper / JM_cusp[1], s);
     const float J_boundary = J_intersect_source + slope * M_boundary; // TODO don't recalculate this
 
-    return {J_boundary, M_boundary, J_intersect_source};
+    return {J_boundary, M_boundary};
 }
 
 f3 get_reach_boundary(
@@ -750,7 +749,7 @@ float compression_function(
     return vCompressed;
 }
 
-f3 compressGamut(const f3 &JMh, float Jx, const ACES2::SharedCompressionParameters& sp, const ACES2::GamutCompressParams& p, const f2& JMcusp, bool invert)
+f3 compressGamut(const f3 &JMh, float Jx, const ACES2::SharedCompressionParameters& sp, const ACES2::GamutCompressParams& p, const HueDependantGamutParams hdp, bool invert)
 {
     const float J = JMh[0];
     const float M = JMh[1];
@@ -763,22 +762,20 @@ f3 compressGamut(const f3 &JMh, float Jx, const ACES2::SharedCompressionParamete
     else
     {
         const f2 project_from = {J, M};
-        const float focusJ = lerpf(JMcusp[0], p.mid_J, std::min(1.f, cusp_mid_blend - (JMcusp[0] / sp.limit_J_max)));
-        const float slope_gain = sp.limit_J_max * p.focus_dist * get_focus_gain(Jx, JMcusp[0], sp.limit_J_max);
+        const float slope_gain = sp.limit_J_max * p.focus_dist * get_focus_gain(Jx, hdp.JMcusp[0], sp.limit_J_max);
 
-        const float gamma_top = hue_dependent_upper_hull_gamma(h, p.upper_hull_gamma_table);
-        const float gamma_bottom = p.lower_hull_gamma;
+        const float J_intersect_source = solve_J_intersect(J, M, hdp.focusJ, sp.limit_J_max, slope_gain);
 
-        const f3 boundaryReturn = find_gamut_boundary_intersection({J, M, h}, JMcusp, focusJ, sp.limit_J_max, slope_gain, gamma_top, gamma_bottom);
+        const f2 boundaryReturn = find_gamut_boundary_intersection(hdp.JMcusp, hdp.focusJ, sp.limit_J_max, slope_gain, hdp.gamma_top, hdp.gamma_bottom, J_intersect_source);
         const f2 JMboundary = {boundaryReturn[0], boundaryReturn[1]};
-        const f2 project_to = {boundaryReturn[2], 0.f};
+        const f2 project_to = {J_intersect_source, 0.f};
 
         if (JMboundary[1] <= 0.0f)
         {
             return {J, 0.f, h};
         }
 
-        const f3 reachBoundary = get_reach_boundary(JMboundary[0], JMboundary[1], h, JMcusp, focusJ, sp.limit_J_max, sp.model_gamma_inv, p.focus_dist, sp.reach_m_table); // TODO
+        const f3 reachBoundary = get_reach_boundary(JMboundary[0], JMboundary[1], h, hdp.JMcusp, hdp.focusJ, sp.limit_J_max, sp.model_gamma_inv, p.focus_dist, sp.reach_m_table); // TODO
 
         const float difference = std::max(1.0001f, reachBoundary[1] / JMboundary[1]);
         const float threshold = std::max(compression_threshold, 1.f / difference);
@@ -795,24 +792,39 @@ f3 compressGamut(const f3 &JMh, float Jx, const ACES2::SharedCompressionParamete
     }
 }
 
+inline float compute_focusJ(float cusp_J, float mid_J, float limit_J_max)
+{
+    return lerpf(cusp_J, mid_J, std::min(1.f, cusp_mid_blend - (cusp_J / limit_J_max)));
+}
+
 f3 gamut_compress_fwd(const f3 &JMh, const SharedCompressionParameters &sp, const GamutCompressParams &p)
 {
-    const f2 JMcusp = cusp_from_table(JMh[2], p.gamut_cusp_table); // TODO: call once and pass this into compress functions ?
-    return compressGamut(JMh, JMh[0], sp, p, JMcusp, false);
+    HueDependantGamutParams hdp;
+    hdp.gamma_top = hue_dependent_upper_hull_gamma(JMh[2], p.upper_hull_gamma_table);
+    hdp.gamma_bottom = p.lower_hull_gamma;
+    hdp.JMcusp = cusp_from_table(JMh[2], p.gamut_cusp_table);
+    hdp.focusJ = compute_focusJ(hdp.JMcusp[0], p.mid_J, sp.limit_J_max);
+    
+    return compressGamut(JMh, JMh[0], sp, p, hdp, false);
 }
 
 f3 gamut_compress_inv(const f3 &JMh, const SharedCompressionParameters &sp, const GamutCompressParams &p)
 {
-    const f2 JMcusp = cusp_from_table(JMh[2], p.gamut_cusp_table); // TODO: call once and pass this into compress functions ?
+    HueDependantGamutParams hdp;
+    hdp.gamma_top = hue_dependent_upper_hull_gamma(JMh[2], p.upper_hull_gamma_table);
+    hdp.gamma_bottom = p.lower_hull_gamma;
+    hdp.JMcusp = cusp_from_table(JMh[2], p.gamut_cusp_table);
+    hdp.focusJ = compute_focusJ(hdp.JMcusp[0], p.mid_J, sp.limit_J_max);
+
     float Jx = JMh[0];
 
     // Analytic inverse below threshold
-    if (Jx > lerpf(JMcusp[0], sp.limit_J_max, focus_gain_blend))
+    if (Jx > lerpf(hdp.JMcusp[0], sp.limit_J_max, focus_gain_blend))
     {
         // Approximation above threshold
-        Jx = compressGamut(JMh, Jx, sp, p, JMcusp, true)[0];
+        Jx = compressGamut(JMh, Jx, sp, p, hdp, true)[0];
     }
-    return compressGamut(JMh, Jx, sp, p, JMcusp, true);
+    return compressGamut(JMh, Jx, sp, p, hdp, true);
 }
 
 bool evaluate_gamma_fit(
@@ -826,12 +838,13 @@ bool evaluate_gamma_fit(
     float lower_hull_gamma,
     const JMhParams &limitJMhParams)
 {
-    const float focusJ = lerpf(JMcusp[0], mid_J, std::min(1.f, cusp_mid_blend - (JMcusp[0] / limit_J_max)));
+    const float focusJ = compute_focusJ(JMcusp[0], mid_J, limit_J_max);
 
     for (size_t testIndex = 0; testIndex < 3; testIndex++)
     {
         const float slope_gain = limit_J_max * focus_dist * get_focus_gain(testJMh[testIndex][0], JMcusp[0], limit_J_max);
-        const f3 approxLimit = find_gamut_boundary_intersection(testJMh[testIndex], JMcusp, focusJ, limit_J_max, slope_gain, topGamma, lower_hull_gamma);
+        const float J_intersect_source = solve_J_intersect(testJMh[testIndex][0], testJMh[testIndex][1], focusJ, limit_J_max, slope_gain);
+        const f2 approxLimit = find_gamut_boundary_intersection(JMcusp, focusJ, limit_J_max, slope_gain, topGamma, lower_hull_gamma, J_intersect_source);
         const f3 approximate_JMh = {approxLimit[0], approxLimit[1], testJMh[testIndex][2]};
         const f3 newLimitRGB = JMh_to_RGB(approximate_JMh, limitJMhParams);
         const f3 newLimitRGBScaled = mult_f_f3(reference_luminance / peakLuminance, newLimitRGB);

From f466d846a0edb87c3a3dd2944886a33454c1251c Mon Sep 17 00:00:00 2001
From: Kevin Wheatley <kevin.wheatley@framestore.com>
Date: Thu, 9 Jan 2025 09:18:56 +0000
Subject: [PATCH 21/70] Avoid double lookup for reachMaxM value by resolving
 once the hue is known.

also reduces size of object on stack by not passing the whole table.

Signed-off-by: Kevin Wheatley <kevin.wheatley@framestore.com>
---
 .../ops/fixedfunction/ACES2/Common.h          |  7 ++
 .../ops/fixedfunction/ACES2/Transform.cpp     | 70 ++++++++++---------
 .../ops/fixedfunction/ACES2/Transform.h       |  9 +--
 .../ops/fixedfunction/FixedFunctionOpCPU.cpp  | 22 +++---
 4 files changed, 64 insertions(+), 44 deletions(-)

diff --git a/src/OpenColorIO/ops/fixedfunction/ACES2/Common.h b/src/OpenColorIO/ops/fixedfunction/ACES2/Common.h
index 3d46fe2de2..97c38bf904 100644
--- a/src/OpenColorIO/ops/fixedfunction/ACES2/Common.h
+++ b/src/OpenColorIO/ops/fixedfunction/ACES2/Common.h
@@ -67,6 +67,13 @@ struct SharedCompressionParameters
     Table1D reach_m_table;
 };
 
+struct ResolvedSharedCompressionParameters
+{
+    float limit_J_max;
+    float model_gamma_inv;
+    float reachMaxM;
+};
+
 struct ChromaCompressParams
 {
     float sat;
diff --git a/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp b/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp
index 46ee471998..f702cbd86b 100644
--- a/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp
+++ b/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp
@@ -255,7 +255,7 @@ f3 JMh_to_RGB(const f3 &JMh, const JMhParams &p)
 // Tonescale / Chroma compress
 //
 
-float chroma_compress_norm(float h, float chroma_compress_scale)
+inline float chroma_compress_norm(float h, float chroma_compress_scale)
 {
     const float h_rad = degrees_to_radians(h);
     const float a = cos(h_rad);
@@ -333,7 +333,7 @@ float tonescale_inv(const float J_ts, const JMhParams &p, const ToneScaleParams
     return std::copysign(J, Y_ts);
 }
 
-f3 tonescale_chroma_compress_fwd(const f3 &JMh, const JMhParams &p, const ToneScaleParams &pt, const SharedCompressionParameters &ps, const ChromaCompressParams &pc)
+f3 tonescale_chroma_compress_fwd(const f3 &JMh, const JMhParams &p, const ToneScaleParams &pt, const ResolvedSharedCompressionParameters &pr, const ChromaCompressParams &pc)
 {
     const float J = JMh[0];
     const float M = JMh[1];
@@ -346,12 +346,12 @@ f3 tonescale_chroma_compress_fwd(const f3 &JMh, const JMhParams &p, const ToneSc
 
     if (M != 0.0)
     {
-        const float nJ = J_ts / ps.limit_J_max;
+        const float nJ = J_ts / pr.limit_J_max;
         const float snJ = std::max(0.f, 1.f - nJ);
         const float Mnorm = chroma_compress_norm(h, pc.chroma_compress_scale);
-        const float limit = powf(nJ, ps.model_gamma_inv) * reach_m_from_table(h, ps.reach_m_table) / Mnorm;
+        const float limit = powf(nJ, pr.model_gamma_inv) * pr.reachMaxM / Mnorm;
 
-        M_cp = M * powf(J_ts / J, ps.model_gamma_inv);
+        M_cp = M * powf(J_ts / J, pr.model_gamma_inv);
         M_cp = M_cp / Mnorm;
         M_cp = limit - toe_fwd(limit - M_cp, limit - 0.001f, snJ * pc.sat, sqrt(nJ * nJ + pc.sat_thr));
         M_cp = toe_fwd(M_cp, limit, nJ * pc.compr, snJ);
@@ -361,7 +361,7 @@ f3 tonescale_chroma_compress_fwd(const f3 &JMh, const JMhParams &p, const ToneSc
     return {J_ts, M_cp, h};
 }
 
-f3 tonescale_chroma_compress_inv(const f3 &JMh, const JMhParams &p, const ToneScaleParams &pt, const SharedCompressionParameters &ps, const ChromaCompressParams &pc)
+f3 tonescale_chroma_compress_inv(const f3 &JMh, const JMhParams &p, const ToneScaleParams &pt, const ResolvedSharedCompressionParameters &pr, const ChromaCompressParams &pc)
 {
     const float J_ts = JMh[0];
     const float M_cp = JMh[1];
@@ -374,16 +374,16 @@ f3 tonescale_chroma_compress_inv(const f3 &JMh, const JMhParams &p, const ToneSc
 
     if (M_cp != 0.0)
     {
-        const float nJ = J_ts / ps.limit_J_max;
+        const float nJ = J_ts / pr.limit_J_max;
         const float snJ = std::max(0.f, 1.f - nJ);
         const float Mnorm = chroma_compress_norm(h, pc.chroma_compress_scale);
-        const float limit = powf(nJ, ps.model_gamma_inv) * reach_m_from_table(h, ps.reach_m_table) / Mnorm;
+        const float limit = powf(nJ, pr.model_gamma_inv) * pr.reachMaxM / Mnorm;
 
         M = M_cp / Mnorm;
         M = toe_inv(M, limit, nJ * pc.compr, snJ);
         M = limit - toe_inv(limit - M, limit - 0.001f, snJ * pc.sat, sqrt(nJ * nJ + pc.sat_thr));
         M = M * Mnorm;
-        M = M * powf(J_ts / J, -ps.model_gamma_inv);
+        M = M * powf(J_ts / J, -pr.model_gamma_inv);
     }
 
     return {J, M, h};
@@ -695,17 +695,13 @@ f3 get_reach_boundary(
     float h,
     const f2 &JMcusp,
     float focusJ,
-    float limit_J_max,
-    float model_gamma_inv,
     float focus_dist,
-    const ACES2::Table1D & reach_m_table
+    const ACES2::ResolvedSharedCompressionParameters& sr
 )
 {
-    const float reachMaxM = reach_m_from_table(h, reach_m_table);
-
-    const float slope_gain = limit_J_max * focus_dist * get_focus_gain(J, JMcusp[0], limit_J_max);
+    const float slope_gain = sr.limit_J_max * focus_dist * get_focus_gain(J, JMcusp[0], sr.limit_J_max);
 
-    const float intersectJ = solve_J_intersect(J, M, focusJ, limit_J_max, slope_gain);
+    const float intersectJ = solve_J_intersect(J, M, focusJ, sr.limit_J_max, slope_gain);
 
     float slope;
     if (intersectJ < focusJ)
@@ -714,10 +710,10 @@ f3 get_reach_boundary(
     }
     else
     {
-        slope = (limit_J_max - intersectJ) * (intersectJ - focusJ) / (focusJ * slope_gain);
+        slope = (sr.limit_J_max - intersectJ) * (intersectJ - focusJ) / (focusJ * slope_gain);
     }
 
-    const float boundary = limit_J_max * powf(intersectJ / limit_J_max, model_gamma_inv) * reachMaxM / (limit_J_max - slope * reachMaxM);
+    const float boundary = sr.limit_J_max * powf(intersectJ / sr.limit_J_max, sr.model_gamma_inv) * sr.reachMaxM / (sr.limit_J_max - slope * sr.reachMaxM);
     return {J, boundary, h};
 }
 
@@ -749,24 +745,24 @@ float compression_function(
     return vCompressed;
 }
 
-f3 compressGamut(const f3 &JMh, float Jx, const ACES2::SharedCompressionParameters& sp, const ACES2::GamutCompressParams& p, const HueDependantGamutParams hdp, bool invert)
+f3 compressGamut(const f3 &JMh, float Jx, const ACES2::ResolvedSharedCompressionParameters& sr, const ACES2::GamutCompressParams& p, const HueDependantGamutParams hdp, bool invert)
 {
     const float J = JMh[0];
     const float M = JMh[1];
     const float h = JMh[2];
 
-    if (M < 0.0001f || J > sp.limit_J_max)
+    if (M < 0.0001f || J > sr.limit_J_max)
     {
         return {J, 0.f, h};
     }
     else
     {
         const f2 project_from = {J, M};
-        const float slope_gain = sp.limit_J_max * p.focus_dist * get_focus_gain(Jx, hdp.JMcusp[0], sp.limit_J_max);
+        const float slope_gain = sr.limit_J_max * p.focus_dist * get_focus_gain(Jx, hdp.JMcusp[0], sr.limit_J_max);
 
-        const float J_intersect_source = solve_J_intersect(J, M, hdp.focusJ, sp.limit_J_max, slope_gain);
+        const float J_intersect_source = solve_J_intersect(J, M, hdp.focusJ, sr.limit_J_max, slope_gain);
 
-        const f2 boundaryReturn = find_gamut_boundary_intersection(hdp.JMcusp, hdp.focusJ, sp.limit_J_max, slope_gain, hdp.gamma_top, hdp.gamma_bottom, J_intersect_source);
+        const f2 boundaryReturn = find_gamut_boundary_intersection(hdp.JMcusp, hdp.focusJ, sr.limit_J_max, slope_gain, hdp.gamma_top, hdp.gamma_bottom, J_intersect_source);
         const f2 JMboundary = {boundaryReturn[0], boundaryReturn[1]};
         const f2 project_to = {J_intersect_source, 0.f};
 
@@ -775,7 +771,7 @@ f3 compressGamut(const f3 &JMh, float Jx, const ACES2::SharedCompressionParamete
             return {J, 0.f, h};
         }
 
-        const f3 reachBoundary = get_reach_boundary(JMboundary[0], JMboundary[1], h, hdp.JMcusp, hdp.focusJ, sp.limit_J_max, sp.model_gamma_inv, p.focus_dist, sp.reach_m_table); // TODO
+        const f3 reachBoundary = get_reach_boundary(JMboundary[0], JMboundary[1], h, hdp.JMcusp, hdp.focusJ, p.focus_dist, sr); // TODO
 
         const float difference = std::max(1.0001f, reachBoundary[1] / JMboundary[1]);
         const float threshold = std::max(compression_threshold, 1.f / difference);
@@ -797,34 +793,34 @@ inline float compute_focusJ(float cusp_J, float mid_J, float limit_J_max)
     return lerpf(cusp_J, mid_J, std::min(1.f, cusp_mid_blend - (cusp_J / limit_J_max)));
 }
 
-f3 gamut_compress_fwd(const f3 &JMh, const SharedCompressionParameters &sp, const GamutCompressParams &p)
+f3 gamut_compress_fwd(const f3 &JMh, const ResolvedSharedCompressionParameters &sr, const GamutCompressParams &p)
 {
     HueDependantGamutParams hdp;
     hdp.gamma_top = hue_dependent_upper_hull_gamma(JMh[2], p.upper_hull_gamma_table);
     hdp.gamma_bottom = p.lower_hull_gamma;
     hdp.JMcusp = cusp_from_table(JMh[2], p.gamut_cusp_table);
-    hdp.focusJ = compute_focusJ(hdp.JMcusp[0], p.mid_J, sp.limit_J_max);
+    hdp.focusJ = compute_focusJ(hdp.JMcusp[0], p.mid_J, sr.limit_J_max);
     
-    return compressGamut(JMh, JMh[0], sp, p, hdp, false);
+    return compressGamut(JMh, JMh[0], sr, p, hdp, false);
 }
 
-f3 gamut_compress_inv(const f3 &JMh, const SharedCompressionParameters &sp, const GamutCompressParams &p)
+f3 gamut_compress_inv(const f3 &JMh, const ResolvedSharedCompressionParameters &sr, const GamutCompressParams &p)
 {
     HueDependantGamutParams hdp;
     hdp.gamma_top = hue_dependent_upper_hull_gamma(JMh[2], p.upper_hull_gamma_table);
     hdp.gamma_bottom = p.lower_hull_gamma;
     hdp.JMcusp = cusp_from_table(JMh[2], p.gamut_cusp_table);
-    hdp.focusJ = compute_focusJ(hdp.JMcusp[0], p.mid_J, sp.limit_J_max);
+    hdp.focusJ = compute_focusJ(hdp.JMcusp[0], p.mid_J, sr.limit_J_max);
 
     float Jx = JMh[0];
 
     // Analytic inverse below threshold
-    if (Jx > lerpf(hdp.JMcusp[0], sp.limit_J_max, focus_gain_blend))
+    if (Jx > lerpf(hdp.JMcusp[0], sr.limit_J_max, focus_gain_blend))
     {
         // Approximation above threshold
-        Jx = compressGamut(JMh, Jx, sp, p, hdp, true)[0];
+        Jx = compressGamut(JMh, Jx, sr, p, hdp, true)[0];
     }
-    return compressGamut(JMh, Jx, sp, p, hdp, true);
+    return compressGamut(JMh, Jx, sr, p, hdp, true);
 }
 
 bool evaluate_gamma_fit(
@@ -999,6 +995,16 @@ SharedCompressionParameters init_SharedCompressionParams(float peakLuminance, co
     return params;
 }
 
+ResolvedSharedCompressionParameters resolve_CompressionParams(float hue, const SharedCompressionParameters &p)
+{
+    ResolvedSharedCompressionParameters params = {
+        p.limit_J_max,
+        p.model_gamma_inv,
+        reach_m_from_table(hue, p.reach_m_table)
+    };
+    return params;
+}
+
 ChromaCompressParams init_ChromaCompressParams(float peakLuminance, const ToneScaleParams &tsParams)
 {
     // Calculated chroma compress variables
diff --git a/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.h b/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.h
index 9d41ad4225..9c2314c5b3 100644
--- a/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.h
+++ b/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.h
@@ -15,6 +15,7 @@ namespace ACES2
 JMhParams init_JMhParams(const Primaries &P);
 ToneScaleParams init_ToneScaleParams(float peakLuminance);
 SharedCompressionParameters init_SharedCompressionParams(float peakLuminance, const JMhParams &inputJMhParams);
+ResolvedSharedCompressionParameters resolve_CompressionParams(float hue, const SharedCompressionParameters &p);
 ChromaCompressParams init_ChromaCompressParams(float peakLuminance, const ToneScaleParams &tsParams);
 GamutCompressParams init_GamutCompressParams(float peakLuminance, const JMhParams &inputJMhParams, const JMhParams &limitJMhParams,
                                              const ToneScaleParams &tsParams, const SharedCompressionParameters &shParams);
@@ -22,11 +23,11 @@ GamutCompressParams init_GamutCompressParams(float peakLuminance, const JMhParam
 f3 RGB_to_JMh(const f3 &RGB, const JMhParams &p);
 f3 JMh_to_RGB(const f3 &JMh, const JMhParams &p);
 
-f3 tonescale_chroma_compress_fwd(const f3 &JMh, const JMhParams &p, const ToneScaleParams &pt, const SharedCompressionParameters &ps, const ChromaCompressParams &pc);
-f3 tonescale_chroma_compress_inv(const f3 &JMh, const JMhParams &p, const ToneScaleParams &pt, const SharedCompressionParameters &ps, const ChromaCompressParams &pc);
+f3 tonescale_chroma_compress_fwd(const f3 &JMh, const JMhParams &p, const ToneScaleParams &pt, const ResolvedSharedCompressionParameters &ps, const ChromaCompressParams &pc);
+f3 tonescale_chroma_compress_inv(const f3 &JMh, const JMhParams &p, const ToneScaleParams &pt, const ResolvedSharedCompressionParameters &ps, const ChromaCompressParams &pc);
 
-f3 gamut_compress_fwd(const f3 &JMh, const SharedCompressionParameters &ps, const GamutCompressParams &p);
-f3 gamut_compress_inv(const f3 &JMh, const SharedCompressionParameters &ps, const GamutCompressParams &p);
+f3 gamut_compress_fwd(const f3 &JMh, const ResolvedSharedCompressionParameters &ps, const GamutCompressParams &p);
+f3 gamut_compress_inv(const f3 &JMh, const ResolvedSharedCompressionParameters &ps, const GamutCompressParams &p);
 
 
 } // namespace ACES2
diff --git a/src/OpenColorIO/ops/fixedfunction/FixedFunctionOpCPU.cpp b/src/OpenColorIO/ops/fixedfunction/FixedFunctionOpCPU.cpp
index 7af8f0a0b3..1476dfc866 100644
--- a/src/OpenColorIO/ops/fixedfunction/FixedFunctionOpCPU.cpp
+++ b/src/OpenColorIO/ops/fixedfunction/FixedFunctionOpCPU.cpp
@@ -1052,8 +1052,9 @@ void Renderer_ACES_OutputTransform20::fwd(const void * inImg, void * outImg, lon
     {
         const ACES2::f3 RGBIn {in[0], in[1], in[2]};
         const ACES2::f3 JMh           = ACES2::RGB_to_JMh(RGBIn, m_pIn);
-        const ACES2::f3 tonemappedJMh = ACES2::tonescale_chroma_compress_fwd(JMh, m_pIn, m_t, m_s, m_c);
-        const ACES2::f3 compressedJMh = ACES2::gamut_compress_fwd(tonemappedJMh, m_s, m_g);
+        const ACES2::ResolvedSharedCompressionParameters rp = resolve_CompressionParams(JMh[2], m_s);
+        const ACES2::f3 tonemappedJMh = ACES2::tonescale_chroma_compress_fwd(JMh, m_pIn, m_t, rp, m_c);
+        const ACES2::f3 compressedJMh = ACES2::gamut_compress_fwd(tonemappedJMh, rp, m_g);
         const ACES2::f3 RGBOut        = ACES2::JMh_to_RGB(compressedJMh, m_pOut);
 
         out[0] = RGBOut[0];
@@ -1075,8 +1076,9 @@ void Renderer_ACES_OutputTransform20::inv(const void * inImg, void * outImg, lon
     {
         const ACES2::f3 RGBout {in[0], in[1], in[2]};
         const ACES2::f3 compressedJMh = ACES2::RGB_to_JMh(RGBout, m_pOut);
-        const ACES2::f3 tonemappedJMh = ACES2::gamut_compress_inv(compressedJMh, m_s, m_g);
-        const ACES2::f3 JMh           = ACES2::tonescale_chroma_compress_inv(tonemappedJMh, m_pIn, m_t, m_s, m_c);
+        const ACES2::ResolvedSharedCompressionParameters rp = resolve_CompressionParams(compressedJMh[2], m_s);
+        const ACES2::f3 tonemappedJMh = ACES2::gamut_compress_inv(compressedJMh, rp, m_g);
+        const ACES2::f3 JMh           = ACES2::tonescale_chroma_compress_inv(tonemappedJMh, m_pIn, m_t, rp, m_c);
         const ACES2::f3 RGBin         = ACES2::JMh_to_RGB(JMh, m_pIn);
 
         out[0] = RGBin[0];
@@ -1195,7 +1197,8 @@ void Renderer_ACES_TONESCALE_COMPRESS_20::fwd(const void * inImg, void * outImg,
 
     for(long idx=0; idx<numPixels; ++idx)
     {
-        const ACES2::f3 JMh = ACES2::tonescale_chroma_compress_fwd({in[0], in[1], in[2]}, m_p, m_t, m_s, m_c);
+        const ACES2::ResolvedSharedCompressionParameters rp = resolve_CompressionParams(in[2], m_s);
+        const ACES2::f3 JMh = ACES2::tonescale_chroma_compress_fwd({in[0], in[1], in[2]}, m_p, m_t, rp, m_c);
 
         out[0] = JMh[0];
         out[1] = JMh[1];
@@ -1214,7 +1217,8 @@ void Renderer_ACES_TONESCALE_COMPRESS_20::inv(const void * inImg, void * outImg,
 
     for(long idx=0; idx<numPixels; ++idx)
     {
-        const ACES2::f3 JMh = ACES2::tonescale_chroma_compress_inv({in[0], in[1], in[2]}, m_p, m_t, m_s, m_c);
+        const ACES2::ResolvedSharedCompressionParameters rp = resolve_CompressionParams(in[2], m_s);
+        const ACES2::f3 JMh = ACES2::tonescale_chroma_compress_inv({in[0], in[1], in[2]}, m_p, m_t, rp, m_c);
 
         out[0] = JMh[0];
         out[1] = JMh[1];
@@ -1275,7 +1279,8 @@ void Renderer_ACES_GAMUT_COMPRESS_20::fwd(const void * inImg, void * outImg, lon
 
     for(long idx=0; idx<numPixels; ++idx)
     {
-        const ACES2::f3 JMh = ACES2::gamut_compress_fwd({in[0], in[1], in[2]}, m_s, m_g);
+        const ACES2::ResolvedSharedCompressionParameters rp = resolve_CompressionParams(in[2], m_s);
+        const ACES2::f3 JMh = ACES2::gamut_compress_fwd({in[0], in[1], in[2]}, rp, m_g);
 
         out[0] = JMh[0];
         out[1] = JMh[1];
@@ -1294,7 +1299,8 @@ void Renderer_ACES_GAMUT_COMPRESS_20::inv(const void * inImg, void * outImg, lon
 
     for(long idx=0; idx<numPixels; ++idx)
     {
-        const ACES2::f3 JMh = ACES2::gamut_compress_inv({in[0], in[1], in[2]}, m_s, m_g);
+        const ACES2::ResolvedSharedCompressionParameters rp = resolve_CompressionParams(in[2], m_s);
+        const ACES2::f3 JMh = ACES2::gamut_compress_inv({in[0], in[1], in[2]}, rp, m_g);
 
         out[0] = JMh[0];
         out[1] = JMh[1];

From 34794cdad24b8372a2ae03e15c7d883eabd97a31 Mon Sep 17 00:00:00 2001
From: Kevin Wheatley <kevin.wheatley@framestore.com>
Date: Thu, 9 Jan 2025 18:09:49 +0000
Subject: [PATCH 22/70] Push wrapping of hues to the boundary, mark up 
 conversion points from external inputs etc

Signed-off-by: Kevin Wheatley <kevin.wheatley@framestore.com>
---
 .../ops/fixedfunction/ACES2/Common.h          | 23 +++++++-
 .../ops/fixedfunction/ACES2/Transform.cpp     | 52 ++++++-------------
 .../ops/fixedfunction/FixedFunctionOpCPU.cpp  | 33 +++++++-----
 3 files changed, 58 insertions(+), 50 deletions(-)

diff --git a/src/OpenColorIO/ops/fixedfunction/ACES2/Common.h b/src/OpenColorIO/ops/fixedfunction/ACES2/Common.h
index 97c38bf904..3757aa229d 100644
--- a/src/OpenColorIO/ops/fixedfunction/ACES2/Common.h
+++ b/src/OpenColorIO/ops/fixedfunction/ACES2/Common.h
@@ -106,7 +106,28 @@ constexpr float Y_b = 20.f;
 constexpr f3 surround = {0.9f, 0.59f, 0.9f}; // Dim surround
 constexpr float J_scale = 100.0f;
 constexpr float PI = 3.14159265358979f;
-constexpr float hue_limit = 360.0f; // 2.0f * PI;
+
+constexpr float hue_limit = 360.0f;
+//constexpr float hue_limit = 2.0f * PI;
+inline float wrap_to_hue_limit(float hue)
+{
+    float y = std::fmod(hue, hue_limit);
+    if ( y < 0.f)
+    {
+        y = y + hue_limit;
+    }
+    return y;
+}
+inline constexpr float to_degrees(const float v) { return v; }
+inline float from_degrees(const float v) { return wrap_to_hue_limit(v); }
+inline constexpr float to_radians(const float v) { return PI * v / 180.0f; };
+inline float from_radians(const float v) { return wrap_to_hue_limit(180.0f * v / PI); };
+/*
+inline constexpr float to_degrees(const float v) { return 180.0f * v / PI; }
+inline float from_degrees(const float v) { return wrap_to_hue_limit(PI * v / 180.0f); }
+inline constexpr float to_radians(const float v) { return v; }
+inline float from_radians(const float v) { return wrap_to_hue_limit(v); };
+*/
 
 // Chroma compression
 constexpr float chroma_compress = 2.4f;
diff --git a/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp b/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp
index f702cbd86b..574b343955 100644
--- a/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp
+++ b/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp
@@ -13,53 +13,34 @@ namespace ACES2
 //
 // Table lookups
 //
-inline constexpr float degrees_to_radians(float d)
-{
-    return d / 180.0f * PI;
-}
-
-inline constexpr float radians_to_degrees(float r)
-{
-    return r / PI * 180.f;
-}
 
-inline float wrap_to_hue_limit(float hue) // TODO: track this and strip out unneeded calls
-{
-    float y = std::fmod(hue, hue_limit);
-    if ( y < 0.f)
-    {
-        y = y + hue_limit;
-    }
-    return y;
-}
-
-float base_hue_for_position(int i_lo, int table_size) 
+inline float base_hue_for_position(int i_lo, int table_size) 
 {
     const float result = i_lo * hue_limit / table_size;
     return result;
 }
 
-int hue_position_in_uniform_table(float hue, int table_size)
+inline int hue_position_in_uniform_table(float wrapped_hue, int table_size)
 {
-    const float wrapped_hue = wrap_to_hue_limit(hue);
     return int(wrapped_hue / hue_limit * (float) table_size);
 }
 
-int next_position_in_table(int entry, int table_size)
+inline int next_position_in_table(int entry, int table_size)
 {
     return (entry + 1) % table_size;
 }
 
-int clamp_to_table_bounds(int entry, int table_size)
+inline int clamp_to_table_bounds(int entry, int table_size)
 {
     return std::min(table_size - 1, std::max(0, entry));
 }
 
 f2 cusp_from_table(float h, const Table3D &gt)
 {
-    int i_lo = 0;
-    int i_hi = gt.base_index + gt.size; // allowed as we have an extra entry in the table
-    int i = clamp_to_table_bounds(hue_position_in_uniform_table(h, gt.size) + gt.base_index, gt.total_size);
+    constexpr int search_range = 64; // BUG: TODO: FIXME: This is arbitrary and should be calculated based on gamut parameters
+    int i = hue_position_in_uniform_table(h, gt.size) + gt.base_index;
+    int i_lo = std::max(0, i - search_range);
+    int i_hi = std::min(gt.total_size - 1, i + search_range); // allowed as we have an extra entry in the table
 
     while (i_lo + 1 < i_hi)
     {
@@ -71,7 +52,7 @@ f2 cusp_from_table(float h, const Table3D &gt)
         {
             i_hi = i;
         }
-        i = (i_lo + i_hi) / 2, gt.total_size;
+        i = (i_lo + i_hi) / 2;
     }
 
     i_hi = std::max(1, i_hi);
@@ -98,7 +79,7 @@ f2 cusp_from_table(float h, const Table3D &gt)
 float reach_m_from_table(float h, const ACES2::Table1D &gt)
 {
     const int i_lo = clamp_to_table_bounds(hue_position_in_uniform_table(h, gt.size), gt.total_size);
-    const int i_hi = clamp_to_table_bounds(next_position_in_table(i_lo, gt.size), gt.total_size);
+    const int i_hi = next_position_in_table(i_lo, gt.size);
 
     const float t = (h - i_lo) / (i_hi - i_lo);
     return lerpf(gt.table[i_lo], gt.table[i_hi], t);
@@ -107,11 +88,11 @@ float reach_m_from_table(float h, const ACES2::Table1D &gt)
 float hue_dependent_upper_hull_gamma(float h, const ACES2::Table1D &gt)
 {
     const int i_lo = clamp_to_table_bounds(hue_position_in_uniform_table(h, gt.size) + gt.base_index, gt.total_size);
-    const int i_hi = clamp_to_table_bounds(next_position_in_table(i_lo, gt.size), gt.total_size);
+    const int i_hi = next_position_in_table(i_lo, gt.size);
 
     const float base_hue = (float) (i_lo - gt.base_index);
 
-    const float t = wrap_to_hue_limit(h) - base_hue;
+    const float t = h - base_hue;
 
     return lerpf(gt.table[i_lo], gt.table[i_hi], t);
 }
@@ -204,7 +185,7 @@ inline f3 Aab_to_JMh(const f3 &Aab, const JMhParams &p)
     const float M = J == 0.f ? 0.f : sqrt(Aab[1] * Aab[1] + Aab[2] * Aab[2]);
 
     const float h_rad = std::atan2(Aab[2], Aab[1]);
-    float h = wrap_to_hue_limit(radians_to_degrees(h_rad));
+    float h = from_radians(h_rad);
 
     return {J, M, h};
 }
@@ -222,7 +203,7 @@ inline f3 JMh_to_Aab(const f3 &JMh, const JMhParams &p)
     const float M = JMh[1];
     const float h = JMh[2];
 
-    const float h_rad = degrees_to_radians(h);
+    const float h_rad = to_radians(h);
 
     const float A = J_to_Achromatic_n(J, p.cz);
     const float a = M * cos(h_rad);
@@ -257,7 +238,7 @@ f3 JMh_to_RGB(const f3 &JMh, const JMhParams &p)
 
 inline float chroma_compress_norm(float h, float chroma_compress_scale)
 {
-    const float h_rad = degrees_to_radians(h);
+    const float h_rad = to_radians(h);
     const float a = cos(h_rad);
     const float b = sin(h_rad);
     const float cos_hr2 = a * a - b * b;
@@ -509,7 +490,8 @@ Table3D make_gamut_table(const JMhParams &params, float peakLuminance)
     // Wrap the hues, to maintain monotonicity. These entries will fall outside [0.0, 360.0)
     gamutCuspTable.table[0][2] = gamutCuspTable.table[0][2] - hue_limit;
     gamutCuspTable.table[gamutCuspTable.size+1][2] = gamutCuspTable.table[gamutCuspTable.size+1][2] + hue_limit;
-
+    //for (int i = 0; i < gamutCuspTableUnsorted.total_size; i++)
+    //  std::cout << "i " << i << " " << hue_position_in_uniform_table(gamutCuspTable.table[i][2], gamutCuspTableUnsorted.size) << " " << gamutCuspTable.table[i][0]  << " " << gamutCuspTable.table[i][1]  << " " << gamutCuspTable.table[i][2]  << "\n";
     return gamutCuspTable;
 }
 
diff --git a/src/OpenColorIO/ops/fixedfunction/FixedFunctionOpCPU.cpp b/src/OpenColorIO/ops/fixedfunction/FixedFunctionOpCPU.cpp
index 1476dfc866..626f379d0b 100644
--- a/src/OpenColorIO/ops/fixedfunction/FixedFunctionOpCPU.cpp
+++ b/src/OpenColorIO/ops/fixedfunction/FixedFunctionOpCPU.cpp
@@ -1138,7 +1138,7 @@ void Renderer_ACES_RGB_TO_JMh_20::fwd(const void * inImg, void * outImg, long nu
 
         out[0] = JMh[0];
         out[1] = JMh[1];
-        out[2] = JMh[2];
+        out[2] = ACES2::to_degrees(JMh[2]);
         out[3] = in[3];
 
         in  += 4;
@@ -1153,7 +1153,8 @@ void Renderer_ACES_RGB_TO_JMh_20::inv(const void * inImg, void * outImg, long nu
 
     for(long idx=0; idx<numPixels; ++idx)
     {
-        const ACES2::f3 RGB = ACES2::JMh_to_RGB({in[0], in[1], in[2]}, m_p);
+        const float normalised_hue = ACES2::from_degrees(in[2]);
+        const ACES2::f3 RGB = ACES2::JMh_to_RGB({in[0], in[1], normalised_hue}, m_p);
 
         out[0] = RGB[0];
         out[1] = RGB[1];
@@ -1197,12 +1198,13 @@ void Renderer_ACES_TONESCALE_COMPRESS_20::fwd(const void * inImg, void * outImg,
 
     for(long idx=0; idx<numPixels; ++idx)
     {
-        const ACES2::ResolvedSharedCompressionParameters rp = resolve_CompressionParams(in[2], m_s);
-        const ACES2::f3 JMh = ACES2::tonescale_chroma_compress_fwd({in[0], in[1], in[2]}, m_p, m_t, rp, m_c);
+        const float normalised_hue = ACES2::from_degrees(in[2]);
+        const ACES2::ResolvedSharedCompressionParameters rp = resolve_CompressionParams(normalised_hue, m_s);
+        const ACES2::f3 JMh = ACES2::tonescale_chroma_compress_fwd({in[0], in[1], normalised_hue}, m_p, m_t, rp, m_c);
 
         out[0] = JMh[0];
         out[1] = JMh[1];
-        out[2] = JMh[2];
+        out[2] = ACES2::to_degrees(JMh[2]);
         out[3] = in[3];
 
         in  += 4;
@@ -1217,12 +1219,13 @@ void Renderer_ACES_TONESCALE_COMPRESS_20::inv(const void * inImg, void * outImg,
 
     for(long idx=0; idx<numPixels; ++idx)
     {
-        const ACES2::ResolvedSharedCompressionParameters rp = resolve_CompressionParams(in[2], m_s);
-        const ACES2::f3 JMh = ACES2::tonescale_chroma_compress_inv({in[0], in[1], in[2]}, m_p, m_t, rp, m_c);
+        const float normalised_hue = ACES2::from_degrees(in[2]);
+        const ACES2::ResolvedSharedCompressionParameters rp = resolve_CompressionParams(normalised_hue, m_s);
+        const ACES2::f3 JMh = ACES2::tonescale_chroma_compress_inv({in[0], in[1],  normalised_hue}, m_p, m_t, rp, m_c);
 
         out[0] = JMh[0];
         out[1] = JMh[1];
-        out[2] = JMh[2];
+        out[2] = ACES2::to_degrees(JMh[2]);
         out[3] = in[3];
 
         in  += 4;
@@ -1279,12 +1282,13 @@ void Renderer_ACES_GAMUT_COMPRESS_20::fwd(const void * inImg, void * outImg, lon
 
     for(long idx=0; idx<numPixels; ++idx)
     {
-        const ACES2::ResolvedSharedCompressionParameters rp = resolve_CompressionParams(in[2], m_s);
-        const ACES2::f3 JMh = ACES2::gamut_compress_fwd({in[0], in[1], in[2]}, rp, m_g);
+        const float normalised_hue = ACES2::from_degrees(in[2]);
+        const ACES2::ResolvedSharedCompressionParameters rp = resolve_CompressionParams(normalised_hue, m_s);
+        const ACES2::f3 JMh = ACES2::gamut_compress_fwd({in[0], in[1], normalised_hue}, rp, m_g);
 
         out[0] = JMh[0];
         out[1] = JMh[1];
-        out[2] = JMh[2];
+        out[2] = ACES2::to_degrees(JMh[2]);
         out[3] = in[3];
 
         in  += 4;
@@ -1299,12 +1303,13 @@ void Renderer_ACES_GAMUT_COMPRESS_20::inv(const void * inImg, void * outImg, lon
 
     for(long idx=0; idx<numPixels; ++idx)
     {
-        const ACES2::ResolvedSharedCompressionParameters rp = resolve_CompressionParams(in[2], m_s);
-        const ACES2::f3 JMh = ACES2::gamut_compress_inv({in[0], in[1], in[2]}, rp, m_g);
+        const float normalised_hue = ACES2::from_degrees(in[2]);
+        const ACES2::ResolvedSharedCompressionParameters rp = resolve_CompressionParams(normalised_hue, m_s);
+        const ACES2::f3 JMh = ACES2::gamut_compress_inv({in[0], in[1], normalised_hue}, rp, m_g);
 
         out[0] = JMh[0];
         out[1] = JMh[1];
-        out[2] = JMh[2];
+        out[2] = ACES2::to_degrees(JMh[2]);
         out[3] = in[3];
 
         in  += 4;

From 07cab5d027f9b1a323df05f49387e94ae95d1f63 Mon Sep 17 00:00:00 2001
From: Kevin Wheatley <kevin.wheatley@framestore.com>
Date: Thu, 9 Jan 2025 19:25:44 +0000
Subject: [PATCH 23/70] Store gamma values as reciprocals move more magic
 constants into const variables factor some of the complex expressions into
 function (temporarily makes things slower)

Signed-off-by: Kevin Wheatley <kevin.wheatley@framestore.com>
---
 .../ops/fixedfunction/ACES2/Common.h          | 12 ++-
 .../ops/fixedfunction/ACES2/Transform.cpp     | 83 ++++++++-----------
 .../ops/fixedfunction/FixedFunctionOpGPU.cpp  | 38 ++++-----
 3 files changed, 63 insertions(+), 70 deletions(-)

diff --git a/src/OpenColorIO/ops/fixedfunction/ACES2/Common.h b/src/OpenColorIO/ops/fixedfunction/ACES2/Common.h
index 3757aa229d..837bdd9438 100644
--- a/src/OpenColorIO/ops/fixedfunction/ACES2/Common.h
+++ b/src/OpenColorIO/ops/fixedfunction/ACES2/Common.h
@@ -85,8 +85,8 @@ struct ChromaCompressParams
 
 struct HueDependantGamutParams
 {
-    float gamma_top;
-    float gamma_bottom;
+    float gamma_top_inv;
+    float gamma_bottom_inv;
     f2 JMcusp;
     float focusJ;
 };
@@ -94,9 +94,9 @@ struct GamutCompressParams
 {
     float mid_J;
     float focus_dist;
-    float lower_hull_gamma;
+    float lower_hull_gamma_inv;
     Table3D gamut_cusp_table;
-    Table1D upper_hull_gamma_table;
+    Table1D upper_hull_gamma_inv_table;
 };
 
 // CAM
@@ -104,7 +104,11 @@ constexpr float reference_luminance = 100.f;
 constexpr float L_A = 100.f;
 constexpr float Y_b = 20.f;
 constexpr f3 surround = {0.9f, 0.59f, 0.9f}; // Dim surround
+
 constexpr float J_scale = 100.0f;
+constexpr float cam_nl_Y_reference = 100.0f;
+constexpr float cam_nl_offset = 0.2713f * cam_nl_Y_reference;
+constexpr float cam_nl_scale = 4.0f * cam_nl_Y_reference;
 constexpr float PI = 3.14159265358979f;
 
 constexpr float hue_limit = 360.0f;
diff --git a/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp b/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp
index 574b343955..23b885b54e 100644
--- a/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp
+++ b/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp
@@ -30,7 +30,7 @@ inline int next_position_in_table(int entry, int table_size)
     return (entry + 1) % table_size;
 }
 
-inline int clamp_to_table_bounds(int entry, int table_size)
+inline int clamp_to_table_bounds(int entry, int table_size) // TODO: this should be removed if we can constrain the hue range properly
 {
     return std::min(table_size - 1, std::max(0, entry));
 }
@@ -78,7 +78,7 @@ f2 cusp_from_table(float h, const Table3D &gt)
 
 float reach_m_from_table(float h, const ACES2::Table1D &gt)
 {
-    const int i_lo = clamp_to_table_bounds(hue_position_in_uniform_table(h, gt.size), gt.total_size);
+    const int i_lo = clamp_to_table_bounds(hue_position_in_uniform_table(h, gt.size), gt.total_size);  // TODO: this should be removed if we can constrain the hue range properly
     const int i_hi = next_position_in_table(i_lo, gt.size);
 
     const float t = (h - i_lo) / (i_hi - i_lo);
@@ -87,7 +87,7 @@ float reach_m_from_table(float h, const ACES2::Table1D &gt)
 
 float hue_dependent_upper_hull_gamma(float h, const ACES2::Table1D &gt)
 {
-    const int i_lo = clamp_to_table_bounds(hue_position_in_uniform_table(h, gt.size) + gt.base_index, gt.total_size);
+    const int i_lo = clamp_to_table_bounds(hue_position_in_uniform_table(h, gt.size) + gt.base_index, gt.total_size);  // TODO: this should be removed if we can constrain the hue range properly
     const int i_hi = next_position_in_table(i_lo, gt.size);
 
     const float base_hue = (float) (i_lo - gt.base_index);
@@ -104,13 +104,13 @@ float hue_dependent_upper_hull_gamma(float h, const ACES2::Table1D &gt)
 inline float _post_adaptation_cone_response_compression_fwd(float Rc, const float F_L_n)
 {
     const float F_L_Y = powf(Rc * F_L_n, 0.42f);
-    const float Ra    = (400.f * F_L_Y) / (27.13f + F_L_Y);
+    const float Ra    = (cam_nl_scale * F_L_Y) / (cam_nl_offset + F_L_Y);
     return Ra;
 }
 
 inline float _post_adaptation_cone_response_compression_inv(float Ra, const float F_L_n)
 {
-    const float F_L_Y = (27.13f * Ra) / (400.f - Ra); // TODO: what happens when Ra >= 400.0f
+    const float F_L_Y = (cam_nl_offset * Ra) / (cam_nl_scale - Ra); // TODO: what happens when Ra >= cam_nl_scale (400.0f)
     const float Rc    = powf(F_L_Y, 1.f / 0.42f) / F_L_n;
     return Rc;
 }
@@ -257,7 +257,7 @@ inline float chroma_compress_norm(float h, float chroma_compress_scale)
     return M * chroma_compress_scale;
 }
 
-float toe_fwd( float x, float limit, float k1_in, float k2_in)
+inline float toe_fwd( float x, float limit, float k1_in, float k2_in)
 {
     if (x > limit)
     {
@@ -273,7 +273,7 @@ float toe_fwd( float x, float limit, float k1_in, float k2_in)
     return 0.5f * (minus_b + sqrt(minus_b * minus_b + 4.f * minus_ac)); // a is 1.0, mins_b squared == b^2
 }
 
-float toe_inv( float x, float limit, float k1_in, float k2_in)
+inline float toe_inv( float x, float limit, float k1_in, float k2_in)
 {
     if (x > limit)
     {
@@ -643,7 +643,7 @@ inline float estimate_line_and_boundary_intersection_M(const float J_axis_inters
     //return shifted_intersection * M_max / (J_max - slope * M_max);
 }
 
-f2 find_gamut_boundary_intersection(const f2 &JM_cusp_in, float J_focus, float J_max, float slope_gain, float gamma_top, float gamma_bottom, const float J_intersect_source)
+f2 find_gamut_boundary_intersection(const f2 &JM_cusp_in, float J_focus, float J_max, float slope_gain, float gamma_top_inv, float gamma_bottom_inv, const float J_intersect_source)
 {
     const float s = std::max(0.000001f, smooth_cusps); // TODO: pre smooth the cusp
     const f2 JM_cusp = {
@@ -652,19 +652,19 @@ f2 find_gamut_boundary_intersection(const f2 &JM_cusp_in, float J_focus, float J
     };
 
     const float J_intersect_cusp = solve_J_intersect(JM_cusp[0], JM_cusp[1], J_focus, J_max, slope_gain);
+    const float slope = compute_compression_vector_slope(J_intersect_source, J_focus, J_max, slope_gain);
 
-    float slope = 0.f;
-    if (J_intersect_source < J_focus)
-    {
-        slope = J_intersect_source * (J_intersect_source - J_focus) / (J_focus * slope_gain);
-    }
-    else
-    {
-        slope = (J_max - J_intersect_source) * (J_intersect_source - J_focus) / (J_focus * slope_gain);
-    }
+    const float M_boundary_lower = estimate_line_and_boundary_intersection_M(J_intersect_source, slope, gamma_bottom_inv, JM_cusp[0], JM_cusp[1], J_intersect_cusp);
+
+    // The upper hull is flipped and thus 'zeroed' at J_max
+    // Also note we negate the slope
+    const float f_J_intersect_cusp   = J_max - J_intersect_cusp;
+    const float f_J_intersect_source = J_max - J_intersect_source;
+    const float f_JM_cusp_J          = J_max - JM_cusp[0];
+    const float M_boundary_upper =
+      estimate_line_and_boundary_intersection_M(f_J_intersect_source, -slope, gamma_top_inv, f_JM_cusp_J, JM_cusp[1], f_J_intersect_cusp);
 
-    const float M_boundary_lower = J_intersect_cusp * powf(J_intersect_source / J_intersect_cusp, 1.f / gamma_bottom) / (JM_cusp[0] / JM_cusp[1] - slope);
-    const float M_boundary_upper = JM_cusp[1] * (J_max - J_intersect_cusp) * powf((J_max - J_intersect_source) / (J_max - J_intersect_cusp), 1.f / gamma_top) / (slope * JM_cusp[1] + J_max - JM_cusp[0]);
+    // Smooth minimum between the two calculated values for the M component
     const float M_boundary = JM_cusp[1] * smin(M_boundary_lower / JM_cusp[1], M_boundary_upper / JM_cusp[1], s);
     const float J_boundary = J_intersect_source + slope * M_boundary; // TODO don't recalculate this
 
@@ -682,20 +682,9 @@ f3 get_reach_boundary(
 )
 {
     const float slope_gain = sr.limit_J_max * focus_dist * get_focus_gain(J, JMcusp[0], sr.limit_J_max);
-
     const float intersectJ = solve_J_intersect(J, M, focusJ, sr.limit_J_max, slope_gain);
-
-    float slope;
-    if (intersectJ < focusJ)
-    {
-        slope = intersectJ * (intersectJ - focusJ) / (focusJ * slope_gain);
-    }
-    else
-    {
-        slope = (sr.limit_J_max - intersectJ) * (intersectJ - focusJ) / (focusJ * slope_gain);
-    }
-
-    const float boundary = sr.limit_J_max * powf(intersectJ / sr.limit_J_max, sr.model_gamma_inv) * sr.reachMaxM / (sr.limit_J_max - slope * sr.reachMaxM);
+    const float slope = compute_compression_vector_slope(intersectJ, focusJ, sr.limit_J_max, slope_gain);
+    const float boundary = estimate_line_and_boundary_intersection_M(intersectJ, slope, sr.model_gamma_inv, sr.limit_J_max, sr.reachMaxM, sr.limit_J_max);
     return {J, boundary, h};
 }
 
@@ -744,7 +733,7 @@ f3 compressGamut(const f3 &JMh, float Jx, const ACES2::ResolvedSharedCompression
 
         const float J_intersect_source = solve_J_intersect(J, M, hdp.focusJ, sr.limit_J_max, slope_gain);
 
-        const f2 boundaryReturn = find_gamut_boundary_intersection(hdp.JMcusp, hdp.focusJ, sr.limit_J_max, slope_gain, hdp.gamma_top, hdp.gamma_bottom, J_intersect_source);
+        const f2 boundaryReturn = find_gamut_boundary_intersection(hdp.JMcusp, hdp.focusJ, sr.limit_J_max, slope_gain, hdp.gamma_top_inv, hdp.gamma_bottom_inv, J_intersect_source);
         const f2 JMboundary = {boundaryReturn[0], boundaryReturn[1]};
         const f2 project_to = {J_intersect_source, 0.f};
 
@@ -778,8 +767,8 @@ inline float compute_focusJ(float cusp_J, float mid_J, float limit_J_max)
 f3 gamut_compress_fwd(const f3 &JMh, const ResolvedSharedCompressionParameters &sr, const GamutCompressParams &p)
 {
     HueDependantGamutParams hdp;
-    hdp.gamma_top = hue_dependent_upper_hull_gamma(JMh[2], p.upper_hull_gamma_table);
-    hdp.gamma_bottom = p.lower_hull_gamma;
+    hdp.gamma_top_inv = hue_dependent_upper_hull_gamma(JMh[2], p.upper_hull_gamma_inv_table);
+    hdp.gamma_bottom_inv = p.lower_hull_gamma_inv;
     hdp.JMcusp = cusp_from_table(JMh[2], p.gamut_cusp_table);
     hdp.focusJ = compute_focusJ(hdp.JMcusp[0], p.mid_J, sr.limit_J_max);
     
@@ -789,8 +778,8 @@ f3 gamut_compress_fwd(const f3 &JMh, const ResolvedSharedCompressionParameters &
 f3 gamut_compress_inv(const f3 &JMh, const ResolvedSharedCompressionParameters &sr, const GamutCompressParams &p)
 {
     HueDependantGamutParams hdp;
-    hdp.gamma_top = hue_dependent_upper_hull_gamma(JMh[2], p.upper_hull_gamma_table);
-    hdp.gamma_bottom = p.lower_hull_gamma;
+    hdp.gamma_top_inv = hue_dependent_upper_hull_gamma(JMh[2], p.upper_hull_gamma_inv_table);
+    hdp.gamma_bottom_inv = p.lower_hull_gamma_inv;
     hdp.JMcusp = cusp_from_table(JMh[2], p.gamut_cusp_table);
     hdp.focusJ = compute_focusJ(hdp.JMcusp[0], p.mid_J, sr.limit_J_max);
 
@@ -808,12 +797,12 @@ f3 gamut_compress_inv(const f3 &JMh, const ResolvedSharedCompressionParameters &
 bool evaluate_gamma_fit(
     const f2 &JMcusp,
     const f3 testJMh[3],
-    float topGamma,
+    float topGamma_inv,
     float peakLuminance,
     float limit_J_max,
     float mid_J,
     float focus_dist,
-    float lower_hull_gamma,
+    float lower_hull_gamma_inv,
     const JMhParams &limitJMhParams)
 {
     const float focusJ = compute_focusJ(JMcusp[0], mid_J, limit_J_max);
@@ -822,7 +811,7 @@ bool evaluate_gamma_fit(
     {
         const float slope_gain = limit_J_max * focus_dist * get_focus_gain(testJMh[testIndex][0], JMcusp[0], limit_J_max);
         const float J_intersect_source = solve_J_intersect(testJMh[testIndex][0], testJMh[testIndex][1], focusJ, limit_J_max, slope_gain);
-        const f2 approxLimit = find_gamut_boundary_intersection(JMcusp, focusJ, limit_J_max, slope_gain, topGamma, lower_hull_gamma, J_intersect_source);
+        const f2 approxLimit = find_gamut_boundary_intersection(JMcusp, focusJ, limit_J_max, slope_gain, topGamma_inv, lower_hull_gamma_inv, J_intersect_source);
         const f3 approximate_JMh = {approxLimit[0], approxLimit[1], testJMh[testIndex][2]};
         const f3 newLimitRGB = JMh_to_RGB(approximate_JMh, limitJMhParams);
         const f3 newLimitRGBScaled = mult_f_f3(reference_luminance / peakLuminance, newLimitRGB);
@@ -842,7 +831,7 @@ Table1D make_upper_hull_gamma(
     float limit_J_max,
     float mid_J,
     float focus_dist,
-    float lower_hull_gamma,
+    float lower_hull_gamma_inv,
     const JMhParams &limitJMhParams)
 {
     const int test_count = 3;
@@ -876,7 +865,7 @@ Table1D make_upper_hull_gamma(
 
         while (!(outside) && (high < 5.f))
         {
-            const bool gammaFound = evaluate_gamma_fit(JMcusp, testJMh, high, peakLuminance, limit_J_max, mid_J, focus_dist, lower_hull_gamma, limitJMhParams);
+            const bool gammaFound = evaluate_gamma_fit(JMcusp, testJMh, 1.0f / high, peakLuminance, limit_J_max, mid_J, focus_dist, lower_hull_gamma_inv, limitJMhParams);
             if (!gammaFound)
             {
                 low = high;
@@ -892,11 +881,11 @@ Table1D make_upper_hull_gamma(
         while ( (high-low) > gammaAccuracy)
         {
             testGamma = (high + low) / 2.f;
-            const bool gammaFound = evaluate_gamma_fit(JMcusp, testJMh, testGamma, peakLuminance, limit_J_max, mid_J, focus_dist, lower_hull_gamma, limitJMhParams);
+            const bool gammaFound = evaluate_gamma_fit(JMcusp, testJMh, 1.0f / testGamma, peakLuminance, limit_J_max, mid_J, focus_dist, lower_hull_gamma_inv, limitJMhParams);
             if (gammaFound)
             {
                 high = testGamma;
-                gammaTable.table[i] = high;
+                gammaTable.table[i] = 1.0f / high;
             }
             else
             {
@@ -1011,15 +1000,15 @@ GamutCompressParams init_GamutCompressParams(float peakLuminance, const JMhParam
 
     // Calculated chroma compress variables
     const float focus_dist = focus_distance + focus_distance * focus_distance_scaling * tsParams.log_peak;
-    const float lower_hull_gamma =  1.14f + 0.07f * tsParams.log_peak;
+    const float lower_hull_gamma_inv =  1.0f / (1.14f + 0.07f * tsParams.log_peak);
 
     GamutCompressParams params = {
         mid_J,
         focus_dist,
-        lower_hull_gamma,
+        lower_hull_gamma_inv,
         make_gamut_table(limitJMhParams, peakLuminance),
         make_upper_hull_gamma(params.gamut_cusp_table, peakLuminance, shParams.limit_J_max,
-                              mid_J, focus_dist, lower_hull_gamma, limitJMhParams)
+                              mid_J, focus_dist, lower_hull_gamma_inv, limitJMhParams) //TODO
     };
     return params;
 }
diff --git a/src/OpenColorIO/ops/fixedfunction/FixedFunctionOpGPU.cpp b/src/OpenColorIO/ops/fixedfunction/FixedFunctionOpGPU.cpp
index 76937c4de2..c0c63a0ae5 100644
--- a/src/OpenColorIO/ops/fixedfunction/FixedFunctionOpGPU.cpp
+++ b/src/OpenColorIO/ops/fixedfunction/FixedFunctionOpGPU.cpp
@@ -359,7 +359,7 @@ void _Add_RGB_to_JMh_Shader(
     ss.newLine() << ss.float3Decl("lms") << " = " << ss.mat3fMul(&p.MATRIX_RGB_to_CAM16_c[0], pxl + ".rgb") << ";";
 
     ss.newLine() << ss.float3Decl("F_L_v") << " = pow(" << p.F_L_n << " * abs(lms), " << ss.float3Const(0.42f) << ");";
-    ss.newLine() << ss.float3Decl("rgb_a") << " = (400.0 * sign(lms) * F_L_v) / (27.13 + F_L_v);";
+    ss.newLine() << ss.float3Decl("rgb_a") << " = ( " << ACES2::cam_nl_scale << " * sign(lms) * F_L_v) / ( " << ACES2::cam_nl_offset << " + F_L_v);";
 
     ss.newLine() << ss.float3Decl("Aab") << " = " << ss.mat3fMul(&p.MATRIX_cone_response_to_Aab[0], "rgb_a.rgb") << ";";
 
@@ -390,7 +390,7 @@ void _Add_JMh_to_RGB_Shader(
 
     ss.newLine() << ss.float3Decl("rgb_a") << " = " << ss.mat3fMul(&p.MATRIX_Aab_to_cone_response[0], "Aab.rgb") << ";";
 
-    ss.newLine() << ss.float3Decl("lms") << " = sign(rgb_a) * pow(27.13 * abs(rgb_a) / (400.0 - abs(rgb_a)), " << ss.float3Const(1.f / 0.42f) << ")/ " << p.F_L_n << ";";
+    ss.newLine() << ss.float3Decl("lms") << " = sign(rgb_a) * pow( " << ACES2::cam_nl_offset << " * abs(rgb_a) / ( " << ACES2::cam_nl_scale << " - abs(rgb_a)), " << ss.float3Const(1.f / 0.42f) << ")/ " << p.F_L_n << ";";
 
     ss.newLine() << pxl << ".rgb = " << ss.mat3fMul(&p.MATRIX_CAM16_c_to_RGB[0], "lms") << ";";
 }
@@ -545,13 +545,13 @@ void _Add_Tonescale_Compress_Fwd_Shader(
 
     // Tonescale applied in Y (convert to and from J)
     ss.newLine() << ss.floatDecl("A") << " = " << p.A_w_J << " * pow(abs(J) / 100.0, 1.0 / (" << p.cz << "));";
-    ss.newLine() << ss.floatDecl("Y") << " = sign(J) * pow((27.13 * A) / (400.0 - A), 1.0 / 0.42) / " << p.F_L_n << ";";
+    ss.newLine() << ss.floatDecl("Y") << " = sign(J) * pow(( " << ACES2::cam_nl_offset << " * A) / ( " << ACES2::cam_nl_scale << " - A), 1.0 / 0.42) / " << p.F_L_n << ";";
 
     ss.newLine() << ss.floatDecl("f") << " = " << t.m_2  << " * pow(max(0.0, Y) / (Y + " << t.s_2 << "), " << t.g << ");";
     ss.newLine() << ss.floatDecl("Y_ts") << " = max(0.0, f * f / (f + " << t.t_1 << ")) * " << t.n_r << ";";
 
     ss.newLine() << ss.floatDecl("F_L_Y") << " = pow(" << p.F_L_n << " * abs(Y_ts), 0.42);";
-    ss.newLine() << ss.floatDecl("J_ts") << " = sign(Y_ts) * 100.0 * pow(((400.0 * F_L_Y) / (27.13 + F_L_Y)) / " << p.A_w_J << ", " << p.cz << ");";
+    ss.newLine() << ss.floatDecl("J_ts") << " = sign(Y_ts) * 100.0 * pow((( " << ACES2::cam_nl_scale << " * F_L_Y) / ( " << ACES2::cam_nl_offset << " + F_L_Y)) / " << p.A_w_J << ", " << p.cz << ");";
 
     // ChromaCompress
     ss.newLine() << ss.floatDecl("M_cp") << " = M;";
@@ -617,14 +617,14 @@ void _Add_Tonescale_Compress_Inv_Shader(
 
     // Inverse Tonescale applied in Y (convert to and from J)
     ss.newLine() << ss.floatDecl("A") << " = " << p.A_w_J << " * pow(abs(J_ts) / 100.0, 1.0 / (" << p.cz << "));";
-    ss.newLine() << ss.floatDecl("Y_ts") << " = sign(J_ts) * pow((27.13 * A) / (400.0 - A), 1.0 / 0.42) / " << p.F_L_n << " / 100.0;";
+    ss.newLine() << ss.floatDecl("Y_ts") << " = sign(J_ts) * pow(( " << ACES2::cam_nl_offset << " * A) / ( " << ACES2::cam_nl_scale << " - A), 1.0 / 0.42) / " << p.F_L_n << " / 100.0;";
 
     ss.newLine() << ss.floatDecl("Z") << " = max(0.0, min(" << t.n << " / (" << t.u_2 * t.n_r << "), Y_ts));";
     ss.newLine() << ss.floatDecl("ht") << " = (Z + sqrt(Z * (4.0 * " << t.t_1 << " + Z))) / 2.0;";
     ss.newLine() << ss.floatDecl("Y") << " = " << t.s_2 << " / (pow((" << t.m_2 << " / ht), (1.0 / " << t.g << ")) - 1.0);";
 
     ss.newLine() << ss.floatDecl("F_L_Y") << " = pow(" << p.F_L_n << " * abs(Y), 0.42);";
-    ss.newLine() << ss.floatDecl("J") << " = sign(Y) * 100.0 * pow(((400.0 * F_L_Y) / (27.13 + F_L_Y)) / " << p.A_w_J << ", " << p.cz << ");";
+    ss.newLine() << ss.floatDecl("J") << " = sign(Y) * 100.0 * pow((( " << ACES2::cam_nl_scale << " * F_L_Y) / ( " << ACES2::cam_nl_offset << " + F_L_Y)) / " << p.A_w_J << ", " << p.cz << ");";
 
     // ChromaCompress
     ss.newLine() << ss.floatDecl("M") << " = M_cp;";
@@ -822,7 +822,7 @@ std::string _Add_Gamma_table(
         GpuShaderCreator::TEXTURE_RED_CHANNEL,
         dimensions,
         INTERP_NEAREST,
-        &(g.upper_hull_gamma_table.table[0]));
+        &(g.upper_hull_gamma_inv_table.table[0]));
 
 
     if (dimensions == GpuShaderDesc::TEXTURE_1D)
@@ -849,21 +849,21 @@ std::string _Add_Gamma_table(
     ss.newLine() << "hwrap = hwrap - floor(hwrap / 360.0) * 360.0;";
     ss.newLine() << "hwrap = (hwrap < 0.0) ? hwrap + 360.0 : hwrap;";
 
-    ss.newLine() << ss.floatDecl("i_lo") << " = floor(hwrap) + " << g.upper_hull_gamma_table.base_index << ";";
+    ss.newLine() << ss.floatDecl("i_lo") << " = floor(hwrap) + " << g.upper_hull_gamma_inv_table.base_index << ";";
     ss.newLine() << ss.floatDecl("i_hi") << " = (i_lo + 1);";
     ss.newLine() << "i_hi = i_hi - floor(i_hi / 360.0) * 360.0;";
 
-    ss.newLine() << ss.floatDecl("base_hue") << " = i_lo - " << g.upper_hull_gamma_table.base_index << ";";
+    ss.newLine() << ss.floatDecl("base_hue") << " = i_lo - " << g.upper_hull_gamma_inv_table.base_index << ";";
 
     if (dimensions == GpuShaderDesc::TEXTURE_1D)
     {
-        ss.newLine() << ss.floatDecl("lo") << " = " << ss.sampleTex1D(name, std::string("(i_lo + 0.5) / ") + std::to_string(g.upper_hull_gamma_table.total_size)) << ".r;";
-        ss.newLine() << ss.floatDecl("hi") << " = " << ss.sampleTex1D(name, std::string("(i_hi + 0.5) / ") + std::to_string(g.upper_hull_gamma_table.total_size)) << ".r;";
+        ss.newLine() << ss.floatDecl("lo") << " = " << ss.sampleTex1D(name, std::string("(i_lo + 0.5) / ") + std::to_string(g.upper_hull_gamma_inv_table.total_size)) << ".r;";
+        ss.newLine() << ss.floatDecl("hi") << " = " << ss.sampleTex1D(name, std::string("(i_hi + 0.5) / ") + std::to_string(g.upper_hull_gamma_inv_table.total_size)) << ".r;";
     }
     else
     {
-        ss.newLine() << ss.floatDecl("lo") << " = " << ss.sampleTex2D(name, ss.float2Const(std::string("(i_lo + 0.5) / ") + std::to_string(g.upper_hull_gamma_table.total_size), "0.5")) << ".r;";
-        ss.newLine() << ss.floatDecl("hi") << " = " << ss.sampleTex2D(name, ss.float2Const(std::string("(i_hi + 0.5) / ") + std::to_string(g.upper_hull_gamma_table.total_size), "0.5")) << ".r;";
+        ss.newLine() << ss.floatDecl("lo") << " = " << ss.sampleTex2D(name, ss.float2Const(std::string("(i_lo + 0.5) / ") + std::to_string(g.upper_hull_gamma_inv_table.total_size), "0.5")) << ".r;";
+        ss.newLine() << ss.floatDecl("hi") << " = " << ss.sampleTex2D(name, ss.float2Const(std::string("(i_hi + 0.5) / ") + std::to_string(g.upper_hull_gamma_inv_table.total_size), "0.5")) << ".r;";
     }
 
     ss.newLine() << ss.floatDecl("t") << " = hwrap - base_hue;";
@@ -1023,7 +1023,7 @@ std::string _Add_Find_Gamut_Boundary_Intersection_func(
 
     GpuShaderText ss(shaderCreator->getLanguage());
 
-    ss.newLine() << ss.float3Keyword() << " " << name << "(" << ss.float3Keyword() << " JMh_s, " << ss.float2Keyword() << " JM_cusp_in, float J_focus, float slope_gain, float gamma_top, float gamma_bottom)";
+    ss.newLine() << ss.float3Keyword() << " " << name << "(" << ss.float3Keyword() << " JMh_s, " << ss.float2Keyword() << " JM_cusp_in, float J_focus, float slope_gain, float gamma_top_inv, float gamma_bottom_inv)";
     ss.newLine() << "{";
     ss.indent();
 
@@ -1046,8 +1046,8 @@ std::string _Add_Find_Gamut_Boundary_Intersection_func(
     ss.dedent();
     ss.newLine() << "}";
 
-    ss.newLine() << ss.floatDecl("M_boundary_lower ") << " = J_intersect_cusp * pow(J_intersect_source / J_intersect_cusp, 1.0 / gamma_bottom) / (JM_cusp.r / JM_cusp.g - slope);";
-    ss.newLine() << ss.floatDecl("M_boundary_upper") << " = JM_cusp.g * (" << s.limit_J_max << " - J_intersect_cusp) * pow((" << s.limit_J_max << " - J_intersect_source) / (" << s.limit_J_max << " - J_intersect_cusp), 1.0 / gamma_top) / (slope * JM_cusp.g + " << s.limit_J_max << " - JM_cusp.r);";
+    ss.newLine() << ss.floatDecl("M_boundary_lower ") << " = J_intersect_cusp * pow(J_intersect_source / J_intersect_cusp, gamma_bottom_inv) / (JM_cusp.r / JM_cusp.g - slope);";
+    ss.newLine() << ss.floatDecl("M_boundary_upper") << " = JM_cusp.g * (" << s.limit_J_max << " - J_intersect_cusp) * pow((" << s.limit_J_max << " - J_intersect_source) / (" << s.limit_J_max << " - J_intersect_cusp), gamma_top_inv) / (slope * JM_cusp.g + " << s.limit_J_max << " - JM_cusp.r);";
 
     ss.newLine() << ss.floatDecl("smin") << " = 0.0;";
     ss.newLine() << "{";
@@ -1250,10 +1250,10 @@ std::string _Add_Compress_Gamut_func(
     ss.newLine() << ss.floatDecl("focusJ") << " = " << ss.lerp("JMcusp.r", std::to_string(g.mid_J), std::string("min(1.0, ") + std::to_string(ACES2::cusp_mid_blend) + " - (JMcusp.r / " + std::to_string(s.limit_J_max)) << "));";
     ss.newLine() << ss.floatDecl("slope_gain") << " = " << s.limit_J_max << " * " << g.focus_dist << " * " << getFocusGainName << "(Jx, JMcusp.r);";
 
-    ss.newLine() << ss.floatDecl("gamma_top") << " = " << gammaName << "_sample(h);";
-    ss.newLine() << ss.floatDecl("gamma_bottom") << " = " << g.lower_hull_gamma << ";";
+    ss.newLine() << ss.floatDecl("gamma_top_inv") << " = " << gammaName << "_sample(h);";
+    ss.newLine() << ss.floatDecl("gamma_bottom_inv") << " = " << g.lower_hull_gamma_inv << ";";
 
-    ss.newLine() << ss.float3Decl("boundaryReturn") << " = " << findGamutBoundaryIntersectionName << "(" << ss.float3Const("J", "M", "h") << ", JMcusp, focusJ, slope_gain, gamma_top, gamma_bottom);";
+    ss.newLine() << ss.float3Decl("boundaryReturn") << " = " << findGamutBoundaryIntersectionName << "(" << ss.float3Const("J", "M", "h") << ", JMcusp, focusJ, slope_gain, gamma_top_inv, gamma_bottom_inv);";
     ss.newLine() << ss.float2Decl("JMboundary") << " = " << ss.float2Const("boundaryReturn.r", "boundaryReturn.g") << ";";
     ss.newLine() << ss.float2Decl("project_to") << " = " << ss.float2Const("boundaryReturn.b", "0.0") << ";";
 

From a6c7800be384e5e74389615260a45445f48713a5 Mon Sep 17 00:00:00 2001
From: Kevin Wheatley <kevin.wheatley@framestore.com>
Date: Mon, 13 Jan 2025 15:08:16 +0000
Subject: [PATCH 24/70] Add some missing includes to headers

Signed-off-by: Kevin Wheatley <kevin.wheatley@framestore.com>
---
 src/OpenColorIO/ops/fixedfunction/ACES2/ColorLib.h  | 1 +
 src/OpenColorIO/ops/fixedfunction/ACES2/Common.h    | 1 +
 src/OpenColorIO/ops/fixedfunction/ACES2/MatrixLib.h | 1 +
 3 files changed, 3 insertions(+)

diff --git a/src/OpenColorIO/ops/fixedfunction/ACES2/ColorLib.h b/src/OpenColorIO/ops/fixedfunction/ACES2/ColorLib.h
index f10ddf3d56..17d5022ee5 100644
--- a/src/OpenColorIO/ops/fixedfunction/ACES2/ColorLib.h
+++ b/src/OpenColorIO/ops/fixedfunction/ACES2/ColorLib.h
@@ -7,6 +7,7 @@
 #include "transforms/builtins/ColorMatrixHelpers.h"
 #include "MatrixLib.h"
 
+#include <cmath>
 
 namespace OCIO_NAMESPACE
 {
diff --git a/src/OpenColorIO/ops/fixedfunction/ACES2/Common.h b/src/OpenColorIO/ops/fixedfunction/ACES2/Common.h
index 837bdd9438..0246a256b7 100644
--- a/src/OpenColorIO/ops/fixedfunction/ACES2/Common.h
+++ b/src/OpenColorIO/ops/fixedfunction/ACES2/Common.h
@@ -7,6 +7,7 @@
 #include "MatrixLib.h"
 #include "ColorLib.h"
 
+#include <cmath>
 
 namespace OCIO_NAMESPACE
 {
diff --git a/src/OpenColorIO/ops/fixedfunction/ACES2/MatrixLib.h b/src/OpenColorIO/ops/fixedfunction/ACES2/MatrixLib.h
index 27a24df0c9..d552742ccb 100644
--- a/src/OpenColorIO/ops/fixedfunction/ACES2/MatrixLib.h
+++ b/src/OpenColorIO/ops/fixedfunction/ACES2/MatrixLib.h
@@ -6,6 +6,7 @@
 
 #include "ops/matrix/MatrixOpData.h"
 
+#include <array>
 
 namespace OCIO_NAMESPACE
 {

From e567701f8c6e52dadddc94ae50fcee720ea5307f Mon Sep 17 00:00:00 2001
From: Kevin Wheatley <kevin.wheatley@framestore.com>
Date: Mon, 13 Jan 2025 15:26:23 +0000
Subject: [PATCH 25/70] minor cleanup to use std::array instead of plain array
 for test samples

Signed-off-by: Kevin Wheatley <kevin.wheatley@framestore.com>
---
 .../ops/fixedfunction/ACES2/Transform.cpp     | 22 +++++++++++--------
 1 file changed, 13 insertions(+), 9 deletions(-)

diff --git a/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp b/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp
index 23b885b54e..3f1be3bae7 100644
--- a/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp
+++ b/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp
@@ -3,6 +3,9 @@
 
 #include "Transform.h"
 
+#include <array>
+#include <algorithm>
+#include <cmath>
 
 namespace OCIO_NAMESPACE
 {
@@ -510,7 +513,7 @@ Table1D make_reach_m_table(const JMhParams &params, float peakLuminance)
         const float hue = (float) i;
 
         const float search_range = 50.f;
-        float low = 0.;
+        float low = 0.f;
         float high = low + search_range;
         bool outside = false;
 
@@ -796,7 +799,7 @@ f3 gamut_compress_inv(const f3 &JMh, const ResolvedSharedCompressionParameters &
 
 bool evaluate_gamma_fit(
     const f2 &JMcusp,
-    const f3 testJMh[3],
+    const std::array<f3, 3> JMh_values,
     float topGamma_inv,
     float peakLuminance,
     float limit_J_max,
@@ -807,12 +810,13 @@ bool evaluate_gamma_fit(
 {
     const float focusJ = compute_focusJ(JMcusp[0], mid_J, limit_J_max);
 
-    for (size_t testIndex = 0; testIndex < 3; testIndex++)
+    for (auto testJMh: JMh_values)
     {
-        const float slope_gain = limit_J_max * focus_dist * get_focus_gain(testJMh[testIndex][0], JMcusp[0], limit_J_max);
-        const float J_intersect_source = solve_J_intersect(testJMh[testIndex][0], testJMh[testIndex][1], focusJ, limit_J_max, slope_gain);
+        const float slope_gain = limit_J_max * focus_dist * get_focus_gain(testJMh[0], JMcusp[0], limit_J_max);
+        const float J_intersect_source = solve_J_intersect(testJMh[0], testJMh[1], focusJ, limit_J_max, slope_gain);
         const f2 approxLimit = find_gamut_boundary_intersection(JMcusp, focusJ, limit_J_max, slope_gain, topGamma_inv, lower_hull_gamma_inv, J_intersect_source);
-        const f3 approximate_JMh = {approxLimit[0], approxLimit[1], testJMh[testIndex][2]};
+        const f3 approximate_JMh = {approxLimit[0], approxLimit[1], testJMh[2]};
+
         const f3 newLimitRGB = JMh_to_RGB(approximate_JMh, limitJMhParams);
         const f3 newLimitRGBScaled = mult_f_f3(reference_luminance / peakLuminance, newLimitRGB);
 
@@ -834,8 +838,8 @@ Table1D make_upper_hull_gamma(
     float lower_hull_gamma_inv,
     const JMhParams &limitJMhParams)
 {
-    const int test_count = 3;
-    const float testPositions[test_count] = {0.01f, 0.5f, 0.99f};
+    constexpr int test_count = 3;
+    const std::array<float, test_count> testPositions = {0.01f, 0.5f, 0.99f};
 
     Table1D gammaTable{};
     Table1D gamutTopGamma{};
@@ -847,7 +851,7 @@ Table1D make_upper_hull_gamma(
         const float hue = (float) i;
         const f2 JMcusp = cusp_from_table(hue, gamutCuspTable);
 
-        f3 testJMh[test_count]{};
+        std::array<f3, test_count> testJMh;
         for (int testIndex = 0; testIndex < test_count; testIndex++)
         {
             const float testJ = JMcusp[0] + ((limit_J_max - JMcusp[0]) * testPositions[testIndex]);

From 4a9e1f79dcd54e7fcf4e7e218a95468e4a83da06 Mon Sep 17 00:00:00 2001
From: Kevin Wheatley <kevin.wheatley@framestore.com>
Date: Fri, 10 Jan 2025 10:51:09 +0000
Subject: [PATCH 26/70] Inline reach boundary finding restructure
 find_gamut_boundary_intersection to highlight common patterns.

Signed-off-by: Kevin Wheatley <kevin.wheatley@framestore.com>
---
 .../ops/fixedfunction/ACES2/Common.h          |  2 +-
 .../ops/fixedfunction/ACES2/Transform.cpp     | 51 +++++++------------
 2 files changed, 20 insertions(+), 33 deletions(-)

diff --git a/src/OpenColorIO/ops/fixedfunction/ACES2/Common.h b/src/OpenColorIO/ops/fixedfunction/ACES2/Common.h
index 0246a256b7..429676afdc 100644
--- a/src/OpenColorIO/ops/fixedfunction/ACES2/Common.h
+++ b/src/OpenColorIO/ops/fixedfunction/ACES2/Common.h
@@ -142,7 +142,7 @@ constexpr float chroma_expand_fact = 0.69f;
 constexpr float chroma_expand_thr = 0.5f;
 
 // Gamut compression
-constexpr float smooth_cusps = 0.12f;
+constexpr float smooth_cusps = std::max(0.000001f, 0.12f);
 constexpr float smooth_m = 0.27f;
 constexpr float cusp_mid_blend = 1.3f;
 constexpr float focus_gain_blend = 0.3f;
diff --git a/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp b/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp
index 3f1be3bae7..a3d7e718c7 100644
--- a/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp
+++ b/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp
@@ -646,16 +646,14 @@ inline float estimate_line_and_boundary_intersection_M(const float J_axis_inters
     //return shifted_intersection * M_max / (J_max - slope * M_max);
 }
 
-f2 find_gamut_boundary_intersection(const f2 &JM_cusp_in, float J_focus, float J_max, float slope_gain, float gamma_top_inv, float gamma_bottom_inv, const float J_intersect_source)
+f2 find_gamut_boundary_intersection(const f2 &JM_cusp_in, float J_focus, float J_max, float slope_gain, float gamma_top_inv, float gamma_bottom_inv, const float J_intersect_source, const float slope)
 {
-    const float s = std::max(0.000001f, smooth_cusps); // TODO: pre smooth the cusp
     const f2 JM_cusp = {
         JM_cusp_in[0],
-        JM_cusp_in[1] * (1.f + smooth_m * s)
+        JM_cusp_in[1] * (1.f + smooth_m * smooth_cusps)
     };
 
     const float J_intersect_cusp = solve_J_intersect(JM_cusp[0], JM_cusp[1], J_focus, J_max, slope_gain);
-    const float slope = compute_compression_vector_slope(J_intersect_source, J_focus, J_max, slope_gain);
 
     const float M_boundary_lower = estimate_line_and_boundary_intersection_M(J_intersect_source, slope, gamma_bottom_inv, JM_cusp[0], JM_cusp[1], J_intersect_cusp);
 
@@ -668,29 +666,12 @@ f2 find_gamut_boundary_intersection(const f2 &JM_cusp_in, float J_focus, float J
       estimate_line_and_boundary_intersection_M(f_J_intersect_source, -slope, gamma_top_inv, f_JM_cusp_J, JM_cusp[1], f_J_intersect_cusp);
 
     // Smooth minimum between the two calculated values for the M component
-    const float M_boundary = JM_cusp[1] * smin(M_boundary_lower / JM_cusp[1], M_boundary_upper / JM_cusp[1], s);
+    const float M_boundary = JM_cusp[1] * smin(M_boundary_lower / JM_cusp[1], M_boundary_upper / JM_cusp[1], smooth_cusps);
     const float J_boundary = J_intersect_source + slope * M_boundary; // TODO don't recalculate this
 
     return {J_boundary, M_boundary};
 }
 
-f3 get_reach_boundary(
-    float J,
-    float M,
-    float h,
-    const f2 &JMcusp,
-    float focusJ,
-    float focus_dist,
-    const ACES2::ResolvedSharedCompressionParameters& sr
-)
-{
-    const float slope_gain = sr.limit_J_max * focus_dist * get_focus_gain(J, JMcusp[0], sr.limit_J_max);
-    const float intersectJ = solve_J_intersect(J, M, focusJ, sr.limit_J_max, slope_gain);
-    const float slope = compute_compression_vector_slope(intersectJ, focusJ, sr.limit_J_max, slope_gain);
-    const float boundary = estimate_line_and_boundary_intersection_M(intersectJ, slope, sr.model_gamma_inv, sr.limit_J_max, sr.reachMaxM, sr.limit_J_max);
-    return {J, boundary, h};
-}
-
 float compression_function(
     float v,
     float thr,
@@ -731,12 +712,12 @@ f3 compressGamut(const f3 &JMh, float Jx, const ACES2::ResolvedSharedCompression
     }
     else
     {
-        const f2 project_from = {J, M};
         const float slope_gain = sr.limit_J_max * p.focus_dist * get_focus_gain(Jx, hdp.JMcusp[0], sr.limit_J_max);
-
         const float J_intersect_source = solve_J_intersect(J, M, hdp.focusJ, sr.limit_J_max, slope_gain);
+        const float gamut_slope = compute_compression_vector_slope(J_intersect_source, hdp.focusJ, sr.limit_J_max, slope_gain);
+        const f2 boundaryReturn = find_gamut_boundary_intersection(hdp.JMcusp, hdp.focusJ, sr.limit_J_max, slope_gain, hdp.gamma_top_inv, hdp.gamma_bottom_inv, J_intersect_source, gamut_slope);
 
-        const f2 boundaryReturn = find_gamut_boundary_intersection(hdp.JMcusp, hdp.focusJ, sr.limit_J_max, slope_gain, hdp.gamma_top_inv, hdp.gamma_bottom_inv, J_intersect_source);
+        const f2 project_from = {J, M};
         const f2 JMboundary = {boundaryReturn[0], boundaryReturn[1]};
         const f2 project_to = {J_intersect_source, 0.f};
 
@@ -745,20 +726,25 @@ f3 compressGamut(const f3 &JMh, float Jx, const ACES2::ResolvedSharedCompression
             return {J, 0.f, h};
         }
 
-        const f3 reachBoundary = get_reach_boundary(JMboundary[0], JMboundary[1], h, hdp.JMcusp, hdp.focusJ, p.focus_dist, sr); // TODO
+        const float reach_slope_gain = sr.limit_J_max * p.focus_dist * get_focus_gain(JMboundary[0], hdp.JMcusp[0], sr.limit_J_max);
+        const float reach_intersectJ = solve_J_intersect(JMboundary[0], JMboundary[1], hdp.focusJ, sr.limit_J_max, reach_slope_gain);
+        const float reach_slope      = compute_compression_vector_slope(reach_intersectJ, hdp.focusJ, sr.limit_J_max, reach_slope_gain);
+        const float reachBoundaryM   = estimate_line_and_boundary_intersection_M(reach_intersectJ, reach_slope, sr.model_gamma_inv, sr.limit_J_max, sr.reachMaxM, sr.limit_J_max);
+
 
-        const float difference = std::max(1.0001f, reachBoundary[1] / JMboundary[1]);
+        const float difference = std::max(1.0001f, reachBoundaryM / JMboundary[1]);
         const float threshold = std::max(compression_threshold, 1.f / difference);
 
         float v = project_from[1] / JMboundary[1];
         v = compression_function(v, threshold, difference, invert);
 
-        const f2 JMcompressed {
+        const f3 JMcompressed {
             project_to[0] + v * (JMboundary[0] - project_to[0]),
-            project_to[1] + v * (JMboundary[1] - project_to[1])
+            project_to[1] + v * (JMboundary[1] - project_to[1]),
+            h
         };
 
-        return {JMcompressed[0], JMcompressed[1], h};
+        return JMcompressed;
     }
 }
 
@@ -814,9 +800,10 @@ bool evaluate_gamma_fit(
     {
         const float slope_gain = limit_J_max * focus_dist * get_focus_gain(testJMh[0], JMcusp[0], limit_J_max);
         const float J_intersect_source = solve_J_intersect(testJMh[0], testJMh[1], focusJ, limit_J_max, slope_gain);
-        const f2 approxLimit = find_gamut_boundary_intersection(JMcusp, focusJ, limit_J_max, slope_gain, topGamma_inv, lower_hull_gamma_inv, J_intersect_source);
-        const f3 approximate_JMh = {approxLimit[0], approxLimit[1], testJMh[2]};
+        const float slope = compute_compression_vector_slope(J_intersect_source, focusJ, limit_J_max, slope_gain);
 
+        const f2 approxLimit = find_gamut_boundary_intersection(JMcusp, focusJ, limit_J_max, slope_gain, topGamma_inv, lower_hull_gamma_inv, J_intersect_source, slope);
+        const f3 approximate_JMh = {approxLimit[0], approxLimit[1], testJMh[2]};
         const f3 newLimitRGB = JMh_to_RGB(approximate_JMh, limitJMhParams);
         const f3 newLimitRGBScaled = mult_f_f3(reference_luminance / peakLuminance, newLimitRGB);
 

From b0f39629ee20e44267610e96e573af0d2e4b9d6a Mon Sep 17 00:00:00 2001
From: Kevin Wheatley <kevin.wheatley@framestore.com>
Date: Fri, 10 Jan 2025 16:33:04 +0000
Subject: [PATCH 27/70] Extract gamut mapper compression function rework
 get_focus_gain to directly computer the slope_gain Share calculation of
 analytical thereshold

Signed-off-by: Kevin Wheatley <kevin.wheatley@framestore.com>
---
 .../ops/fixedfunction/ACES2/Common.h          |   3 +-
 .../ops/fixedfunction/ACES2/Transform.cpp     | 171 ++++++++++--------
 .../ops/fixedfunction/FixedFunctionOpGPU.cpp  |   2 +-
 3 files changed, 98 insertions(+), 78 deletions(-)

diff --git a/src/OpenColorIO/ops/fixedfunction/ACES2/Common.h b/src/OpenColorIO/ops/fixedfunction/ACES2/Common.h
index 429676afdc..1209d98410 100644
--- a/src/OpenColorIO/ops/fixedfunction/ACES2/Common.h
+++ b/src/OpenColorIO/ops/fixedfunction/ACES2/Common.h
@@ -90,6 +90,7 @@ struct HueDependantGamutParams
     float gamma_bottom_inv;
     f2 JMcusp;
     float focusJ;
+    float analytical_threshold;
 };
 struct GamutCompressParams
 {
@@ -146,7 +147,7 @@ constexpr float smooth_cusps = std::max(0.000001f, 0.12f);
 constexpr float smooth_m = 0.27f;
 constexpr float cusp_mid_blend = 1.3f;
 constexpr float focus_gain_blend = 0.3f;
-constexpr float focus_adjust_gain = 0.55f;
+constexpr float focus_adjust_gain_inv = 1.0f / 0.55f;
 constexpr float focus_distance = 1.35f;
 constexpr float focus_distance_scaling = 1.75f;
 constexpr float compression_threshold = 0.75f;
diff --git a/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp b/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp
index a3d7e718c7..4183aa0275 100644
--- a/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp
+++ b/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp
@@ -558,21 +558,30 @@ bool outside_hull(const f3 &rgb)
     return rgb[0] > maxRGBtestVal || rgb[1] > maxRGBtestVal || rgb[2] > maxRGBtestVal;
 }
 
-float get_focus_gain(float J, float cuspJ, float limit_J_max)
+inline float get_focus_gain(float J, float cuspJ, float limit_J_max, float focus_dist)
 {
     const float thr = lerpf(cuspJ, limit_J_max, focus_gain_blend);
 
+    /* Note from Pekka
+
+    If we’d be willing to make a tiny change in the pixel values, we can eliminate one pow() from getFocusGain() function by changing the focusAdjustGain value to 0.5 from the current 0.55. It then becomes:
+
+      float gain = log10((limitJmax - thr) / max(0.0001f, (limitJmax - min(limitJmax, J))));
+      return gain * gain + 1.0f;
+    from the current:
+
+      float gain = (limitJmax - thr) / max(0.0001f, (limitJmax - min(limitJmax, J)));
+      return pow(log10(gain), 1.0f / focusAdjustGain) + 1.0f;
+    
+    */
+    float gain = 1.0f;
     if (J > thr)
     {
         // Approximate inverse required above threshold
-        float gain = (limit_J_max - thr) / std::max(0.0001f, (limit_J_max - std::min(limit_J_max, J)));
-        return powf(log10(gain), 1.f / focus_adjust_gain) + 1.f;
-    }
-    else
-    {
-        // Analytic inverse possible below cusp
-        return 1.f;
+        gain = (limit_J_max - thr) / std::max(0.0001f, (limit_J_max - std::min(limit_J_max, J)));
+        gain = powf(log10(gain), focus_adjust_gain_inv) + 1.f;
     }
+    return limit_J_max * focus_dist * gain;
 }
 
 float solve_J_intersect(float J, float M, float focusJ, float maxJ, float slope_gain)
@@ -672,80 +681,89 @@ f2 find_gamut_boundary_intersection(const f2 &JM_cusp_in, float J_focus, float J
     return {J_boundary, M_boundary};
 }
 
-float compression_function(
-    float v,
-    float thr,
-    float lim,
-    bool invert)
+template <bool invert>
+inline float reinhard_compand(const float scale, const float factor)
+{
+    if (invert)
+    {
+        if (factor >= 1.0f)
+            return scale;
+        else
+            return scale * -(factor / (factor - 1.0f));
+    }
+    return scale * factor / (1.0f + factor);
+}
+
+template <bool invert>
+inline float compression_function(float v, float thr, float lim)
 {
     float s = (lim - thr) * (1.f - thr) / (lim - 1.f);
     float nd = (v - thr) / s;
 
-    float vCompressed;
-
-    if (invert) {
-        if (v < thr || lim <= 1.0001f || v > thr + s) {
-            vCompressed = v;
-        } else {
-            vCompressed = thr + s * (-nd / (nd - 1.f));
-        }
-    } else {
-        if (v < thr || lim <= 1.0001f) {
-            vCompressed = v;
-        } else {
-            vCompressed = thr + s * nd / (1.f + nd);
-        }
+    if (invert)
+    {
+        if (v < thr || lim <= 1.0001f || v > thr + s)
+            return v;
+    }
+    else
+    {
+        if (v < thr || lim <= 1.0001f) 
+            return v;
     }
 
-    return vCompressed;
+    return thr + reinhard_compand<invert>(s, nd);
 }
 
-f3 compressGamut(const f3 &JMh, float Jx, const ACES2::ResolvedSharedCompressionParameters& sr, const ACES2::GamutCompressParams& p, const HueDependantGamutParams hdp, bool invert)
+template <bool invert>
+f3 compress_JMh(const float initial_M, const float hue, const float J_intersect_source, const f2 JMboundary, const float reachBoundaryM)
+{
+    const float difference = std::max(1.0001f, reachBoundaryM / JMboundary[1]);
+    const float threshold = std::max(compression_threshold, 1.f / difference);
+
+    float v = initial_M / JMboundary[1];
+    v = compression_function<invert>(v, threshold, difference);
+
+    const f3 JMcompressed {
+        J_intersect_source + v * (JMboundary[0] - J_intersect_source),
+        0.0f               + v * (JMboundary[1] - 0.0f),
+        hue
+    };
+    return JMcompressed;
+}
+
+template <bool invert>
+f3 compressGamut(const f3 &JMh, float Jx, const ACES2::ResolvedSharedCompressionParameters& sr, const ACES2::GamutCompressParams& p, const HueDependantGamutParams hdp)
 {
     const float J = JMh[0];
     const float M = JMh[1];
     const float h = JMh[2];
-
-    if (M < 0.0001f || J > sr.limit_J_max)
+    
+    if (J <= 0.0f) // Limit to +ve J values // TODO test this is needed
     {
-        return {J, 0.f, h};
+        return {0.0f, 0.f, h};
     }
-    else
+    if (M <= 0.0f || J > sr.limit_J_max) // We compress M only so avoid mapping zero
+                                         // Above the expected maximum we explicitly map to 0 M
     {
-        const float slope_gain = sr.limit_J_max * p.focus_dist * get_focus_gain(Jx, hdp.JMcusp[0], sr.limit_J_max);
-        const float J_intersect_source = solve_J_intersect(J, M, hdp.focusJ, sr.limit_J_max, slope_gain);
-        const float gamut_slope = compute_compression_vector_slope(J_intersect_source, hdp.focusJ, sr.limit_J_max, slope_gain);
-        const f2 boundaryReturn = find_gamut_boundary_intersection(hdp.JMcusp, hdp.focusJ, sr.limit_J_max, slope_gain, hdp.gamma_top_inv, hdp.gamma_bottom_inv, J_intersect_source, gamut_slope);
-
-        const f2 project_from = {J, M};
-        const f2 JMboundary = {boundaryReturn[0], boundaryReturn[1]};
-        const f2 project_to = {J_intersect_source, 0.f};
-
-        if (JMboundary[1] <= 0.0f)
-        {
-            return {J, 0.f, h};
-        }
-
-        const float reach_slope_gain = sr.limit_J_max * p.focus_dist * get_focus_gain(JMboundary[0], hdp.JMcusp[0], sr.limit_J_max);
-        const float reach_intersectJ = solve_J_intersect(JMboundary[0], JMboundary[1], hdp.focusJ, sr.limit_J_max, reach_slope_gain);
-        const float reach_slope      = compute_compression_vector_slope(reach_intersectJ, hdp.focusJ, sr.limit_J_max, reach_slope_gain);
-        const float reachBoundaryM   = estimate_line_and_boundary_intersection_M(reach_intersectJ, reach_slope, sr.model_gamma_inv, sr.limit_J_max, sr.reachMaxM, sr.limit_J_max);
-
+        return {J, 0.f, h};
+    }
 
-        const float difference = std::max(1.0001f, reachBoundaryM / JMboundary[1]);
-        const float threshold = std::max(compression_threshold, 1.f / difference);
+    const float slope_gain = get_focus_gain(Jx, hdp.JMcusp[0], sr.limit_J_max, p.focus_dist);
+    const float J_intersect_source = solve_J_intersect(J, M, hdp.focusJ, sr.limit_J_max, slope_gain);
+    const float gamut_slope = compute_compression_vector_slope(J_intersect_source, hdp.focusJ, sr.limit_J_max, slope_gain);
+    const f2 gamut_boundary = find_gamut_boundary_intersection(hdp.JMcusp, hdp.focusJ, sr.limit_J_max, slope_gain, hdp.gamma_top_inv, hdp.gamma_bottom_inv, J_intersect_source, gamut_slope);
 
-        float v = project_from[1] / JMboundary[1];
-        v = compression_function(v, threshold, difference, invert);
+    if (gamut_boundary[1] <= 0.0f) // TODO: when does this happen?
+    {
+        return {J, 0.f, h};
+    }
 
-        const f3 JMcompressed {
-            project_to[0] + v * (JMboundary[0] - project_to[0]),
-            project_to[1] + v * (JMboundary[1] - project_to[1]),
-            h
-        };
+    const float reach_slope_gain = get_focus_gain(gamut_boundary[0], hdp.JMcusp[0], sr.limit_J_max, p.focus_dist);
+    const float reach_intersectJ = solve_J_intersect(gamut_boundary[0], gamut_boundary[1], hdp.focusJ, sr.limit_J_max, reach_slope_gain);
+    const float reach_slope      = compute_compression_vector_slope(reach_intersectJ, hdp.focusJ, sr.limit_J_max, reach_slope_gain);
+    const float reachBoundaryM   = estimate_line_and_boundary_intersection_M(reach_intersectJ, reach_slope, sr.model_gamma_inv, sr.limit_J_max, sr.reachMaxM, sr.limit_J_max);
 
-        return JMcompressed;
-    }
+    return compress_JMh<invert>(M, h, J_intersect_source, gamut_boundary, reachBoundaryM);
 }
 
 inline float compute_focusJ(float cusp_J, float mid_J, float limit_J_max)
@@ -753,34 +771,35 @@ inline float compute_focusJ(float cusp_J, float mid_J, float limit_J_max)
     return lerpf(cusp_J, mid_J, std::min(1.f, cusp_mid_blend - (cusp_J / limit_J_max)));
 }
 
-f3 gamut_compress_fwd(const f3 &JMh, const ResolvedSharedCompressionParameters &sr, const GamutCompressParams &p)
+HueDependantGamutParams init_HueDependantGamutParams(const f3 &JMh, const ResolvedSharedCompressionParameters &sr, const GamutCompressParams &p)
 {
     HueDependantGamutParams hdp;
     hdp.gamma_top_inv = hue_dependent_upper_hull_gamma(JMh[2], p.upper_hull_gamma_inv_table);
     hdp.gamma_bottom_inv = p.lower_hull_gamma_inv;
     hdp.JMcusp = cusp_from_table(JMh[2], p.gamut_cusp_table);
     hdp.focusJ = compute_focusJ(hdp.JMcusp[0], p.mid_J, sr.limit_J_max);
+    hdp.analytical_threshold = lerpf(hdp.JMcusp[0], sr.limit_J_max, focus_gain_blend);
+    return hdp;
+}
+
+f3 gamut_compress_fwd(const f3 &JMh, const ResolvedSharedCompressionParameters &sr, const GamutCompressParams &p)
+{
+    const HueDependantGamutParams hdp = init_HueDependantGamutParams(JMh, sr, p);
     
-    return compressGamut(JMh, JMh[0], sr, p, hdp, false);
+    return compressGamut<false>(JMh, JMh[0], sr, p, hdp);
 }
 
 f3 gamut_compress_inv(const f3 &JMh, const ResolvedSharedCompressionParameters &sr, const GamutCompressParams &p)
 {
-    HueDependantGamutParams hdp;
-    hdp.gamma_top_inv = hue_dependent_upper_hull_gamma(JMh[2], p.upper_hull_gamma_inv_table);
-    hdp.gamma_bottom_inv = p.lower_hull_gamma_inv;
-    hdp.JMcusp = cusp_from_table(JMh[2], p.gamut_cusp_table);
-    hdp.focusJ = compute_focusJ(hdp.JMcusp[0], p.mid_J, sr.limit_J_max);
+    const HueDependantGamutParams hdp = init_HueDependantGamutParams(JMh, sr, p);
 
     float Jx = JMh[0];
-
-    // Analytic inverse below threshold
-    if (Jx > lerpf(hdp.JMcusp[0], sr.limit_J_max, focus_gain_blend))
+    if (Jx > hdp.analytical_threshold)
     {
         // Approximation above threshold
-        Jx = compressGamut(JMh, Jx, sr, p, hdp, true)[0];
+        Jx = compressGamut<true>(JMh, Jx, sr, p, hdp)[0];
     }
-    return compressGamut(JMh, Jx, sr, p, hdp, true);
+    return compressGamut<true>(JMh, Jx, sr, p, hdp);
 }
 
 bool evaluate_gamma_fit(
@@ -798,7 +817,7 @@ bool evaluate_gamma_fit(
 
     for (auto testJMh: JMh_values)
     {
-        const float slope_gain = limit_J_max * focus_dist * get_focus_gain(testJMh[0], JMcusp[0], limit_J_max);
+        const float slope_gain = get_focus_gain(testJMh[0], JMcusp[0], limit_J_max, focus_dist);
         const float J_intersect_source = solve_J_intersect(testJMh[0], testJMh[1], focusJ, limit_J_max, slope_gain);
         const float slope = compute_compression_vector_slope(J_intersect_source, focusJ, limit_J_max, slope_gain);
 
diff --git a/src/OpenColorIO/ops/fixedfunction/FixedFunctionOpGPU.cpp b/src/OpenColorIO/ops/fixedfunction/FixedFunctionOpGPU.cpp
index c0c63a0ae5..c50ce327c1 100644
--- a/src/OpenColorIO/ops/fixedfunction/FixedFunctionOpGPU.cpp
+++ b/src/OpenColorIO/ops/fixedfunction/FixedFunctionOpGPU.cpp
@@ -906,7 +906,7 @@ std::string _Add_Focus_Gain_func(
     ss.newLine() << "{";
     ss.indent();
     ss.newLine() << ss.floatDecl("gain") << " = ( " << s.limit_J_max << " - thr) / max(0.0001, (" << s.limit_J_max << " - min(" << s.limit_J_max << ", J)));";
-    ss.newLine() << "return pow(log(gain)/log(10.0), 1.0 / " << ACES2::focus_adjust_gain << ") + 1.0;";
+    ss.newLine() << "return pow(log(gain)/log(10.0)," << ACES2::focus_adjust_gain_inv << ") + 1.0;";
     ss.dedent();
     ss.newLine() << "}";
     ss.newLine() << "else";

From 05da229614cae64292760198f9453792ac0d7889 Mon Sep 17 00:00:00 2001
From: Kevin Wheatley <kevin.wheatley@framestore.com>
Date: Fri, 10 Jan 2025 16:51:04 +0000
Subject: [PATCH 28/70] Rework gamut mapper to compress absolute M then only
 recalculate calculate J

Signed-off-by: Kevin Wheatley <kevin.wheatley@framestore.com>
---
 .../ops/fixedfunction/ACES2/Transform.cpp     | 62 ++++++++-----------
 1 file changed, 27 insertions(+), 35 deletions(-)

diff --git a/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp b/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp
index 4183aa0275..09f07841f1 100644
--- a/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp
+++ b/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp
@@ -682,53 +682,38 @@ f2 find_gamut_boundary_intersection(const f2 &JM_cusp_in, float J_focus, float J
 }
 
 template <bool invert>
-inline float reinhard_compand(const float scale, const float factor)
+inline float reinhard_remap(const float scale, const float nd)
 {
     if (invert)
     {
-        if (factor >= 1.0f)
+        if (nd >= 1.0f)
             return scale;
         else
-            return scale * -(factor / (factor - 1.0f));
+            return scale * -(nd / (nd - 1.0f));
     }
-    return scale * factor / (1.0f + factor);
+    return scale * nd / (1.0f + nd);
 }
 
 template <bool invert>
-inline float compression_function(float v, float thr, float lim)
+inline float remap_M(const float M, const float gamut_boundary_M, const float reach_boundary_M)
 {
-    float s = (lim - thr) * (1.f - thr) / (lim - 1.f);
-    float nd = (v - thr) / s;
+    const float boundary_ratio = gamut_boundary_M / reach_boundary_M;
+    const float proportion = std::min(1.0f, std::max(boundary_ratio, compression_threshold));
+    const float threshold  = proportion * gamut_boundary_M;
 
-    if (invert)
-    {
-        if (v < thr || lim <= 1.0001f || v > thr + s)
-            return v;
-    }
-    else
-    {
-        if (v < thr || lim <= 1.0001f) 
-            return v;
-    }
-
-    return thr + reinhard_compand<invert>(s, nd);
-}
-
-template <bool invert>
-f3 compress_JMh(const float initial_M, const float hue, const float J_intersect_source, const f2 JMboundary, const float reachBoundaryM)
-{
-    const float difference = std::max(1.0001f, reachBoundaryM / JMboundary[1]);
-    const float threshold = std::max(compression_threshold, 1.f / difference);
+    if (M <= threshold || proportion == 1.0f) //boundary_ratio >= 1.0f) // Removed above asymptote v > threshold + s // TODO compare effects
+        return M;
 
-    float v = initial_M / JMboundary[1];
-    v = compression_function<invert>(v, threshold, difference);
+    // Translate to place threshold at zero
+    const float m_offset     = M - threshold;
+    const float gamut_offset = gamut_boundary_M - threshold;
+    const float reach_offset = reach_boundary_M - threshold;
 
-    const f3 JMcompressed {
-        J_intersect_source + v * (JMboundary[0] - J_intersect_source),
-        0.0f               + v * (JMboundary[1] - 0.0f),
-        hue
-    };
-    return JMcompressed;
+    const float scale = reach_offset / ((reach_offset / gamut_offset) - 1.0f);
+    const float nd = m_offset / scale;
+ 
+    // shift back to absolute
+    return threshold + reinhard_remap<invert>(scale, nd);
 }
 
 template <bool invert>
@@ -763,7 +748,14 @@ f3 compressGamut(const f3 &JMh, float Jx, const ACES2::ResolvedSharedCompression
     const float reach_slope      = compute_compression_vector_slope(reach_intersectJ, hdp.focusJ, sr.limit_J_max, reach_slope_gain);
     const float reachBoundaryM   = estimate_line_and_boundary_intersection_M(reach_intersectJ, reach_slope, sr.model_gamma_inv, sr.limit_J_max, sr.reachMaxM, sr.limit_J_max);
 
-    return compress_JMh<invert>(M, h, J_intersect_source, gamut_boundary, reachBoundaryM);
+    const float remapped_M = remap_M<invert>(M, gamut_boundary[1], reachBoundaryM);
+
+    const f3 JMcompressed {
+        J_intersect_source + remapped_M * gamut_slope,
+        remapped_M,
+        h
+    };
+    return JMcompressed;
 }
 
 inline float compute_focusJ(float cusp_J, float mid_J, float limit_J_max)

From f8498c643edf795404b6ac907a3417a942007374 Mon Sep 17 00:00:00 2001
From: Kevin Wheatley <kevin.wheatley@framestore.com>
Date: Mon, 13 Jan 2025 18:51:42 +0000
Subject: [PATCH 29/70] Precalculate maximum search range for cusp lookup

next steps would be to factor hue into separate table to improve cache hits followed by redistribution to more uniform hues which should narrow search range

Signed-off-by: Kevin Wheatley <kevin.wheatley@framestore.com>
---
 .../ops/fixedfunction/ACES2/Common.h          |  1 +
 .../ops/fixedfunction/ACES2/Transform.cpp     | 54 +++++++++++--------
 2 files changed, 33 insertions(+), 22 deletions(-)

diff --git a/src/OpenColorIO/ops/fixedfunction/ACES2/Common.h b/src/OpenColorIO/ops/fixedfunction/ACES2/Common.h
index 1209d98410..50678ac0d2 100644
--- a/src/OpenColorIO/ops/fixedfunction/ACES2/Common.h
+++ b/src/OpenColorIO/ops/fixedfunction/ACES2/Common.h
@@ -97,6 +97,7 @@ struct GamutCompressParams
     float mid_J;
     float focus_dist;
     float lower_hull_gamma_inv;
+    std::array<int, 2> hue_linearity_search_range;
     Table3D gamut_cusp_table;
     Table1D upper_hull_gamma_inv_table;
 };
diff --git a/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp b/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp
index 09f07841f1..3c8cba0a8d 100644
--- a/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp
+++ b/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp
@@ -38,12 +38,11 @@ inline int clamp_to_table_bounds(int entry, int table_size) // TODO: this should
     return std::min(table_size - 1, std::max(0, entry));
 }
 
-f2 cusp_from_table(float h, const Table3D &gt)
+f2 cusp_from_table(float h, const Table3D &gt, const std::array<int, 2> & hue_linearity_search_range)
 {
-    constexpr int search_range = 64; // BUG: TODO: FIXME: This is arbitrary and should be calculated based on gamut parameters
     int i = hue_position_in_uniform_table(h, gt.size) + gt.base_index;
-    int i_lo = std::max(0, i - search_range);
-    int i_hi = std::min(gt.total_size - 1, i + search_range); // allowed as we have an extra entry in the table
+    int i_lo = std::max(0, i + hue_linearity_search_range[0]);
+    int i_hi = std::min(gt.total_size - 1, i + hue_linearity_search_range[1]); // allowed as we have an extra entry in the table
 
     while (i_lo + 1 < i_hi)
     {
@@ -452,6 +451,7 @@ JMhParams init_JMhParams(const Primaries &prims)
 Table3D make_gamut_table(const JMhParams &params, float peakLuminance)
 {
     Table3D gamutCuspTableUnsorted{};
+    int minhIndex = 0;
     for (int i = 0; i < gamutCuspTableUnsorted.size; i++)
     {
         const float hNorm = (float) i / gamutCuspTableUnsorted.size;
@@ -463,11 +463,6 @@ Table3D make_gamut_table(const JMhParams &params, float peakLuminance)
         gamutCuspTableUnsorted.table[i][0] = JMh[0];
         gamutCuspTableUnsorted.table[i][1] = JMh[1];
         gamutCuspTableUnsorted.table[i][2] = JMh[2];
-    }
-
-    int minhIndex = 0;
-    for (int i = 0; i < gamutCuspTableUnsorted.size; i++)
-    {
         if ( gamutCuspTableUnsorted.table[i][2] < gamutCuspTableUnsorted.table[minhIndex][2])
             minhIndex = i;
     }
@@ -493,8 +488,6 @@ Table3D make_gamut_table(const JMhParams &params, float peakLuminance)
     // Wrap the hues, to maintain monotonicity. These entries will fall outside [0.0, 360.0)
     gamutCuspTable.table[0][2] = gamutCuspTable.table[0][2] - hue_limit;
     gamutCuspTable.table[gamutCuspTable.size+1][2] = gamutCuspTable.table[gamutCuspTable.size+1][2] + hue_limit;
-    //for (int i = 0; i < gamutCuspTableUnsorted.total_size; i++)
-    //  std::cout << "i " << i << " " << hue_position_in_uniform_table(gamutCuspTable.table[i][2], gamutCuspTableUnsorted.size) << " " << gamutCuspTable.table[i][0]  << " " << gamutCuspTable.table[i][1]  << " " << gamutCuspTable.table[i][2]  << "\n";
     return gamutCuspTable;
 }
 
@@ -623,7 +616,7 @@ float solve_J_intersect(float J, float M, float focusJ, float maxJ, float slope_
     return intersectJ;
 }
 
-float smin(float a, float b, float s)
+inline float smin(float a, float b, float s)
 {
     const float h = std::max(s - std::abs(a - b), 0.f) / s;
     return std::min(a, b) - h * h * h * s * (1.f / 6.f);
@@ -768,7 +761,7 @@ HueDependantGamutParams init_HueDependantGamutParams(const f3 &JMh, const Resolv
     HueDependantGamutParams hdp;
     hdp.gamma_top_inv = hue_dependent_upper_hull_gamma(JMh[2], p.upper_hull_gamma_inv_table);
     hdp.gamma_bottom_inv = p.lower_hull_gamma_inv;
-    hdp.JMcusp = cusp_from_table(JMh[2], p.gamut_cusp_table);
+    hdp.JMcusp = cusp_from_table(JMh[2], p.gamut_cusp_table, p.hue_linearity_search_range);
     hdp.focusJ = compute_focusJ(hdp.JMcusp[0], p.mid_J, sr.limit_J_max);
     hdp.analytical_threshold = lerpf(hdp.JMcusp[0], sr.limit_J_max, focus_gain_blend);
     return hdp;
@@ -829,6 +822,7 @@ bool evaluate_gamma_fit(
 
 Table1D make_upper_hull_gamma(
     const Table3D &gamutCuspTable,
+    const std::array<int, 2> & hue_linearity_search_range,
     float peakLuminance,
     float limit_J_max,
     float mid_J,
@@ -847,7 +841,7 @@ Table1D make_upper_hull_gamma(
         gammaTable.table[i] = -1.f;
 
         const float hue = (float) i;
-        const f2 JMcusp = cusp_from_table(hue, gamutCuspTable);
+        const f2 JMcusp = cusp_from_table(hue, gamutCuspTable, hue_linearity_search_range);
 
         std::array<f3, test_count> testJMh;
         for (int testIndex = 0; testIndex < test_count; testIndex++)
@@ -995,6 +989,21 @@ ChromaCompressParams init_ChromaCompressParams(float peakLuminance, const ToneSc
     return params;
 }
 
+std::array<int, 2> determine_hue_linearity_search_range(const Table3D &gamutCuspTable)
+{
+    std::array<int, 2> hue_linearity_search_range = {0, 0};
+    for (int i = gamutCuspTable.base_index; i < gamutCuspTable.base_index + gamutCuspTable.size; i++)
+    {
+      const int pos = hue_position_in_uniform_table(gamutCuspTable.table[i][2], gamutCuspTable.size) + gamutCuspTable.base_index;
+      const int delta = i - pos;
+      hue_linearity_search_range[0] = std::min(hue_linearity_search_range[0], delta-2);
+      hue_linearity_search_range[1] = std::max(hue_linearity_search_range[1], delta+1);
+      //std:: cout << "hue_linearity_search_range " << i << " " << pos << " " << hue_linearity_search_range[0] << " " << hue_linearity_search_range[1] << "\n";
+    }
+    //std:: cout << "hue_linearity_search_range " << hue_linearity_search_range[0] << " " << hue_linearity_search_range[1] << "\n";
+    return hue_linearity_search_range;
+}
+
 GamutCompressParams init_GamutCompressParams(float peakLuminance, const JMhParams &inputJMhParams, const JMhParams &limitJMhParams,
                                              const ToneScaleParams &tsParams, const SharedCompressionParameters &shParams)
 {
@@ -1004,14 +1013,15 @@ GamutCompressParams init_GamutCompressParams(float peakLuminance, const JMhParam
     const float focus_dist = focus_distance + focus_distance * focus_distance_scaling * tsParams.log_peak;
     const float lower_hull_gamma_inv =  1.0f / (1.14f + 0.07f * tsParams.log_peak);
 
-    GamutCompressParams params = {
-        mid_J,
-        focus_dist,
-        lower_hull_gamma_inv,
-        make_gamut_table(limitJMhParams, peakLuminance),
-        make_upper_hull_gamma(params.gamut_cusp_table, peakLuminance, shParams.limit_J_max,
-                              mid_J, focus_dist, lower_hull_gamma_inv, limitJMhParams) //TODO
-    };
+    GamutCompressParams params;
+    params.mid_J = mid_J;
+    params.focus_dist = focus_dist;
+    params.lower_hull_gamma_inv = lower_hull_gamma_inv;
+    params.gamut_cusp_table = make_gamut_table(limitJMhParams, peakLuminance);
+    params.hue_linearity_search_range = determine_hue_linearity_search_range(params.gamut_cusp_table);
+    params.upper_hull_gamma_inv_table = make_upper_hull_gamma(params.gamut_cusp_table, params.hue_linearity_search_range,
+                                                              peakLuminance, shParams.limit_J_max, mid_J, focus_dist,
+                                                              lower_hull_gamma_inv, limitJMhParams) ;//TODO
     return params;
 }
 

From 90c6765686f5ac74b390584aeadd697ab23072c6 Mon Sep 17 00:00:00 2001
From: Kevin Wheatley <kevin.wheatley@framestore.com>
Date: Wed, 15 Jan 2025 11:04:50 +0000
Subject: [PATCH 30/70] Experiment with reusing slope calculations in gamut
 mapper presmooth cusp values

Signed-off-by: Kevin Wheatley <kevin.wheatley@framestore.com>
---
 .../ops/fixedfunction/ACES2/Transform.cpp     | 44 +++++++++----------
 1 file changed, 22 insertions(+), 22 deletions(-)

diff --git a/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp b/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp
index 3c8cba0a8d..6f5f8ba17c 100644
--- a/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp
+++ b/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp
@@ -461,7 +461,7 @@ Table3D make_gamut_table(const JMhParams &params, float peakLuminance)
         const f3 JMh = RGB_to_JMh(scaledRGB, params);
 
         gamutCuspTableUnsorted.table[i][0] = JMh[0];
-        gamutCuspTableUnsorted.table[i][1] = JMh[1];
+        gamutCuspTableUnsorted.table[i][1] = JMh[1]  * (1.f + smooth_m * smooth_cusps);
         gamutCuspTableUnsorted.table[i][2] = JMh[2];
         if ( gamutCuspTableUnsorted.table[i][2] < gamutCuspTableUnsorted.table[minhIndex][2])
             minhIndex = i;
@@ -648,15 +648,8 @@ inline float estimate_line_and_boundary_intersection_M(const float J_axis_inters
     //return shifted_intersection * M_max / (J_max - slope * M_max);
 }
 
-f2 find_gamut_boundary_intersection(const f2 &JM_cusp_in, float J_focus, float J_max, float slope_gain, float gamma_top_inv, float gamma_bottom_inv, const float J_intersect_source, const float slope)
+float find_gamut_boundary_intersection(const f2 &JM_cusp, float J_max, float gamma_top_inv, float gamma_bottom_inv, const float J_intersect_source, const float slope, const float J_intersect_cusp)
 {
-    const f2 JM_cusp = {
-        JM_cusp_in[0],
-        JM_cusp_in[1] * (1.f + smooth_m * smooth_cusps)
-    };
-
-    const float J_intersect_cusp = solve_J_intersect(JM_cusp[0], JM_cusp[1], J_focus, J_max, slope_gain);
-
     const float M_boundary_lower = estimate_line_and_boundary_intersection_M(J_intersect_source, slope, gamma_bottom_inv, JM_cusp[0], JM_cusp[1], J_intersect_cusp);
 
     // The upper hull is flipped and thus 'zeroed' at J_max
@@ -669,9 +662,8 @@ f2 find_gamut_boundary_intersection(const f2 &JM_cusp_in, float J_focus, float J
 
     // Smooth minimum between the two calculated values for the M component
     const float M_boundary = JM_cusp[1] * smin(M_boundary_lower / JM_cusp[1], M_boundary_upper / JM_cusp[1], smooth_cusps);
-    const float J_boundary = J_intersect_source + slope * M_boundary; // TODO don't recalculate this
 
-    return {J_boundary, M_boundary};
+    return M_boundary;
 }
 
 template <bool invert>
@@ -691,10 +683,10 @@ template <bool invert>
 inline float remap_M(const float M, const float gamut_boundary_M, const float reach_boundary_M)
 {
     const float boundary_ratio = gamut_boundary_M / reach_boundary_M;
-    const float proportion = std::min(1.0f, std::max(boundary_ratio, compression_threshold));
+    const float proportion = std::max(boundary_ratio, compression_threshold);
     const float threshold  = proportion * gamut_boundary_M;
 
-    if (M <= threshold || proportion == 1.0f) //boundary_ratio >= 1.0f) // Removed above asymptote v > threshold + s // TODO compare effects
+    if (proportion >= 1.0f || M <= threshold)
         return M;
 
     // Translate to place threshold at zero
@@ -729,19 +721,23 @@ f3 compressGamut(const f3 &JMh, float Jx, const ACES2::ResolvedSharedCompression
     const float slope_gain = get_focus_gain(Jx, hdp.JMcusp[0], sr.limit_J_max, p.focus_dist);
     const float J_intersect_source = solve_J_intersect(J, M, hdp.focusJ, sr.limit_J_max, slope_gain);
     const float gamut_slope = compute_compression_vector_slope(J_intersect_source, hdp.focusJ, sr.limit_J_max, slope_gain);
-    const f2 gamut_boundary = find_gamut_boundary_intersection(hdp.JMcusp, hdp.focusJ, sr.limit_J_max, slope_gain, hdp.gamma_top_inv, hdp.gamma_bottom_inv, J_intersect_source, gamut_slope);
+ 
 
-    if (gamut_boundary[1] <= 0.0f) // TODO: when does this happen?
+    const float J_intersect_cusp = solve_J_intersect(hdp.JMcusp[0], hdp.JMcusp[1], hdp.focusJ, sr.limit_J_max, slope_gain);
+    const float gamut_boundary_M = find_gamut_boundary_intersection(hdp.JMcusp, sr.limit_J_max, hdp.gamma_top_inv, hdp.gamma_bottom_inv, J_intersect_source, gamut_slope, J_intersect_cusp);
+
+    if (gamut_boundary_M <= 0.0f) // TODO: when does this happen?
     {
         return {J, 0.f, h};
     }
 
-    const float reach_slope_gain = get_focus_gain(gamut_boundary[0], hdp.JMcusp[0], sr.limit_J_max, p.focus_dist);
-    const float reach_intersectJ = solve_J_intersect(gamut_boundary[0], gamut_boundary[1], hdp.focusJ, sr.limit_J_max, reach_slope_gain);
-    const float reach_slope      = compute_compression_vector_slope(reach_intersectJ, hdp.focusJ, sr.limit_J_max, reach_slope_gain);
-    const float reachBoundaryM   = estimate_line_and_boundary_intersection_M(reach_intersectJ, reach_slope, sr.model_gamma_inv, sr.limit_J_max, sr.reachMaxM, sr.limit_J_max);
+    //const float reach_slope_gain = get_focus_gain(gamut_boundary[0], hdp.JMcusp[0], sr.limit_J_max, p.focus_dist);
+    //const float reach_intersectJ = solve_J_intersect(gamut_boundary[0], gamut_boundary[1], hdp.focusJ, sr.limit_J_max, reach_slope_gain);
+    //const float reach_slope      = compute_compression_vector_slope(reach_intersectJ, hdp.focusJ, sr.limit_J_max, reach_slope_gain);
+    //const float reachBoundaryM   = estimate_line_and_boundary_intersection_M(reach_intersectJ, reach_slope, sr.model_gamma_inv, sr.limit_J_max, sr.reachMaxM, sr.limit_J_max);
+    const float reachBoundaryM   = estimate_line_and_boundary_intersection_M(J_intersect_source, gamut_slope, sr.model_gamma_inv, sr.limit_J_max, sr.reachMaxM, sr.limit_J_max);
 
-    const float remapped_M = remap_M<invert>(M, gamut_boundary[1], reachBoundaryM);
+    const float remapped_M = remap_M<invert>(M, gamut_boundary_M, reachBoundaryM);
 
     const f3 JMcompressed {
         J_intersect_source + remapped_M * gamut_slope,
@@ -798,6 +794,7 @@ bool evaluate_gamma_fit(
     float lower_hull_gamma_inv,
     const JMhParams &limitJMhParams)
 {
+
     const float focusJ = compute_focusJ(JMcusp[0], mid_J, limit_J_max);
 
     for (auto testJMh: JMh_values)
@@ -805,9 +802,12 @@ bool evaluate_gamma_fit(
         const float slope_gain = get_focus_gain(testJMh[0], JMcusp[0], limit_J_max, focus_dist);
         const float J_intersect_source = solve_J_intersect(testJMh[0], testJMh[1], focusJ, limit_J_max, slope_gain);
         const float slope = compute_compression_vector_slope(J_intersect_source, focusJ, limit_J_max, slope_gain);
+        const float J_intersect_cusp = solve_J_intersect(JMcusp[0], JMcusp[1], focusJ, limit_J_max, slope_gain);
+
+        const float approxLimit_M = find_gamut_boundary_intersection(JM_cusp_s, limit_J_max, topGamma_inv, lower_hull_gamma_inv, J_intersect_source, slope, J_intersect_cusp);
+        const float approxLimit_J = J_intersect_source + slope * approxLimit_M;
 
-        const f2 approxLimit = find_gamut_boundary_intersection(JMcusp, focusJ, limit_J_max, slope_gain, topGamma_inv, lower_hull_gamma_inv, J_intersect_source, slope);
-        const f3 approximate_JMh = {approxLimit[0], approxLimit[1], testJMh[2]};
+        const f3 approximate_JMh = {approxLimit_J, approxLimit_M, testJMh[2]};
         const f3 newLimitRGB = JMh_to_RGB(approximate_JMh, limitJMhParams);
         const f3 newLimitRGBScaled = mult_f_f3(reference_luminance / peakLuminance, newLimitRGB);
 

From 4a834790f5560096b3414a6522ff47e9adad335e Mon Sep 17 00:00:00 2001
From: Kevin Wheatley <kevin.wheatley@framestore.com>
Date: Wed, 15 Jan 2025 11:05:06 +0000
Subject: [PATCH 31/70] Add a collection of TODO's

Signed-off-by: Kevin Wheatley <kevin.wheatley@framestore.com>
---
 .../ops/fixedfunction/ACES2/Transform.cpp     | 75 +++++++++++--------
 1 file changed, 43 insertions(+), 32 deletions(-)

diff --git a/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp b/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp
index 6f5f8ba17c..4669b37390 100644
--- a/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp
+++ b/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp
@@ -17,15 +17,9 @@ namespace ACES2
 // Table lookups
 //
 
-inline float base_hue_for_position(int i_lo, int table_size) 
-{
-    const float result = i_lo * hue_limit / table_size;
-    return result;
-}
-
 inline int hue_position_in_uniform_table(float wrapped_hue, int table_size)
 {
-    return int(wrapped_hue / hue_limit * (float) table_size);
+    return int(wrapped_hue / hue_limit * float(table_size)); // TODO: can we use the 'lost' fraction for the lerps?
 }
 
 inline int next_position_in_table(int entry, int table_size)
@@ -42,7 +36,7 @@ f2 cusp_from_table(float h, const Table3D &gt, const std::array<int, 2> & hue_li
 {
     int i = hue_position_in_uniform_table(h, gt.size) + gt.base_index;
     int i_lo = std::max(0, i + hue_linearity_search_range[0]);
-    int i_hi = std::min(gt.total_size - 1, i + hue_linearity_search_range[1]); // allowed as we have an extra entry in the table
+    int i_hi = std::min(gt.total_size - 1, i + hue_linearity_search_range[1]);
 
     while (i_lo + 1 < i_hi)
     {
@@ -92,7 +86,7 @@ float hue_dependent_upper_hull_gamma(float h, const ACES2::Table1D &gt)
     const int i_lo = clamp_to_table_bounds(hue_position_in_uniform_table(h, gt.size) + gt.base_index, gt.total_size);  // TODO: this should be removed if we can constrain the hue range properly
     const int i_hi = next_position_in_table(i_lo, gt.size);
 
-    const float base_hue = (float) (i_lo - gt.base_index);
+    const float base_hue = float(i_lo - gt.base_index);
 
     const float t = h - base_hue;
 
@@ -256,7 +250,7 @@ inline float chroma_compress_norm(float h, float chroma_compress_scale)
                9.19364f * sin_hr3 +
               77.12896f;
 
-    return M * chroma_compress_scale;
+    return M * chroma_compress_scale; // TODO: is it worth prescaling the above M constants ?
 }
 
 inline float toe_fwd( float x, float limit, float k1_in, float k2_in)
@@ -288,7 +282,7 @@ inline float toe_inv( float x, float limit, float k1_in, float k2_in)
     return (x * x + k1 * x) / (k3 * (x + k2));
 }
 
-float tonescale_fwd(const float J, const JMhParams &p, const ToneScaleParams &pt)
+float tonescale_fwd(const float J, const JMhParams &p, const ToneScaleParams &pt) // TODO: consider computing tonescale from and to A rather than J to avoid extra pow() calls
 {
     // Tonescale applied in Y (convert to and from J)
     const float J_abs = std::abs(J);
@@ -423,6 +417,9 @@ JMhParams init_JMhParams(const Primaries &prims)
     const float A_w   = cone_response_to_Aab[0] * RGB_AW[0] + cone_response_to_Aab[1] * RGB_AW[1] + cone_response_to_Aab[2] * RGB_AW[2];
     const float A_w_J = _post_adaptation_cone_response_compression_fwd(reference_luminance, F_L_n);
 
+    // TODO: evaluate the condition number of the below matrices and consider extracting a power of 2 scale out of them may improve noise behaviour
+    //       power of 2 to make cost of extra scaling limited vs generalised multiply/divide ?
+
     // Note we are prescaling the CAM16 LMS responses to directly provide for chromatic adaptation.
     const m33f MATRIX_RGB_to_CAM16 = mult_f33_f33(RGBtoRGB_f33(prims, CAM16::primaries), scale_f33(Identity_M33, f3_from_f(reference_luminance)));
     const m33f MATRIX_RGB_to_CAM16_c = mult_f33_f33(scale_f33(Identity_M33, D_RGB), MATRIX_RGB_to_CAM16);
@@ -454,7 +451,7 @@ Table3D make_gamut_table(const JMhParams &params, float peakLuminance)
     int minhIndex = 0;
     for (int i = 0; i < gamutCuspTableUnsorted.size; i++)
     {
-        const float hNorm = (float) i / gamutCuspTableUnsorted.size;
+        const float hNorm = float(i) / float(gamutCuspTableUnsorted.size);
         const f3 HSV = {hNorm, 1., 1.};
         const f3 RGB = HSV_to_RGB(HSV);
         const f3 scaledRGB = mult_f_f3(peakLuminance / reference_luminance, RGB);
@@ -485,7 +482,7 @@ Table3D make_gamut_table(const JMhParams &params, float peakLuminance)
     gamutCuspTable.table[gamutCuspTable.base_index + gamutCuspTable.size][1] = gamutCuspTable.table[gamutCuspTable.base_index][1];
     gamutCuspTable.table[gamutCuspTable.base_index + gamutCuspTable.size][2] = gamutCuspTable.table[gamutCuspTable.base_index][2];
 
-    // Wrap the hues, to maintain monotonicity. These entries will fall outside [0.0, 360.0)
+    // Wrap the hues, to maintain monotonicity. These entries will fall outside [0.0, hue_limit)
     gamutCuspTable.table[0][2] = gamutCuspTable.table[0][2] - hue_limit;
     gamutCuspTable.table[gamutCuspTable.size+1][2] = gamutCuspTable.table[gamutCuspTable.size+1][2] + hue_limit;
     return gamutCuspTable;
@@ -496,14 +493,12 @@ bool any_below_zero(const f3 &rgb)
     return (rgb[0] < 0. || rgb[1] < 0. || rgb[2] < 0.);
 }
 
-Table1D make_reach_m_table(const JMhParams &params, float peakLuminance)
+Table1D make_reach_m_table(const JMhParams &params, const float limit_J_max)
 {
-    const float limit_J_max = Y_to_J(peakLuminance, params);
-
     Table1D gamutReachTable{};
 
     for (int i = 0; i < gamutReachTable.size; i++) {
-        const float hue = (float) i;
+        const float hue = float(i);
 
         const float search_range = 50.f;
         float low = 0.f;
@@ -661,6 +656,7 @@ float find_gamut_boundary_intersection(const f2 &JM_cusp, float J_max, float gam
       estimate_line_and_boundary_intersection_M(f_J_intersect_source, -slope, gamma_top_inv, f_JM_cusp_J, JM_cusp[1], f_J_intersect_cusp);
 
     // Smooth minimum between the two calculated values for the M component
+    // TODO: do we need to normalise based on JM_cusp[1]
     const float M_boundary = JM_cusp[1] * smin(M_boundary_lower / JM_cusp[1], M_boundary_upper / JM_cusp[1], smooth_cusps);
 
     return M_boundary;
@@ -671,7 +667,7 @@ inline float reinhard_remap(const float scale, const float nd)
 {
     if (invert)
     {
-        if (nd >= 1.0f)
+        if (nd >= 1.0f) // TODO: given remap_M already tests against proportion do we need this asymptote test
             return scale;
         else
             return scale * -(nd / (nd - 1.0f));
@@ -708,6 +704,7 @@ f3 compressGamut(const f3 &JMh, float Jx, const ACES2::ResolvedSharedCompression
     const float M = JMh[1];
     const float h = JMh[2];
     
+    // TODO: test migrating these to calling function so they are only tested once for the inverse?
     if (J <= 0.0f) // Limit to +ve J values // TODO test this is needed
     {
         return {0.0f, 0.f, h};
@@ -726,7 +723,7 @@ f3 compressGamut(const f3 &JMh, float Jx, const ACES2::ResolvedSharedCompression
     const float J_intersect_cusp = solve_J_intersect(hdp.JMcusp[0], hdp.JMcusp[1], hdp.focusJ, sr.limit_J_max, slope_gain);
     const float gamut_boundary_M = find_gamut_boundary_intersection(hdp.JMcusp, sr.limit_J_max, hdp.gamma_top_inv, hdp.gamma_bottom_inv, J_intersect_source, gamut_slope, J_intersect_cusp);
 
-    if (gamut_boundary_M <= 0.0f) // TODO: when does this happen?
+    if (gamut_boundary_M <= 0.0f) // TODO: when/why does this happen?
     {
         return {J, 0.f, h};
     }
@@ -804,12 +801,12 @@ bool evaluate_gamma_fit(
         const float slope = compute_compression_vector_slope(J_intersect_source, focusJ, limit_J_max, slope_gain);
         const float J_intersect_cusp = solve_J_intersect(JMcusp[0], JMcusp[1], focusJ, limit_J_max, slope_gain);
 
-        const float approxLimit_M = find_gamut_boundary_intersection(JM_cusp_s, limit_J_max, topGamma_inv, lower_hull_gamma_inv, J_intersect_source, slope, J_intersect_cusp);
+        const float approxLimit_M = find_gamut_boundary_intersection(JMcusp, limit_J_max, topGamma_inv, lower_hull_gamma_inv, J_intersect_source, slope, J_intersect_cusp);
         const float approxLimit_J = J_intersect_source + slope * approxLimit_M;
 
         const f3 approximate_JMh = {approxLimit_J, approxLimit_M, testJMh[2]};
         const f3 newLimitRGB = JMh_to_RGB(approximate_JMh, limitJMhParams);
-        const f3 newLimitRGBScaled = mult_f_f3(reference_luminance / peakLuminance, newLimitRGB);
+        const f3 newLimitRGBScaled = mult_f_f3(reference_luminance / peakLuminance, newLimitRGB); // TODO: can this be avoided by amending outside_hull to account for peak?
 
         if (!outside_hull(newLimitRGBScaled))
         {
@@ -833,14 +830,14 @@ Table1D make_upper_hull_gamma(
     constexpr int test_count = 3;
     const std::array<float, test_count> testPositions = {0.01f, 0.5f, 0.99f};
 
-    Table1D gammaTable{};
+    Table1D gammaTable{}; // TODO: do we need the 2nd table or can this be done "in place"
     Table1D gamutTopGamma{};
 
     for (int i = 0; i < gammaTable.size; i++)
     {
         gammaTable.table[i] = -1.f;
 
-        const float hue = (float) i;
+        const float hue = float(i);
         const f2 JMcusp = cusp_from_table(hue, gamutCuspTable, hue_linearity_search_range);
 
         std::array<f3, test_count> testJMh;
@@ -854,6 +851,9 @@ Table1D make_upper_hull_gamma(
             };
         }
 
+        // TODO: calculate best fitting inverse gamma directly rather than reciprocating it in the loop
+        // TODO: adjacent found gamma values typically fall close to each other could initialise the search range
+        //       with values near to speed up searching. Note that discrepancies do occur at/near corners of gamut
         const float search_range = gammaSearchStep;
         float low = gammaMinimum;
         float high = low + search_range;
@@ -918,6 +918,8 @@ ToneScaleParams init_ToneScaleParams(float peakLuminance)
     const float r_hit_max = 896.f;   // scene-referred value "hitting the roof"
 
     // Calculate output constants
+    // TODO: factor these calculations into well named functions
+    // TODO: ensure correct use of n_r vs n vs reference_luminance
     const float r_hit = r_hit_min + (r_hit_max - r_hit_min) * (log(n/n_r)/log(10000.f/100.f));
     const float m_0 = (n / n_r);
     const float m_1 = 0.5f * (m_0 + sqrt(m_0 * (m_0 + 4.f * t_1)));
@@ -957,7 +959,7 @@ SharedCompressionParameters init_SharedCompressionParams(float peakLuminance, co
     SharedCompressionParameters params = {
         limit_J_max,
         model_gamma_inv,
-        make_reach_m_table(compressionGamut, peakLuminance)
+        make_reach_m_table(compressionGamut, limit_J_max)
     };
     return params;
 }
@@ -975,6 +977,7 @@ ResolvedSharedCompressionParameters resolve_CompressionParams(float hue, const S
 ChromaCompressParams init_ChromaCompressParams(float peakLuminance, const ToneScaleParams &tsParams)
 {
     // Calculated chroma compress variables
+    // TODO: name these magic constants
     const float compr = chroma_compress + (chroma_compress * chroma_compress_fact) * tsParams.log_peak;
     const float sat = std::max(0.2f, chroma_expand - (chroma_expand * chroma_expand_fact) * tsParams.log_peak);
     const float sat_thr = chroma_expand_thr / tsParams.n;
@@ -991,27 +994,35 @@ ChromaCompressParams init_ChromaCompressParams(float peakLuminance, const ToneSc
 
 std::array<int, 2> determine_hue_linearity_search_range(const Table3D &gamutCuspTable)
 {
-    std::array<int, 2> hue_linearity_search_range = {0, 0};
+    // This function searches through the hues looking for the largest deviations from a linear distribution
+    // We can then use this to initialise the binary search range to something smaller than the full one to
+    // reduce the number of lookups per hue lookup from ~ceil(log2(table size)) to ~ceil(log2(range))
+    // during image rendering.
+
+    // TODO: Padding values are a quick hack to ensure the range encloses the needed range, left as an exersize
+    // for the reader to fully reason if these values could be smaller, probably best done the closer the hue
+    // distribution comes to linear as the overhead becomes a potentially greater issue
+    constexpr int lower_padding = -2;
+    constexpr int upper_padding = 1;
+    std::array<int, 2> hue_linearity_search_range = {lower_padding, upper_padding};
     for (int i = gamutCuspTable.base_index; i < gamutCuspTable.base_index + gamutCuspTable.size; i++)
     {
       const int pos = hue_position_in_uniform_table(gamutCuspTable.table[i][2], gamutCuspTable.size) + gamutCuspTable.base_index;
       const int delta = i - pos;
-      hue_linearity_search_range[0] = std::min(hue_linearity_search_range[0], delta-2);
-      hue_linearity_search_range[1] = std::max(hue_linearity_search_range[1], delta+1);
-      //std:: cout << "hue_linearity_search_range " << i << " " << pos << " " << hue_linearity_search_range[0] << " " << hue_linearity_search_range[1] << "\n";
+      hue_linearity_search_range[0] = std::min(hue_linearity_search_range[0], delta + lower_padding);
+      hue_linearity_search_range[1] = std::max(hue_linearity_search_range[1], delta + upper_padding);
     }
-    //std:: cout << "hue_linearity_search_range " << hue_linearity_search_range[0] << " " << hue_linearity_search_range[1] << "\n";
     return hue_linearity_search_range;
 }
 
 GamutCompressParams init_GamutCompressParams(float peakLuminance, const JMhParams &inputJMhParams, const JMhParams &limitJMhParams,
                                              const ToneScaleParams &tsParams, const SharedCompressionParameters &shParams)
 {
-    float mid_J = Y_to_J(tsParams.c_t * reference_luminance, inputJMhParams); // TODO
+    float mid_J = Y_to_J(tsParams.c_t * reference_luminance, inputJMhParams);
 
     // Calculated chroma compress variables
     const float focus_dist = focus_distance + focus_distance * focus_distance_scaling * tsParams.log_peak;
-    const float lower_hull_gamma_inv =  1.0f / (1.14f + 0.07f * tsParams.log_peak);
+    const float lower_hull_gamma_inv =  1.0f / (1.14f + 0.07f * tsParams.log_peak); // TODO: name these magic constants
 
     GamutCompressParams params;
     params.mid_J = mid_J;
@@ -1021,7 +1032,7 @@ GamutCompressParams init_GamutCompressParams(float peakLuminance, const JMhParam
     params.hue_linearity_search_range = determine_hue_linearity_search_range(params.gamut_cusp_table);
     params.upper_hull_gamma_inv_table = make_upper_hull_gamma(params.gamut_cusp_table, params.hue_linearity_search_range,
                                                               peakLuminance, shParams.limit_J_max, mid_J, focus_dist,
-                                                              lower_hull_gamma_inv, limitJMhParams) ;//TODO
+                                                              lower_hull_gamma_inv, limitJMhParams); //TODO: mess of parameters
     return params;
 }
 

From a9a5c9a6710f63dd3e4e25b71ebbae072cfd79c0 Mon Sep 17 00:00:00 2001
From: Kevin Wheatley <kevin.wheatley@framestore.com>
Date: Wed, 15 Jan 2025 12:56:45 +0000
Subject: [PATCH 32/70] Restore function mapping table index to hue

Signed-off-by: Kevin Wheatley <kevin.wheatley@framestore.com>
---
 .../ops/fixedfunction/ACES2/Transform.cpp            | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp b/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp
index 4669b37390..d233704158 100644
--- a/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp
+++ b/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp
@@ -17,6 +17,12 @@ namespace ACES2
 // Table lookups
 //
 
+inline float base_hue_for_position(int i_lo, int table_size) 
+{
+    const float result = i_lo * hue_limit / table_size;
+    return result;
+}
+
 inline int hue_position_in_uniform_table(float wrapped_hue, int table_size)
 {
     return int(wrapped_hue / hue_limit * float(table_size)); // TODO: can we use the 'lost' fraction for the lerps?
@@ -86,7 +92,7 @@ float hue_dependent_upper_hull_gamma(float h, const ACES2::Table1D &gt)
     const int i_lo = clamp_to_table_bounds(hue_position_in_uniform_table(h, gt.size) + gt.base_index, gt.total_size);  // TODO: this should be removed if we can constrain the hue range properly
     const int i_hi = next_position_in_table(i_lo, gt.size);
 
-    const float base_hue = float(i_lo - gt.base_index);
+    const float base_hue = base_hue_for_position(i_lo - gt.base_index, gt.size);
 
     const float t = h - base_hue;
 
@@ -498,7 +504,7 @@ Table1D make_reach_m_table(const JMhParams &params, const float limit_J_max)
     Table1D gamutReachTable{};
 
     for (int i = 0; i < gamutReachTable.size; i++) {
-        const float hue = float(i);
+        const float hue = base_hue_for_position(i, gamutReachTable.size);
 
         const float search_range = 50.f;
         float low = 0.f;
@@ -837,7 +843,7 @@ Table1D make_upper_hull_gamma(
     {
         gammaTable.table[i] = -1.f;
 
-        const float hue = float(i);
+        const float hue = base_hue_for_position(i, gamutCuspTable.size);
         const f2 JMcusp = cusp_from_table(hue, gamutCuspTable, hue_linearity_search_range);
 
         std::array<f3, test_count> testJMh;

From a2ba5343bd118db1bcff1e5c8068e7f15321748d Mon Sep 17 00:00:00 2001
From: Kevin Wheatley <kevin.wheatley@framestore.com>
Date: Thu, 16 Jan 2025 14:14:08 +0000
Subject: [PATCH 33/70] Minor tweaks to tonescale inverse clamp

Signed-off-by: Kevin Wheatley <kevin.wheatley@framestore.com>
---
 .../ops/fixedfunction/ACES2/Common.h          |  1 +
 .../ops/fixedfunction/ACES2/Transform.cpp     | 52 ++++++++++---------
 2 files changed, 29 insertions(+), 24 deletions(-)

diff --git a/src/OpenColorIO/ops/fixedfunction/ACES2/Common.h b/src/OpenColorIO/ops/fixedfunction/ACES2/Common.h
index 50678ac0d2..ed9e1a21e3 100644
--- a/src/OpenColorIO/ops/fixedfunction/ACES2/Common.h
+++ b/src/OpenColorIO/ops/fixedfunction/ACES2/Common.h
@@ -58,6 +58,7 @@ struct ToneScaleParams
     float s_2;
     float u_2;
     float m_2;
+    float inverse_limit;
     float log_peak;
 };
 
diff --git a/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp b/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp
index d233704158..4e3c46f21d 100644
--- a/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp
+++ b/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp
@@ -42,7 +42,7 @@ f2 cusp_from_table(float h, const Table3D &gt, const std::array<int, 2> & hue_li
 {
     int i = hue_position_in_uniform_table(h, gt.size) + gt.base_index;
     int i_lo = std::max(0, i + hue_linearity_search_range[0]);
-    int i_hi = std::min(gt.total_size - 1, i + hue_linearity_search_range[1]);
+    int i_hi = std::min(gt.base_index + gt.size, i + hue_linearity_search_range[1]);
 
     while (i_lo + 1 < i_hi)
     {
@@ -288,32 +288,32 @@ inline float toe_inv( float x, float limit, float k1_in, float k2_in)
     return (x * x + k1 * x) / (k3 * (x + k2));
 }
 
-float tonescale_fwd(const float J, const JMhParams &p, const ToneScaleParams &pt) // TODO: consider computing tonescale from and to A rather than J to avoid extra pow() calls
+template <bool inverse>
+inline float aces_tonescale(const float Y_in, const JMhParams &p, const ToneScaleParams &pt)
 {
-    // Tonescale applied in Y (convert to and from J)
-    const float J_abs = std::abs(J);
-    const float Y     = _J_to_Y(J_abs, p);
-    
-    const float f    = pt.m_2 * powf(Y / (Y + pt.s_2), pt.g);
-    const float Y_ts = std::max(0.f, f * f / (f + pt.t_1)) * pt.n_r;  // max prevents -ve values being output also handles division by zero possibility
+    if (inverse)
+    {
+        const float Y_ts_norm = Y_in / reference_luminance; // TODO
+        const float Z = std::max(0.f, std::min(pt.inverse_limit, Y_ts_norm)); // TODO: could eliminate max in the context of the full tonescale
+        const float f = (Z + sqrt(Z * (4.f * pt.t_1 + Z))) / 2.f;
+        const float Y = pt.s_2 / (powf((pt.m_2 / f), (1.f / pt.g)) - 1.f);
+        return Y;
+    }
 
-    const float J_ts  = _Y_to_J(Y_ts, p);
-    return std::copysign(J_ts, Y_ts);
+    const float f    = pt.m_2 * powf(Y_in / (Y_in + pt.s_2), pt.g);
+    const float Y_ts = std::max(0.f, f * f / (f + pt.t_1)) * pt.n_r;  // max prevents -ve values being output also handles division by zero possibility
+    return Y_ts;
 }
 
-float tonescale_inv(const float J_ts, const JMhParams &p, const ToneScaleParams &pt)
+template <bool inverse>
+float tonescale(const float J, const JMhParams &p, const ToneScaleParams &pt) // TODO: consider computing tonescale from and to A rather than J to avoid extra pow() calls
 {
-    // Inverse Tonescale applied in Y (convert to and from J)
-    const float J_abs = std::abs(J_ts);
-    const float Y_ts  = _J_to_Y(J_abs, p);
-
-    const float Y_ts_norm = Y_ts / reference_luminance; // TODO
-    const float Z = std::max(0.f, std::min(pt.n / (pt.u_2 * pt.n_r), Y_ts_norm));  //TODO
-    const float f = (Z + sqrt(Z * (4.f * pt.t_1 + Z))) / 2.f;
-    const float Y = pt.s_2 / (powf((pt.m_2 / f), (1.f / pt.g)) - 1.f);
-
-    const float J     = _Y_to_J(Y, p);
-    return std::copysign(J, Y_ts);
+    // Tonescale applied in Y (convert to and from J)
+    const float J_abs = std::abs(J);
+    const float Y_in  = _J_to_Y(J_abs, p);
+    const float Y_out = aces_tonescale<inverse>(Y_in, p, pt);
+    const float J_out = _Y_to_J(Y_out, p);
+    return std::copysign(J_out, J);
 }
 
 f3 tonescale_chroma_compress_fwd(const f3 &JMh, const JMhParams &p, const ToneScaleParams &pt, const ResolvedSharedCompressionParameters &pr, const ChromaCompressParams &pc)
@@ -322,7 +322,8 @@ f3 tonescale_chroma_compress_fwd(const f3 &JMh, const JMhParams &p, const ToneSc
     const float M = JMh[1];
     const float h = JMh[2];
 
-    const float J_ts = tonescale_fwd(J, p, pt);
+    constexpr bool inverse = false;
+    const float J_ts = tonescale<inverse>(J, p, pt);
 
     // ChromaCompress
     float M_cp = M;
@@ -350,7 +351,8 @@ f3 tonescale_chroma_compress_inv(const f3 &JMh, const JMhParams &p, const ToneSc
     const float M_cp = JMh[1];
     const float h    = JMh[2];
 
-    const float J = tonescale_inv(J_ts, p, pt);
+    constexpr bool inverse = true;
+    const float J = tonescale<inverse>(J_ts, p, pt);
 
     // Inverse ChromaCompress
     float M = M_cp;
@@ -939,6 +941,7 @@ ToneScaleParams init_ToneScaleParams(float peakLuminance)
     const float s_2 = w_2 * m_1 * reference_luminance;
     const float u_2 = powf((r_hit/m_1)/((r_hit/m_1) + w_2), g);
     const float m_2 = m_1 / u_2;
+    const float inverse_limit = n / (u_2 * n_r);
     const float log_peak = log10( n / n_r);
 
     ToneScaleParams TonescaleParams = {
@@ -950,6 +953,7 @@ ToneScaleParams init_ToneScaleParams(float peakLuminance)
         s_2,
         u_2,
         m_2,
+        inverse_limit,
         log_peak
     };
 

From 9720a153febeb2584e783a76df635f301f4953ff Mon Sep 17 00:00:00 2001
From: Kevin Wheatley <kevin.wheatley@framestore.com>
Date: Thu, 16 Jan 2025 16:35:23 +0000
Subject: [PATCH 34/70] Remove duplicate table whilst calculating upper hull
 gamma

Signed-off-by: Kevin Wheatley <kevin.wheatley@framestore.com>
---
 .../ops/fixedfunction/ACES2/Transform.cpp     | 24 +++++++------------
 1 file changed, 9 insertions(+), 15 deletions(-)

diff --git a/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp b/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp
index 4e3c46f21d..64b554a391 100644
--- a/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp
+++ b/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp
@@ -296,12 +296,12 @@ inline float aces_tonescale(const float Y_in, const JMhParams &p, const ToneScal
         const float Y_ts_norm = Y_in / reference_luminance; // TODO
         const float Z = std::max(0.f, std::min(pt.inverse_limit, Y_ts_norm)); // TODO: could eliminate max in the context of the full tonescale
         const float f = (Z + sqrt(Z * (4.f * pt.t_1 + Z))) / 2.f;
-        const float Y = pt.s_2 / (powf((pt.m_2 / f), (1.f / pt.g)) - 1.f);
+        const float Y = pt.s_2 / (powf((pt.m_2 / f), (1.f / pt.g)) - 1.f); // TODO: investigate precomputing reciprocal m_2  and a negative power? may swap a division for a multiply? powf(pt.m_2_recip * f, (-1.f / pt.g))
         return Y;
     }
 
     const float f    = pt.m_2 * powf(Y_in / (Y_in + pt.s_2), pt.g);
-    const float Y_ts = std::max(0.f, f * f / (f + pt.t_1)) * pt.n_r;  // max prevents -ve values being output also handles division by zero possibility
+    const float Y_ts = std::max(0.f, f * f / (f + pt.t_1)) * pt.n_r;  // max prevents -ve values being output also handles division by zero possibility // TODO: can we eliminate the n_r scaling? should it be reference_luminance
     return Y_ts;
 }
 
@@ -838,14 +838,13 @@ Table1D make_upper_hull_gamma(
     constexpr int test_count = 3;
     const std::array<float, test_count> testPositions = {0.01f, 0.5f, 0.99f};
 
-    Table1D gammaTable{}; // TODO: do we need the 2nd table or can this be done "in place"
     Table1D gamutTopGamma{};
 
-    for (int i = 0; i < gammaTable.size; i++)
+    for (int i = 0; i < gamutTopGamma.size; i++)
     {
-        gammaTable.table[i] = -1.f;
+        gamutTopGamma.table[i + gamutTopGamma.base_index] = -1.f;
 
-        const float hue = base_hue_for_position(i, gamutCuspTable.size);
+        const float hue = base_hue_for_position(i, gamutTopGamma.size);
         const f2 JMcusp = cusp_from_table(hue, gamutCuspTable, hue_linearity_search_range);
 
         std::array<f3, test_count> testJMh;
@@ -889,23 +888,18 @@ Table1D make_upper_hull_gamma(
             if (gammaFound)
             {
                 high = testGamma;
-                gammaTable.table[i] = 1.0f / high;
+                gamutTopGamma.table[i + gamutTopGamma.base_index] = 1.0f / high;
             }
             else
             {
                 low = testGamma;
             }
         }
-
-        // Duplicate gamma value to array, leaving empty entries at first and last position
-        gamutTopGamma.table[i+gamutTopGamma.base_index] = gammaTable.table[i];
     }
 
-    // Copy last populated entry to first empty spot
-    gamutTopGamma.table[0] = gammaTable.table[gammaTable.size-1];
-
-    // Copy first populated entry to last empty spot
-    gamutTopGamma.table[gamutTopGamma.total_size-1] = gammaTable.table[0];
+    // Copy last populated entries to empty spot 'wrapping' entries
+    gamutTopGamma.table[0] = gamutTopGamma.table[gamutTopGamma.base_index + gamutTopGamma.size - 1];
+    gamutTopGamma.table[gamutTopGamma.base_index + gamutTopGamma.size] = gamutTopGamma.table[gamutCuspTable.base_index];
 
     return gamutTopGamma;
 }

From f9b80870d7c9f2316bd7cad3b745329ee145be8c Mon Sep 17 00:00:00 2001
From: Kevin Wheatley <kevin.wheatley@framestore.com>
Date: Fri, 17 Jan 2025 10:32:22 +0000
Subject: [PATCH 35/70] Add some additional sample points for the upper hull
 gamma finder

Signed-off-by: Kevin Wheatley <kevin.wheatley@framestore.com>
---
 .../ops/fixedfunction/ACES2/Transform.cpp           | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp b/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp
index 64b554a391..b87468fa39 100644
--- a/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp
+++ b/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp
@@ -788,9 +788,11 @@ f3 gamut_compress_inv(const f3 &JMh, const ResolvedSharedCompressionParameters &
     return compressGamut<true>(JMh, Jx, sr, p, hdp);
 }
 
+static constexpr int gamma_test_count = 5;
+
 bool evaluate_gamma_fit(
     const f2 &JMcusp,
-    const std::array<f3, 3> JMh_values,
+    const std::array<f3, gamma_test_count> JMh_values,
     float topGamma_inv,
     float peakLuminance,
     float limit_J_max,
@@ -835,8 +837,7 @@ Table1D make_upper_hull_gamma(
     float lower_hull_gamma_inv,
     const JMhParams &limitJMhParams)
 {
-    constexpr int test_count = 3;
-    const std::array<float, test_count> testPositions = {0.01f, 0.5f, 0.99f};
+    const std::array<float, gamma_test_count> testPositions = {0.01f, 0.1f, 0.5f, 0.8f, 0.99f};
 
     Table1D gamutTopGamma{};
 
@@ -847,8 +848,8 @@ Table1D make_upper_hull_gamma(
         const float hue = base_hue_for_position(i, gamutTopGamma.size);
         const f2 JMcusp = cusp_from_table(hue, gamutCuspTable, hue_linearity_search_range);
 
-        std::array<f3, test_count> testJMh;
-        for (int testIndex = 0; testIndex < test_count; testIndex++)
+        std::array<f3, gamma_test_count> testJMh;
+        for (int testIndex = 0; testIndex < testJMh.size(); testIndex++)
         {
             const float testJ = JMcusp[0] + ((limit_J_max - JMcusp[0]) * testPositions[testIndex]);
             testJMh[testIndex] = {
@@ -866,7 +867,7 @@ Table1D make_upper_hull_gamma(
         float high = low + search_range;
         bool outside = false;
 
-        while (!(outside) && (high < 5.f))
+        while (!(outside) && (high < gammaMaximum))
         {
             const bool gammaFound = evaluate_gamma_fit(JMcusp, testJMh, 1.0f / high, peakLuminance, limit_J_max, mid_J, focus_dist, lower_hull_gamma_inv, limitJMhParams);
             if (!gammaFound)

From d661f07ee886b165f8ed67ecd66dcc51cc9dfe65 Mon Sep 17 00:00:00 2001
From: Kevin Wheatley <kevin.wheatley@framestore.com>
Date: Fri, 17 Jan 2025 13:07:02 +0000
Subject: [PATCH 36/70] Slight tidy up of gamma fitting code

Signed-off-by: Kevin Wheatley <kevin.wheatley@framestore.com>
---
 .../ops/fixedfunction/ACES2/Transform.cpp     | 23 ++++++++++---------
 1 file changed, 12 insertions(+), 11 deletions(-)

diff --git a/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp b/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp
index b87468fa39..835cc23e53 100644
--- a/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp
+++ b/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp
@@ -849,15 +849,12 @@ Table1D make_upper_hull_gamma(
         const f2 JMcusp = cusp_from_table(hue, gamutCuspTable, hue_linearity_search_range);
 
         std::array<f3, gamma_test_count> testJMh;
-        for (int testIndex = 0; testIndex < testJMh.size(); testIndex++)
-        {
-            const float testJ = JMcusp[0] + ((limit_J_max - JMcusp[0]) * testPositions[testIndex]);
-            testJMh[testIndex] = {
-                testJ,
-                JMcusp[1],
-                hue
-            };
-        }
+        std::generate(testJMh.begin(), testJMh.end(), [JMcusp, limit_J_max, testPositions, hue, testIndex = 0]() mutable {
+            const float testJ = lerpf(JMcusp[0], limit_J_max, testPositions[testIndex]);
+            f3 result = { testJ, JMcusp[1], hue };
+            testIndex++;
+            return result;
+        });
 
         // TODO: calculate best fitting inverse gamma directly rather than reciprocating it in the loop
         // TODO: adjacent found gamma values typically fall close to each other could initialise the search range
@@ -867,9 +864,13 @@ Table1D make_upper_hull_gamma(
         float high = low + search_range;
         bool outside = false;
 
+        auto gamma_fit_predicate = [JMcusp, &testJMh, peakLuminance, limit_J_max, mid_J, focus_dist, lower_hull_gamma_inv, limitJMhParams](float gamma)
+        {
+            return evaluate_gamma_fit(JMcusp, testJMh, 1.0f / gamma, peakLuminance, limit_J_max, mid_J, focus_dist, lower_hull_gamma_inv, limitJMhParams);
+        };
         while (!(outside) && (high < gammaMaximum))
         {
-            const bool gammaFound = evaluate_gamma_fit(JMcusp, testJMh, 1.0f / high, peakLuminance, limit_J_max, mid_J, focus_dist, lower_hull_gamma_inv, limitJMhParams);
+            const bool gammaFound = gamma_fit_predicate(high);
             if (!gammaFound)
             {
                 low = high;
@@ -885,7 +886,7 @@ Table1D make_upper_hull_gamma(
         while ( (high-low) > gammaAccuracy)
         {
             testGamma = (high + low) / 2.f;
-            const bool gammaFound = evaluate_gamma_fit(JMcusp, testJMh, 1.0f / testGamma, peakLuminance, limit_J_max, mid_J, focus_dist, lower_hull_gamma_inv, limitJMhParams);
+            const bool gammaFound = gamma_fit_predicate(testGamma);
             if (gammaFound)
             {
                 high = testGamma;

From 16f9d209b37672ca44fbac694456a52df6060dc6 Mon Sep 17 00:00:00 2001
From: Kevin Wheatley <kevin.wheatley@framestore.com>
Date: Fri, 17 Jan 2025 14:45:16 +0000
Subject: [PATCH 37/70] Experiment with alternate smin implementation

Signed-off-by: Kevin Wheatley <kevin.wheatley@framestore.com>
---
 src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp b/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp
index 835cc23e53..e1acf556ef 100644
--- a/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp
+++ b/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp
@@ -625,6 +625,13 @@ inline float smin(float a, float b, float s)
     return std::min(a, b) - h * h * h * s * (1.f / 6.f);
 }
 
+// Redefined smin function to handle scaling by adjusting k
+inline float smin_scaled(float a, float b, float s, float scale) {
+    float s_scaled = s * scale;
+    float h = std::max(s_scaled - std::abs(a - b), 0.0f) / s_scaled;
+    return std::min(a, b) - h * h * h * s_scaled * (1.f / 6.f);
+}
+
 inline float compute_compression_vector_slope(const float intersectJ, const float focusJ, const float limitJmax, const float slope_gain)
 {
     const float direction_scaler = (intersectJ < focusJ) ? intersectJ : (limitJmax - intersectJ); // TODO < vs <=
@@ -665,8 +672,8 @@ float find_gamut_boundary_intersection(const f2 &JM_cusp, float J_max, float gam
 
     // Smooth minimum between the two calculated values for the M component
     // TODO: do we need to normalise based on JM_cusp[1]
-    const float M_boundary = JM_cusp[1] * smin(M_boundary_lower / JM_cusp[1], M_boundary_upper / JM_cusp[1], smooth_cusps);
-
+    //const float M_boundary = JM_cusp[1] * smin(M_boundary_lower / JM_cusp[1], M_boundary_upper / JM_cusp[1], smooth_cusps);
+    const float M_boundary = smin_scaled(M_boundary_lower, M_boundary_upper, smooth_cusps, JM_cusp[1]);
     return M_boundary;
 }
 

From d8a253694ad86b2c2f2d1f1b813ff61d0ee0b7d1 Mon Sep 17 00:00:00 2001
From: Kevin Wheatley <kevin.wheatley@framestore.com>
Date: Mon, 20 Jan 2025 14:52:01 +0000
Subject: [PATCH 38/70] Remove unused function and tidy up comments

Signed-off-by: Kevin Wheatley <kevin.wheatley@framestore.com>
---
 .../ops/fixedfunction/ACES2/Transform.cpp     | 21 +++++--------------
 1 file changed, 5 insertions(+), 16 deletions(-)

diff --git a/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp b/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp
index e1acf556ef..0302f4bfb0 100644
--- a/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp
+++ b/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp
@@ -619,16 +619,11 @@ float solve_J_intersect(float J, float M, float focusJ, float maxJ, float slope_
     return intersectJ;
 }
 
-inline float smin(float a, float b, float s)
+// Smooth minimum about the scaled reference, based upon a cubic polynomial
+inline float smin_scaled(float a, float b, float scale_reference)
 {
-    const float h = std::max(s - std::abs(a - b), 0.f) / s;
-    return std::min(a, b) - h * h * h * s * (1.f / 6.f);
-}
-
-// Redefined smin function to handle scaling by adjusting k
-inline float smin_scaled(float a, float b, float s, float scale) {
-    float s_scaled = s * scale;
-    float h = std::max(s_scaled - std::abs(a - b), 0.0f) / s_scaled;
+    const float s_scaled = smooth_cusps * scale_reference;
+    const float h = std::max(s_scaled - std::abs(a - b), 0.0f) / s_scaled;
     return std::min(a, b) - h * h * h * s_scaled * (1.f / 6.f);
 }
 
@@ -671,9 +666,7 @@ float find_gamut_boundary_intersection(const f2 &JM_cusp, float J_max, float gam
       estimate_line_and_boundary_intersection_M(f_J_intersect_source, -slope, gamma_top_inv, f_JM_cusp_J, JM_cusp[1], f_J_intersect_cusp);
 
     // Smooth minimum between the two calculated values for the M component
-    // TODO: do we need to normalise based on JM_cusp[1]
-    //const float M_boundary = JM_cusp[1] * smin(M_boundary_lower / JM_cusp[1], M_boundary_upper / JM_cusp[1], smooth_cusps);
-    const float M_boundary = smin_scaled(M_boundary_lower, M_boundary_upper, smooth_cusps, JM_cusp[1]);
+    const float M_boundary = smin_scaled(M_boundary_lower, M_boundary_upper, JM_cusp[1]);
     return M_boundary;
 }
 
@@ -743,10 +736,6 @@ f3 compressGamut(const f3 &JMh, float Jx, const ACES2::ResolvedSharedCompression
         return {J, 0.f, h};
     }
 
-    //const float reach_slope_gain = get_focus_gain(gamut_boundary[0], hdp.JMcusp[0], sr.limit_J_max, p.focus_dist);
-    //const float reach_intersectJ = solve_J_intersect(gamut_boundary[0], gamut_boundary[1], hdp.focusJ, sr.limit_J_max, reach_slope_gain);
-    //const float reach_slope      = compute_compression_vector_slope(reach_intersectJ, hdp.focusJ, sr.limit_J_max, reach_slope_gain);
-    //const float reachBoundaryM   = estimate_line_and_boundary_intersection_M(reach_intersectJ, reach_slope, sr.model_gamma_inv, sr.limit_J_max, sr.reachMaxM, sr.limit_J_max);
     const float reachBoundaryM   = estimate_line_and_boundary_intersection_M(J_intersect_source, gamut_slope, sr.model_gamma_inv, sr.limit_J_max, sr.reachMaxM, sr.limit_J_max);
 
     const float remapped_M = remap_M<invert>(M, gamut_boundary_M, reachBoundaryM);

From c4e05c1b8b028965727ca4e6a972325d19b73218 Mon Sep 17 00:00:00 2001
From: Kevin Wheatley <kevin.wheatley@framestore.com>
Date: Mon, 20 Jan 2025 16:48:36 +0000
Subject: [PATCH 39/70] Extract hue search into separate function

Signed-off-by: Kevin Wheatley <kevin.wheatley@framestore.com>
---
 .../ops/fixedfunction/ACES2/Transform.cpp     | 21 ++++++++++++++++---
 1 file changed, 18 insertions(+), 3 deletions(-)

diff --git a/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp b/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp
index 0302f4bfb0..0ada3a6477 100644
--- a/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp
+++ b/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp
@@ -38,8 +38,10 @@ inline int clamp_to_table_bounds(int entry, int table_size) // TODO: this should
     return std::min(table_size - 1, std::max(0, entry));
 }
 
-f2 cusp_from_table(float h, const Table3D &gt, const std::array<int, 2> & hue_linearity_search_range)
+int lookup_hue_interval(float h, const Table3D &gt, const std::array<int, 2> & hue_linearity_search_range)
 {
+    // Search the given Table for the interval containing the desired hue
+    // Retruns the upper index of the interval
     int i = hue_position_in_uniform_table(h, gt.size) + gt.base_index;
     int i_lo = std::max(0, i + hue_linearity_search_range[0]);
     int i_hi = std::min(gt.base_index + gt.size, i + hue_linearity_search_range[1]);
@@ -59,6 +61,19 @@ f2 cusp_from_table(float h, const Table3D &gt, const std::array<int, 2> & hue_li
 
     i_hi = std::max(1, i_hi);
 
+    return i_hi;
+}
+
+inline float interpolation_weight(float h, float h_lo, float h_hi)
+{
+    return (h - h_lo) / (h_hi - h_lo);
+}
+
+
+f2 cusp_from_table(float h, const Table3D &gt, const std::array<int, 2> & hue_linearity_search_range)
+{
+    const int i_hi = lookup_hue_interval(h, gt, hue_linearity_search_range);
+
     const f3 lo {
         gt.table[i_hi-1][0],
         gt.table[i_hi-1][1],
@@ -71,7 +86,7 @@ f2 cusp_from_table(float h, const Table3D &gt, const std::array<int, 2> & hue_li
         gt.table[i_hi][2]
     };
 
-    const float t = (h - lo[2]) / (hi[2] - lo[2]);
+    const float t = interpolation_weight(h, lo[2], hi[2]);
     const float cuspJ = lerpf(lo[0], hi[0], t);
     const float cuspM = lerpf(lo[1], hi[1], t);
 
@@ -83,7 +98,7 @@ float reach_m_from_table(float h, const ACES2::Table1D &gt)
     const int i_lo = clamp_to_table_bounds(hue_position_in_uniform_table(h, gt.size), gt.total_size);  // TODO: this should be removed if we can constrain the hue range properly
     const int i_hi = next_position_in_table(i_lo, gt.size);
 
-    const float t = (h - i_lo) / (i_hi - i_lo);
+    const float t = interpolation_weight(h, i_lo, i_hi);
     return lerpf(gt.table[i_lo], gt.table[i_hi], t);
 }
 

From ff6ca090a07339a676654f58c98c2f6ea31e26fa Mon Sep 17 00:00:00 2001
From: Kevin Wheatley <kevin.wheatley@framestore.com>
Date: Mon, 27 Jan 2025 13:22:31 +0000
Subject: [PATCH 40/70] Extract hues into separate table, merge gamma values
 into their place (gamma values now sampled on cusp hue intervals). Removes
 extra texture from GPU path.

Signed-off-by: Kevin Wheatley <kevin.wheatley@framestore.com>
---
 .../ops/fixedfunction/ACES2/Common.h          |   4 +-
 .../ops/fixedfunction/ACES2/Transform.cpp     | 114 +++++++--------
 .../ops/fixedfunction/FixedFunctionOpGPU.cpp  | 135 +++---------------
 3 files changed, 70 insertions(+), 183 deletions(-)

diff --git a/src/OpenColorIO/ops/fixedfunction/ACES2/Common.h b/src/OpenColorIO/ops/fixedfunction/ACES2/Common.h
index ed9e1a21e3..74dfbea3cf 100644
--- a/src/OpenColorIO/ops/fixedfunction/ACES2/Common.h
+++ b/src/OpenColorIO/ops/fixedfunction/ACES2/Common.h
@@ -87,9 +87,9 @@ struct ChromaCompressParams
 
 struct HueDependantGamutParams
 {
-    float gamma_top_inv;
     float gamma_bottom_inv;
     f2 JMcusp;
+    float gamma_top_inv;
     float focusJ;
     float analytical_threshold;
 };
@@ -99,8 +99,8 @@ struct GamutCompressParams
     float focus_dist;
     float lower_hull_gamma_inv;
     std::array<int, 2> hue_linearity_search_range;
+    Table1D hue_table;;
     Table3D gamut_cusp_table;
-    Table1D upper_hull_gamma_inv_table;
 };
 
 // CAM
diff --git a/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp b/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp
index 0ada3a6477..2af5b8f9d6 100644
--- a/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp
+++ b/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp
@@ -38,17 +38,17 @@ inline int clamp_to_table_bounds(int entry, int table_size) // TODO: this should
     return std::min(table_size - 1, std::max(0, entry));
 }
 
-int lookup_hue_interval(float h, const Table3D &gt, const std::array<int, 2> & hue_linearity_search_range)
+int lookup_hue_interval(float h, const Table1D &hues, const std::array<int, 2> & hue_linearity_search_range)
 {
     // Search the given Table for the interval containing the desired hue
-    // Retruns the upper index of the interval
-    int i = hue_position_in_uniform_table(h, gt.size) + gt.base_index;
+    // Returns the upper index of the interval
+    int i = hue_position_in_uniform_table(h, hues.size) + hues.base_index;
     int i_lo = std::max(0, i + hue_linearity_search_range[0]);
-    int i_hi = std::min(gt.base_index + gt.size, i + hue_linearity_search_range[1]);
+    int i_hi = std::min(hues.base_index + hues.size, i + hue_linearity_search_range[1]);
 
     while (i_lo + 1 < i_hi)
     {
-        if (h > gt.table[i][2])
+        if (h > hues.table[i])
         {
             i_lo = i;
         }
@@ -69,49 +69,21 @@ inline float interpolation_weight(float h, float h_lo, float h_hi)
     return (h - h_lo) / (h_hi - h_lo);
 }
 
-
-f2 cusp_from_table(float h, const Table3D &gt, const std::array<int, 2> & hue_linearity_search_range)
+inline f3 cusp_from_table(int i_hi, float t, const Table3D &gt)
 {
-    const int i_hi = lookup_hue_interval(h, gt, hue_linearity_search_range);
-
-    const f3 lo {
-        gt.table[i_hi-1][0],
-        gt.table[i_hi-1][1],
-        gt.table[i_hi-1][2]
-    };
-
-    const f3 hi {
-        gt.table[i_hi][0],
-        gt.table[i_hi][1],
-        gt.table[i_hi][2]
-    };
-
-    const float t = interpolation_weight(h, lo[2], hi[2]);
-    const float cuspJ = lerpf(lo[0], hi[0], t);
-    const float cuspM = lerpf(lo[1], hi[1], t);
-
-    return { cuspJ, cuspM };
+    const float cuspJ = lerpf(gt.table[i_hi-1][0], gt.table[i_hi][0], t);
+    const float cuspM = lerpf(gt.table[i_hi-1][1], gt.table[i_hi][1], t);
+    const float upperGamma = lerpf(gt.table[i_hi-1][2], gt.table[i_hi][2], t);
+    return { cuspJ, cuspM, upperGamma };
 }
 
-float reach_m_from_table(float h, const ACES2::Table1D &gt)
+float reach_m_from_table(float h, const ACES2::Table1D &rt)
 {
-    const int i_lo = clamp_to_table_bounds(hue_position_in_uniform_table(h, gt.size), gt.total_size);  // TODO: this should be removed if we can constrain the hue range properly
-    const int i_hi = next_position_in_table(i_lo, gt.size);
+    const int i_lo = clamp_to_table_bounds(hue_position_in_uniform_table(h, rt.size), rt.total_size);  // TODO: this should be removed if we can constrain the hue range properly
+    const int i_hi = next_position_in_table(i_lo, rt.size);
 
     const float t = interpolation_weight(h, i_lo, i_hi);
-    return lerpf(gt.table[i_lo], gt.table[i_hi], t);
-}
-
-float hue_dependent_upper_hull_gamma(float h, const ACES2::Table1D &gt)
-{
-    const int i_lo = clamp_to_table_bounds(hue_position_in_uniform_table(h, gt.size) + gt.base_index, gt.total_size);  // TODO: this should be removed if we can constrain the hue range properly
-    const int i_hi = next_position_in_table(i_lo, gt.size);
-
-    const float base_hue = base_hue_for_position(i_lo - gt.base_index, gt.size);
-
-    const float t = h - base_hue;
-
-    return lerpf(gt.table[i_lo], gt.table[i_hi], t);
+    return lerpf(rt.table[i_lo], rt.table[i_hi], t);
 }
 
 //
@@ -468,7 +440,7 @@ JMhParams init_JMhParams(const Primaries &prims)
    return p;
 }
 
-Table3D make_gamut_table(const JMhParams &params, float peakLuminance)
+Table3D make_gamut_table(const JMhParams &params, float peakLuminance, Table1D& hue_table)
 {
     Table3D gamutCuspTableUnsorted{};
     int minhIndex = 0;
@@ -508,6 +480,12 @@ Table3D make_gamut_table(const JMhParams &params, float peakLuminance)
     // Wrap the hues, to maintain monotonicity. These entries will fall outside [0.0, hue_limit)
     gamutCuspTable.table[0][2] = gamutCuspTable.table[0][2] - hue_limit;
     gamutCuspTable.table[gamutCuspTable.size+1][2] = gamutCuspTable.table[gamutCuspTable.size+1][2] + hue_limit;
+
+    // Extract hue table
+    for (int i = 0; i < gamutCuspTable.total_size; i++)
+    {
+        hue_table.table[i] = gamutCuspTable.table[i][2];
+    }
     return gamutCuspTable;
 }
 
@@ -768,12 +746,17 @@ inline float compute_focusJ(float cusp_J, float mid_J, float limit_J_max)
     return lerpf(cusp_J, mid_J, std::min(1.f, cusp_mid_blend - (cusp_J / limit_J_max)));
 }
 
-HueDependantGamutParams init_HueDependantGamutParams(const f3 &JMh, const ResolvedSharedCompressionParameters &sr, const GamutCompressParams &p)
+HueDependantGamutParams init_HueDependantGamutParams(const float hue, const ResolvedSharedCompressionParameters &sr, const GamutCompressParams &p)
 {
     HueDependantGamutParams hdp;
-    hdp.gamma_top_inv = hue_dependent_upper_hull_gamma(JMh[2], p.upper_hull_gamma_inv_table);
     hdp.gamma_bottom_inv = p.lower_hull_gamma_inv;
-    hdp.JMcusp = cusp_from_table(JMh[2], p.gamut_cusp_table, p.hue_linearity_search_range);
+
+    const int i_hi = lookup_hue_interval(hue, p.hue_table, p.hue_linearity_search_range);
+    const float t = interpolation_weight(hue, p.hue_table.table[i_hi-1], p.hue_table.table[i_hi]);
+    const f3 cusp = cusp_from_table(i_hi, t, p.gamut_cusp_table);
+
+    hdp.JMcusp = { cusp[0], cusp[1] };
+    hdp.gamma_top_inv = { cusp[2] };
     hdp.focusJ = compute_focusJ(hdp.JMcusp[0], p.mid_J, sr.limit_J_max);
     hdp.analytical_threshold = lerpf(hdp.JMcusp[0], sr.limit_J_max, focus_gain_blend);
     return hdp;
@@ -781,14 +764,14 @@ HueDependantGamutParams init_HueDependantGamutParams(const f3 &JMh, const Resolv
 
 f3 gamut_compress_fwd(const f3 &JMh, const ResolvedSharedCompressionParameters &sr, const GamutCompressParams &p)
 {
-    const HueDependantGamutParams hdp = init_HueDependantGamutParams(JMh, sr, p);
+    const HueDependantGamutParams hdp = init_HueDependantGamutParams(JMh[2], sr, p);
     
     return compressGamut<false>(JMh, JMh[0], sr, p, hdp);
 }
 
 f3 gamut_compress_inv(const f3 &JMh, const ResolvedSharedCompressionParameters &sr, const GamutCompressParams &p)
 {
-    const HueDependantGamutParams hdp = init_HueDependantGamutParams(JMh, sr, p);
+    const HueDependantGamutParams hdp = init_HueDependantGamutParams(JMh[2], sr, p);
 
     float Jx = JMh[0];
     if (Jx > hdp.analytical_threshold)
@@ -838,8 +821,9 @@ bool evaluate_gamma_fit(
     return true;
 }
 
-Table1D make_upper_hull_gamma(
-    const Table3D &gamutCuspTable,
+void make_upper_hull_gamma(
+    const Table1D &hue_table,
+    Table3D &gamutCuspTable,
     const std::array<int, 2> & hue_linearity_search_range,
     float peakLuminance,
     float limit_J_max,
@@ -850,14 +834,16 @@ Table1D make_upper_hull_gamma(
 {
     const std::array<float, gamma_test_count> testPositions = {0.01f, 0.1f, 0.5f, 0.8f, 0.99f};
 
-    Table1D gamutTopGamma{};
-
-    for (int i = 0; i < gamutTopGamma.size; i++)
+    for (int i = gamutCuspTable.base_index; i < gamutCuspTable.base_index + gamutCuspTable.size; i++)
     {
-        gamutTopGamma.table[i + gamutTopGamma.base_index] = -1.f;
+        gamutCuspTable.table[i][2] = -1.f;
+
+        const float hue = hue_table.table[i];
+        const int i_hi = lookup_hue_interval(hue, hue_table, hue_linearity_search_range);
+        const float t = interpolation_weight(hue, hue_table.table[i_hi-1], hue_table.table[i_hi]);
 
-        const float hue = base_hue_for_position(i, gamutTopGamma.size);
-        const f2 JMcusp = cusp_from_table(hue, gamutCuspTable, hue_linearity_search_range);
+        const f3 cusp = cusp_from_table(i_hi, t, gamutCuspTable);
+        const f2 JMcusp = { cusp[0], cusp[1] };
 
         std::array<f3, gamma_test_count> testJMh;
         std::generate(testJMh.begin(), testJMh.end(), [JMcusp, limit_J_max, testPositions, hue, testIndex = 0]() mutable {
@@ -901,7 +887,7 @@ Table1D make_upper_hull_gamma(
             if (gammaFound)
             {
                 high = testGamma;
-                gamutTopGamma.table[i + gamutTopGamma.base_index] = 1.0f / high;
+                gamutCuspTable.table[i][2] = 1.0f / high;
             }
             else
             {
@@ -911,10 +897,8 @@ Table1D make_upper_hull_gamma(
     }
 
     // Copy last populated entries to empty spot 'wrapping' entries
-    gamutTopGamma.table[0] = gamutTopGamma.table[gamutTopGamma.base_index + gamutTopGamma.size - 1];
-    gamutTopGamma.table[gamutTopGamma.base_index + gamutTopGamma.size] = gamutTopGamma.table[gamutCuspTable.base_index];
-
-    return gamutTopGamma;
+    gamutCuspTable.table[0][2] = gamutCuspTable.table[gamutCuspTable.base_index + gamutCuspTable.size - 1][2];
+    gamutCuspTable.table[gamutCuspTable.base_index + gamutCuspTable.size][2] = gamutCuspTable.table[gamutCuspTable.base_index][2];
 }
 
 // Tonescale pre-calculations
@@ -1045,11 +1029,11 @@ GamutCompressParams init_GamutCompressParams(float peakLuminance, const JMhParam
     params.mid_J = mid_J;
     params.focus_dist = focus_dist;
     params.lower_hull_gamma_inv = lower_hull_gamma_inv;
-    params.gamut_cusp_table = make_gamut_table(limitJMhParams, peakLuminance);
+    params.gamut_cusp_table = make_gamut_table(limitJMhParams, peakLuminance, params.hue_table);
     params.hue_linearity_search_range = determine_hue_linearity_search_range(params.gamut_cusp_table);
-    params.upper_hull_gamma_inv_table = make_upper_hull_gamma(params.gamut_cusp_table, params.hue_linearity_search_range,
-                                                              peakLuminance, shParams.limit_J_max, mid_J, focus_dist,
-                                                              lower_hull_gamma_inv, limitJMhParams); //TODO: mess of parameters
+    make_upper_hull_gamma(params.hue_table, params.gamut_cusp_table, params.hue_linearity_search_range,
+                          peakLuminance, shParams.limit_J_max, mid_J, focus_dist,
+                          lower_hull_gamma_inv, limitJMhParams); //TODO: mess of parameters
     return params;
 }
 
diff --git a/src/OpenColorIO/ops/fixedfunction/FixedFunctionOpGPU.cpp b/src/OpenColorIO/ops/fixedfunction/FixedFunctionOpGPU.cpp
index c50ce327c1..ca03d55621 100644
--- a/src/OpenColorIO/ops/fixedfunction/FixedFunctionOpGPU.cpp
+++ b/src/OpenColorIO/ops/fixedfunction/FixedFunctionOpGPU.cpp
@@ -722,19 +722,14 @@ std::string _Add_Cusp_table(
     GpuShaderText ss(shaderCreator->getLanguage());
 
     const std::string hues_array_name = name + "_hues_array";
-    std::vector<float> hues_array(g.gamut_cusp_table.total_size);
-    for (int i = 0; i < g.gamut_cusp_table.total_size; ++i)
-    {
-        hues_array[i] = g.gamut_cusp_table.table[i][2];
-    }
-    ss.declareFloatArrayConst(hues_array_name, (int) hues_array.size(), hues_array.data());
+    ss.declareFloatArrayConst(hues_array_name, (int) g.hue_table.total_size, g.hue_table.table);
 
-    ss.newLine() << ss.float2Keyword() << " " << name << "_sample(float h)";
+    ss.newLine() << ss.float3Keyword() << " " << name << "_sample(float h)";
     ss.newLine() << "{";
     ss.indent();
 
-    ss.newLine() << ss.floatDecl("i_lo") << " = 0;";
-    ss.newLine() << ss.floatDecl("i_hi") << " = " << g.gamut_cusp_table.base_index + g.gamut_cusp_table.size << ";";
+    ss.newLine() << ss.intDecl("i_lo") << " = 0;";
+    ss.newLine() << ss.intDecl("i_hi") << " = " << g.gamut_cusp_table.base_index + g.gamut_cusp_table.size << ";";
 
     ss.newLine() << ss.floatDecl("hwrap") << " = h;";
     ss.newLine() << "hwrap = hwrap - floor(hwrap / 360.0) * 360.0;";
@@ -759,7 +754,7 @@ std::string _Add_Cusp_table(
     ss.newLine() << "i_hi = i;";
     ss.dedent();
     ss.newLine() << "}";
-    ss.newLine() << "i = " << ss.intKeyword() << "((i_lo + i_hi) / 2.0);";
+    ss.newLine() << "i = (i_lo + i_hi) / 2;";
 
     ss.dedent();
     ss.newLine() << "}";
@@ -775,98 +770,8 @@ std::string _Add_Cusp_table(
         ss.newLine() << ss.float3Decl("hi") << " = " << ss.sampleTex2D(name, ss.float2Const(std::string("(i_hi + 0.5) / ") + std::to_string(g.gamut_cusp_table.total_size), "0.5")) << ".rgb;";
     }
 
-    ss.newLine() << ss.floatDecl("t") << " = (h - lo.b) / (hi.b - lo.b);";
-    ss.newLine() << ss.floatDecl("cuspJ") << " = " << ss.lerp("lo.r", "hi.r", "t") << ";";
-    ss.newLine() << ss.floatDecl("cuspM") << " = " << ss.lerp("lo.g", "hi.g", "t") << ";";
-
-    ss.newLine() << "return " << ss.float2Const("cuspJ", "cuspM") << ";";
-
-    ss.dedent();
-    ss.newLine() << "}";
-
-    shaderCreator->addToHelperShaderCode(ss.string().c_str());
-
-    return name;
-}
-
-std::string _Add_Gamma_table(
-    GpuShaderCreatorRcPtr & shaderCreator,
-    unsigned resourceIndex,
-    const ACES2::GamutCompressParams & g)
-{
-    // Reserve name
-    std::ostringstream resName;
-    resName << shaderCreator->getResourcePrefix()
-            << std::string("_")
-            << std::string("upper_hull_gamma_table_")
-            << resourceIndex;
-
-    // Note: Remove potentially problematic double underscores from GLSL resource names.
-    std::string name(resName.str());
-    StringUtils::ReplaceInPlace(name, "__", "_");
-
-    // Register texture
-    GpuShaderDesc::TextureDimensions dimensions = GpuShaderDesc::TEXTURE_1D;
-    if (shaderCreator->getLanguage() == GPU_LANGUAGE_GLSL_ES_1_0
-        || shaderCreator->getLanguage() == GPU_LANGUAGE_GLSL_ES_3_0
-        || !shaderCreator->getAllowTexture1D())
-    {
-        dimensions = GpuShaderDesc::TEXTURE_2D;
-    }
-
-    shaderCreator->addTexture(
-        name.c_str(),
-        GpuShaderText::getSamplerName(name).c_str(),
-        g.gamut_cusp_table.total_size,
-        1,
-        GpuShaderCreator::TEXTURE_RED_CHANNEL,
-        dimensions,
-        INTERP_NEAREST,
-        &(g.upper_hull_gamma_inv_table.table[0]));
-
-
-    if (dimensions == GpuShaderDesc::TEXTURE_1D)
-    {
-        GpuShaderText ss(shaderCreator->getLanguage());
-        ss.declareTex1D(name);
-        shaderCreator->addToDeclareShaderCode(ss.string().c_str());
-    }
-    else
-    {
-        GpuShaderText ss(shaderCreator->getLanguage());
-        ss.declareTex2D(name);
-        shaderCreator->addToDeclareShaderCode(ss.string().c_str());
-    }
-
-    // Sampler function
-    GpuShaderText ss(shaderCreator->getLanguage());
-
-    ss.newLine() << ss.floatKeyword() << " " << name << "_sample(float h)";
-    ss.newLine() << "{";
-    ss.indent();
-
-    ss.newLine() << ss.floatDecl("hwrap") << " = h;";
-    ss.newLine() << "hwrap = hwrap - floor(hwrap / 360.0) * 360.0;";
-    ss.newLine() << "hwrap = (hwrap < 0.0) ? hwrap + 360.0 : hwrap;";
-
-    ss.newLine() << ss.floatDecl("i_lo") << " = floor(hwrap) + " << g.upper_hull_gamma_inv_table.base_index << ";";
-    ss.newLine() << ss.floatDecl("i_hi") << " = (i_lo + 1);";
-    ss.newLine() << "i_hi = i_hi - floor(i_hi / 360.0) * 360.0;";
-
-    ss.newLine() << ss.floatDecl("base_hue") << " = i_lo - " << g.upper_hull_gamma_inv_table.base_index << ";";
-
-    if (dimensions == GpuShaderDesc::TEXTURE_1D)
-    {
-        ss.newLine() << ss.floatDecl("lo") << " = " << ss.sampleTex1D(name, std::string("(i_lo + 0.5) / ") + std::to_string(g.upper_hull_gamma_inv_table.total_size)) << ".r;";
-        ss.newLine() << ss.floatDecl("hi") << " = " << ss.sampleTex1D(name, std::string("(i_hi + 0.5) / ") + std::to_string(g.upper_hull_gamma_inv_table.total_size)) << ".r;";
-    }
-    else
-    {
-        ss.newLine() << ss.floatDecl("lo") << " = " << ss.sampleTex2D(name, ss.float2Const(std::string("(i_lo + 0.5) / ") + std::to_string(g.upper_hull_gamma_inv_table.total_size), "0.5")) << ".r;";
-        ss.newLine() << ss.floatDecl("hi") << " = " << ss.sampleTex2D(name, ss.float2Const(std::string("(i_hi + 0.5) / ") + std::to_string(g.upper_hull_gamma_inv_table.total_size), "0.5")) << ".r;";
-    }
-
-    ss.newLine() << ss.floatDecl("t") << " = hwrap - base_hue;";
+    ss.newLine() << ss.floatDecl("t") << " = (h - " << hues_array_name << "[i_hi - 1]) / "
+                                                "(" << hues_array_name << "[i_hi]" << " - " << hues_array_name << "[i_hi - 1]);";
     ss.newLine() << "return " << ss.lerp("lo", "hi", "t") << ";";
 
     ss.dedent();
@@ -1208,7 +1113,6 @@ std::string _Add_Compress_Gamut_func(
     const ACES2::GamutCompressParams & g,
     const std::string & cuspName,
     const std::string & getFocusGainName,
-    const std::string & gammaName,
     const std::string & findGamutBoundaryIntersectionName,
     const std::string & getReachBoundaryName,
     const std::string & compressionName)
@@ -1226,7 +1130,7 @@ std::string _Add_Compress_Gamut_func(
 
     GpuShaderText ss(shaderCreator->getLanguage());
 
-    ss.newLine() << ss.float3Keyword() << " " << name << "(" << ss.float3Keyword() << " JMh, float Jx)";
+    ss.newLine() << ss.float3Keyword() << " " << name << "(" << ss.float3Keyword() << " JMh, float Jx, " << ss.float3Keyword() << " JMGcusp)";
     ss.newLine() << "{";
     ss.indent();
 
@@ -1245,12 +1149,12 @@ std::string _Add_Compress_Gamut_func(
     ss.indent();
 
     ss.newLine() << ss.float2Decl("project_from") << " = " << ss.float2Const("J", "M") << ";";
-    ss.newLine() << ss.float2Decl("JMcusp") << " = " << cuspName << "_sample(h);";
+    ss.newLine() << ss.float2Decl("JMcusp") << " = JMGcusp.rg;";
 
     ss.newLine() << ss.floatDecl("focusJ") << " = " << ss.lerp("JMcusp.r", std::to_string(g.mid_J), std::string("min(1.0, ") + std::to_string(ACES2::cusp_mid_blend) + " - (JMcusp.r / " + std::to_string(s.limit_J_max)) << "));";
     ss.newLine() << ss.floatDecl("slope_gain") << " = " << s.limit_J_max << " * " << g.focus_dist << " * " << getFocusGainName << "(Jx, JMcusp.r);";
 
-    ss.newLine() << ss.floatDecl("gamma_top_inv") << " = " << gammaName << "_sample(h);";
+    ss.newLine() << ss.floatDecl("gamma_top_inv") << " = JMGcusp.b;";
     ss.newLine() << ss.floatDecl("gamma_bottom_inv") << " = " << g.lower_hull_gamma_inv << ";";
 
     ss.newLine() << ss.float3Decl("boundaryReturn") << " = " << findGamutBoundaryIntersectionName << "(" << ss.float3Const("J", "M", "h") << ", JMcusp, focusJ, slope_gain, gamma_top_inv, gamma_bottom_inv);";
@@ -1299,17 +1203,17 @@ void _Add_Gamut_Compress_Fwd_Shader(
     const std::string & reachName)
 {
     std::string cuspName = _Add_Cusp_table(shaderCreator, resourceIndex, g);
-    std::string gammaName = _Add_Gamma_table(shaderCreator, resourceIndex, g);
     std::string getFocusGainName = _Add_Focus_Gain_func(shaderCreator, resourceIndex, s, g);
     std::string solveJIntersectName = _Add_Solve_J_Intersect_func(shaderCreator, resourceIndex, s, g);
     std::string findGamutBoundaryIntersectionName = _Add_Find_Gamut_Boundary_Intersection_func(shaderCreator, resourceIndex, s, g, solveJIntersectName);
     std::string getReachBoundaryName = _Add_Reach_Boundary_func(shaderCreator, resourceIndex, s, g, reachName, getFocusGainName, solveJIntersectName);
     std::string compressionName = _Add_Compression_func(shaderCreator, resourceIndex, false);
-    std::string gamutCompressName = _Add_Compress_Gamut_func(shaderCreator, resourceIndex, s, g, cuspName, getFocusGainName, gammaName, findGamutBoundaryIntersectionName, getReachBoundaryName, compressionName);
+    std::string gamutCompressName = _Add_Compress_Gamut_func(shaderCreator, resourceIndex, s, g, cuspName, getFocusGainName, findGamutBoundaryIntersectionName, getReachBoundaryName, compressionName);
 
     const std::string pxl(shaderCreator->getPixelName());
 
-    ss.newLine() << pxl << ".rgb = " << gamutCompressName << "(" << pxl << ".rgb, " << pxl << ".r);";
+    ss.newLine() << ss.float3Decl("JMGcusp") << " = " << cuspName << "_sample(" << pxl << ".b);";
+    ss.newLine() << pxl << ".rgb = " << gamutCompressName << "(" << pxl << ".rgb, " << pxl << ".r, JMGcusp);";
 }
 
 void _Add_Gamut_Compress_Inv_Shader(
@@ -1321,33 +1225,32 @@ void _Add_Gamut_Compress_Inv_Shader(
     const std::string & reachName)
 {
     std::string cuspName = _Add_Cusp_table(shaderCreator, resourceIndex, g);
-    std::string gammaName = _Add_Gamma_table(shaderCreator, resourceIndex, g);
     std::string getFocusGainName = _Add_Focus_Gain_func(shaderCreator, resourceIndex, s, g);
     std::string solveJIntersectName = _Add_Solve_J_Intersect_func(shaderCreator, resourceIndex,s, g);
     std::string findGamutBoundaryIntersectionName = _Add_Find_Gamut_Boundary_Intersection_func(shaderCreator, resourceIndex, s, g, solveJIntersectName);
     std::string getReachBoundaryName = _Add_Reach_Boundary_func(shaderCreator, resourceIndex, s, g, reachName, getFocusGainName, solveJIntersectName);
     std::string compressionName = _Add_Compression_func(shaderCreator, resourceIndex, true);
-    std::string gamutCompressName = _Add_Compress_Gamut_func(shaderCreator, resourceIndex, s, g, cuspName, getFocusGainName, gammaName, findGamutBoundaryIntersectionName, getReachBoundaryName, compressionName);
+    std::string gamutCompressName = _Add_Compress_Gamut_func(shaderCreator, resourceIndex, s, g, cuspName, getFocusGainName, findGamutBoundaryIntersectionName, getReachBoundaryName, compressionName);
 
     const std::string pxl(shaderCreator->getPixelName());
 
-    ss.newLine() << ss.float2Decl("JMcusp") << " = " << cuspName << "_sample(" << pxl << ".b);";
+    ss.newLine() << ss.float3Decl("JMGcusp") << " = " << cuspName << "_sample(" << pxl << ".b);";
     ss.newLine() << ss.floatDecl("Jx") << " = " << pxl << ".r;";
     ss.newLine() << ss.float3Decl("unCompressedJMh") << ";";
 
     // Analytic inverse below threshold
-    ss.newLine() << "if (Jx <= " << ss.lerp("JMcusp.r", std::to_string(s.limit_J_max), std::to_string(ACES2::focus_gain_blend)) << ")";
+    ss.newLine() << "if (Jx <= " << ss.lerp("JMGcusp.r", std::to_string(s.limit_J_max), std::to_string(ACES2::focus_gain_blend)) << ")";
     ss.newLine() << "{";
     ss.indent();
-    ss.newLine() << "unCompressedJMh = " << gamutCompressName << "(" << pxl << ".rgb, Jx);";
+    ss.newLine() << "unCompressedJMh = " << gamutCompressName << "(" << pxl << ".rgb, Jx, JMGcusp);";
     ss.dedent();
     ss.newLine() << "}";
     // Approximation above threshold
     ss.newLine() << "else";
     ss.newLine() << "{";
     ss.indent();
-    ss.newLine() << "Jx = " << gamutCompressName << "(" << pxl << ".rgb, Jx).r;";
-    ss.newLine() << "unCompressedJMh = " << gamutCompressName << "(" << pxl << ".rgb, Jx);";
+    ss.newLine() << "Jx = " << gamutCompressName << "(" << pxl << ".rgb, Jx, JMGcusp).r;";
+    ss.newLine() << "unCompressedJMh = " << gamutCompressName << "(" << pxl << ".rgb, Jx, JMGcusp);";
     ss.dedent();
     ss.newLine() << "}";
 

From 11856b93190709d6b6ea4b4de81144af04cf75a8 Mon Sep 17 00:00:00 2001
From: Kevin Wheatley <kevin.wheatley@framestore.com>
Date: Tue, 21 Jan 2025 11:27:36 +0000
Subject: [PATCH 41/70] Simplify upper hull gamma hue lookup to avoid unneeded
 lerping as we are sampling the table entries directly

Signed-off-by: Kevin Wheatley <kevin.wheatley@framestore.com>
---
 .../ops/fixedfunction/ACES2/Transform.cpp     | 19 ++++++++-----------
 1 file changed, 8 insertions(+), 11 deletions(-)

diff --git a/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp b/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp
index 2af5b8f9d6..2b470d3ae4 100644
--- a/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp
+++ b/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp
@@ -69,6 +69,11 @@ inline float interpolation_weight(float h, float h_lo, float h_hi)
     return (h - h_lo) / (h_hi - h_lo);
 }
 
+inline float interpolation_weight(float h, int h_lo, int h_hi)
+{
+    return (h - h_lo) / (h_hi - h_lo);
+}
+
 inline f3 cusp_from_table(int i_hi, float t, const Table3D &gt)
 {
     const float cuspJ = lerpf(gt.table[i_hi-1][0], gt.table[i_hi][0], t);
@@ -824,7 +829,6 @@ bool evaluate_gamma_fit(
 void make_upper_hull_gamma(
     const Table1D &hue_table,
     Table3D &gamutCuspTable,
-    const std::array<int, 2> & hue_linearity_search_range,
     float peakLuminance,
     float limit_J_max,
     float mid_J,
@@ -836,14 +840,8 @@ void make_upper_hull_gamma(
 
     for (int i = gamutCuspTable.base_index; i < gamutCuspTable.base_index + gamutCuspTable.size; i++)
     {
-        gamutCuspTable.table[i][2] = -1.f;
-
         const float hue = hue_table.table[i];
-        const int i_hi = lookup_hue_interval(hue, hue_table, hue_linearity_search_range);
-        const float t = interpolation_weight(hue, hue_table.table[i_hi-1], hue_table.table[i_hi]);
-
-        const f3 cusp = cusp_from_table(i_hi, t, gamutCuspTable);
-        const f2 JMcusp = { cusp[0], cusp[1] };
+        const f2 JMcusp = { gamutCuspTable.table[i][0], gamutCuspTable.table[i][1] };
 
         std::array<f3, gamma_test_count> testJMh;
         std::generate(testJMh.begin(), testJMh.end(), [JMcusp, limit_J_max, testPositions, hue, testIndex = 0]() mutable {
@@ -918,7 +916,7 @@ ToneScaleParams init_ToneScaleParams(float peakLuminance)
 
     // Calculate output constants
     // TODO: factor these calculations into well named functions
-    // TODO: ensure correct use of n_r vs n vs reference_luminance
+    // TODO: ensure correct use of n_r vs n vs reference_luminance vs 100.f etc
     const float r_hit = r_hit_min + (r_hit_max - r_hit_min) * (log(n/n_r)/log(10000.f/100.f));
     const float m_0 = (n / n_r);
     const float m_1 = 0.5f * (m_0 + sqrt(m_0 * (m_0 + 4.f * t_1)));
@@ -1031,8 +1029,7 @@ GamutCompressParams init_GamutCompressParams(float peakLuminance, const JMhParam
     params.lower_hull_gamma_inv = lower_hull_gamma_inv;
     params.gamut_cusp_table = make_gamut_table(limitJMhParams, peakLuminance, params.hue_table);
     params.hue_linearity_search_range = determine_hue_linearity_search_range(params.gamut_cusp_table);
-    make_upper_hull_gamma(params.hue_table, params.gamut_cusp_table, params.hue_linearity_search_range,
-                          peakLuminance, shParams.limit_J_max, mid_J, focus_dist,
+    make_upper_hull_gamma(params.hue_table, params.gamut_cusp_table, peakLuminance, shParams.limit_J_max, mid_J, focus_dist,
                           lower_hull_gamma_inv, limitJMhParams); //TODO: mess of parameters
     return params;
 }

From bec1240d31a2a6aeaf3230a94714b98388288af2 Mon Sep 17 00:00:00 2001
From: Kevin Wheatley <kevin.wheatley@framestore.com>
Date: Tue, 21 Jan 2025 14:07:36 +0000
Subject: [PATCH 42/70] Split out tonescale function, minor tweaks to Aab->JMh

Signed-off-by: Kevin Wheatley <kevin.wheatley@framestore.com>
---
 .../ops/fixedfunction/ACES2/Common.h          | 18 ++++++----
 .../ops/fixedfunction/ACES2/Transform.cpp     | 35 ++++++++++---------
 .../ops/fixedfunction/ACES2/Transform.h       |  7 ++--
 .../ops/fixedfunction/FixedFunctionOpCPU.cpp  | 12 ++++---
 4 files changed, 44 insertions(+), 28 deletions(-)

diff --git a/src/OpenColorIO/ops/fixedfunction/ACES2/Common.h b/src/OpenColorIO/ops/fixedfunction/ACES2/Common.h
index 74dfbea3cf..7cdd2bbce9 100644
--- a/src/OpenColorIO/ops/fixedfunction/ACES2/Common.h
+++ b/src/OpenColorIO/ops/fixedfunction/ACES2/Common.h
@@ -18,11 +18,11 @@ namespace ACES2
 constexpr int TABLE_SIZE = 360;
 constexpr int TABLE_ADDITION_ENTRIES = 2;
 constexpr int TABLE_TOTAL_SIZE = TABLE_SIZE + TABLE_ADDITION_ENTRIES;
-constexpr int GAMUT_TABLE_BASE_INDEX = 1;
+constexpr int TABLE_BASE_INDEX = 1;
 
 struct Table3D
 {
-    static constexpr int base_index = GAMUT_TABLE_BASE_INDEX;
+    static constexpr int base_index = TABLE_BASE_INDEX;
     static constexpr int size = TABLE_SIZE;
     static constexpr int total_size = TABLE_TOTAL_SIZE;
     float table[TABLE_TOTAL_SIZE][3];
@@ -30,7 +30,7 @@ struct Table3D
 
 struct Table1D
 {
-    static constexpr int base_index = GAMUT_TABLE_BASE_INDEX;
+    static constexpr int base_index = TABLE_BASE_INDEX;
     static constexpr int size = TABLE_SIZE;
     static constexpr int total_size = TABLE_TOTAL_SIZE;
     float table[TABLE_TOTAL_SIZE];
@@ -117,23 +117,30 @@ constexpr float PI = 3.14159265358979f;
 
 constexpr float hue_limit = 360.0f;
 //constexpr float hue_limit = 2.0f * PI;
-inline float wrap_to_hue_limit(float hue)
+inline float _wrap_to_hue_limit(float y)
 {
-    float y = std::fmod(hue, hue_limit);
     if ( y < 0.f)
     {
         y = y + hue_limit;
     }
     return y;
 }
+
+inline float wrap_to_hue_limit(float hue)
+{
+    float y = std::fmod(hue, hue_limit);
+    return _wrap_to_hue_limit(y);
+}
 inline constexpr float to_degrees(const float v) { return v; }
 inline float from_degrees(const float v) { return wrap_to_hue_limit(v); }
 inline constexpr float to_radians(const float v) { return PI * v / 180.0f; };
+inline float _from_radians(const float v) { return _wrap_to_hue_limit(180.0f * v / PI); }; // v needs to be wrapped already
 inline float from_radians(const float v) { return wrap_to_hue_limit(180.0f * v / PI); };
 /*
 inline constexpr float to_degrees(const float v) { return 180.0f * v / PI; }
 inline float from_degrees(const float v) { return wrap_to_hue_limit(PI * v / 180.0f); }
 inline constexpr float to_radians(const float v) { return v; }
+inline float _from_radians(const float v) { return _wrap_to_hue_limit(v); };
 inline float from_radians(const float v) { return wrap_to_hue_limit(v); };
 */
 
@@ -170,7 +177,6 @@ constexpr float gammaMaximum = 5.0f;
 constexpr float gammaSearchStep = 0.4f;
 constexpr float gammaAccuracy = 1e-5f;
 
-
 } // namespace ACES2
 
 } // OCIO namespace
diff --git a/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp b/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp
index 2b470d3ae4..30271eff6b 100644
--- a/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp
+++ b/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp
@@ -174,12 +174,14 @@ inline f3 RGB_to_Aab(const f3 &RGB, const JMhParams &p)
 
 inline f3 Aab_to_JMh(const f3 &Aab, const JMhParams &p)
 {
+    if (Aab[0] <= 0.f)
+    {
+        return {0.f, 0.f, 0.f};
+    }
     const float J = Achromatic_n_to_J(Aab[0], p.cz);
-
-    const float M = J == 0.f ? 0.f : sqrt(Aab[1] * Aab[1] + Aab[2] * Aab[2]);
-
+    const float M = sqrt(Aab[1] * Aab[1] + Aab[2] * Aab[2]);
     const float h_rad = std::atan2(Aab[2], Aab[1]);
-    float h = from_radians(h_rad);
+    float h = _from_radians(h_rad); // Call to unwrapped hue version due to atan2 limits
 
     return {J, M, h};
 }
@@ -298,7 +300,7 @@ inline float aces_tonescale(const float Y_in, const JMhParams &p, const ToneScal
 }
 
 template <bool inverse>
-float tonescale(const float J, const JMhParams &p, const ToneScaleParams &pt) // TODO: consider computing tonescale from and to A rather than J to avoid extra pow() calls
+inline float tonescale(const float J, const JMhParams &p, const ToneScaleParams &pt) // TODO: consider computing tonescale from and to A rather than J to avoid extra pow() calls
 {
     // Tonescale applied in Y (convert to and from J)
     const float J_abs = std::abs(J);
@@ -308,16 +310,22 @@ float tonescale(const float J, const JMhParams &p, const ToneScaleParams &pt) //
     return std::copysign(J_out, J);
 }
 
-f3 tonescale_chroma_compress_fwd(const f3 &JMh, const JMhParams &p, const ToneScaleParams &pt, const ResolvedSharedCompressionParameters &pr, const ChromaCompressParams &pc)
+float tonescale_fwd(const float J, const JMhParams &p, const ToneScaleParams &pt)
+{
+    return tonescale<false>(J, p, pt);
+}
+
+float tonescale_inv(const float J, const JMhParams &p, const ToneScaleParams &pt)
+{
+    return tonescale<true>(J, p, pt);
+}
+
+f3 chroma_compress_fwd(const f3 &JMh, const float J_ts, const JMhParams &p, const ResolvedSharedCompressionParameters &pr, const ChromaCompressParams &pc)
 {
     const float J = JMh[0];
     const float M = JMh[1];
     const float h = JMh[2];
 
-    constexpr bool inverse = false;
-    const float J_ts = tonescale<inverse>(J, p, pt);
-
-    // ChromaCompress
     float M_cp = M;
 
     if (M != 0.0)
@@ -337,16 +345,11 @@ f3 tonescale_chroma_compress_fwd(const f3 &JMh, const JMhParams &p, const ToneSc
     return {J_ts, M_cp, h};
 }
 
-f3 tonescale_chroma_compress_inv(const f3 &JMh, const JMhParams &p, const ToneScaleParams &pt, const ResolvedSharedCompressionParameters &pr, const ChromaCompressParams &pc)
+f3 chroma_compress_inv(const f3 &JMh, const float J, const JMhParams &p, const ResolvedSharedCompressionParameters &pr, const ChromaCompressParams &pc)
 {
     const float J_ts = JMh[0];
     const float M_cp = JMh[1];
     const float h    = JMh[2];
-
-    constexpr bool inverse = true;
-    const float J = tonescale<inverse>(J_ts, p, pt);
-
-    // Inverse ChromaCompress
     float M = M_cp;
 
     if (M_cp != 0.0)
diff --git a/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.h b/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.h
index 9c2314c5b3..c6d33798aa 100644
--- a/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.h
+++ b/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.h
@@ -23,8 +23,11 @@ GamutCompressParams init_GamutCompressParams(float peakLuminance, const JMhParam
 f3 RGB_to_JMh(const f3 &RGB, const JMhParams &p);
 f3 JMh_to_RGB(const f3 &JMh, const JMhParams &p);
 
-f3 tonescale_chroma_compress_fwd(const f3 &JMh, const JMhParams &p, const ToneScaleParams &pt, const ResolvedSharedCompressionParameters &ps, const ChromaCompressParams &pc);
-f3 tonescale_chroma_compress_inv(const f3 &JMh, const JMhParams &p, const ToneScaleParams &pt, const ResolvedSharedCompressionParameters &ps, const ChromaCompressParams &pc);
+float tonescale_fwd(const float J, const JMhParams &p, const ToneScaleParams &pt);
+float tonescale_inv(const float J, const JMhParams &p, const ToneScaleParams &pt);
+
+f3 chroma_compress_fwd(const f3 &JMh, const float J_ts, const JMhParams &p, const ResolvedSharedCompressionParameters &ps, const ChromaCompressParams &pc);
+f3 chroma_compress_inv(const f3 &JMh, const float J, const JMhParams &p, const ResolvedSharedCompressionParameters &ps, const ChromaCompressParams &pc);
 
 f3 gamut_compress_fwd(const f3 &JMh, const ResolvedSharedCompressionParameters &ps, const GamutCompressParams &p);
 f3 gamut_compress_inv(const f3 &JMh, const ResolvedSharedCompressionParameters &ps, const GamutCompressParams &p);
diff --git a/src/OpenColorIO/ops/fixedfunction/FixedFunctionOpCPU.cpp b/src/OpenColorIO/ops/fixedfunction/FixedFunctionOpCPU.cpp
index 626f379d0b..3dc389b2ee 100644
--- a/src/OpenColorIO/ops/fixedfunction/FixedFunctionOpCPU.cpp
+++ b/src/OpenColorIO/ops/fixedfunction/FixedFunctionOpCPU.cpp
@@ -1053,7 +1053,8 @@ void Renderer_ACES_OutputTransform20::fwd(const void * inImg, void * outImg, lon
         const ACES2::f3 RGBIn {in[0], in[1], in[2]};
         const ACES2::f3 JMh           = ACES2::RGB_to_JMh(RGBIn, m_pIn);
         const ACES2::ResolvedSharedCompressionParameters rp = resolve_CompressionParams(JMh[2], m_s);
-        const ACES2::f3 tonemappedJMh = ACES2::tonescale_chroma_compress_fwd(JMh, m_pIn, m_t, rp, m_c);
+        const float J_ts = ACES2::tonescale_fwd(JMh[0], m_pIn, m_t);
+        const ACES2::f3 tonemappedJMh = ACES2::chroma_compress_fwd(JMh, J_ts, m_pIn, rp, m_c);
         const ACES2::f3 compressedJMh = ACES2::gamut_compress_fwd(tonemappedJMh, rp, m_g);
         const ACES2::f3 RGBOut        = ACES2::JMh_to_RGB(compressedJMh, m_pOut);
 
@@ -1078,7 +1079,8 @@ void Renderer_ACES_OutputTransform20::inv(const void * inImg, void * outImg, lon
         const ACES2::f3 compressedJMh = ACES2::RGB_to_JMh(RGBout, m_pOut);
         const ACES2::ResolvedSharedCompressionParameters rp = resolve_CompressionParams(compressedJMh[2], m_s);
         const ACES2::f3 tonemappedJMh = ACES2::gamut_compress_inv(compressedJMh, rp, m_g);
-        const ACES2::f3 JMh           = ACES2::tonescale_chroma_compress_inv(tonemappedJMh, m_pIn, m_t, rp, m_c);
+        const float J = ACES2::tonescale_inv(tonemappedJMh[0], m_pIn, m_t);
+        const ACES2::f3 JMh           = ACES2::chroma_compress_inv(tonemappedJMh, J, m_pIn, rp, m_c);
         const ACES2::f3 RGBin         = ACES2::JMh_to_RGB(JMh, m_pIn);
 
         out[0] = RGBin[0];
@@ -1200,7 +1202,8 @@ void Renderer_ACES_TONESCALE_COMPRESS_20::fwd(const void * inImg, void * outImg,
     {
         const float normalised_hue = ACES2::from_degrees(in[2]);
         const ACES2::ResolvedSharedCompressionParameters rp = resolve_CompressionParams(normalised_hue, m_s);
-        const ACES2::f3 JMh = ACES2::tonescale_chroma_compress_fwd({in[0], in[1], normalised_hue}, m_p, m_t, rp, m_c);
+        const float J_ts = ACES2::tonescale_fwd(in[0], m_p, m_t);
+        const ACES2::f3 JMh = ACES2::chroma_compress_fwd({in[0], in[1], normalised_hue}, J_ts, m_p, rp, m_c);
 
         out[0] = JMh[0];
         out[1] = JMh[1];
@@ -1221,7 +1224,8 @@ void Renderer_ACES_TONESCALE_COMPRESS_20::inv(const void * inImg, void * outImg,
     {
         const float normalised_hue = ACES2::from_degrees(in[2]);
         const ACES2::ResolvedSharedCompressionParameters rp = resolve_CompressionParams(normalised_hue, m_s);
-        const ACES2::f3 JMh = ACES2::tonescale_chroma_compress_inv({in[0], in[1],  normalised_hue}, m_p, m_t, rp, m_c);
+        const float J = ACES2::tonescale_inv(in[0], m_p, m_t);
+        const ACES2::f3 JMh = ACES2::chroma_compress_inv({in[0], in[1],  normalised_hue}, J, m_p, rp, m_c);
 
         out[0] = JMh[0];
         out[1] = JMh[1];

From 3233ade093c971b8f15167f6a2d63c8a65c21e7c Mon Sep 17 00:00:00 2001
From: Kevin Wheatley <kevin.wheatley@framestore.com>
Date: Wed, 22 Jan 2025 15:45:53 +0000
Subject: [PATCH 43/70] Build tables more uniformly, needs some clean up and
 lots of testing

Signed-off-by: Kevin Wheatley <kevin.wheatley@framestore.com>
---
 .../ops/fixedfunction/ACES2/Common.h          |   7 +
 .../ops/fixedfunction/ACES2/Transform.cpp     | 376 +++++++++++++++++-
 .../ops/fixedfunction/ACES2/Transform.h       |   4 +-
 .../ops/fixedfunction/FixedFunctionOpCPU.cpp  |  13 +-
 .../ops/fixedfunction/FixedFunctionOpGPU.cpp  |  26 +-
 5 files changed, 388 insertions(+), 38 deletions(-)

diff --git a/src/OpenColorIO/ops/fixedfunction/ACES2/Common.h b/src/OpenColorIO/ops/fixedfunction/ACES2/Common.h
index 7cdd2bbce9..b4deebba6f 100644
--- a/src/OpenColorIO/ops/fixedfunction/ACES2/Common.h
+++ b/src/OpenColorIO/ops/fixedfunction/ACES2/Common.h
@@ -58,6 +58,7 @@ struct ToneScaleParams
     float s_2;
     float u_2;
     float m_2;
+    float forward_limit;
     float inverse_limit;
     float log_peak;
 };
@@ -177,6 +178,12 @@ constexpr float gammaMaximum = 5.0f;
 constexpr float gammaSearchStep = 0.4f;
 constexpr float gammaAccuracy = 1e-5f;
 
+constexpr int cuspCornerCount = 6;
+constexpr int totalCornerCount = cuspCornerCount + 2;
+constexpr int max_sorted_corners = 2 * cuspCornerCount;
+constexpr float reach_cusp_tolerance = 1e-3f;
+constexpr float display_cusp_tolerance = 1e-7f;
+
 } // namespace ACES2
 
 } // OCIO namespace
diff --git a/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp b/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp
index 30271eff6b..c72dfe37e2 100644
--- a/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp
+++ b/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp
@@ -16,6 +16,33 @@ namespace ACES2
 //
 // Table lookups
 //
+inline f3 lerp(const f3& lower, const f3& upper, const float t)
+{
+    return {
+        lerpf(lower[0], upper[0], t),
+        lerpf(lower[1], upper[1], t),
+        lerpf(lower[2], upper[2], t)
+    };
+}
+
+inline f3 lerp(const float lower[3], const float upper[3], const float t)
+{
+    return {
+        lerpf(lower[0], upper[0], t),
+        lerpf(lower[1], upper[1], t),
+        lerpf(lower[2], upper[2], t)
+    };
+}
+
+inline int midpoint(int a, int b)
+{
+    return (a + b) / 2;
+}
+
+inline float midpoint(float a, float b)
+{
+    return (a + b) / 2.f;
+}
 
 inline float base_hue_for_position(int i_lo, int table_size) 
 {
@@ -56,7 +83,7 @@ int lookup_hue_interval(float h, const Table1D &hues, const std::array<int, 2> &
         {
             i_hi = i;
         }
-        i = (i_lo + i_hi) / 2;
+        i = midpoint(i_lo, i_hi);
     }
 
     i_hi = std::max(1, i_hi);
@@ -76,10 +103,7 @@ inline float interpolation_weight(float h, int h_lo, int h_hi)
 
 inline f3 cusp_from_table(int i_hi, float t, const Table3D &gt)
 {
-    const float cuspJ = lerpf(gt.table[i_hi-1][0], gt.table[i_hi][0], t);
-    const float cuspM = lerpf(gt.table[i_hi-1][1], gt.table[i_hi][1], t);
-    const float upperGamma = lerpf(gt.table[i_hi-1][2], gt.table[i_hi][2], t);
-    return { cuspJ, cuspM, upperGamma };
+    return lerp(gt.table[i_hi-1], gt.table[i_hi], t);
 }
 
 float reach_m_from_table(float h, const ACES2::Table1D &rt)
@@ -448,6 +472,310 @@ JMhParams init_JMhParams(const Primaries &prims)
    return p;
 }
 
+inline f3 generate_unit_cube_cusp_corners(const int corner)
+{
+    // Generation order R, Y, G, C, B, M to ensure hues rotate in correct order
+    return {float(int(((corner + 1) % cuspCornerCount) < 3)),
+            float(int(((corner + 5) % cuspCornerCount) < 3)),
+            float(int(((corner + 3) % cuspCornerCount) < 3))
+            };
+}
+
+void build_limiting_cusp_corners_tables(std::array<f3, totalCornerCount>& RGB_corners, std::array<f3, totalCornerCount>& JMh_corners,
+                               const JMhParams &params, const float peakLuminance)
+{
+    // We calculate the RGB and JMh values for the limiting gamut cusp corners
+    // They are then arranged into a cycle with the lowest JMh value at [1] to allow for hue wrapping
+    std::array<f3, cuspCornerCount> temp_RGB_corners;
+    std::array<f3, cuspCornerCount> temp_JMh_corners;
+    int min_index = 0;
+    for (int i = 0; i != cuspCornerCount; ++i)
+    {
+      temp_RGB_corners[i] = mult_f_f3(peakLuminance / reference_luminance, generate_unit_cube_cusp_corners(i));
+      temp_JMh_corners[i] = RGB_to_JMh(temp_RGB_corners[i], params);
+      if (temp_JMh_corners[i][2] < temp_JMh_corners[min_index][2])
+        min_index = i;
+    }
+
+    // Rotate entries placing lowest at [1] (not [0])
+    for (int i = 0; i != cuspCornerCount; ++i)
+    {
+      RGB_corners[i + 1] = temp_RGB_corners[(i + min_index) % cuspCornerCount];
+      JMh_corners[i + 1] = temp_JMh_corners[(i + min_index) % cuspCornerCount];
+    }
+
+    // Copy end elements to create a cycle
+    RGB_corners[0]                   = RGB_corners[cuspCornerCount];
+    RGB_corners[cuspCornerCount + 1] = RGB_corners[1];
+    JMh_corners[0]                   = JMh_corners[cuspCornerCount];
+    JMh_corners[cuspCornerCount + 1] = JMh_corners[1];
+
+    // Wrap the hues, to maintain monotonicity these entries will fall outside [0.0, hue_limit)
+    JMh_corners[0][2]                   = JMh_corners[0][2] - hue_limit;
+    JMh_corners[cuspCornerCount + 1][2] = JMh_corners[cuspCornerCount + 1][2] + hue_limit;
+}
+
+void find_reach_corners_table(std::array<f3, totalCornerCount>& JMh_corners, const JMhParams &params, const float limitJ, const float maximum_source)
+{
+    // We need to find the value of JMh that corresponds to limitJ for each corner
+    // This is done by scaling the unit corners converting to JMh until the J value is near the limitJ
+    // Strictly speaking we should only need to find the R, G and  B "corners" as the reach is unbounded and
+    // as such does not form a cube, but is formed by the transformed 3 lower planes of the cube and
+    // the plane at J = limitJ
+    std::array<f3, cuspCornerCount> temp_JMh_corners;
+
+    int min_index = 0;
+    for (int i = 0; i != cuspCornerCount; ++i)
+    {
+        const f3 rgb_vector = generate_unit_cube_cusp_corners(i);
+
+        float lower = 0.0f;
+        float upper = maximum_source;
+        while ((upper - lower) > reach_cusp_tolerance)  // TODO: find the equivalent Achromatic and search for that rather than J
+        {
+            float       test        = midpoint(lower, upper);
+            const f3    test_corner = mult_f_f3(test, rgb_vector);
+            const float J           = Achromatic_n_to_J(RGB_to_Aab(test_corner, params)[0], params.cz);
+            if (J < limitJ)
+            {
+                lower = test;
+            }
+            else
+            {
+                upper = test;
+            }
+            if (J == limitJ)
+                break;
+        }
+        temp_JMh_corners[i] = RGB_to_JMh(mult_f_f3(upper, rgb_vector), params);
+
+        if (temp_JMh_corners[i][2] < temp_JMh_corners[min_index][2])
+            min_index = i;
+    }
+
+    // Rotate entries placing lowest at [1] (not [0])
+    for (int i = 0; i != cuspCornerCount; ++i)
+    {
+      JMh_corners[i + 1] = temp_JMh_corners[(i + min_index) % cuspCornerCount];
+    }
+
+    // Copy end elements to create a cycle
+    JMh_corners[0]                   = JMh_corners[cuspCornerCount];
+    JMh_corners[cuspCornerCount + 1] = JMh_corners[1];
+
+    // Wrap the hues, to maintain monotonicity these entries will fall outside [0.0, hue_limit)
+    JMh_corners[0][2]                   = JMh_corners[0][2] - hue_limit;
+    JMh_corners[cuspCornerCount + 1][2] = JMh_corners[cuspCornerCount + 1][2] + hue_limit;
+}
+
+int extract_sorted_cube_hues(std::array<float, max_sorted_corners>& sorted_hues,
+                             const std::array<f3, totalCornerCount>& reach_JMh, const std::array<f3, totalCornerCount>& display_JMh)
+{
+    // Basic merge of 2 sorted arrays, extracting the unique hues.
+    // Return the count of the unique hues
+    //TODO: use STL for this and similar functions
+    int idx         = 0;
+    int reach_idx   = 1;
+    int display_idx = 1;
+    while ((reach_idx < (cuspCornerCount + 1)) || (display_idx < (cuspCornerCount + 1)))
+    {
+        const float reach_hue   = reach_JMh[reach_idx][2];
+        const float display_hue = display_JMh[display_idx][2];
+        if (reach_hue == display_hue)
+        {
+            sorted_hues[idx] = reach_hue;
+            ++reach_idx;
+            ++display_idx; // When equal consume both
+        }
+        else
+        {
+            if (reach_hue < display_hue)
+            {
+                sorted_hues[idx] = reach_hue;
+                ++reach_idx;
+            }
+            else
+            {
+                sorted_hues[idx] = display_hue;
+                ++display_idx;
+            }
+      }
+      ++idx;
+    }
+    return idx;
+}
+
+void build_hue_sample_interval(const int samples, const float lower, const float upper, Table1D &hue_table, const int base)
+{
+    const float delta = (upper - lower) / float(samples);
+    for (int i = 0; i != samples; ++i)
+    {
+        hue_table.table[base + i] = lower + float(i) * delta;
+    }
+}
+
+void build_hue_table(Table1D &hue_table, const std::array<float, max_sorted_corners>& sorted_hues, const int unique_hues)
+{
+    const float ideal_spacing = hue_table.size / hue_limit;
+    std::array<int, 2 * cuspCornerCount + 2> samples_count;
+    int         last_idx  = -1;
+    int         min_index = sorted_hues[0] == 0.0f ? 0 : 1; // Ensure we can always sample at 0.0 hue
+    for (int hue_idx = 0; hue_idx != unique_hues; ++hue_idx)
+    {
+        // BUG: "hue_table.size - 1" will fail if we have multiple hues mapping near the top of the table
+        int nominal_idx = std::min(std::max(int(std::round(sorted_hues[hue_idx] * ideal_spacing)), min_index), hue_table.size - 1);
+        if (last_idx == nominal_idx)
+        {
+            // Last two hues should sample at same index, need to adjust them
+            // Adjust previous sample down if we can
+            if (hue_idx > 1 && samples_count[hue_idx - 2] != (samples_count[hue_idx - 1] - 1))
+            {
+                samples_count[hue_idx - 1] = samples_count[hue_idx - 1] - 1;
+            }
+            else
+            {
+                nominal_idx = nominal_idx + 1;
+            }
+        }
+        samples_count[hue_idx] = std::min(nominal_idx, hue_table.size - 1);
+        last_idx = min_index = nominal_idx;
+    }
+
+    int total_samples = 0;
+    // Special cases for ends
+    int i = 0;
+    build_hue_sample_interval(samples_count[i], 0.0f, sorted_hues[i], hue_table, total_samples + 1);
+    total_samples += samples_count[i];
+    for (++i; i != unique_hues; ++i)
+    {
+        const int samples = samples_count[i] - samples_count[i - 1];
+        build_hue_sample_interval(samples, sorted_hues[i - 1], sorted_hues[i], hue_table, total_samples + 1);
+        total_samples += samples;
+    }
+    // BUG: could break if we are unlucky with samples all being used up by this point
+    build_hue_sample_interval(hue_table.size - total_samples, sorted_hues[i - 1], hue_limit, hue_table, total_samples + 1);
+
+    hue_table.table[0]                  = hue_table.table[hue_table.base_index + hue_table.size - 1] - hue_limit;
+    hue_table.table[hue_table.total_size - 1] = hue_table.table[hue_table.base_index] + hue_limit;
+}
+
+std::array<float, 2> find_display_cusp_for_hue(float hue, const std::array<f3, totalCornerCount>& RGB_corners, const std::array<f3, totalCornerCount>& JMh_corners,
+                                               const JMhParams &params, std::array<float, 2> & previous)
+{
+    // This works by finding the required line segment between two of the XYZ cusp corners, then binary searching
+    // along the line calculating the JMh of points along the line till we find the required value.
+    // All values on the line segments are valid cusp locations.
+
+    int upper_corner = 1;
+    for (int i = upper_corner; i != totalCornerCount; ++i) // TODO: binary search?
+    {
+        if (JMh_corners[i][2] > hue)
+        {
+            upper_corner = i;
+            break;
+        }
+    }
+    const int lower_corner = upper_corner - 1;
+
+    // hue should now be within [lower_corner, upper_corner), handle exact match
+    if (JMh_corners[lower_corner][2] == hue)
+    {
+        return {JMh_corners[lower_corner][0], JMh_corners[lower_corner][1]};
+    }
+
+    // search by lerping between RGB corners for the hue
+    const f3 cusp_lower = RGB_corners[lower_corner];
+    const f3 cusp_upper = RGB_corners[upper_corner];
+    f3       sample;
+
+    float sample_t;
+    float lower_t = (upper_corner == previous[0]) ? previous[1] : 0.0f; // If we are still on the same segment start from where we left off
+    float upper_t = 1.0f;
+
+    f3 JMh;
+
+    // There is an edge case where we need to search towards the range when across the [0.0f, hue_limit) boundary
+    // each edge needs the directions swapped. This is handled by comparing against the appropriate corner to make
+    // sure we are still in the expected range between the lower and upper corner hue limits
+    while ((upper_t - lower_t) > display_cusp_tolerance)
+    {
+        sample_t = midpoint(lower_t, upper_t);
+        sample   = lerp(cusp_lower, cusp_upper, sample_t);
+        JMh      = RGB_to_JMh(sample, params);
+        if (JMh[2] < JMh_corners[lower_corner][2])
+        {
+            upper_t = sample_t;
+        }
+        else if (JMh[2] >= JMh_corners[upper_corner][2])
+        {
+            lower_t = sample_t;
+        }
+        else if (JMh[2] > hue)
+        {
+            upper_t = sample_t;
+        }
+        else
+        {
+            lower_t = sample_t;
+        }
+    }
+
+    // Use the midpoint of the final interval for the actual samples
+    sample_t = midpoint(lower_t, upper_t);
+    sample   = lerp(cusp_lower, cusp_upper, sample_t);
+    JMh      = RGB_to_JMh(sample, params);
+
+    previous[0] = float(upper_corner);
+    previous[1] = sample_t;
+
+    return {JMh[0], JMh[1]};
+}
+
+Table3D build_cusp_table(const Table1D& hue_table, const std::array<f3, totalCornerCount>& RGB_corners, const std::array<f3, totalCornerCount>& JMh_corners, const JMhParams &params)
+{
+    std::array<float, 2> previous = {0.0f, 0.0f};
+    Table3D output_table;
+    for (int i = output_table.base_index; i != output_table.base_index + output_table.size; ++i)
+    {
+      const float hue = hue_table.table[i];
+      const std::array<float, 2> JM = find_display_cusp_for_hue(hue, RGB_corners, JMh_corners, params, previous);
+      output_table.table[i][0] = JM[0];
+      output_table.table[i][1] = JM[1] * (1.f + smooth_m * smooth_cusps);;
+      output_table.table[i][2] = hue;
+    }
+
+    // Copy extra entries to ease the code to handle hues wrapping around
+    // TODO: consider adding operator[] to Table3D to handle this table.table array access more cleanly - subclass std::array?
+    output_table.table[0][0]                                           = output_table.table[output_table.base_index + output_table.size - 1][0];
+    output_table.table[0][1]                                           = output_table.table[output_table.base_index + output_table.size - 1][1];
+    output_table.table[0][2]                                           = output_table.table[output_table.base_index + output_table.size - 1][2];
+    output_table.table[output_table.base_index + output_table.size][0] = output_table.table[output_table.base_index][0];
+    output_table.table[output_table.base_index + output_table.size][1] = output_table.table[output_table.base_index][1];
+    output_table.table[output_table.base_index + output_table.size][2] = output_table.table[output_table.base_index][2];
+    return output_table;
+}
+
+Table3D make_uniform_hue_gamut_table(const JMhParams &reach_params, const JMhParams &params, float peakLuminance, float forward_limit, const SharedCompressionParameters& sp, Table1D& hue_table)
+{
+    // The principal here is to sample the hues as uniformly as possible, whilst ensuring we sample the corners
+    // of the limiting gamut and the reach primaries at limit J Max
+    //
+    // The corners are calculated then the hues are extracted and merged to form a unique sorted hue list
+    // We then build the hue table from the list, those hues are then used to compute the JMh od the limiting
+    // gamut cusp.
+
+    std::array<f3, totalCornerCount> reach_JMh_corners;
+    std::array<f3, totalCornerCount> limiting_RGB_corners;
+    std::array<f3, totalCornerCount> limiting_JMh_corners;
+    std::array<float, max_sorted_corners> sorted_hues;
+
+    find_reach_corners_table(reach_JMh_corners, reach_params, sp.limit_J_max, forward_limit);
+    build_limiting_cusp_corners_tables(limiting_RGB_corners, limiting_JMh_corners, params, peakLuminance);
+    const int unique_hues = extract_sorted_cube_hues(sorted_hues, reach_JMh_corners, limiting_JMh_corners);
+    build_hue_table(hue_table, sorted_hues, unique_hues);
+    return build_cusp_table(hue_table, limiting_RGB_corners, limiting_JMh_corners, params);
+}
+
 Table3D make_gamut_table(const JMhParams &params, float peakLuminance, Table1D& hue_table)
 {
     Table3D gamutCuspTableUnsorted{};
@@ -487,7 +815,7 @@ Table3D make_gamut_table(const JMhParams &params, float peakLuminance, Table1D&
 
     // Wrap the hues, to maintain monotonicity. These entries will fall outside [0.0, hue_limit)
     gamutCuspTable.table[0][2] = gamutCuspTable.table[0][2] - hue_limit;
-    gamutCuspTable.table[gamutCuspTable.size+1][2] = gamutCuspTable.table[gamutCuspTable.size+1][2] + hue_limit;
+    gamutCuspTable.table[gamutCuspTable.base_index + gamutCuspTable.size][2] = gamutCuspTable.table[gamutCuspTable.base_index + gamutCuspTable.size][2] + hue_limit;
 
     // Extract hue table
     for (int i = 0; i < gamutCuspTable.total_size; i++)
@@ -509,12 +837,13 @@ Table1D make_reach_m_table(const JMhParams &params, const float limit_J_max)
     for (int i = 0; i < gamutReachTable.size; i++) {
         const float hue = base_hue_for_position(i, gamutReachTable.size);
 
-        const float search_range = 50.f;
+        constexpr float search_range = 50.f;
+        constexpr float search_maximum = 1300.f; // TODO: magic limit
         float low = 0.f;
         float high = low + search_range;
         bool outside = false;
 
-        while ((outside != true) & (high < 1300.f))
+        while ((outside != true) & (high < search_maximum))
         {
             const f3 searchJMh = {limit_J_max, high, hue};
             const f3 newLimitRGB = JMh_to_RGB(searchJMh, params);
@@ -526,7 +855,7 @@ Table1D make_reach_m_table(const JMhParams &params, const float limit_J_max)
             }
         }
 
-        while (high-low > 1e-2)
+        while (high-low > 1e-2) // TODO:tolerance as constexpr
         {
             const float sampleM = (high + low) / 2.f;
             const f3 searchJMh = {limit_J_max, sampleM, hue};
@@ -551,7 +880,7 @@ Table1D make_reach_m_table(const JMhParams &params, const float limit_J_max)
 bool outside_hull(const f3 &rgb)
 {
     // limit value, once we cross this value, we are outside of the top gamut shell
-    const float maxRGBtestVal = 1.0;
+    constexpr float maxRGBtestVal = 1.0f;
     return rgb[0] > maxRGBtestVal || rgb[1] > maxRGBtestVal || rgb[2] > maxRGBtestVal;
 }
 
@@ -883,7 +1212,7 @@ void make_upper_hull_gamma(
         float testGamma = -1.f;
         while ( (high-low) > gammaAccuracy)
         {
-            testGamma = (high + low) / 2.f;
+            testGamma = midpoint(high, low);
             const bool gammaFound = gamma_fit_predicate(testGamma);
             if (gammaFound)
             {
@@ -934,6 +1263,7 @@ ToneScaleParams init_ToneScaleParams(float peakLuminance)
     const float u_2 = powf((r_hit/m_1)/((r_hit/m_1) + w_2), g);
     const float m_2 = m_1 / u_2;
     const float inverse_limit = n / (u_2 * n_r);
+    const float forward_limit = 8.0f * r_hit;
     const float log_peak = log10( n / n_r);
 
     ToneScaleParams TonescaleParams = {
@@ -945,6 +1275,7 @@ ToneScaleParams init_ToneScaleParams(float peakLuminance)
         s_2,
         u_2,
         m_2,
+        forward_limit,
         inverse_limit,
         log_peak
     };
@@ -952,16 +1283,15 @@ ToneScaleParams init_ToneScaleParams(float peakLuminance)
     return TonescaleParams;
 }
 
-SharedCompressionParameters init_SharedCompressionParams(float peakLuminance, const JMhParams &inputJMhParams)
+SharedCompressionParameters init_SharedCompressionParams(float peakLuminance, const JMhParams &inputJMhParams, const JMhParams &reachParams)
 {
     const float limit_J_max = Y_to_J(peakLuminance, inputJMhParams);
     const float model_gamma_inv = 1.f / model_gamma();
-    const JMhParams compressionGamut = init_JMhParams(ACES_AP1::primaries);
 
     SharedCompressionParameters params = {
         limit_J_max,
         model_gamma_inv,
-        make_reach_m_table(compressionGamut, limit_J_max)
+        make_reach_m_table(reachParams, limit_J_max)
     };
     return params;
 }
@@ -1004,21 +1334,24 @@ std::array<int, 2> determine_hue_linearity_search_range(const Table3D &gamutCusp
     // TODO: Padding values are a quick hack to ensure the range encloses the needed range, left as an exersize
     // for the reader to fully reason if these values could be smaller, probably best done the closer the hue
     // distribution comes to linear as the overhead becomes a potentially greater issue
-    constexpr int lower_padding = -2;
+    constexpr int lower_padding = 0;
     constexpr int upper_padding = 1;
     std::array<int, 2> hue_linearity_search_range = {lower_padding, upper_padding};
     for (int i = gamutCuspTable.base_index; i < gamutCuspTable.base_index + gamutCuspTable.size; i++)
     {
-      const int pos = hue_position_in_uniform_table(gamutCuspTable.table[i][2], gamutCuspTable.size) + gamutCuspTable.base_index;
-      const int delta = i - pos;
-      hue_linearity_search_range[0] = std::min(hue_linearity_search_range[0], delta + lower_padding);
-      hue_linearity_search_range[1] = std::max(hue_linearity_search_range[1], delta + upper_padding);
+        const int pos = hue_position_in_uniform_table(gamutCuspTable.table[i][2], gamutCuspTable.size) + gamutCuspTable.base_index;
+        const int delta = i - pos;
+        hue_linearity_search_range[0] = std::min(hue_linearity_search_range[0], delta + lower_padding);
+        hue_linearity_search_range[1] = std::max(hue_linearity_search_range[1], delta + upper_padding);
+
+        std::cout << i << " " << pos << " " << delta << " " << gamutCuspTable.table[i][0] << " " << gamutCuspTable.table[i][1] << " " << gamutCuspTable.table[i][2] << "\n";
     }
+    std::cout << "search range " << hue_linearity_search_range[0] << " " << hue_linearity_search_range[1] << "\n";
     return hue_linearity_search_range;
 }
 
 GamutCompressParams init_GamutCompressParams(float peakLuminance, const JMhParams &inputJMhParams, const JMhParams &limitJMhParams,
-                                             const ToneScaleParams &tsParams, const SharedCompressionParameters &shParams)
+                                             const ToneScaleParams &tsParams, const SharedCompressionParameters &shParams, const JMhParams &reachParams)
 {
     float mid_J = Y_to_J(tsParams.c_t * reference_luminance, inputJMhParams);
 
@@ -1030,7 +1363,8 @@ GamutCompressParams init_GamutCompressParams(float peakLuminance, const JMhParam
     params.mid_J = mid_J;
     params.focus_dist = focus_dist;
     params.lower_hull_gamma_inv = lower_hull_gamma_inv;
-    params.gamut_cusp_table = make_gamut_table(limitJMhParams, peakLuminance, params.hue_table);
+    params.gamut_cusp_table = make_uniform_hue_gamut_table(reachParams, limitJMhParams, peakLuminance, tsParams.forward_limit, shParams, params.hue_table);
+    //params.gamut_cusp_table = make_gamut_table(limitJMhParams, peakLuminance, params.hue_table);
     params.hue_linearity_search_range = determine_hue_linearity_search_range(params.gamut_cusp_table);
     make_upper_hull_gamma(params.hue_table, params.gamut_cusp_table, peakLuminance, shParams.limit_J_max, mid_J, focus_dist,
                           lower_hull_gamma_inv, limitJMhParams); //TODO: mess of parameters
diff --git a/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.h b/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.h
index c6d33798aa..b2f92359b2 100644
--- a/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.h
+++ b/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.h
@@ -14,11 +14,11 @@ namespace ACES2
 
 JMhParams init_JMhParams(const Primaries &P);
 ToneScaleParams init_ToneScaleParams(float peakLuminance);
-SharedCompressionParameters init_SharedCompressionParams(float peakLuminance, const JMhParams &inputJMhParams);
+SharedCompressionParameters init_SharedCompressionParams(float peakLuminance, const JMhParams &inputJMhParams, const JMhParams &reachParams);
 ResolvedSharedCompressionParameters resolve_CompressionParams(float hue, const SharedCompressionParameters &p);
 ChromaCompressParams init_ChromaCompressParams(float peakLuminance, const ToneScaleParams &tsParams);
 GamutCompressParams init_GamutCompressParams(float peakLuminance, const JMhParams &inputJMhParams, const JMhParams &limitJMhParams,
-                                             const ToneScaleParams &tsParams, const SharedCompressionParameters &shParams);
+                                             const ToneScaleParams &tsParams, const SharedCompressionParameters &shParams, const JMhParams &reachParams);
 
 f3 RGB_to_JMh(const f3 &RGB, const JMhParams &p);
 f3 JMh_to_RGB(const f3 &JMh, const JMhParams &p);
diff --git a/src/OpenColorIO/ops/fixedfunction/FixedFunctionOpCPU.cpp b/src/OpenColorIO/ops/fixedfunction/FixedFunctionOpCPU.cpp
index 3dc389b2ee..a4edc17a32 100644
--- a/src/OpenColorIO/ops/fixedfunction/FixedFunctionOpCPU.cpp
+++ b/src/OpenColorIO/ops/fixedfunction/FixedFunctionOpCPU.cpp
@@ -1026,9 +1026,10 @@ Renderer_ACES_OutputTransform20::Renderer_ACES_OutputTransform20(ConstFixedFunct
     m_pIn = ACES2::init_JMhParams(ACES_AP0::primaries);
     m_pOut = ACES2::init_JMhParams(lim_primaries);
     m_t = ACES2::init_ToneScaleParams(peak_luminance);
-    m_s = ACES2::init_SharedCompressionParams(peak_luminance, m_pIn);
+    const ACES2::JMhParams reachGamut = ACES2::init_JMhParams(ACES_AP1::primaries);
+    m_s = ACES2::init_SharedCompressionParams(peak_luminance, m_pIn, reachGamut);
     m_c = ACES2::init_ChromaCompressParams(peak_luminance, m_t);
-    m_g = ACES2::init_GamutCompressParams(peak_luminance, m_pIn, m_pOut, m_t, m_s);
+    m_g = ACES2::init_GamutCompressParams(peak_luminance, m_pIn, m_pOut, m_t, m_s, reachGamut);
 }
 
 void Renderer_ACES_OutputTransform20::apply(const void * inImg, void * outImg, long numPixels) const
@@ -1177,7 +1178,8 @@ Renderer_ACES_TONESCALE_COMPRESS_20::Renderer_ACES_TONESCALE_COMPRESS_20(ConstFi
 
     m_p = ACES2::init_JMhParams(ACES_AP0::primaries);
     m_t = ACES2::init_ToneScaleParams(peak_luminance);
-    m_s = ACES2::init_SharedCompressionParams(peak_luminance, m_p);
+    const ACES2::JMhParams reachGamut = ACES2::init_JMhParams(ACES_AP1::primaries);
+    m_s = ACES2::init_SharedCompressionParams(peak_luminance, m_p, reachGamut);
     m_c = ACES2::init_ChromaCompressParams(peak_luminance, m_t);
 }
 
@@ -1263,8 +1265,9 @@ Renderer_ACES_GAMUT_COMPRESS_20::Renderer_ACES_GAMUT_COMPRESS_20(ConstFixedFunct
     const ACES2::JMhParams pIn = ACES2::init_JMhParams(ACES_AP0::primaries);
     const ACES2::JMhParams pLim = ACES2::init_JMhParams(limitingPrimaries);
     const ACES2::ToneScaleParams t = ACES2::init_ToneScaleParams(peakLuminance);
-    m_s = ACES2::init_SharedCompressionParams(peakLuminance, pIn);
-    m_g = ACES2::init_GamutCompressParams(peakLuminance, pIn, pLim, t, m_s);
+    const ACES2::JMhParams reachGamut = ACES2::init_JMhParams(ACES_AP1::primaries);
+    m_s = ACES2::init_SharedCompressionParams(peakLuminance, pIn, reachGamut);
+    m_g = ACES2::init_GamutCompressParams(peakLuminance, pIn, pLim, t, m_s, reachGamut);
 }
 
 void Renderer_ACES_GAMUT_COMPRESS_20::apply(const void * inImg, void * outImg, long numPixels) const
diff --git a/src/OpenColorIO/ops/fixedfunction/FixedFunctionOpGPU.cpp b/src/OpenColorIO/ops/fixedfunction/FixedFunctionOpGPU.cpp
index ca03d55621..8da4d85a3a 100644
--- a/src/OpenColorIO/ops/fixedfunction/FixedFunctionOpGPU.cpp
+++ b/src/OpenColorIO/ops/fixedfunction/FixedFunctionOpGPU.cpp
@@ -1283,9 +1283,10 @@ void Add_ACES_OutputTransform_Fwd_Shader(
     const ACES2::JMhParams pIn = ACES2::init_JMhParams(ACES_AP0::primaries);
     const ACES2::JMhParams pLim = ACES2::init_JMhParams(lim_primaries);
     const ACES2::ToneScaleParams t = ACES2::init_ToneScaleParams(peak_luminance);
-    const ACES2::SharedCompressionParameters s = ACES2::init_SharedCompressionParams(peak_luminance, pIn);
+    const ACES2::JMhParams reachGamut = ACES2::init_JMhParams(ACES_AP1::primaries);
+    const ACES2::SharedCompressionParameters s = ACES2::init_SharedCompressionParams(peak_luminance, pIn, reachGamut);
     const ACES2::ChromaCompressParams c = ACES2::init_ChromaCompressParams(peak_luminance, t);
-    const ACES2::GamutCompressParams g = ACES2::init_GamutCompressParams(peak_luminance, pIn, pLim, t, s);
+    const ACES2::GamutCompressParams g = ACES2::init_GamutCompressParams(peak_luminance, pIn, pLim, t, s, reachGamut);
 
     unsigned resourceIndex = shaderCreator->getNextResourceIndex();
 
@@ -1355,9 +1356,10 @@ void Add_ACES_OutputTransform_Inv_Shader(
     const ACES2::JMhParams pIn = ACES2::init_JMhParams(ACES_AP0::primaries);
     const ACES2::JMhParams pLim = ACES2::init_JMhParams(lim_primaries);
     const ACES2::ToneScaleParams t = ACES2::init_ToneScaleParams(peak_luminance);
-    const ACES2::SharedCompressionParameters s = ACES2::init_SharedCompressionParams(peak_luminance, pIn);
+    const ACES2::JMhParams reachGamut = ACES2::init_JMhParams(ACES_AP1::primaries);
+    const ACES2::SharedCompressionParameters s = ACES2::init_SharedCompressionParams(peak_luminance, pIn, reachGamut);
     const ACES2::ChromaCompressParams c = ACES2::init_ChromaCompressParams(peak_luminance, t);
-    const ACES2::GamutCompressParams g = ACES2::init_GamutCompressParams(peak_luminance, pIn, pLim, t, s);
+    const ACES2::GamutCompressParams g = ACES2::init_GamutCompressParams(peak_luminance, pIn, pLim, t, s, reachGamut);
 
     unsigned resourceIndex = shaderCreator->getNextResourceIndex();
 
@@ -1460,7 +1462,8 @@ void Add_Tonescale_Compress_Fwd_Shader(
 
     const ACES2::JMhParams p = ACES2::init_JMhParams(ACES_AP0::primaries);
     const ACES2::ToneScaleParams t = ACES2::init_ToneScaleParams(peak_luminance);
-    const ACES2::SharedCompressionParameters s = ACES2::init_SharedCompressionParams(peak_luminance, p);
+    const ACES2::JMhParams reachGamut = ACES2::init_JMhParams(ACES_AP1::primaries);
+    const ACES2::SharedCompressionParameters s = ACES2::init_SharedCompressionParams(peak_luminance, p, reachGamut);
     const ACES2::ChromaCompressParams c = ACES2::init_ChromaCompressParams(peak_luminance, t);
 
     unsigned resourceIndex = shaderCreator->getNextResourceIndex();
@@ -1479,7 +1482,8 @@ void Add_Tonescale_Compress_Inv_Shader(
 
     const ACES2::JMhParams p = ACES2::init_JMhParams(ACES_AP0::primaries);
     const ACES2::ToneScaleParams t = ACES2::init_ToneScaleParams(peak_luminance);
-    const ACES2::SharedCompressionParameters s = ACES2::init_SharedCompressionParams(peak_luminance, p);
+    const ACES2::JMhParams reachGamut = ACES2::init_JMhParams(ACES_AP1::primaries);
+    const ACES2::SharedCompressionParameters s = ACES2::init_SharedCompressionParams(peak_luminance, p, reachGamut);
     const ACES2::ChromaCompressParams c = ACES2::init_ChromaCompressParams(peak_luminance, t);
 
     unsigned resourceIndex = shaderCreator->getNextResourceIndex();
@@ -1515,8 +1519,9 @@ void Add_Gamut_Compress_Fwd_Shader(
     const ACES2::JMhParams pIn = ACES2::init_JMhParams(ACES_AP0::primaries);
     const ACES2::JMhParams pLim = ACES2::init_JMhParams(primaries);
     const ACES2::ToneScaleParams t = ACES2::init_ToneScaleParams(peak_luminance);
-    const ACES2::SharedCompressionParameters s = ACES2::init_SharedCompressionParams(peak_luminance, pIn);
-    const ACES2::GamutCompressParams g = ACES2::init_GamutCompressParams(peak_luminance, pIn, pLim, t, s); 
+    const ACES2::JMhParams reachGamut = ACES2::init_JMhParams(ACES_AP1::primaries);
+    const ACES2::SharedCompressionParameters s = ACES2::init_SharedCompressionParams(peak_luminance, pIn, reachGamut);
+    const ACES2::GamutCompressParams g = ACES2::init_GamutCompressParams(peak_luminance, pIn, pLim, t, s, reachGamut); 
 
     unsigned resourceIndex = shaderCreator->getNextResourceIndex();
 
@@ -1551,8 +1556,9 @@ void Add_Gamut_Compress_Inv_Shader(
     const ACES2::JMhParams pIn = ACES2::init_JMhParams(ACES_AP0::primaries);
     const ACES2::JMhParams pLim = ACES2::init_JMhParams(primaries);
     const ACES2::ToneScaleParams t = ACES2::init_ToneScaleParams(peak_luminance);
-    const ACES2::SharedCompressionParameters s = ACES2::init_SharedCompressionParams(peak_luminance, pIn);
-    const ACES2::GamutCompressParams g = ACES2::init_GamutCompressParams(peak_luminance, pIn, pLim, t, s);
+    const ACES2::JMhParams reachGamut = ACES2::init_JMhParams(ACES_AP1::primaries);
+    const ACES2::SharedCompressionParameters s = ACES2::init_SharedCompressionParams(peak_luminance, pIn, reachGamut);
+    const ACES2::GamutCompressParams g = ACES2::init_GamutCompressParams(peak_luminance, pIn, pLim, t, s, reachGamut);
 
     unsigned resourceIndex = shaderCreator->getNextResourceIndex();
 

From 04a867a75f4e508becb18ea6dacbe13d56c5cbf6 Mon Sep 17 00:00:00 2001
From: Kevin Wheatley <kevin.wheatley@framestore.com>
Date: Thu, 23 Jan 2025 09:54:59 +0000
Subject: [PATCH 44/70] Speed up reach corner finding by switching to testing
 against the Achromatic rather than J limit

Signed-off-by: Kevin Wheatley <kevin.wheatley@framestore.com>
---
 .../ops/fixedfunction/ACES2/Transform.cpp         | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp b/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp
index c72dfe37e2..aeafd7cbb3 100644
--- a/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp
+++ b/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp
@@ -519,10 +519,13 @@ void find_reach_corners_table(std::array<f3, totalCornerCount>& JMh_corners, con
 {
     // We need to find the value of JMh that corresponds to limitJ for each corner
     // This is done by scaling the unit corners converting to JMh until the J value is near the limitJ
+    // As an optimisation we use the equivalent Achromatic value to search for the J value and avoid the
+    // non-linear transform during the search
     // Strictly speaking we should only need to find the R, G and  B "corners" as the reach is unbounded and
     // as such does not form a cube, but is formed by the transformed 3 lower planes of the cube and
     // the plane at J = limitJ
     std::array<f3, cuspCornerCount> temp_JMh_corners;
+    const float limitA = J_to_Achromatic_n(limitJ, params.cz);
 
     int min_index = 0;
     for (int i = 0; i != cuspCornerCount; ++i)
@@ -531,12 +534,12 @@ void find_reach_corners_table(std::array<f3, totalCornerCount>& JMh_corners, con
 
         float lower = 0.0f;
         float upper = maximum_source;
-        while ((upper - lower) > reach_cusp_tolerance)  // TODO: find the equivalent Achromatic and search for that rather than J
+        while ((upper - lower) > reach_cusp_tolerance)
         {
             float       test        = midpoint(lower, upper);
             const f3    test_corner = mult_f_f3(test, rgb_vector);
-            const float J           = Achromatic_n_to_J(RGB_to_Aab(test_corner, params)[0], params.cz);
-            if (J < limitJ)
+            const float A           = RGB_to_Aab(test_corner, params)[0];
+            if (A < limitA)
             {
                 lower = test;
             }
@@ -544,7 +547,7 @@ void find_reach_corners_table(std::array<f3, totalCornerCount>& JMh_corners, con
             {
                 upper = test;
             }
-            if (J == limitJ)
+            if (A == limitA)
                 break;
         }
         temp_JMh_corners[i] = RGB_to_JMh(mult_f_f3(upper, rgb_vector), params);
@@ -1344,9 +1347,9 @@ std::array<int, 2> determine_hue_linearity_search_range(const Table3D &gamutCusp
         hue_linearity_search_range[0] = std::min(hue_linearity_search_range[0], delta + lower_padding);
         hue_linearity_search_range[1] = std::max(hue_linearity_search_range[1], delta + upper_padding);
 
-        std::cout << i << " " << pos << " " << delta << " " << gamutCuspTable.table[i][0] << " " << gamutCuspTable.table[i][1] << " " << gamutCuspTable.table[i][2] << "\n";
+        //std::cout << i << " " << pos << " " << delta << " " << gamutCuspTable.table[i][0] << " " << gamutCuspTable.table[i][1] << " " << gamutCuspTable.table[i][2] << "\n";
     }
-    std::cout << "search range " << hue_linearity_search_range[0] << " " << hue_linearity_search_range[1] << "\n";
+    //std::cout << "search range " << hue_linearity_search_range[0] << " " << hue_linearity_search_range[1] << "\n";
     return hue_linearity_search_range;
 }
 

From 426e3e798293f965ad9899ee885e08c206780c31 Mon Sep 17 00:00:00 2001
From: Kevin Wheatley <kevin.wheatley@framestore.com>
Date: Thu, 23 Jan 2025 13:02:56 +0000
Subject: [PATCH 45/70] Speed up hull gamma finding by computing values which
 depend only on the test points and not the gamma values themselves

Signed-off-by: Kevin Wheatley <kevin.wheatley@framestore.com>
---
 .../ops/fixedfunction/ACES2/Transform.cpp     | 72 +++++++++++--------
 1 file changed, 41 insertions(+), 31 deletions(-)

diff --git a/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp b/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp
index aeafd7cbb3..0a443dcd71 100644
--- a/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp
+++ b/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp
@@ -880,10 +880,9 @@ Table1D make_reach_m_table(const JMhParams &params, const float limit_J_max)
     return gamutReachTable;
 }
 
-bool outside_hull(const f3 &rgb)
+inline bool outside_hull(const f3 &rgb, float maxRGBtestVal)
 {
     // limit value, once we cross this value, we are outside of the top gamut shell
-    constexpr float maxRGBtestVal = 1.0f;
     return rgb[0] > maxRGBtestVal || rgb[1] > maxRGBtestVal || rgb[2] > maxRGBtestVal;
 }
 
@@ -1123,36 +1122,55 @@ f3 gamut_compress_inv(const f3 &JMh, const ResolvedSharedCompressionParameters &
 }
 
 static constexpr int gamma_test_count = 5;
+struct testData {
+    f3 testJMh;
+    float J_intersect_source;
+    float slope;
+    float J_intersect_cusp;
+};
+
+std::array<testData, gamma_test_count> generate_gamma_test_data(const f2 &JMcusp, float hue, float limit_J_max, float mid_J, float focus_dist)
+{
+    const std::array<float, gamma_test_count> testPositions = {0.01f, 0.1f, 0.5f, 0.8f, 0.99f};
+    std::array<testData, gamma_test_count> data;
+
+    std::generate(data.begin(), data.end(), [JMcusp, limit_J_max, mid_J, focus_dist, testPositions, hue, testIndex = 0]() mutable {
+        const float testJ = lerpf(JMcusp[0], limit_J_max, testPositions[testIndex]);
+        const float focusJ = compute_focusJ(JMcusp[0], mid_J, limit_J_max);
+        const float slope_gain = get_focus_gain(testJ, JMcusp[0], limit_J_max, focus_dist);
+        const float J_intersect_source = solve_J_intersect(testJ, JMcusp[1], focusJ, limit_J_max, slope_gain);
+        testData result = {
+            { testJ, JMcusp[1], hue },
+            J_intersect_source,
+            compute_compression_vector_slope(J_intersect_source, focusJ, limit_J_max, slope_gain),
+            solve_J_intersect(JMcusp[0], JMcusp[1], focusJ, limit_J_max, slope_gain)
+        };
+        testIndex++;
+        return result;
+    });
+    return data;
+}
 
 bool evaluate_gamma_fit(
     const f2 &JMcusp,
-    const std::array<f3, gamma_test_count> JMh_values,
+    const std::array<testData, gamma_test_count> data,
     float topGamma_inv,
     float peakLuminance,
     float limit_J_max,
-    float mid_J,
-    float focus_dist,
     float lower_hull_gamma_inv,
     const JMhParams &limitJMhParams)
 {
-
-    const float focusJ = compute_focusJ(JMcusp[0], mid_J, limit_J_max);
-
-    for (auto testJMh: JMh_values)
+    const float luminance_limit = peakLuminance / reference_luminance;
+    for (auto test_data: data)
     {
-        const float slope_gain = get_focus_gain(testJMh[0], JMcusp[0], limit_J_max, focus_dist);
-        const float J_intersect_source = solve_J_intersect(testJMh[0], testJMh[1], focusJ, limit_J_max, slope_gain);
-        const float slope = compute_compression_vector_slope(J_intersect_source, focusJ, limit_J_max, slope_gain);
-        const float J_intersect_cusp = solve_J_intersect(JMcusp[0], JMcusp[1], focusJ, limit_J_max, slope_gain);
+        const float approxLimit_M = find_gamut_boundary_intersection(JMcusp, limit_J_max, topGamma_inv, lower_hull_gamma_inv,
+                                                    test_data.J_intersect_source, test_data.slope, test_data.J_intersect_cusp);
+        const float approxLimit_J = test_data.J_intersect_source + test_data.slope * approxLimit_M;
 
-        const float approxLimit_M = find_gamut_boundary_intersection(JMcusp, limit_J_max, topGamma_inv, lower_hull_gamma_inv, J_intersect_source, slope, J_intersect_cusp);
-        const float approxLimit_J = J_intersect_source + slope * approxLimit_M;
-
-        const f3 approximate_JMh = {approxLimit_J, approxLimit_M, testJMh[2]};
+        const f3 approximate_JMh = {approxLimit_J, approxLimit_M, test_data.testJMh[2]};
         const f3 newLimitRGB = JMh_to_RGB(approximate_JMh, limitJMhParams);
-        const f3 newLimitRGBScaled = mult_f_f3(reference_luminance / peakLuminance, newLimitRGB); // TODO: can this be avoided by amending outside_hull to account for peak?
 
-        if (!outside_hull(newLimitRGBScaled))
+        if (!outside_hull(newLimitRGB, luminance_limit))
         {
             return false;
         }
@@ -1171,20 +1189,12 @@ void make_upper_hull_gamma(
     float lower_hull_gamma_inv,
     const JMhParams &limitJMhParams)
 {
-    const std::array<float, gamma_test_count> testPositions = {0.01f, 0.1f, 0.5f, 0.8f, 0.99f};
-
     for (int i = gamutCuspTable.base_index; i < gamutCuspTable.base_index + gamutCuspTable.size; i++)
     {
         const float hue = hue_table.table[i];
         const f2 JMcusp = { gamutCuspTable.table[i][0], gamutCuspTable.table[i][1] };
 
-        std::array<f3, gamma_test_count> testJMh;
-        std::generate(testJMh.begin(), testJMh.end(), [JMcusp, limit_J_max, testPositions, hue, testIndex = 0]() mutable {
-            const float testJ = lerpf(JMcusp[0], limit_J_max, testPositions[testIndex]);
-            f3 result = { testJ, JMcusp[1], hue };
-            testIndex++;
-            return result;
-        });
+        std::array<testData, gamma_test_count> data = generate_gamma_test_data(JMcusp, hue, limit_J_max, mid_J, focus_dist);
 
         // TODO: calculate best fitting inverse gamma directly rather than reciprocating it in the loop
         // TODO: adjacent found gamma values typically fall close to each other could initialise the search range
@@ -1194,9 +1204,9 @@ void make_upper_hull_gamma(
         float high = low + search_range;
         bool outside = false;
 
-        auto gamma_fit_predicate = [JMcusp, &testJMh, peakLuminance, limit_J_max, mid_J, focus_dist, lower_hull_gamma_inv, limitJMhParams](float gamma)
+        auto gamma_fit_predicate = [JMcusp, data, peakLuminance, limit_J_max, lower_hull_gamma_inv, limitJMhParams](float gamma)
         {
-            return evaluate_gamma_fit(JMcusp, testJMh, 1.0f / gamma, peakLuminance, limit_J_max, mid_J, focus_dist, lower_hull_gamma_inv, limitJMhParams);
+            return evaluate_gamma_fit(JMcusp, data, 1.0f / gamma, peakLuminance, limit_J_max, lower_hull_gamma_inv, limitJMhParams);
         };
         while (!(outside) && (high < gammaMaximum))
         {
@@ -1220,13 +1230,13 @@ void make_upper_hull_gamma(
             if (gammaFound)
             {
                 high = testGamma;
-                gamutCuspTable.table[i][2] = 1.0f / high;
             }
             else
             {
                 low = testGamma;
             }
         }
+        gamutCuspTable.table[i][2] = 1.0f / high;
     }
 
     // Copy last populated entries to empty spot 'wrapping' entries

From f1937a17cfebcd9b29dc06a2d33a3ecdee8caed2 Mon Sep 17 00:00:00 2001
From: Kevin Wheatley <kevin.wheatley@framestore.com>
Date: Mon, 27 Jan 2025 13:47:28 +0000
Subject: [PATCH 46/70] Adjust GPU hue lookup to take advantage of more uniform
 distribution

Signed-off-by: Kevin Wheatley <kevin.wheatley@framestore.com>
---
 src/OpenColorIO/ops/fixedfunction/FixedFunctionOpGPU.cpp | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/src/OpenColorIO/ops/fixedfunction/FixedFunctionOpGPU.cpp b/src/OpenColorIO/ops/fixedfunction/FixedFunctionOpGPU.cpp
index 8da4d85a3a..d16fc286ab 100644
--- a/src/OpenColorIO/ops/fixedfunction/FixedFunctionOpGPU.cpp
+++ b/src/OpenColorIO/ops/fixedfunction/FixedFunctionOpGPU.cpp
@@ -728,14 +728,17 @@ std::string _Add_Cusp_table(
     ss.newLine() << "{";
     ss.indent();
 
-    ss.newLine() << ss.intDecl("i_lo") << " = 0;";
-    ss.newLine() << ss.intDecl("i_hi") << " = " << g.gamut_cusp_table.base_index + g.gamut_cusp_table.size << ";";
-
     ss.newLine() << ss.floatDecl("hwrap") << " = h;";
     ss.newLine() << "hwrap = hwrap - floor(hwrap / 360.0) * 360.0;";
     ss.newLine() << "hwrap = (hwrap < 0.0) ? hwrap + 360.0 : hwrap;";
     ss.newLine() << ss.intDecl("i") << " = " << ss.intKeyword() << "(hwrap + " << g.gamut_cusp_table.base_index << ");";
 
+    ss.newLine() << ss.intDecl("i_lo") << " = " << ss.intKeyword() << "(max(0, "
+                 << ss.floatKeyword() << "(i + " << g.hue_linearity_search_range[0] << ")));";
+    ss.newLine() << ss.intDecl("i_hi") << " = " << ss.intKeyword() << "(min("
+                 << ss.floatKeyword() << "(" << g.gamut_cusp_table.base_index + g.gamut_cusp_table.size << "), "
+                 << ss.floatKeyword() << "(i + " << g.hue_linearity_search_range[1] << ")));";
+
     ss.newLine() << "while (i_lo + 1 < i_hi)";
     ss.newLine() << "{";
     ss.indent();

From debd9792c2fdf7286bd9cad5ad350c7b93f4d07c Mon Sep 17 00:00:00 2001
From: Kevin Wheatley <kevin.wheatley@framestore.com>
Date: Mon, 27 Jan 2025 13:55:46 +0000
Subject: [PATCH 47/70] Fix GLSL compatibility with hue lookup Remove compiler
 warnings for unused parameters

Signed-off-by: Kevin Wheatley <kevin.wheatley@framestore.com>
---
 .../ops/fixedfunction/ACES2/Transform.cpp     |  8 +++----
 .../ops/fixedfunction/ACES2/Transform.h       |  4 ++--
 .../ops/fixedfunction/FixedFunctionOpCPU.cpp  |  8 +++----
 .../ops/fixedfunction/FixedFunctionOpGPU.cpp  | 24 ++++++++-----------
 4 files changed, 20 insertions(+), 24 deletions(-)

diff --git a/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp b/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp
index 0a443dcd71..c34cc223b5 100644
--- a/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp
+++ b/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp
@@ -307,7 +307,7 @@ inline float toe_inv( float x, float limit, float k1_in, float k2_in)
 }
 
 template <bool inverse>
-inline float aces_tonescale(const float Y_in, const JMhParams &p, const ToneScaleParams &pt)
+inline float aces_tonescale(const float Y_in, const ToneScaleParams &pt)
 {
     if (inverse)
     {
@@ -329,7 +329,7 @@ inline float tonescale(const float J, const JMhParams &p, const ToneScaleParams
     // Tonescale applied in Y (convert to and from J)
     const float J_abs = std::abs(J);
     const float Y_in  = _J_to_Y(J_abs, p);
-    const float Y_out = aces_tonescale<inverse>(Y_in, p, pt);
+    const float Y_out = aces_tonescale<inverse>(Y_in, pt);
     const float J_out = _Y_to_J(Y_out, p);
     return std::copysign(J_out, J);
 }
@@ -344,7 +344,7 @@ float tonescale_inv(const float J, const JMhParams &p, const ToneScaleParams &pt
     return tonescale<true>(J, p, pt);
 }
 
-f3 chroma_compress_fwd(const f3 &JMh, const float J_ts, const JMhParams &p, const ResolvedSharedCompressionParameters &pr, const ChromaCompressParams &pc)
+f3 chroma_compress_fwd(const f3 &JMh, const float J_ts, const ResolvedSharedCompressionParameters &pr, const ChromaCompressParams &pc)
 {
     const float J = JMh[0];
     const float M = JMh[1];
@@ -369,7 +369,7 @@ f3 chroma_compress_fwd(const f3 &JMh, const float J_ts, const JMhParams &p, cons
     return {J_ts, M_cp, h};
 }
 
-f3 chroma_compress_inv(const f3 &JMh, const float J, const JMhParams &p, const ResolvedSharedCompressionParameters &pr, const ChromaCompressParams &pc)
+f3 chroma_compress_inv(const f3 &JMh, const float J, const ResolvedSharedCompressionParameters &pr, const ChromaCompressParams &pc)
 {
     const float J_ts = JMh[0];
     const float M_cp = JMh[1];
diff --git a/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.h b/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.h
index b2f92359b2..16693dfbd4 100644
--- a/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.h
+++ b/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.h
@@ -26,8 +26,8 @@ f3 JMh_to_RGB(const f3 &JMh, const JMhParams &p);
 float tonescale_fwd(const float J, const JMhParams &p, const ToneScaleParams &pt);
 float tonescale_inv(const float J, const JMhParams &p, const ToneScaleParams &pt);
 
-f3 chroma_compress_fwd(const f3 &JMh, const float J_ts, const JMhParams &p, const ResolvedSharedCompressionParameters &ps, const ChromaCompressParams &pc);
-f3 chroma_compress_inv(const f3 &JMh, const float J, const JMhParams &p, const ResolvedSharedCompressionParameters &ps, const ChromaCompressParams &pc);
+f3 chroma_compress_fwd(const f3 &JMh, const float J_ts, const ResolvedSharedCompressionParameters &ps, const ChromaCompressParams &pc);
+f3 chroma_compress_inv(const f3 &JMh, const float J, const ResolvedSharedCompressionParameters &ps, const ChromaCompressParams &pc);
 
 f3 gamut_compress_fwd(const f3 &JMh, const ResolvedSharedCompressionParameters &ps, const GamutCompressParams &p);
 f3 gamut_compress_inv(const f3 &JMh, const ResolvedSharedCompressionParameters &ps, const GamutCompressParams &p);
diff --git a/src/OpenColorIO/ops/fixedfunction/FixedFunctionOpCPU.cpp b/src/OpenColorIO/ops/fixedfunction/FixedFunctionOpCPU.cpp
index a4edc17a32..18bd5b8338 100644
--- a/src/OpenColorIO/ops/fixedfunction/FixedFunctionOpCPU.cpp
+++ b/src/OpenColorIO/ops/fixedfunction/FixedFunctionOpCPU.cpp
@@ -1055,7 +1055,7 @@ void Renderer_ACES_OutputTransform20::fwd(const void * inImg, void * outImg, lon
         const ACES2::f3 JMh           = ACES2::RGB_to_JMh(RGBIn, m_pIn);
         const ACES2::ResolvedSharedCompressionParameters rp = resolve_CompressionParams(JMh[2], m_s);
         const float J_ts = ACES2::tonescale_fwd(JMh[0], m_pIn, m_t);
-        const ACES2::f3 tonemappedJMh = ACES2::chroma_compress_fwd(JMh, J_ts, m_pIn, rp, m_c);
+        const ACES2::f3 tonemappedJMh = ACES2::chroma_compress_fwd(JMh, J_ts, rp, m_c);
         const ACES2::f3 compressedJMh = ACES2::gamut_compress_fwd(tonemappedJMh, rp, m_g);
         const ACES2::f3 RGBOut        = ACES2::JMh_to_RGB(compressedJMh, m_pOut);
 
@@ -1081,7 +1081,7 @@ void Renderer_ACES_OutputTransform20::inv(const void * inImg, void * outImg, lon
         const ACES2::ResolvedSharedCompressionParameters rp = resolve_CompressionParams(compressedJMh[2], m_s);
         const ACES2::f3 tonemappedJMh = ACES2::gamut_compress_inv(compressedJMh, rp, m_g);
         const float J = ACES2::tonescale_inv(tonemappedJMh[0], m_pIn, m_t);
-        const ACES2::f3 JMh           = ACES2::chroma_compress_inv(tonemappedJMh, J, m_pIn, rp, m_c);
+        const ACES2::f3 JMh           = ACES2::chroma_compress_inv(tonemappedJMh, J, rp, m_c);
         const ACES2::f3 RGBin         = ACES2::JMh_to_RGB(JMh, m_pIn);
 
         out[0] = RGBin[0];
@@ -1205,7 +1205,7 @@ void Renderer_ACES_TONESCALE_COMPRESS_20::fwd(const void * inImg, void * outImg,
         const float normalised_hue = ACES2::from_degrees(in[2]);
         const ACES2::ResolvedSharedCompressionParameters rp = resolve_CompressionParams(normalised_hue, m_s);
         const float J_ts = ACES2::tonescale_fwd(in[0], m_p, m_t);
-        const ACES2::f3 JMh = ACES2::chroma_compress_fwd({in[0], in[1], normalised_hue}, J_ts, m_p, rp, m_c);
+        const ACES2::f3 JMh = ACES2::chroma_compress_fwd({in[0], in[1], normalised_hue}, J_ts, rp, m_c);
 
         out[0] = JMh[0];
         out[1] = JMh[1];
@@ -1227,7 +1227,7 @@ void Renderer_ACES_TONESCALE_COMPRESS_20::inv(const void * inImg, void * outImg,
         const float normalised_hue = ACES2::from_degrees(in[2]);
         const ACES2::ResolvedSharedCompressionParameters rp = resolve_CompressionParams(normalised_hue, m_s);
         const float J = ACES2::tonescale_inv(in[0], m_p, m_t);
-        const ACES2::f3 JMh = ACES2::chroma_compress_inv({in[0], in[1],  normalised_hue}, J, m_p, rp, m_c);
+        const ACES2::f3 JMh = ACES2::chroma_compress_inv({in[0], in[1],  normalised_hue}, J, rp, m_c);
 
         out[0] = JMh[0];
         out[1] = JMh[1];
diff --git a/src/OpenColorIO/ops/fixedfunction/FixedFunctionOpGPU.cpp b/src/OpenColorIO/ops/fixedfunction/FixedFunctionOpGPU.cpp
index d16fc286ab..a65fac0de3 100644
--- a/src/OpenColorIO/ops/fixedfunction/FixedFunctionOpGPU.cpp
+++ b/src/OpenColorIO/ops/fixedfunction/FixedFunctionOpGPU.cpp
@@ -788,8 +788,7 @@ std::string _Add_Cusp_table(
 std::string _Add_Focus_Gain_func(
     GpuShaderCreatorRcPtr & shaderCreator,
     unsigned resourceIndex,
-    const ACES2::SharedCompressionParameters & s,
-    const ACES2::GamutCompressParams & g)
+    const ACES2::SharedCompressionParameters & s)
 {
     // Reserve name
     std::ostringstream resName;
@@ -835,8 +834,7 @@ std::string _Add_Focus_Gain_func(
 std::string _Add_Solve_J_Intersect_func(
     GpuShaderCreatorRcPtr & shaderCreator,
     unsigned resourceIndex,
-    const ACES2::SharedCompressionParameters & s,
-    const ACES2::GamutCompressParams & g)
+    const ACES2::SharedCompressionParameters & s)
 {
     // Reserve name
     std::ostringstream resName;
@@ -915,7 +913,6 @@ std::string _Add_Find_Gamut_Boundary_Intersection_func(
     GpuShaderCreatorRcPtr & shaderCreator,
     unsigned resourceIndex,
     const ACES2::SharedCompressionParameters & s,
-    const ACES2::GamutCompressParams & g,
     const std::string & solveJIntersectName)
 {
     // Reserve name
@@ -1114,7 +1111,6 @@ std::string _Add_Compress_Gamut_func(
     unsigned resourceIndex,
     const ACES2::SharedCompressionParameters & s,
     const ACES2::GamutCompressParams & g,
-    const std::string & cuspName,
     const std::string & getFocusGainName,
     const std::string & findGamutBoundaryIntersectionName,
     const std::string & getReachBoundaryName,
@@ -1206,12 +1202,12 @@ void _Add_Gamut_Compress_Fwd_Shader(
     const std::string & reachName)
 {
     std::string cuspName = _Add_Cusp_table(shaderCreator, resourceIndex, g);
-    std::string getFocusGainName = _Add_Focus_Gain_func(shaderCreator, resourceIndex, s, g);
-    std::string solveJIntersectName = _Add_Solve_J_Intersect_func(shaderCreator, resourceIndex, s, g);
-    std::string findGamutBoundaryIntersectionName = _Add_Find_Gamut_Boundary_Intersection_func(shaderCreator, resourceIndex, s, g, solveJIntersectName);
+    std::string getFocusGainName = _Add_Focus_Gain_func(shaderCreator, resourceIndex, s);
+    std::string solveJIntersectName = _Add_Solve_J_Intersect_func(shaderCreator, resourceIndex, s);
+    std::string findGamutBoundaryIntersectionName = _Add_Find_Gamut_Boundary_Intersection_func(shaderCreator, resourceIndex, s, solveJIntersectName);
     std::string getReachBoundaryName = _Add_Reach_Boundary_func(shaderCreator, resourceIndex, s, g, reachName, getFocusGainName, solveJIntersectName);
     std::string compressionName = _Add_Compression_func(shaderCreator, resourceIndex, false);
-    std::string gamutCompressName = _Add_Compress_Gamut_func(shaderCreator, resourceIndex, s, g, cuspName, getFocusGainName, findGamutBoundaryIntersectionName, getReachBoundaryName, compressionName);
+    std::string gamutCompressName = _Add_Compress_Gamut_func(shaderCreator, resourceIndex, s, g, getFocusGainName, findGamutBoundaryIntersectionName, getReachBoundaryName, compressionName);
 
     const std::string pxl(shaderCreator->getPixelName());
 
@@ -1228,12 +1224,12 @@ void _Add_Gamut_Compress_Inv_Shader(
     const std::string & reachName)
 {
     std::string cuspName = _Add_Cusp_table(shaderCreator, resourceIndex, g);
-    std::string getFocusGainName = _Add_Focus_Gain_func(shaderCreator, resourceIndex, s, g);
-    std::string solveJIntersectName = _Add_Solve_J_Intersect_func(shaderCreator, resourceIndex,s, g);
-    std::string findGamutBoundaryIntersectionName = _Add_Find_Gamut_Boundary_Intersection_func(shaderCreator, resourceIndex, s, g, solveJIntersectName);
+    std::string getFocusGainName = _Add_Focus_Gain_func(shaderCreator, resourceIndex, s);
+    std::string solveJIntersectName = _Add_Solve_J_Intersect_func(shaderCreator, resourceIndex, s);
+    std::string findGamutBoundaryIntersectionName = _Add_Find_Gamut_Boundary_Intersection_func(shaderCreator, resourceIndex, s, solveJIntersectName);
     std::string getReachBoundaryName = _Add_Reach_Boundary_func(shaderCreator, resourceIndex, s, g, reachName, getFocusGainName, solveJIntersectName);
     std::string compressionName = _Add_Compression_func(shaderCreator, resourceIndex, true);
-    std::string gamutCompressName = _Add_Compress_Gamut_func(shaderCreator, resourceIndex, s, g, cuspName, getFocusGainName, findGamutBoundaryIntersectionName, getReachBoundaryName, compressionName);
+    std::string gamutCompressName = _Add_Compress_Gamut_func(shaderCreator, resourceIndex, s, g, getFocusGainName, findGamutBoundaryIntersectionName, getReachBoundaryName, compressionName);
 
     const std::string pxl(shaderCreator->getPixelName());
 

From 0fca9e64ccc938cdf6be3e943d50dba0b51aa2dc Mon Sep 17 00:00:00 2001
From: Kevin Wheatley <kevin.wheatley@framestore.com>
Date: Mon, 27 Jan 2025 14:06:36 +0000
Subject: [PATCH 48/70] Attempt to simplify table generation code

Signed-off-by: Kevin Wheatley <kevin.wheatley@framestore.com>
---
 .../ops/fixedfunction/ACES2/Common.h          | 114 ++++++++------
 .../ops/fixedfunction/ACES2/Transform.cpp     | 146 ++++++++----------
 .../ops/fixedfunction/FixedFunctionOpGPU.cpp  |  13 +-
 3 files changed, 140 insertions(+), 133 deletions(-)

diff --git a/src/OpenColorIO/ops/fixedfunction/ACES2/Common.h b/src/OpenColorIO/ops/fixedfunction/ACES2/Common.h
index b4deebba6f..2c2576937e 100644
--- a/src/OpenColorIO/ops/fixedfunction/ACES2/Common.h
+++ b/src/OpenColorIO/ops/fixedfunction/ACES2/Common.h
@@ -14,26 +14,82 @@ namespace OCIO_NAMESPACE
 
 namespace ACES2
 {
+constexpr float PI = 3.14159265358979f;
+
+constexpr float hue_limit = 360.0f;
+//constexpr float hue_limit = 2.0f * PI;
+inline float _wrap_to_hue_limit(float y)
+{
+    if ( y < 0.f)
+    {
+        y = y + hue_limit;
+    }
+    return y;
+}
+
+inline float wrap_to_hue_limit(float hue)
+{
+    float y = std::fmod(hue, hue_limit);
+    return _wrap_to_hue_limit(y);
+}
+inline constexpr float to_degrees(const float v) { return v; }
+inline float from_degrees(const float v) { return wrap_to_hue_limit(v); }
+inline constexpr float to_radians(const float v) { return PI * v / 180.0f; };
+inline float _from_radians(const float v) { return _wrap_to_hue_limit(180.0f * v / PI); }; // v needs to be wrapped already
+inline float from_radians(const float v) { return wrap_to_hue_limit(180.0f * v / PI); };
+/*
+inline constexpr float to_degrees(const float v) { return 180.0f * v / PI; }
+inline float from_degrees(const float v) { return wrap_to_hue_limit(PI * v / 180.0f); }
+inline constexpr float to_radians(const float v) { return v; }
+inline float _from_radians(const float v) { return _wrap_to_hue_limit(v); };
+inline float from_radians(const float v) { return wrap_to_hue_limit(v); };
+*/
+
+struct TableBase
+{
+    static constexpr int _TABLE_ADDITION_ENTRIES = 2;
+    static constexpr int base_index = 1;
+    static constexpr int nominal_size = 360;
+    static constexpr int total_size = nominal_size + _TABLE_ADDITION_ENTRIES;
+
+    static constexpr int lower_wrap_index = 0;
+    static constexpr int upper_wrap_index = base_index + nominal_size;
+    static constexpr int first_nominal_index = base_index;
+    static constexpr int last_nominal_index = upper_wrap_index - 1;
+
+    inline float base_hue_for_position(int i_lo) const
+    {
+        const float result = i_lo * hue_limit / nominal_size;
+        return result;
+    }
+
+    inline int hue_position_in_uniform_table(float wrapped_hue) const 
+    {
+        return int(wrapped_hue / hue_limit * float(nominal_size)); // TODO: can we use the 'lost' fraction for the lerps?
+    }
 
-constexpr int TABLE_SIZE = 360;
-constexpr int TABLE_ADDITION_ENTRIES = 2;
-constexpr int TABLE_TOTAL_SIZE = TABLE_SIZE + TABLE_ADDITION_ENTRIES;
-constexpr int TABLE_BASE_INDEX = 1;
+   inline int nominal_hue_position_in_uniform_table(float wrapped_hue) const 
+    {
+        return first_nominal_index + hue_position_in_uniform_table(wrapped_hue);
+    }
+
+    inline int next_position_in_table(int entry) const
+    {
+        return (entry + 1) % nominal_size;
+    }
 
-struct Table3D
+    inline int clamp_to_table_bounds(int entry) const // TODO: this should be removed if we can constrain the hue range properly
+    {
+        return std::min(nominal_size - 1, std::max(0, entry));
+    }
+};
+
+struct Table3D : public TableBase, std::array<float[3], TableBase::total_size>
 {
-    static constexpr int base_index = TABLE_BASE_INDEX;
-    static constexpr int size = TABLE_SIZE;
-    static constexpr int total_size = TABLE_TOTAL_SIZE;
-    float table[TABLE_TOTAL_SIZE][3];
 };
 
-struct Table1D
+struct Table1D : public TableBase, std::array<float, TableBase::total_size>
 {
-    static constexpr int base_index = TABLE_BASE_INDEX;
-    static constexpr int size = TABLE_SIZE;
-    static constexpr int total_size = TABLE_TOTAL_SIZE;
-    float table[TABLE_TOTAL_SIZE];
 };
 
 struct JMhParams
@@ -114,36 +170,6 @@ constexpr float J_scale = 100.0f;
 constexpr float cam_nl_Y_reference = 100.0f;
 constexpr float cam_nl_offset = 0.2713f * cam_nl_Y_reference;
 constexpr float cam_nl_scale = 4.0f * cam_nl_Y_reference;
-constexpr float PI = 3.14159265358979f;
-
-constexpr float hue_limit = 360.0f;
-//constexpr float hue_limit = 2.0f * PI;
-inline float _wrap_to_hue_limit(float y)
-{
-    if ( y < 0.f)
-    {
-        y = y + hue_limit;
-    }
-    return y;
-}
-
-inline float wrap_to_hue_limit(float hue)
-{
-    float y = std::fmod(hue, hue_limit);
-    return _wrap_to_hue_limit(y);
-}
-inline constexpr float to_degrees(const float v) { return v; }
-inline float from_degrees(const float v) { return wrap_to_hue_limit(v); }
-inline constexpr float to_radians(const float v) { return PI * v / 180.0f; };
-inline float _from_radians(const float v) { return _wrap_to_hue_limit(180.0f * v / PI); }; // v needs to be wrapped already
-inline float from_radians(const float v) { return wrap_to_hue_limit(180.0f * v / PI); };
-/*
-inline constexpr float to_degrees(const float v) { return 180.0f * v / PI; }
-inline float from_degrees(const float v) { return wrap_to_hue_limit(PI * v / 180.0f); }
-inline constexpr float to_radians(const float v) { return v; }
-inline float _from_radians(const float v) { return _wrap_to_hue_limit(v); };
-inline float from_radians(const float v) { return wrap_to_hue_limit(v); };
-*/
 
 // Chroma compression
 constexpr float chroma_compress = 2.4f;
diff --git a/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp b/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp
index c34cc223b5..fb23d6583a 100644
--- a/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp
+++ b/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp
@@ -44,38 +44,19 @@ inline float midpoint(float a, float b)
     return (a + b) / 2.f;
 }
 
-inline float base_hue_for_position(int i_lo, int table_size) 
-{
-    const float result = i_lo * hue_limit / table_size;
-    return result;
-}
-
-inline int hue_position_in_uniform_table(float wrapped_hue, int table_size)
-{
-    return int(wrapped_hue / hue_limit * float(table_size)); // TODO: can we use the 'lost' fraction for the lerps?
-}
-
-inline int next_position_in_table(int entry, int table_size)
-{
-    return (entry + 1) % table_size;
-}
-
-inline int clamp_to_table_bounds(int entry, int table_size) // TODO: this should be removed if we can constrain the hue range properly
-{
-    return std::min(table_size - 1, std::max(0, entry));
-}
-
 int lookup_hue_interval(float h, const Table1D &hues, const std::array<int, 2> & hue_linearity_search_range)
 {
     // Search the given Table for the interval containing the desired hue
     // Returns the upper index of the interval
-    int i = hue_position_in_uniform_table(h, hues.size) + hues.base_index;
-    int i_lo = std::max(0, i + hue_linearity_search_range[0]);
-    int i_hi = std::min(hues.base_index + hues.size, i + hue_linearity_search_range[1]);
+
+    // We can narrow the search range based on the hues being almost uniform
+    int i = hues.nominal_hue_position_in_uniform_table(h);
+    int i_lo = std::max(hues.lower_wrap_index, i + hue_linearity_search_range[0]); // Should be nominal not lower_wrap?
+    int i_hi = std::min(hues.upper_wrap_index, i + hue_linearity_search_range[1]);
 
     while (i_lo + 1 < i_hi)
     {
-        if (h > hues.table[i])
+        if (h > hues[i])
         {
             i_lo = i;
         }
@@ -86,7 +67,7 @@ int lookup_hue_interval(float h, const Table1D &hues, const std::array<int, 2> &
         i = midpoint(i_lo, i_hi);
     }
 
-    i_hi = std::max(1, i_hi);
+    i_hi = std::max(1, i_hi); // TODO: should not be needed if we initialise with the correct lo and hi
 
     return i_hi;
 }
@@ -103,16 +84,16 @@ inline float interpolation_weight(float h, int h_lo, int h_hi)
 
 inline f3 cusp_from_table(int i_hi, float t, const Table3D &gt)
 {
-    return lerp(gt.table[i_hi-1], gt.table[i_hi], t);
+    return lerp(gt[i_hi-1], gt[i_hi], t);
 }
 
 float reach_m_from_table(float h, const ACES2::Table1D &rt)
 {
-    const int i_lo = clamp_to_table_bounds(hue_position_in_uniform_table(h, rt.size), rt.total_size);  // TODO: this should be removed if we can constrain the hue range properly
-    const int i_hi = next_position_in_table(i_lo, rt.size);
+    const int i_lo = rt.clamp_to_table_bounds(rt.hue_position_in_uniform_table(h));  // TODO: this should be removed if we can constrain the hue range properly
+    const int i_hi = rt.next_position_in_table(i_lo);
 
     const float t = interpolation_weight(h, i_lo, i_hi);
-    return lerpf(rt.table[i_lo], rt.table[i_hi], t);
+    return lerpf(rt[i_lo], rt[i_hi], t);
 }
 
 //
@@ -556,7 +537,7 @@ void find_reach_corners_table(std::array<f3, totalCornerCount>& JMh_corners, con
             min_index = i;
     }
 
-    // Rotate entries placing lowest at [1] (not [0])
+    // Rotate entries placing lowest at [1] (not [0]) // TODO: could use std::rotate_copy or even the ranges vs in C++20
     for (int i = 0; i != cuspCornerCount; ++i)
     {
       JMh_corners[i + 1] = temp_JMh_corners[(i + min_index) % cuspCornerCount];
@@ -613,20 +594,20 @@ void build_hue_sample_interval(const int samples, const float lower, const float
     const float delta = (upper - lower) / float(samples);
     for (int i = 0; i != samples; ++i)
     {
-        hue_table.table[base + i] = lower + float(i) * delta;
+        hue_table[base + i] = lower + float(i) * delta;
     }
 }
 
 void build_hue_table(Table1D &hue_table, const std::array<float, max_sorted_corners>& sorted_hues, const int unique_hues)
 {
-    const float ideal_spacing = hue_table.size / hue_limit;
-    std::array<int, 2 * cuspCornerCount + 2> samples_count;
+    const float ideal_spacing = hue_table.nominal_size / hue_limit;
+    std::array<int, 2 * cuspCornerCount + 2> samples_count = {};
     int         last_idx  = -1;
     int         min_index = sorted_hues[0] == 0.0f ? 0 : 1; // Ensure we can always sample at 0.0 hue
     for (int hue_idx = 0; hue_idx != unique_hues; ++hue_idx)
     {
         // BUG: "hue_table.size - 1" will fail if we have multiple hues mapping near the top of the table
-        int nominal_idx = std::min(std::max(int(std::round(sorted_hues[hue_idx] * ideal_spacing)), min_index), hue_table.size - 1);
+        int nominal_idx = std::min(std::max(int(std::round(sorted_hues[hue_idx] * ideal_spacing)), min_index), hue_table.nominal_size - 1);
         if (last_idx == nominal_idx)
         {
             // Last two hues should sample at same index, need to adjust them
@@ -640,7 +621,7 @@ void build_hue_table(Table1D &hue_table, const std::array<float, max_sorted_corn
                 nominal_idx = nominal_idx + 1;
             }
         }
-        samples_count[hue_idx] = std::min(nominal_idx, hue_table.size - 1);
+        samples_count[hue_idx] = std::min(nominal_idx, hue_table.nominal_size - 1);
         last_idx = min_index = nominal_idx;
     }
 
@@ -656,10 +637,10 @@ void build_hue_table(Table1D &hue_table, const std::array<float, max_sorted_corn
         total_samples += samples;
     }
     // BUG: could break if we are unlucky with samples all being used up by this point
-    build_hue_sample_interval(hue_table.size - total_samples, sorted_hues[i - 1], hue_limit, hue_table, total_samples + 1);
+    build_hue_sample_interval(hue_table.nominal_size - total_samples, sorted_hues[i - 1], hue_limit, hue_table, total_samples + 1);
 
-    hue_table.table[0]                  = hue_table.table[hue_table.base_index + hue_table.size - 1] - hue_limit;
-    hue_table.table[hue_table.total_size - 1] = hue_table.table[hue_table.base_index] + hue_limit;
+    hue_table[hue_table.lower_wrap_index] = hue_table[hue_table.last_nominal_index] - hue_limit;
+    hue_table[hue_table.upper_wrap_index] = hue_table[hue_table.first_nominal_index] + hue_limit;
 }
 
 std::array<float, 2> find_display_cusp_for_hue(float hue, const std::array<f3, totalCornerCount>& RGB_corners, const std::array<f3, totalCornerCount>& JMh_corners,
@@ -738,23 +719,22 @@ Table3D build_cusp_table(const Table1D& hue_table, const std::array<f3, totalCor
 {
     std::array<float, 2> previous = {0.0f, 0.0f};
     Table3D output_table;
-    for (int i = output_table.base_index; i != output_table.base_index + output_table.size; ++i)
+    for (int i = output_table.first_nominal_index; i != output_table.upper_wrap_index; ++i)
     {
-      const float hue = hue_table.table[i];
+      const float hue = hue_table[i];
       const std::array<float, 2> JM = find_display_cusp_for_hue(hue, RGB_corners, JMh_corners, params, previous);
-      output_table.table[i][0] = JM[0];
-      output_table.table[i][1] = JM[1] * (1.f + smooth_m * smooth_cusps);;
-      output_table.table[i][2] = hue;
+      output_table[i][0] = JM[0];
+      output_table[i][1] = JM[1] * (1.f + smooth_m * smooth_cusps);;
+      output_table[i][2] = hue;
     }
 
     // Copy extra entries to ease the code to handle hues wrapping around
-    // TODO: consider adding operator[] to Table3D to handle this table.table array access more cleanly - subclass std::array?
-    output_table.table[0][0]                                           = output_table.table[output_table.base_index + output_table.size - 1][0];
-    output_table.table[0][1]                                           = output_table.table[output_table.base_index + output_table.size - 1][1];
-    output_table.table[0][2]                                           = output_table.table[output_table.base_index + output_table.size - 1][2];
-    output_table.table[output_table.base_index + output_table.size][0] = output_table.table[output_table.base_index][0];
-    output_table.table[output_table.base_index + output_table.size][1] = output_table.table[output_table.base_index][1];
-    output_table.table[output_table.base_index + output_table.size][2] = output_table.table[output_table.base_index][2];
+    output_table[output_table.lower_wrap_index][0] = output_table[output_table.last_nominal_index][0];
+    output_table[output_table.lower_wrap_index][1] = output_table[output_table.last_nominal_index][1];
+    output_table[output_table.lower_wrap_index][2] = hue_table[hue_table.lower_wrap_index];
+    output_table[output_table.upper_wrap_index][0] = output_table[output_table.first_nominal_index][0];
+    output_table[output_table.upper_wrap_index][1] = output_table[output_table.first_nominal_index][1];
+    output_table[output_table.upper_wrap_index][2] = hue_table[hue_table.upper_wrap_index];
     return output_table;
 }
 
@@ -783,47 +763,47 @@ Table3D make_gamut_table(const JMhParams &params, float peakLuminance, Table1D&
 {
     Table3D gamutCuspTableUnsorted{};
     int minhIndex = 0;
-    for (int i = 0; i < gamutCuspTableUnsorted.size; i++)
+    for (int i = 0; i < gamutCuspTableUnsorted.nominal_size; i++)
     {
-        const float hNorm = float(i) / float(gamutCuspTableUnsorted.size);
+        const float hNorm = float(i) / float(gamutCuspTableUnsorted.nominal_size);
         const f3 HSV = {hNorm, 1., 1.};
         const f3 RGB = HSV_to_RGB(HSV);
         const f3 scaledRGB = mult_f_f3(peakLuminance / reference_luminance, RGB);
         const f3 JMh = RGB_to_JMh(scaledRGB, params);
 
-        gamutCuspTableUnsorted.table[i][0] = JMh[0];
-        gamutCuspTableUnsorted.table[i][1] = JMh[1]  * (1.f + smooth_m * smooth_cusps);
-        gamutCuspTableUnsorted.table[i][2] = JMh[2];
-        if ( gamutCuspTableUnsorted.table[i][2] < gamutCuspTableUnsorted.table[minhIndex][2])
+        gamutCuspTableUnsorted[i][0] = JMh[0];
+        gamutCuspTableUnsorted[i][1] = JMh[1]  * (1.f + smooth_m * smooth_cusps);
+        gamutCuspTableUnsorted[i][2] = JMh[2];
+        if ( gamutCuspTableUnsorted[i][2] < gamutCuspTableUnsorted[minhIndex][2])
             minhIndex = i;
     }
 
     Table3D gamutCuspTable{};
-    for (int i = 0; i < gamutCuspTableUnsorted.size; i++)
+    for (int i = 0; i < gamutCuspTableUnsorted.nominal_size; i++)
     {
-        gamutCuspTable.table[i + gamutCuspTable.base_index][0] = gamutCuspTableUnsorted.table[(minhIndex+i) % gamutCuspTableUnsorted.size][0];
-        gamutCuspTable.table[i + gamutCuspTable.base_index][1] = gamutCuspTableUnsorted.table[(minhIndex+i) % gamutCuspTableUnsorted.size][1];
-        gamutCuspTable.table[i + gamutCuspTable.base_index][2] = gamutCuspTableUnsorted.table[(minhIndex+i) % gamutCuspTableUnsorted.size][2];
+        gamutCuspTable[i + gamutCuspTable.base_index][0] = gamutCuspTableUnsorted[(minhIndex+i) % gamutCuspTableUnsorted.nominal_size][0];
+        gamutCuspTable[i + gamutCuspTable.base_index][1] = gamutCuspTableUnsorted[(minhIndex+i) % gamutCuspTableUnsorted.nominal_size][1];
+        gamutCuspTable[i + gamutCuspTable.base_index][2] = gamutCuspTableUnsorted[(minhIndex+i) % gamutCuspTableUnsorted.nominal_size][2];
     }
 
     // Copy last populated entry to first empty spot
-    gamutCuspTable.table[0][0] = gamutCuspTable.table[gamutCuspTable.base_index + gamutCuspTable.size-1][0];
-    gamutCuspTable.table[0][1] = gamutCuspTable.table[gamutCuspTable.base_index + gamutCuspTable.size-1][1];
-    gamutCuspTable.table[0][2] = gamutCuspTable.table[gamutCuspTable.base_index + gamutCuspTable.size-1][2];
+    gamutCuspTable[gamutCuspTable.lower_wrap_index][0] = gamutCuspTable[gamutCuspTable.last_nominal_index][0];
+    gamutCuspTable[gamutCuspTable.lower_wrap_index][1] = gamutCuspTable[gamutCuspTable.last_nominal_index][1];
+    gamutCuspTable[gamutCuspTable.lower_wrap_index][2] = gamutCuspTable[gamutCuspTable.last_nominal_index][2];
 
     // Copy first populated entry to last empty spot
-    gamutCuspTable.table[gamutCuspTable.base_index + gamutCuspTable.size][0] = gamutCuspTable.table[gamutCuspTable.base_index][0];
-    gamutCuspTable.table[gamutCuspTable.base_index + gamutCuspTable.size][1] = gamutCuspTable.table[gamutCuspTable.base_index][1];
-    gamutCuspTable.table[gamutCuspTable.base_index + gamutCuspTable.size][2] = gamutCuspTable.table[gamutCuspTable.base_index][2];
+    gamutCuspTable[gamutCuspTable.upper_wrap_index][0] = gamutCuspTable[gamutCuspTable.first_nominal_index][0];
+    gamutCuspTable[gamutCuspTable.upper_wrap_index][1] = gamutCuspTable[gamutCuspTable.first_nominal_index][1];
+    gamutCuspTable[gamutCuspTable.upper_wrap_index][2] = gamutCuspTable[gamutCuspTable.first_nominal_index][2];
 
     // Wrap the hues, to maintain monotonicity. These entries will fall outside [0.0, hue_limit)
-    gamutCuspTable.table[0][2] = gamutCuspTable.table[0][2] - hue_limit;
-    gamutCuspTable.table[gamutCuspTable.base_index + gamutCuspTable.size][2] = gamutCuspTable.table[gamutCuspTable.base_index + gamutCuspTable.size][2] + hue_limit;
+    gamutCuspTable[gamutCuspTable.lower_wrap_index][2] -= hue_limit;
+    gamutCuspTable[gamutCuspTable.upper_wrap_index][2] += hue_limit;
 
     // Extract hue table
     for (int i = 0; i < gamutCuspTable.total_size; i++)
     {
-        hue_table.table[i] = gamutCuspTable.table[i][2];
+        hue_table[i] = gamutCuspTable[i][2];
     }
     return gamutCuspTable;
 }
@@ -837,8 +817,8 @@ Table1D make_reach_m_table(const JMhParams &params, const float limit_J_max)
 {
     Table1D gamutReachTable{};
 
-    for (int i = 0; i < gamutReachTable.size; i++) {
-        const float hue = base_hue_for_position(i, gamutReachTable.size);
+    for (int i = 0; i < gamutReachTable.nominal_size; i++) {
+        const float hue = gamutReachTable.base_hue_for_position(i);
 
         constexpr float search_range = 50.f;
         constexpr float search_maximum = 1300.f; // TODO: magic limit
@@ -874,7 +854,7 @@ Table1D make_reach_m_table(const JMhParams &params, const float limit_J_max)
             }
         }
 
-        gamutReachTable.table[i] = high;
+        gamutReachTable[i] = high;
     }
 
     return gamutReachTable;
@@ -1091,7 +1071,7 @@ HueDependantGamutParams init_HueDependantGamutParams(const float hue, const Reso
     hdp.gamma_bottom_inv = p.lower_hull_gamma_inv;
 
     const int i_hi = lookup_hue_interval(hue, p.hue_table, p.hue_linearity_search_range);
-    const float t = interpolation_weight(hue, p.hue_table.table[i_hi-1], p.hue_table.table[i_hi]);
+    const float t = interpolation_weight(hue, p.hue_table[i_hi-1], p.hue_table[i_hi]);
     const f3 cusp = cusp_from_table(i_hi, t, p.gamut_cusp_table);
 
     hdp.JMcusp = { cusp[0], cusp[1] };
@@ -1189,10 +1169,10 @@ void make_upper_hull_gamma(
     float lower_hull_gamma_inv,
     const JMhParams &limitJMhParams)
 {
-    for (int i = gamutCuspTable.base_index; i < gamutCuspTable.base_index + gamutCuspTable.size; i++)
+    for (int i = gamutCuspTable.first_nominal_index; i != gamutCuspTable.upper_wrap_index; ++i)
     {
-        const float hue = hue_table.table[i];
-        const f2 JMcusp = { gamutCuspTable.table[i][0], gamutCuspTable.table[i][1] };
+        const float hue = hue_table[i];
+        const f2 JMcusp = { gamutCuspTable[i][0], gamutCuspTable[i][1] };
 
         std::array<testData, gamma_test_count> data = generate_gamma_test_data(JMcusp, hue, limit_J_max, mid_J, focus_dist);
 
@@ -1236,12 +1216,12 @@ void make_upper_hull_gamma(
                 low = testGamma;
             }
         }
-        gamutCuspTable.table[i][2] = 1.0f / high;
+        gamutCuspTable[i][2] = 1.0f / high;
     }
 
     // Copy last populated entries to empty spot 'wrapping' entries
-    gamutCuspTable.table[0][2] = gamutCuspTable.table[gamutCuspTable.base_index + gamutCuspTable.size - 1][2];
-    gamutCuspTable.table[gamutCuspTable.base_index + gamutCuspTable.size][2] = gamutCuspTable.table[gamutCuspTable.base_index][2];
+    gamutCuspTable[gamutCuspTable.lower_wrap_index][2] = gamutCuspTable[gamutCuspTable.last_nominal_index][2];
+    gamutCuspTable[gamutCuspTable.upper_wrap_index][2] = gamutCuspTable[gamutCuspTable.first_nominal_index][2];
 }
 
 // Tonescale pre-calculations
@@ -1350,14 +1330,14 @@ std::array<int, 2> determine_hue_linearity_search_range(const Table3D &gamutCusp
     constexpr int lower_padding = 0;
     constexpr int upper_padding = 1;
     std::array<int, 2> hue_linearity_search_range = {lower_padding, upper_padding};
-    for (int i = gamutCuspTable.base_index; i < gamutCuspTable.base_index + gamutCuspTable.size; i++)
+    for (int i = gamutCuspTable.first_nominal_index; i != gamutCuspTable.upper_wrap_index; ++i)
     {
-        const int pos = hue_position_in_uniform_table(gamutCuspTable.table[i][2], gamutCuspTable.size) + gamutCuspTable.base_index;
+        const int pos = gamutCuspTable.nominal_hue_position_in_uniform_table(gamutCuspTable[i][2]); // TODO: compute from hue table less cache pressure?
         const int delta = i - pos;
         hue_linearity_search_range[0] = std::min(hue_linearity_search_range[0], delta + lower_padding);
         hue_linearity_search_range[1] = std::max(hue_linearity_search_range[1], delta + upper_padding);
 
-        //std::cout << i << " " << pos << " " << delta << " " << gamutCuspTable.table[i][0] << " " << gamutCuspTable.table[i][1] << " " << gamutCuspTable.table[i][2] << "\n";
+        //std::cout << i << " " << pos << " " << delta << " " << gamutCuspTable[i][0] << " " << gamutCuspTable[i][1] << " " << gamutCuspTable[i][2] << "\n";
     }
     //std::cout << "search range " << hue_linearity_search_range[0] << " " << hue_linearity_search_range[1] << "\n";
     return hue_linearity_search_range;
diff --git a/src/OpenColorIO/ops/fixedfunction/FixedFunctionOpGPU.cpp b/src/OpenColorIO/ops/fixedfunction/FixedFunctionOpGPU.cpp
index a65fac0de3..6b5cd75bac 100644
--- a/src/OpenColorIO/ops/fixedfunction/FixedFunctionOpGPU.cpp
+++ b/src/OpenColorIO/ops/fixedfunction/FixedFunctionOpGPU.cpp
@@ -423,12 +423,12 @@ std::string _Add_Reach_table(
     shaderCreator->addTexture(
         name.c_str(),
         GpuShaderText::getSamplerName(name).c_str(),
-        table.size,
+        table.nominal_size, // TODO: be careful if reach ever has a base_offset added to it
         1,
         GpuShaderCreator::TEXTURE_RED_CHANNEL,
         dimensions,
         INTERP_NEAREST,
-        &(table.table[0]));
+        &(table[0]));
 
 
     if (dimensions == GpuShaderDesc::TEXTURE_1D)
@@ -703,7 +703,7 @@ std::string _Add_Cusp_table(
         GpuShaderCreator::TEXTURE_RGB_CHANNEL,
         dimensions,
         INTERP_NEAREST,
-        &(g.gamut_cusp_table.table[0][0]));
+        &(g.gamut_cusp_table[0][0]));
 
     if (dimensions == GpuShaderDesc::TEXTURE_1D)
     {
@@ -722,7 +722,7 @@ std::string _Add_Cusp_table(
     GpuShaderText ss(shaderCreator->getLanguage());
 
     const std::string hues_array_name = name + "_hues_array";
-    ss.declareFloatArrayConst(hues_array_name, (int) g.hue_table.total_size, g.hue_table.table);
+    ss.declareFloatArrayConst(hues_array_name, (int) g.hue_table.total_size, g.hue_table.data());
 
     ss.newLine() << ss.float3Keyword() << " " << name << "_sample(float h)";
     ss.newLine() << "{";
@@ -733,10 +733,11 @@ std::string _Add_Cusp_table(
     ss.newLine() << "hwrap = (hwrap < 0.0) ? hwrap + 360.0 : hwrap;";
     ss.newLine() << ss.intDecl("i") << " = " << ss.intKeyword() << "(hwrap + " << g.gamut_cusp_table.base_index << ");";
 
-    ss.newLine() << ss.intDecl("i_lo") << " = " << ss.intKeyword() << "(max(0, "
+    ss.newLine() << ss.intDecl("i_lo") << " = " << ss.intKeyword() << "(max("
+                 << ss.floatKeyword() << "(" << g.gamut_cusp_table.lower_wrap_index << "), "
                  << ss.floatKeyword() << "(i + " << g.hue_linearity_search_range[0] << ")));";
     ss.newLine() << ss.intDecl("i_hi") << " = " << ss.intKeyword() << "(min("
-                 << ss.floatKeyword() << "(" << g.gamut_cusp_table.base_index + g.gamut_cusp_table.size << "), "
+                 << ss.floatKeyword() << "(" << g.gamut_cusp_table.upper_wrap_index << "), "
                  << ss.floatKeyword() << "(i + " << g.hue_linearity_search_range[1] << ")));";
 
     ss.newLine() << "while (i_lo + 1 < i_hi)";

From 2c69445d24c8c42af78b17873c6fe0fdfc20e108 Mon Sep 17 00:00:00 2001
From: Kevin Wheatley <kevin.wheatley@framestore.com>
Date: Mon, 27 Jan 2025 14:56:36 +0000
Subject: [PATCH 49/70] Explicilty allow GCC to perform additional
 optimisations - Needs some discussion

Signed-off-by: Kevin Wheatley <kevin.wheatley@framestore.com>
---
 src/OpenColorIO/CMakeLists.txt | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/src/OpenColorIO/CMakeLists.txt b/src/OpenColorIO/CMakeLists.txt
index 7d2894da6b..f77a0896c4 100755
--- a/src/OpenColorIO/CMakeLists.txt
+++ b/src/OpenColorIO/CMakeLists.txt
@@ -222,6 +222,12 @@ if(OCIO_USE_SIMD AND (OCIO_ARCH_X86 OR OCIO_USE_SSE2NEON))
     set_property(SOURCE ops/lut3d/Lut3DOpCPU_AVX512.cpp APPEND PROPERTY COMPILE_OPTIONS ${OCIO_AVX512_ARGS})
 endif()
 
+# TODO: What to do for other compilers?
+if(USE_GCC)
+    set_property(SOURCE ops/fixedfunction/ACES2/Transform.cpp APPEND PROPERTY COMPILE_OPTIONS
+                -fno-math-errno -fno-signed-zeros -fno-trapping-math -fno-signaling-nans -ffinite-math-only -freciprocal-math)
+endif()
+
 configure_file(CPUInfoConfig.h.in CPUInfoConfig.h)
 
 add_library(OpenColorIO ${SOURCES})

From fdde6525e8459874eb4888c3124a8e022f53d98f Mon Sep 17 00:00:00 2001
From: Kevin Wheatley <kevin.wheatley@framestore.com>
Date: Mon, 27 Jan 2025 17:43:26 +0000
Subject: [PATCH 50/70] Add extra entries to reach table to avoid needing to
 clamp to range during pixel processing

Signed-off-by: Kevin Wheatley <kevin.wheatley@framestore.com>
---
 .../ops/fixedfunction/ACES2/Common.h            | 13 +++++++------
 .../ops/fixedfunction/ACES2/Transform.cpp       | 10 ++++++----
 .../ops/fixedfunction/FixedFunctionOpGPU.cpp    | 17 ++++++++---------
 3 files changed, 21 insertions(+), 19 deletions(-)

diff --git a/src/OpenColorIO/ops/fixedfunction/ACES2/Common.h b/src/OpenColorIO/ops/fixedfunction/ACES2/Common.h
index 2c2576937e..2c5952585a 100644
--- a/src/OpenColorIO/ops/fixedfunction/ACES2/Common.h
+++ b/src/OpenColorIO/ops/fixedfunction/ACES2/Common.h
@@ -59,13 +59,19 @@ struct TableBase
 
     inline float base_hue_for_position(int i_lo) const
     {
+        if (hue_limit == float(nominal_size)) // TODO C++ 17 if constexpr
+            return float(i_lo);
+
         const float result = i_lo * hue_limit / nominal_size;
         return result;
     }
 
     inline int hue_position_in_uniform_table(float wrapped_hue) const 
     {
-        return int(wrapped_hue / hue_limit * float(nominal_size)); // TODO: can we use the 'lost' fraction for the lerps?
+        if (hue_limit == float(nominal_size)) // TODO C++ 17 if constexpr
+            return int(wrapped_hue);
+        else
+            return int(wrapped_hue / hue_limit * float(nominal_size)); // TODO: can we use the 'lost' fraction for the lerps?
     }
 
    inline int nominal_hue_position_in_uniform_table(float wrapped_hue) const 
@@ -73,11 +79,6 @@ struct TableBase
         return first_nominal_index + hue_position_in_uniform_table(wrapped_hue);
     }
 
-    inline int next_position_in_table(int entry) const
-    {
-        return (entry + 1) % nominal_size;
-    }
-
     inline int clamp_to_table_bounds(int entry) const // TODO: this should be removed if we can constrain the hue range properly
     {
         return std::min(nominal_size - 1, std::max(0, entry));
diff --git a/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp b/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp
index fb23d6583a..14cfef4839 100644
--- a/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp
+++ b/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp
@@ -89,10 +89,10 @@ inline f3 cusp_from_table(int i_hi, float t, const Table3D &gt)
 
 float reach_m_from_table(float h, const ACES2::Table1D &rt)
 {
-    const int i_lo = rt.clamp_to_table_bounds(rt.hue_position_in_uniform_table(h));  // TODO: this should be removed if we can constrain the hue range properly
-    const int i_hi = rt.next_position_in_table(i_lo);
+    const int i_lo = rt.nominal_hue_position_in_uniform_table(h); 
+    const int i_hi = i_lo + 1; // NOTE assumes uniform 1 degree 360 spacing 
 
-    const float t = interpolation_weight(h, i_lo, i_hi);
+    const float t = h - i_lo; // NOTE assumes uniform 1 degree 360 spacing // interpolation_weight(h, i_lo, i_hi);
     return lerpf(rt[i_lo], rt[i_hi], t);
 }
 
@@ -854,8 +854,10 @@ Table1D make_reach_m_table(const JMhParams &params, const float limit_J_max)
             }
         }
 
-        gamutReachTable[i] = high;
+        gamutReachTable[i + gamutReachTable.base_index] = high;
     }
+    gamutReachTable[gamutReachTable.lower_wrap_index] = gamutReachTable[gamutReachTable.last_nominal_index];
+    gamutReachTable[gamutReachTable.upper_wrap_index] = gamutReachTable[gamutReachTable.first_nominal_index];
 
     return gamutReachTable;
 }
diff --git a/src/OpenColorIO/ops/fixedfunction/FixedFunctionOpGPU.cpp b/src/OpenColorIO/ops/fixedfunction/FixedFunctionOpGPU.cpp
index 6b5cd75bac..f81adb2323 100644
--- a/src/OpenColorIO/ops/fixedfunction/FixedFunctionOpGPU.cpp
+++ b/src/OpenColorIO/ops/fixedfunction/FixedFunctionOpGPU.cpp
@@ -423,7 +423,7 @@ std::string _Add_Reach_table(
     shaderCreator->addTexture(
         name.c_str(),
         GpuShaderText::getSamplerName(name).c_str(),
-        table.nominal_size, // TODO: be careful if reach ever has a base_offset added to it
+        table.total_size,
         1,
         GpuShaderCreator::TEXTURE_RED_CHANNEL,
         dimensions,
@@ -455,22 +455,21 @@ std::string _Add_Reach_table(
     ss.newLine() << "hwrap = hwrap - floor(hwrap / 360.0) * 360.0;";
     ss.newLine() << "hwrap = (hwrap < 0.0) ? hwrap + 360.0 : hwrap;";
 
-    ss.newLine() << ss.floatDecl("i_lo") << " = floor(hwrap);";
-    ss.newLine() << ss.floatDecl("i_hi") << " = (i_lo + 1);";
-    ss.newLine() << "i_hi = i_hi - floor(i_hi / 360.0) * 360.0;";
+    ss.newLine() << ss.floatDecl("i_lo") << " = floor(hwrap) + " << table.base_index << ";";
+    ss.newLine() << ss.floatDecl("i_hi") << " = i_lo + 1;";
 
     if (dimensions == GpuShaderDesc::TEXTURE_1D)
     {
-        ss.newLine() << ss.floatDecl("lo") << " = " << ss.sampleTex1D(name, "(i_lo + 0.5) / 360.0") << ".r;";
-        ss.newLine() << ss.floatDecl("hi") << " = " << ss.sampleTex1D(name, "(i_hi + 0.5) / 360.0") << ".r;";
+        ss.newLine() << ss.floatDecl("lo") << " = " << ss.sampleTex1D(name, "(i_lo + 0.5) / " + std::to_string(table.total_size)) << ".r;";
+        ss.newLine() << ss.floatDecl("hi") << " = " << ss.sampleTex1D(name, "(i_hi + 0.5) / " + std::to_string(table.total_size)) << ".r;";
     }
     else
     {
-        ss.newLine() << ss.floatDecl("lo") << " = " << ss.sampleTex2D(name, ss.float2Const("(i_lo + 0.5) / 360.0", "0.0")) << ".r;";
-        ss.newLine() << ss.floatDecl("hi") << " = " << ss.sampleTex2D(name, ss.float2Const("(i_hi + 0.5) / 360.0", "0.5")) << ".r;";
+        ss.newLine() << ss.floatDecl("lo") << " = " << ss.sampleTex2D(name, ss.float2Const("(i_lo + 0.5) / " + std::to_string(table.total_size), "0.0")) << ".r;";
+        ss.newLine() << ss.floatDecl("hi") << " = " << ss.sampleTex2D(name, ss.float2Const("(i_hi + 0.5) / " + std::to_string(table.total_size), "0.5")) << ".r;";
     }
 
-    ss.newLine() << ss.floatDecl("t") << " = (h - i_lo) / (i_hi - i_lo);";
+    ss.newLine() << ss.floatDecl("t") << " = h - i_lo;"; // Hardcoded single degree spacing
     ss.newLine() << "return " << ss.lerp("lo", "hi", "t") << ";";
 
     ss.dedent();

From a40be0cf65f4a22505019b2d5ee05ff6f5304d48 Mon Sep 17 00:00:00 2001
From: Kevin Wheatley <kevin.wheatley@framestore.com>
Date: Tue, 28 Jan 2025 17:55:08 +0000
Subject: [PATCH 51/70] GPU move reach Max M sampling to avoid looking it up
 multiple times per pixel

Signed-off-by: Kevin Wheatley <kevin.wheatley@framestore.com>
---
 .../ops/fixedfunction/FixedFunctionOpGPU.cpp  | 89 +++++++++++--------
 tests/gpu/FixedFunctionOp_test.cpp            |  3 +
 2 files changed, 54 insertions(+), 38 deletions(-)

diff --git a/src/OpenColorIO/ops/fixedfunction/FixedFunctionOpGPU.cpp b/src/OpenColorIO/ops/fixedfunction/FixedFunctionOpGPU.cpp
index f81adb2323..5d0ec22d54 100644
--- a/src/OpenColorIO/ops/fixedfunction/FixedFunctionOpGPU.cpp
+++ b/src/OpenColorIO/ops/fixedfunction/FixedFunctionOpGPU.cpp
@@ -531,8 +531,7 @@ void _Add_Tonescale_Compress_Fwd_Shader(
     const ACES2::JMhParams & p,
     const ACES2::ToneScaleParams & t,
     const ACES2::SharedCompressionParameters & s,
-    const ACES2::ChromaCompressParams & c,
-    const std::string & reachName)
+    const ACES2::ChromaCompressParams & c)
 {
     std::string toeName = _Add_Toe_func(shaderCreator, resourceIndex, false);
 
@@ -581,8 +580,7 @@ void _Add_Tonescale_Compress_Fwd_Shader(
     ss.dedent();
     ss.newLine() << "}";
 
-    ss.newLine() << ss.floatDecl("reachM") << " = " << reachName << "_sample(h);";
-    ss.newLine() << ss.floatDecl("limit") << " = pow(nJ, " << s.model_gamma_inv << ") * reachM / Mnorm;";
+    ss.newLine() << ss.floatDecl("limit") << " = pow(nJ, " << s.model_gamma_inv << ") * reachMaxM / Mnorm;";
     ss.newLine() << "M_cp = M * pow(J_ts / J, " << s.model_gamma_inv << ");";
     ss.newLine() << "M_cp = M_cp / Mnorm;";
 
@@ -603,13 +601,12 @@ void _Add_Tonescale_Compress_Inv_Shader(
     const ACES2::JMhParams & p,
     const ACES2::ToneScaleParams & t,
     const ACES2::SharedCompressionParameters & s,
-    const ACES2::ChromaCompressParams & c,
-    const std::string & reachName)
+    const ACES2::ChromaCompressParams & c)
 {
     std::string toeName = _Add_Toe_func(shaderCreator, resourceIndex, true);
 
     const std::string pxl(shaderCreator->getPixelName());
-
+    
     ss.newLine() << ss.floatDecl("J_ts") << " = " << pxl << ".r;";
     ss.newLine() << ss.floatDecl("M_cp") << " = " << pxl << ".g;";
     ss.newLine() << ss.floatDecl("h") << " = " << pxl << ".b;";
@@ -654,8 +651,7 @@ void _Add_Tonescale_Compress_Inv_Shader(
     ss.dedent();
     ss.newLine() << "}";
 
-    ss.newLine() << ss.floatDecl("reachM") << " = " << reachName << "_sample(h);";
-    ss.newLine() << ss.floatDecl("limit") << " = pow(nJ, " << s.model_gamma_inv << ") * reachM / Mnorm;";
+    ss.newLine() << ss.floatDecl("limit") << " = pow(nJ, " << s.model_gamma_inv << ") * reachMaxM / Mnorm;";
 
     ss.newLine() << "M = M_cp / Mnorm;";
     ss.newLine() << "M = " << toeName << "(M, limit, nJ * " << c.compr << ", snJ);";
@@ -985,7 +981,6 @@ std::string _Add_Reach_Boundary_func(
     unsigned resourceIndex,
     const ACES2::SharedCompressionParameters & s,
     const ACES2::GamutCompressParams & g,
-    const std::string & reachName,
     const std::string & getFocusGainName,
     const std::string & solveJIntersectName)
 {
@@ -1002,11 +997,10 @@ std::string _Add_Reach_Boundary_func(
 
     GpuShaderText ss(shaderCreator->getLanguage());
 
-    ss.newLine() << ss.float3Keyword() << " " << name << "(float J, float M, float h, " << ss.float2Keyword() << " JMcusp, float focusJ)";
+    ss.newLine() << ss.float3Keyword() << " " << name << "(float J, float M, float h, " << ss.float2Keyword() << " JMcusp, float focusJ, float reachMaxM)";
     ss.newLine() << "{";
     ss.indent();
 
-    ss.newLine() << ss.floatDecl("reachMaxM") << " = " << reachName << "_sample(h);";
     ss.newLine() << ss.floatDecl("slope_gain") << " = " << s.limit_J_max << " * " << g.focus_dist << " * " << getFocusGainName << "(J, JMcusp.r);";
     ss.newLine() << ss.floatDecl("intersectJ") << " = " << solveJIntersectName << "(J, M, focusJ, slope_gain);";
 
@@ -1129,7 +1123,7 @@ std::string _Add_Compress_Gamut_func(
 
     GpuShaderText ss(shaderCreator->getLanguage());
 
-    ss.newLine() << ss.float3Keyword() << " " << name << "(" << ss.float3Keyword() << " JMh, float Jx, " << ss.float3Keyword() << " JMGcusp)";
+    ss.newLine() << ss.float3Keyword() << " " << name << "(" << ss.float3Keyword() << " JMh, float Jx, " << ss.float3Keyword() << " JMGcusp, float reachMaxM)";
     ss.newLine() << "{";
     ss.indent();
 
@@ -1167,7 +1161,7 @@ std::string _Add_Compress_Gamut_func(
     ss.dedent();
     ss.newLine() << "}";
 
-    ss.newLine() << ss.float3Decl("reachBoundary") << " = " << getReachBoundaryName << "(JMboundary.r, JMboundary.g, h, JMcusp, focusJ);";
+    ss.newLine() << ss.float3Decl("reachBoundary") << " = " << getReachBoundaryName << "(JMboundary.r, JMboundary.g, h, JMcusp, focusJ, reachMaxM);";
 
     ss.newLine() << ss.floatDecl("difference") << " = max(1.0001, reachBoundary.g / JMboundary.g);";
     ss.newLine() << ss.floatDecl("threshold") << " = max(" << ACES2::compression_threshold << ", 1.0 / difference);";
@@ -1198,21 +1192,20 @@ void _Add_Gamut_Compress_Fwd_Shader(
     GpuShaderText & ss,
     unsigned int resourceIndex,
     const ACES2::SharedCompressionParameters & s,
-    const ACES2::GamutCompressParams & g,
-    const std::string & reachName)
+    const ACES2::GamutCompressParams & g)
 {
     std::string cuspName = _Add_Cusp_table(shaderCreator, resourceIndex, g);
     std::string getFocusGainName = _Add_Focus_Gain_func(shaderCreator, resourceIndex, s);
     std::string solveJIntersectName = _Add_Solve_J_Intersect_func(shaderCreator, resourceIndex, s);
     std::string findGamutBoundaryIntersectionName = _Add_Find_Gamut_Boundary_Intersection_func(shaderCreator, resourceIndex, s, solveJIntersectName);
-    std::string getReachBoundaryName = _Add_Reach_Boundary_func(shaderCreator, resourceIndex, s, g, reachName, getFocusGainName, solveJIntersectName);
+    std::string getReachBoundaryName = _Add_Reach_Boundary_func(shaderCreator, resourceIndex, s, g, getFocusGainName, solveJIntersectName);
     std::string compressionName = _Add_Compression_func(shaderCreator, resourceIndex, false);
     std::string gamutCompressName = _Add_Compress_Gamut_func(shaderCreator, resourceIndex, s, g, getFocusGainName, findGamutBoundaryIntersectionName, getReachBoundaryName, compressionName);
 
     const std::string pxl(shaderCreator->getPixelName());
 
     ss.newLine() << ss.float3Decl("JMGcusp") << " = " << cuspName << "_sample(" << pxl << ".b);";
-    ss.newLine() << pxl << ".rgb = " << gamutCompressName << "(" << pxl << ".rgb, " << pxl << ".r, JMGcusp);";
+    ss.newLine() << pxl << ".rgb = " << gamutCompressName << "(" << pxl << ".rgb, " << pxl << ".r, JMGcusp, reachMaxM);";
 }
 
 void _Add_Gamut_Compress_Inv_Shader(
@@ -1220,20 +1213,20 @@ void _Add_Gamut_Compress_Inv_Shader(
     GpuShaderText & ss,
     unsigned int resourceIndex,
     const ACES2::SharedCompressionParameters & s,
-    const ACES2::GamutCompressParams & g,
-    const std::string & reachName)
+    const ACES2::GamutCompressParams & g)
 {
     std::string cuspName = _Add_Cusp_table(shaderCreator, resourceIndex, g);
     std::string getFocusGainName = _Add_Focus_Gain_func(shaderCreator, resourceIndex, s);
     std::string solveJIntersectName = _Add_Solve_J_Intersect_func(shaderCreator, resourceIndex, s);
     std::string findGamutBoundaryIntersectionName = _Add_Find_Gamut_Boundary_Intersection_func(shaderCreator, resourceIndex, s, solveJIntersectName);
-    std::string getReachBoundaryName = _Add_Reach_Boundary_func(shaderCreator, resourceIndex, s, g, reachName, getFocusGainName, solveJIntersectName);
+    std::string getReachBoundaryName = _Add_Reach_Boundary_func(shaderCreator, resourceIndex, s, g, getFocusGainName, solveJIntersectName);
     std::string compressionName = _Add_Compression_func(shaderCreator, resourceIndex, true);
     std::string gamutCompressName = _Add_Compress_Gamut_func(shaderCreator, resourceIndex, s, g, getFocusGainName, findGamutBoundaryIntersectionName, getReachBoundaryName, compressionName);
 
     const std::string pxl(shaderCreator->getPixelName());
 
     ss.newLine() << ss.float3Decl("JMGcusp") << " = " << cuspName << "_sample(" << pxl << ".b);";
+
     ss.newLine() << ss.floatDecl("Jx") << " = " << pxl << ".r;";
     ss.newLine() << ss.float3Decl("unCompressedJMh") << ";";
 
@@ -1241,15 +1234,15 @@ void _Add_Gamut_Compress_Inv_Shader(
     ss.newLine() << "if (Jx <= " << ss.lerp("JMGcusp.r", std::to_string(s.limit_J_max), std::to_string(ACES2::focus_gain_blend)) << ")";
     ss.newLine() << "{";
     ss.indent();
-    ss.newLine() << "unCompressedJMh = " << gamutCompressName << "(" << pxl << ".rgb, Jx, JMGcusp);";
+    ss.newLine() << "unCompressedJMh = " << gamutCompressName << "(" << pxl << ".rgb, Jx, JMGcusp, reachMaxM);";
     ss.dedent();
     ss.newLine() << "}";
     // Approximation above threshold
     ss.newLine() << "else";
     ss.newLine() << "{";
     ss.indent();
-    ss.newLine() << "Jx = " << gamutCompressName << "(" << pxl << ".rgb, Jx, JMGcusp).r;";
-    ss.newLine() << "unCompressedJMh = " << gamutCompressName << "(" << pxl << ".rgb, Jx, JMGcusp);";
+    ss.newLine() << "Jx = " << gamutCompressName << "(" << pxl << ".rgb, Jx, JMGcusp, reachMaxM).r;";
+    ss.newLine() << "unCompressedJMh = " << gamutCompressName << "(" << pxl << ".rgb, Jx, JMGcusp, reachMaxM);";
     ss.dedent();
     ss.newLine() << "}";
 
@@ -1289,7 +1282,8 @@ void Add_ACES_OutputTransform_Fwd_Shader(
 
     unsigned resourceIndex = shaderCreator->getNextResourceIndex();
 
-    std::string reachName = _Add_Reach_table(shaderCreator, resourceIndex, s.reach_m_table);
+    const std::string reachName = _Add_Reach_table(shaderCreator, resourceIndex, s.reach_m_table);
+    const std::string pxl(shaderCreator->getPixelName());
 
     ss.newLine() << "";
     ss.newLine() << "// Add RGB to JMh";
@@ -1303,9 +1297,14 @@ void Add_ACES_OutputTransform_Fwd_Shader(
     ss.newLine() << "";
     ss.newLine() << "// Add ToneScale and ChromaCompress (fwd)";
     ss.newLine() << "";
+
+    ss.newLine() << "// Sample tables (fwd)";
+    ss.newLine() << ss.floatDecl("reachMaxM") << " = " << reachName << "_sample(" << pxl << ".b);";
+    ss.newLine() << "";
+
     ss.newLine() << "{";
     ss.indent();
-        _Add_Tonescale_Compress_Fwd_Shader(shaderCreator, ss, resourceIndex, pIn, t, s, c, reachName);
+        _Add_Tonescale_Compress_Fwd_Shader(shaderCreator, ss, resourceIndex, pIn, t, s, c);
     ss.dedent();
     ss.newLine() << "}";
 
@@ -1314,7 +1313,7 @@ void Add_ACES_OutputTransform_Fwd_Shader(
     ss.newLine() << "";
     ss.newLine() << "{";
     ss.indent();
-        _Add_Gamut_Compress_Fwd_Shader(shaderCreator, ss, resourceIndex, s, g, reachName);
+        _Add_Gamut_Compress_Fwd_Shader(shaderCreator, ss, resourceIndex, s, g);
     ss.dedent();
     ss.newLine() << "}";
 
@@ -1361,7 +1360,8 @@ void Add_ACES_OutputTransform_Inv_Shader(
     const ACES2::GamutCompressParams g = ACES2::init_GamutCompressParams(peak_luminance, pIn, pLim, t, s, reachGamut);
 
     unsigned resourceIndex = shaderCreator->getNextResourceIndex();
-
+    const std::string pxl(shaderCreator->getPixelName());
+    
     std::string reachName = _Add_Reach_table(shaderCreator, resourceIndex, s.reach_m_table);
 
     ss.newLine() << "";
@@ -1373,12 +1373,13 @@ void Add_ACES_OutputTransform_Inv_Shader(
     ss.dedent();
     ss.newLine() << "}";
 
+    ss.newLine() << ss.floatDecl("reachMaxM") << " = " << reachName << "_sample(" << pxl << ".b);";
     ss.newLine() << "";
     ss.newLine() << "// Add GamutCompress (inv)";
     ss.newLine() << "";
     ss.newLine() << "{";
     ss.indent();
-        _Add_Gamut_Compress_Inv_Shader(shaderCreator, ss, resourceIndex, s, g, reachName);
+        _Add_Gamut_Compress_Inv_Shader(shaderCreator, ss, resourceIndex, s, g);
     ss.dedent();
     ss.newLine() << "}";
 
@@ -1387,7 +1388,7 @@ void Add_ACES_OutputTransform_Inv_Shader(
     ss.newLine() << "";
     ss.newLine() << "{";
     ss.indent();
-        _Add_Tonescale_Compress_Inv_Shader(shaderCreator, ss, resourceIndex, pIn, t, s, c, reachName);
+        _Add_Tonescale_Compress_Inv_Shader(shaderCreator, ss, resourceIndex, pIn, t, s, c);
     ss.dedent();
     ss.newLine() << "}";
 
@@ -1466,10 +1467,13 @@ void Add_Tonescale_Compress_Fwd_Shader(
     const ACES2::ChromaCompressParams c = ACES2::init_ChromaCompressParams(peak_luminance, t);
 
     unsigned resourceIndex = shaderCreator->getNextResourceIndex();
+    const std::string pxl(shaderCreator->getPixelName());
 
-    std::string reachName = _Add_Reach_table(shaderCreator, resourceIndex, s.reach_m_table);
+    const std::string reachName = _Add_Reach_table(shaderCreator, resourceIndex, s.reach_m_table);
+
+    ss.newLine() << ss.floatDecl("reachMaxM") << " = " << reachName << "_sample(" << pxl << ".b);";
 
-    _Add_Tonescale_Compress_Fwd_Shader(shaderCreator, ss, resourceIndex, p, t, s, c, reachName);
+    _Add_Tonescale_Compress_Fwd_Shader(shaderCreator, ss, resourceIndex, p, t, s, c);
 }
 
 void Add_Tonescale_Compress_Inv_Shader(
@@ -1486,10 +1490,13 @@ void Add_Tonescale_Compress_Inv_Shader(
     const ACES2::ChromaCompressParams c = ACES2::init_ChromaCompressParams(peak_luminance, t);
 
     unsigned resourceIndex = shaderCreator->getNextResourceIndex();
+    const std::string pxl(shaderCreator->getPixelName());
 
-    std::string reachName = _Add_Reach_table(shaderCreator, resourceIndex, s.reach_m_table);
+    const std::string reachName = _Add_Reach_table(shaderCreator, resourceIndex, s.reach_m_table);
+
+    ss.newLine() << ss.floatDecl("reachMaxM") << " = " << reachName << "_sample(" << pxl << ".b);";
 
-    _Add_Tonescale_Compress_Inv_Shader(shaderCreator, ss, resourceIndex, p, t, s, c, reachName);
+    _Add_Tonescale_Compress_Inv_Shader(shaderCreator, ss, resourceIndex, p, t, s, c);
 }
 
 void Add_Gamut_Compress_Fwd_Shader(
@@ -1523,10 +1530,13 @@ void Add_Gamut_Compress_Fwd_Shader(
     const ACES2::GamutCompressParams g = ACES2::init_GamutCompressParams(peak_luminance, pIn, pLim, t, s, reachGamut); 
 
     unsigned resourceIndex = shaderCreator->getNextResourceIndex();
+    const std::string pxl(shaderCreator->getPixelName());
 
-    std::string reachName = _Add_Reach_table(shaderCreator, resourceIndex, s.reach_m_table);
+    const std::string reachName = _Add_Reach_table(shaderCreator, resourceIndex, s.reach_m_table);
+
+    ss.newLine() << ss.floatDecl("reachMaxM") << " = " << reachName << "_sample(" << pxl << ".b);";
 
-    _Add_Gamut_Compress_Fwd_Shader(shaderCreator, ss, resourceIndex, s, g, reachName);
+    _Add_Gamut_Compress_Fwd_Shader(shaderCreator, ss, resourceIndex, s, g);
 }
 
 void Add_Gamut_Compress_Inv_Shader(
@@ -1560,10 +1570,13 @@ void Add_Gamut_Compress_Inv_Shader(
     const ACES2::GamutCompressParams g = ACES2::init_GamutCompressParams(peak_luminance, pIn, pLim, t, s, reachGamut);
 
     unsigned resourceIndex = shaderCreator->getNextResourceIndex();
+    const std::string pxl(shaderCreator->getPixelName());
 
-    std::string reachName = _Add_Reach_table(shaderCreator, resourceIndex, s.reach_m_table);
+    const std::string reachName = _Add_Reach_table(shaderCreator, resourceIndex, s.reach_m_table);
+
+    ss.newLine() << ss.floatDecl("reachMaxM") << " = " << reachName << "_sample(" << pxl << ".b);";
 
-    _Add_Gamut_Compress_Inv_Shader(shaderCreator, ss, resourceIndex, s, g, reachName);
+    _Add_Gamut_Compress_Inv_Shader(shaderCreator, ss, resourceIndex, s, g);
 }
 
 void Add_Surround_10_Fwd_Shader(GpuShaderCreatorRcPtr & shaderCreator, GpuShaderText & ss, float gamma)
diff --git a/tests/gpu/FixedFunctionOp_test.cpp b/tests/gpu/FixedFunctionOp_test.cpp
index a7e77bc2d2..2c5d7cade4 100644
--- a/tests/gpu/FixedFunctionOp_test.cpp
+++ b/tests/gpu/FixedFunctionOp_test.cpp
@@ -458,6 +458,9 @@ OCIO_ADD_GPU_TEST(FixedFunction, style_aces2_output_transform_invfwd)
     grp->appendTransform(func_inv);
     grp->appendTransform(func_fwd);
 
+    test.setTestNaN(false); // TODO: Partially running the output transform without the clamp so do not test Nan or Inf values
+    test.setTestInfinity(false);
+
     test.setProcessor(grp);
 
     test.setErrorThreshold(7e-4f);

From a77c9f28edcb87bfccf349b52a3fd66a3bf46cb7 Mon Sep 17 00:00:00 2001
From: Kevin Wheatley <kevin.wheatley@framestore.com>
Date: Tue, 28 Jan 2025 19:58:47 +0000
Subject: [PATCH 52/70] Remove smoothing from GPU path, it is baked into the
 csup

Signed-off-by: Kevin Wheatley <kevin.wheatley@framestore.com>
---
 src/OpenColorIO/ops/fixedfunction/FixedFunctionOpGPU.cpp | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/src/OpenColorIO/ops/fixedfunction/FixedFunctionOpGPU.cpp b/src/OpenColorIO/ops/fixedfunction/FixedFunctionOpGPU.cpp
index 5d0ec22d54..ae5c64b2a8 100644
--- a/src/OpenColorIO/ops/fixedfunction/FixedFunctionOpGPU.cpp
+++ b/src/OpenColorIO/ops/fixedfunction/FixedFunctionOpGPU.cpp
@@ -924,12 +924,10 @@ std::string _Add_Find_Gamut_Boundary_Intersection_func(
 
     GpuShaderText ss(shaderCreator->getLanguage());
 
-    ss.newLine() << ss.float3Keyword() << " " << name << "(" << ss.float3Keyword() << " JMh_s, " << ss.float2Keyword() << " JM_cusp_in, float J_focus, float slope_gain, float gamma_top_inv, float gamma_bottom_inv)";
+    ss.newLine() << ss.float3Keyword() << " " << name << "(" << ss.float3Keyword() << " JMh_s, " << ss.float2Keyword() << " JM_cusp, float J_focus, float slope_gain, float gamma_top_inv, float gamma_bottom_inv)";
     ss.newLine() << "{";
     ss.indent();
 
-    ss.newLine() << ss.float2Decl("JM_cusp") << " = " << ss.float2Const("JM_cusp_in.r", std::string("JM_cusp_in.g * (1.0 + ") + std::to_string(ACES2::smooth_m) + " * " + std::to_string(ACES2::smooth_cusps) + ")") << ";";
-
     ss.newLine() << ss.floatDecl("J_intersect_source") << " = " << solveJIntersectName << "(JMh_s.r, JMh_s.g, J_focus, slope_gain);";
     ss.newLine() << ss.floatDecl("J_intersect_cusp") << " = " << solveJIntersectName << "(JM_cusp.r, JM_cusp.g, J_focus, slope_gain);";
 

From f4d0641ad39e50be9ad7cde20dc54051efb99c66 Mon Sep 17 00:00:00 2001
From: Kevin Wheatley <kevin.wheatley@framestore.com>
Date: Wed, 29 Jan 2025 13:54:13 +0000
Subject: [PATCH 53/70] Fix bug with reach lookup

Signed-off-by: Kevin Wheatley <kevin.wheatley@framestore.com>
---
 src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp    | 5 +++--
 src/OpenColorIO/ops/fixedfunction/FixedFunctionOpGPU.cpp | 5 +++--
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp b/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp
index 14cfef4839..9eeb29199a 100644
--- a/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp
+++ b/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp
@@ -89,10 +89,11 @@ inline f3 cusp_from_table(int i_hi, float t, const Table3D &gt)
 
 float reach_m_from_table(float h, const ACES2::Table1D &rt)
 {
-    const int i_lo = rt.nominal_hue_position_in_uniform_table(h); 
+    const int base = rt.hue_position_in_uniform_table(h);
+    const float t = h - base; // NOTE assumes uniform 1 degree 360 spacing
+    const int i_lo = base + rt.first_nominal_index; 
     const int i_hi = i_lo + 1; // NOTE assumes uniform 1 degree 360 spacing 
 
-    const float t = h - i_lo; // NOTE assumes uniform 1 degree 360 spacing // interpolation_weight(h, i_lo, i_hi);
     return lerpf(rt[i_lo], rt[i_hi], t);
 }
 
diff --git a/src/OpenColorIO/ops/fixedfunction/FixedFunctionOpGPU.cpp b/src/OpenColorIO/ops/fixedfunction/FixedFunctionOpGPU.cpp
index ae5c64b2a8..129375f8ba 100644
--- a/src/OpenColorIO/ops/fixedfunction/FixedFunctionOpGPU.cpp
+++ b/src/OpenColorIO/ops/fixedfunction/FixedFunctionOpGPU.cpp
@@ -455,7 +455,8 @@ std::string _Add_Reach_table(
     ss.newLine() << "hwrap = hwrap - floor(hwrap / 360.0) * 360.0;";
     ss.newLine() << "hwrap = (hwrap < 0.0) ? hwrap + 360.0 : hwrap;";
 
-    ss.newLine() << ss.floatDecl("i_lo") << " = floor(hwrap) + " << table.base_index << ";";
+    ss.newLine() << ss.floatDecl("i_base") << " = floor(hwrap);";
+    ss.newLine() << ss.floatDecl("i_lo") << " = i_base + " << table.base_index << ";";
     ss.newLine() << ss.floatDecl("i_hi") << " = i_lo + 1;";
 
     if (dimensions == GpuShaderDesc::TEXTURE_1D)
@@ -469,7 +470,7 @@ std::string _Add_Reach_table(
         ss.newLine() << ss.floatDecl("hi") << " = " << ss.sampleTex2D(name, ss.float2Const("(i_hi + 0.5) / " + std::to_string(table.total_size), "0.5")) << ".r;";
     }
 
-    ss.newLine() << ss.floatDecl("t") << " = h - i_lo;"; // Hardcoded single degree spacing
+    ss.newLine() << ss.floatDecl("t") << " = h - i_base;"; // Hardcoded single degree spacing
     ss.newLine() << "return " << ss.lerp("lo", "hi", "t") << ";";
 
     ss.dedent();

From caf7b6c7ba8e3be7083ca62a08911da01f642ba2 Mon Sep 17 00:00:00 2001
From: Kevin Wheatley <kevin.wheatley@framestore.com>
Date: Wed, 29 Jan 2025 15:16:18 +0000
Subject: [PATCH 54/70] Try only wrap hues on input to the shaders

Signed-off-by: Kevin Wheatley <kevin.wheatley@framestore.com>
---
 .../ops/fixedfunction/FixedFunctionOpGPU.cpp  | 28 ++++++++++++-------
 1 file changed, 18 insertions(+), 10 deletions(-)

diff --git a/src/OpenColorIO/ops/fixedfunction/FixedFunctionOpGPU.cpp b/src/OpenColorIO/ops/fixedfunction/FixedFunctionOpGPU.cpp
index 129375f8ba..59283f10d5 100644
--- a/src/OpenColorIO/ops/fixedfunction/FixedFunctionOpGPU.cpp
+++ b/src/OpenColorIO/ops/fixedfunction/FixedFunctionOpGPU.cpp
@@ -349,6 +349,17 @@ void Add_GamutComp_13_Inv_Shader(GpuShaderText & ss,
     );
 }
 
+void _Add_WrapHueChannel_Shader(
+    GpuShaderCreatorRcPtr & shaderCreator,
+    GpuShaderText & ss)
+{
+    const std::string pxl(shaderCreator->getPixelName());
+    ss.newLine() << ss.floatDecl("hwrap") << " = " << pxl << ".b;";
+    ss.newLine() << "hwrap = hwrap - floor(hwrap / 360.0) * 360.0;";
+    ss.newLine() << "hwrap = (hwrap < 0.0) ? hwrap + 360.0 : hwrap;";
+    ss.newLine() << pxl << ".b = hwrap;";
+}
+
 void _Add_RGB_to_JMh_Shader(
     GpuShaderCreatorRcPtr & shaderCreator,
     GpuShaderText & ss,
@@ -451,11 +462,7 @@ std::string _Add_Reach_table(
     ss.newLine() << "{";
     ss.indent();
 
-    ss.newLine() << ss.floatDecl("hwrap") << " = h;";
-    ss.newLine() << "hwrap = hwrap - floor(hwrap / 360.0) * 360.0;";
-    ss.newLine() << "hwrap = (hwrap < 0.0) ? hwrap + 360.0 : hwrap;";
-
-    ss.newLine() << ss.floatDecl("i_base") << " = floor(hwrap);";
+    ss.newLine() << ss.floatDecl("i_base") << " = floor(h);";
     ss.newLine() << ss.floatDecl("i_lo") << " = i_base + " << table.base_index << ";";
     ss.newLine() << ss.floatDecl("i_hi") << " = i_lo + 1;";
 
@@ -724,10 +731,7 @@ std::string _Add_Cusp_table(
     ss.newLine() << "{";
     ss.indent();
 
-    ss.newLine() << ss.floatDecl("hwrap") << " = h;";
-    ss.newLine() << "hwrap = hwrap - floor(hwrap / 360.0) * 360.0;";
-    ss.newLine() << "hwrap = (hwrap < 0.0) ? hwrap + 360.0 : hwrap;";
-    ss.newLine() << ss.intDecl("i") << " = " << ss.intKeyword() << "(hwrap + " << g.gamut_cusp_table.base_index << ");";
+    ss.newLine() << ss.intDecl("i") << " = " << ss.intKeyword() << "(h + " << g.gamut_cusp_table.base_index << ");";
 
     ss.newLine() << ss.intDecl("i_lo") << " = " << ss.intKeyword() << "(max("
                  << ss.floatKeyword() << "(" << g.gamut_cusp_table.lower_wrap_index << "), "
@@ -1448,7 +1452,7 @@ void Add_JMh_to_RGB_Shader(
     };
 
     ACES2::JMhParams p = ACES2::init_JMhParams(primaries);
-
+    _Add_WrapHueChannel_Shader(shaderCreator, ss);
     _Add_JMh_to_RGB_Shader(shaderCreator, ss, p);
 }
 
@@ -1470,6 +1474,7 @@ void Add_Tonescale_Compress_Fwd_Shader(
 
     const std::string reachName = _Add_Reach_table(shaderCreator, resourceIndex, s.reach_m_table);
 
+    _Add_WrapHueChannel_Shader(shaderCreator, ss);
     ss.newLine() << ss.floatDecl("reachMaxM") << " = " << reachName << "_sample(" << pxl << ".b);";
 
     _Add_Tonescale_Compress_Fwd_Shader(shaderCreator, ss, resourceIndex, p, t, s, c);
@@ -1493,6 +1498,7 @@ void Add_Tonescale_Compress_Inv_Shader(
 
     const std::string reachName = _Add_Reach_table(shaderCreator, resourceIndex, s.reach_m_table);
 
+    _Add_WrapHueChannel_Shader(shaderCreator, ss);
     ss.newLine() << ss.floatDecl("reachMaxM") << " = " << reachName << "_sample(" << pxl << ".b);";
 
     _Add_Tonescale_Compress_Inv_Shader(shaderCreator, ss, resourceIndex, p, t, s, c);
@@ -1533,6 +1539,7 @@ void Add_Gamut_Compress_Fwd_Shader(
 
     const std::string reachName = _Add_Reach_table(shaderCreator, resourceIndex, s.reach_m_table);
 
+    _Add_WrapHueChannel_Shader(shaderCreator, ss);
     ss.newLine() << ss.floatDecl("reachMaxM") << " = " << reachName << "_sample(" << pxl << ".b);";
 
     _Add_Gamut_Compress_Fwd_Shader(shaderCreator, ss, resourceIndex, s, g);
@@ -1573,6 +1580,7 @@ void Add_Gamut_Compress_Inv_Shader(
 
     const std::string reachName = _Add_Reach_table(shaderCreator, resourceIndex, s.reach_m_table);
 
+    _Add_WrapHueChannel_Shader(shaderCreator, ss);
     ss.newLine() << ss.floatDecl("reachMaxM") << " = " << reachName << "_sample(" << pxl << ".b);";
 
     _Add_Gamut_Compress_Inv_Shader(shaderCreator, ss, resourceIndex, s, g);

From a4f45962c1ab35da9d4c7f35955af7d6a017ee57 Mon Sep 17 00:00:00 2001
From: Kevin Wheatley <kevin.wheatley@framestore.com>
Date: Fri, 31 Jan 2025 12:09:38 +0000
Subject: [PATCH 55/70] rework GPU camut compressor to follow the same
 algorithm as CPU. Not 100% the same GPU still recalculates some values

Signed-off-by: Kevin Wheatley <kevin.wheatley@framestore.com>
---
 .../ops/fixedfunction/FixedFunctionOpGPU.cpp  | 115 +++++++++---------
 1 file changed, 57 insertions(+), 58 deletions(-)

diff --git a/src/OpenColorIO/ops/fixedfunction/FixedFunctionOpGPU.cpp b/src/OpenColorIO/ops/fixedfunction/FixedFunctionOpGPU.cpp
index 59283f10d5..63b540f6bd 100644
--- a/src/OpenColorIO/ops/fixedfunction/FixedFunctionOpGPU.cpp
+++ b/src/OpenColorIO/ops/fixedfunction/FixedFunctionOpGPU.cpp
@@ -913,8 +913,7 @@ std::string _Add_Solve_J_Intersect_func(
 std::string _Add_Find_Gamut_Boundary_Intersection_func(
     GpuShaderCreatorRcPtr & shaderCreator,
     unsigned resourceIndex,
-    const ACES2::SharedCompressionParameters & s,
-    const std::string & solveJIntersectName)
+    const ACES2::SharedCompressionParameters & s)
 {
     // Reserve name
     std::ostringstream resName;
@@ -929,13 +928,11 @@ std::string _Add_Find_Gamut_Boundary_Intersection_func(
 
     GpuShaderText ss(shaderCreator->getLanguage());
 
-    ss.newLine() << ss.float3Keyword() << " " << name << "(" << ss.float3Keyword() << " JMh_s, " << ss.float2Keyword() << " JM_cusp, float J_focus, float slope_gain, float gamma_top_inv, float gamma_bottom_inv)";
+    ss.newLine() << ss.floatKeyword() << " " << name << "(" << ss.float3Keyword() << " JMh_s, " << ss.float2Keyword()
+                 << " JM_cusp, float J_focus, float slope_gain, float gamma_top_inv, float gamma_bottom_inv, float J_intersect_source, float J_intersect_cusp)";
     ss.newLine() << "{";
     ss.indent();
 
-    ss.newLine() << ss.floatDecl("J_intersect_source") << " = " << solveJIntersectName << "(JMh_s.r, JMh_s.g, J_focus, slope_gain);";
-    ss.newLine() << ss.floatDecl("J_intersect_cusp") << " = " << solveJIntersectName << "(JM_cusp.r, JM_cusp.g, J_focus, slope_gain);";
-
     ss.newLine() << ss.floatDecl("slope") << " = 0.0;";
     ss.newLine() << "if (J_intersect_source < J_focus)";
     ss.newLine() << "{";
@@ -950,15 +947,15 @@ std::string _Add_Find_Gamut_Boundary_Intersection_func(
     ss.dedent();
     ss.newLine() << "}";
 
-    ss.newLine() << ss.floatDecl("M_boundary_lower ") << " = J_intersect_cusp * pow(J_intersect_source / J_intersect_cusp, gamma_bottom_inv) / (JM_cusp.r / JM_cusp.g - slope);";
+    ss.newLine() << ss.floatDecl("M_boundary_lower") << " = J_intersect_cusp * pow(J_intersect_source / J_intersect_cusp, gamma_bottom_inv) / (JM_cusp.r / JM_cusp.g - slope);";
     ss.newLine() << ss.floatDecl("M_boundary_upper") << " = JM_cusp.g * (" << s.limit_J_max << " - J_intersect_cusp) * pow((" << s.limit_J_max << " - J_intersect_source) / (" << s.limit_J_max << " - J_intersect_cusp), gamma_top_inv) / (slope * JM_cusp.g + " << s.limit_J_max << " - JM_cusp.r);";
 
     ss.newLine() << ss.floatDecl("smin") << " = 0.0;";
     ss.newLine() << "{";
     ss.indent();
-    ss.newLine() << ss.floatDecl("a") << " = M_boundary_lower / JM_cusp.g;";
-    ss.newLine() << ss.floatDecl("b") << " = M_boundary_upper / JM_cusp.g;";
-    ss.newLine() << ss.floatDecl("s") << " = " << ACES2::smooth_cusps << ";";
+    ss.newLine() << ss.floatDecl("a") << " = M_boundary_lower;";
+    ss.newLine() << ss.floatDecl("b") << " = M_boundary_upper;";
+    ss.newLine() << ss.floatDecl("s") << " = " << ACES2::smooth_cusps << " * JM_cusp.g;";
 
     ss.newLine() << ss.floatDecl("h") << " = max(s - abs(a - b), 0.0) / s;";
     ss.newLine() << "smin = min(a, b) - h * h * h * s * (1.0 / 6.0);";
@@ -966,10 +963,7 @@ std::string _Add_Find_Gamut_Boundary_Intersection_func(
     ss.dedent();
     ss.newLine() << "}";
 
-    ss.newLine() << ss.floatDecl("M_boundary") << " = JM_cusp.g * smin;";
-    ss.newLine() << ss.floatDecl("J_boundary") << "= J_intersect_source + slope * M_boundary;";
-
-    ss.newLine() << "return " << ss.float3Const("J_boundary", "M_boundary", "J_intersect_source") << ";";
+    ss.newLine() << "return smin;";
 
     ss.dedent();
     ss.newLine() << "}";
@@ -1042,7 +1036,7 @@ std::string _Add_Compression_func(
     std::ostringstream resName;
     resName << shaderCreator->getResourcePrefix()
             << std::string("_")
-            << std::string("compression")
+            << std::string("remap_M")
             << (invert ? std::string("_inv") : std::string("_fwd"))
             << resourceIndex;
 
@@ -1052,49 +1046,47 @@ std::string _Add_Compression_func(
 
     GpuShaderText ss(shaderCreator->getLanguage());
 
-    ss.newLine() << ss.floatKeyword() << " " << name << "(float v, float thr, float lim)";
+    ss.newLine() << ss.floatKeyword() << " " << name << "(float M, float gamut_boundary_M, float reach_boundary_M)";
     ss.newLine() << "{";
     ss.indent();
 
-    ss.newLine() << ss.floatDecl("s") << " = (lim - thr) * (1.0 - thr) / (lim - 1.0);";
-    ss.newLine() << ss.floatDecl("nd") << " = (v - thr) / s;";
+    ss.newLine() << ss.floatDecl("boundary_ratio") << " = gamut_boundary_M / reach_boundary_M;";
+    ss.newLine() << ss.floatDecl("proportion") << " = max(boundary_ratio, " << ACES2::compression_threshold << ");";
+    ss.newLine() << ss.floatDecl("threshold") << " = proportion * gamut_boundary_M;";
 
+    ss.newLine() << "if (proportion >= 1.0f || M <= threshold)";
+    ss.newLine() << "{";
+    ss.indent();
+    ss.newLine() << "return M;";
+    ss.dedent();
+    ss.newLine() << "}";
+    ss.newLine() << ss.floatDecl("m_offset") << " = M - threshold;";
+    ss.newLine() << ss.floatDecl("gamut_offset") << " = gamut_boundary_M - threshold;";
+    ss.newLine() << ss.floatDecl("reach_offset") << " = reach_boundary_M - threshold;";
 
-    ss.newLine() << ss.floatDecl("vCompressed") << " = 0.0;";
+    ss.newLine() << ss.floatDecl("scale") << " = reach_offset / ((reach_offset / gamut_offset) - 1.0f);";
+    ss.newLine() << ss.floatDecl("nd") << " = m_offset / scale;";
 
     if (invert)
     {
-        ss.newLine() << "if (v < thr || lim <= 1.0001 || v > thr + s)";
+        ss.newLine() << "if (nd >= 1.0f)"; // TODO: could be done branchless?
         ss.newLine() << "{";
         ss.indent();
-        ss.newLine() << "vCompressed = v;";
+        ss.newLine() << "return threshold + scale;";
         ss.dedent();
         ss.newLine() << "}";
         ss.newLine() << "else";
         ss.newLine() << "{";
         ss.indent();
-        ss.newLine() << "vCompressed = thr + s * (-nd / (nd - 1));";
+        ss.newLine() << "return threshold + scale * -(nd / (nd - 1.0f));";
         ss.dedent();
         ss.newLine() << "}";
     }
     else
     {
-        ss.newLine() << "if (v < thr || lim <= 1.0001)";
-        ss.newLine() << "{";
-        ss.indent();
-        ss.newLine() << "vCompressed = v;";
-        ss.dedent();
-        ss.newLine() << "}";
-        ss.newLine() << "else";
-        ss.newLine() << "{";
-        ss.indent();
-        ss.newLine() << "vCompressed = thr + s * nd / (1.0 + nd);";
-        ss.dedent();
-        ss.newLine() << "}";
+        ss.newLine() << "return threshold + scale * nd / (1.0f + nd);";
     }
 
-    ss.newLine() << "return vCompressed;";
-
     ss.dedent();
     ss.newLine() << "}";
 
@@ -1111,7 +1103,8 @@ std::string _Add_Compress_Gamut_func(
     const std::string & getFocusGainName,
     const std::string & findGamutBoundaryIntersectionName,
     const std::string & getReachBoundaryName,
-    const std::string & compressionName)
+    const std::string & compressionName,
+    const std::string & solveJIntersectName)
 {
     // Reserve name
     std::ostringstream resName;
@@ -1134,7 +1127,7 @@ std::string _Add_Compress_Gamut_func(
     ss.newLine() << ss.floatDecl("M") << " = JMh.g;";
     ss.newLine() << ss.floatDecl("h") << " = JMh.b;";
 
-    ss.newLine() << "if (M < 0.0001 || J > " << s.limit_J_max << ")";
+    ss.newLine() << "if (M <= 0.0 || J > " << s.limit_J_max << ")";
     ss.newLine() << "{";
     ss.indent();
     ss.newLine() << "return " << ss.float3Const("J", "0.0", "h") << ";";
@@ -1144,40 +1137,46 @@ std::string _Add_Compress_Gamut_func(
     ss.newLine() << "{";
     ss.indent();
 
-    ss.newLine() << ss.float2Decl("project_from") << " = " << ss.float2Const("J", "M") << ";";
     ss.newLine() << ss.float2Decl("JMcusp") << " = JMGcusp.rg;";
 
     ss.newLine() << ss.floatDecl("focusJ") << " = " << ss.lerp("JMcusp.r", std::to_string(g.mid_J), std::string("min(1.0, ") + std::to_string(ACES2::cusp_mid_blend) + " - (JMcusp.r / " + std::to_string(s.limit_J_max)) << "));";
     ss.newLine() << ss.floatDecl("slope_gain") << " = " << s.limit_J_max << " * " << g.focus_dist << " * " << getFocusGainName << "(Jx, JMcusp.r);";
+    ss.newLine() << ss.floatDecl("J_intersect_source") << " = " << solveJIntersectName << "(JMh.r, JMh.g, focusJ, slope_gain);";
+    ss.newLine() << ss.floatDecl("gamut_slope") << " = 0.0;";
+    ss.newLine() << "if (J_intersect_source < focusJ)";
+    ss.newLine() << "{";
+    ss.indent();
+    ss.newLine() << "gamut_slope = J_intersect_source * (J_intersect_source - focusJ) / (focusJ * slope_gain);";
+    ss.dedent();
+    ss.newLine() << "}";
+    ss.newLine() << "else";
+    ss.newLine() << "{";
+    ss.indent();
+    ss.newLine() << "gamut_slope = (" << s.limit_J_max << " - J_intersect_source) * (J_intersect_source - focusJ) / (focusJ * slope_gain);";
+    ss.dedent();
+    ss.newLine() << "}";
 
     ss.newLine() << ss.floatDecl("gamma_top_inv") << " = JMGcusp.b;";
     ss.newLine() << ss.floatDecl("gamma_bottom_inv") << " = " << g.lower_hull_gamma_inv << ";";
 
-    ss.newLine() << ss.float3Decl("boundaryReturn") << " = " << findGamutBoundaryIntersectionName << "(" << ss.float3Const("J", "M", "h") << ", JMcusp, focusJ, slope_gain, gamma_top_inv, gamma_bottom_inv);";
-    ss.newLine() << ss.float2Decl("JMboundary") << " = " << ss.float2Const("boundaryReturn.r", "boundaryReturn.g") << ";";
-    ss.newLine() << ss.float2Decl("project_to") << " = " << ss.float2Const("boundaryReturn.b", "0.0") << ";";
+    ss.newLine() << ss.floatDecl("J_intersect_cusp") << " = " << solveJIntersectName << "(JMcusp.r, JMcusp.g, focusJ, slope_gain);";
+    ss.newLine() << ss.floatDecl("gamutBoundaryM") << " = " << findGamutBoundaryIntersectionName << "(JMh, JMcusp, focusJ, slope_gain, gamma_top_inv, gamma_bottom_inv, J_intersect_source, J_intersect_cusp);";
 
-    ss.newLine() << "if (JMboundary.g <= 0.0)";
+    ss.newLine() << "if (gamutBoundaryM <= 0.0)";
     ss.newLine() << "{";
     ss.indent();
     ss.newLine() << "return " << ss.float3Const("J", "0.0", "h") << ";";
     ss.dedent();
     ss.newLine() << "}";
 
-    ss.newLine() << ss.float3Decl("reachBoundary") << " = " << getReachBoundaryName << "(JMboundary.r, JMboundary.g, h, JMcusp, focusJ, reachMaxM);";
-
-    ss.newLine() << ss.floatDecl("difference") << " = max(1.0001, reachBoundary.g / JMboundary.g);";
-    ss.newLine() << ss.floatDecl("threshold") << " = max(" << ACES2::compression_threshold << ", 1.0 / difference);";
+    ss.newLine() << ss.floatDecl("reachBoundaryM") << " = " << s.limit_J_max << " * pow(J_intersect_source / " << s.limit_J_max << ",  " << s.model_gamma_inv << ");";
+    ss.newLine() << "reachBoundaryM = reachBoundaryM / ((" << s.limit_J_max << " / reachMaxM) - gamut_slope);";
 
-    ss.newLine() << ss.floatDecl("v") << " = project_from.g / JMboundary.g;";
-    ss.newLine() << "v = " << compressionName << "(v, threshold, difference);";
+    ss.newLine() << ss.floatDecl("remapped_M") << " = " << compressionName << "(M, gamutBoundaryM, reachBoundaryM);";
+    ss.newLine() << ss.floatDecl("remapped_J") << " = J_intersect_source + remapped_M * gamut_slope;";
 
-    ss.newLine() << ss.float2Decl("JMcompressed") << " = " << ss.float2Const(
-        "project_to.r + v * (JMboundary.r - project_to.r)",
-        "project_to.g + v * (JMboundary.g - project_to.g)"
-    ) << ";";
 
-    ss.newLine() << "return " << ss.float3Const("JMcompressed.r", "JMcompressed.g", "h") << ";";
+    ss.newLine() << "return " << ss.float3Const("remapped_J", "remapped_M", "h") << ";";
 
     ss.dedent();
     ss.newLine() << "}";
@@ -1200,10 +1199,10 @@ void _Add_Gamut_Compress_Fwd_Shader(
     std::string cuspName = _Add_Cusp_table(shaderCreator, resourceIndex, g);
     std::string getFocusGainName = _Add_Focus_Gain_func(shaderCreator, resourceIndex, s);
     std::string solveJIntersectName = _Add_Solve_J_Intersect_func(shaderCreator, resourceIndex, s);
-    std::string findGamutBoundaryIntersectionName = _Add_Find_Gamut_Boundary_Intersection_func(shaderCreator, resourceIndex, s, solveJIntersectName);
+    std::string findGamutBoundaryIntersectionName = _Add_Find_Gamut_Boundary_Intersection_func(shaderCreator, resourceIndex, s);
     std::string getReachBoundaryName = _Add_Reach_Boundary_func(shaderCreator, resourceIndex, s, g, getFocusGainName, solveJIntersectName);
     std::string compressionName = _Add_Compression_func(shaderCreator, resourceIndex, false);
-    std::string gamutCompressName = _Add_Compress_Gamut_func(shaderCreator, resourceIndex, s, g, getFocusGainName, findGamutBoundaryIntersectionName, getReachBoundaryName, compressionName);
+    std::string gamutCompressName = _Add_Compress_Gamut_func(shaderCreator, resourceIndex, s, g, getFocusGainName, findGamutBoundaryIntersectionName, getReachBoundaryName, compressionName, solveJIntersectName);
 
     const std::string pxl(shaderCreator->getPixelName());
 
@@ -1221,10 +1220,10 @@ void _Add_Gamut_Compress_Inv_Shader(
     std::string cuspName = _Add_Cusp_table(shaderCreator, resourceIndex, g);
     std::string getFocusGainName = _Add_Focus_Gain_func(shaderCreator, resourceIndex, s);
     std::string solveJIntersectName = _Add_Solve_J_Intersect_func(shaderCreator, resourceIndex, s);
-    std::string findGamutBoundaryIntersectionName = _Add_Find_Gamut_Boundary_Intersection_func(shaderCreator, resourceIndex, s, solveJIntersectName);
+    std::string findGamutBoundaryIntersectionName = _Add_Find_Gamut_Boundary_Intersection_func(shaderCreator, resourceIndex, s);
     std::string getReachBoundaryName = _Add_Reach_Boundary_func(shaderCreator, resourceIndex, s, g, getFocusGainName, solveJIntersectName);
     std::string compressionName = _Add_Compression_func(shaderCreator, resourceIndex, true);
-    std::string gamutCompressName = _Add_Compress_Gamut_func(shaderCreator, resourceIndex, s, g, getFocusGainName, findGamutBoundaryIntersectionName, getReachBoundaryName, compressionName);
+    std::string gamutCompressName = _Add_Compress_Gamut_func(shaderCreator, resourceIndex, s, g, getFocusGainName, findGamutBoundaryIntersectionName, getReachBoundaryName, compressionName, solveJIntersectName);
 
     const std::string pxl(shaderCreator->getPixelName());
 

From fd981f8b72ed80548a7a509cb5cac9b47550e889 Mon Sep 17 00:00:00 2001
From: Kevin Wheatley <kevin.wheatley@framestore.com>
Date: Fri, 31 Jan 2025 17:34:36 +0000
Subject: [PATCH 56/70] Rework solve_J_intersect to have fewer div instructions

Signed-off-by: Kevin Wheatley <kevin.wheatley@framestore.com>
---
 .../ops/fixedfunction/ACES2/Transform.cpp     | 40 ++++++-------------
 1 file changed, 12 insertions(+), 28 deletions(-)

diff --git a/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp b/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp
index 9eeb29199a..0e89489c46 100644
--- a/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp
+++ b/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp
@@ -897,41 +897,25 @@ inline float get_focus_gain(float J, float cuspJ, float limit_J_max, float focus
 
 float solve_J_intersect(float J, float M, float focusJ, float maxJ, float slope_gain)
 {
-    const float a = M / (focusJ * slope_gain);
-    float b = 0.f;
-    float c = 0.f;
-    float intersectJ = 0.f;
+    const float M_scaled = M / slope_gain;
+    const float a = M_scaled / focusJ;
 
     if (J < focusJ)
     {
-        b = 1.f - M / slope_gain;
+        const float b = 1.f - M_scaled;
+        const float c = -J;
+        const float det = b * b - 4.f * a * c;
+        const float root = sqrt(det);
+        return -2.f * c / (b + root);
     }
     else
     {
-        b = - (1.f + M / slope_gain + maxJ * M / (focusJ * slope_gain));
+        const float b = -(1.f + M_scaled + maxJ * a);
+        const float c = maxJ * M_scaled + J;
+        const float det = b * b - 4.f * a * c;
+        const float root = sqrt(det);
+        return -2.f * c / (b - root);
     }
-
-    if (J < focusJ)
-    {
-        c = -J;
-    }
-    else
-    {
-        c = maxJ * M / slope_gain + J;
-    }
-
-    const float root = sqrt(b * b - 4.f * a * c);
-
-    if (J < focusJ)
-    {
-        intersectJ = 2.f * c / (-b - root);
-    }
-    else
-    {
-        intersectJ = 2.f * c / (-b + root);
-    }
-
-    return intersectJ;
 }
 
 // Smooth minimum about the scaled reference, based upon a cubic polynomial

From 903a98d104b7edee543a7d919ed8cf086f07ab0f Mon Sep 17 00:00:00 2001
From: Kevin Wheatley <kevin.wheatley@framestore.com>
Date: Fri, 31 Jan 2025 17:36:45 +0000
Subject: [PATCH 57/70] Adjust GPU code to better align with CPU code's
 structure, some additional precomputation is now applied during shader
 generation

Signed-off-by: Kevin Wheatley <kevin.wheatley@framestore.com>
---
 .../ops/fixedfunction/FixedFunctionOpGPU.cpp  | 175 ++++--------------
 1 file changed, 34 insertions(+), 141 deletions(-)

diff --git a/src/OpenColorIO/ops/fixedfunction/FixedFunctionOpGPU.cpp b/src/OpenColorIO/ops/fixedfunction/FixedFunctionOpGPU.cpp
index 63b540f6bd..5ebd9fa008 100644
--- a/src/OpenColorIO/ops/fixedfunction/FixedFunctionOpGPU.cpp
+++ b/src/OpenColorIO/ops/fixedfunction/FixedFunctionOpGPU.cpp
@@ -378,7 +378,7 @@ void _Add_RGB_to_JMh_Shader(
 
     ss.newLine() << ss.floatDecl("M") << " = (J == 0.0) ? 0.0 : sqrt(Aab.g * Aab.g + Aab.b * Aab.b);";
 
-    ss.newLine() << ss.floatDecl("h") << " = (Aab.g == 0.0) ? 0.0 : " << ss.atan2("Aab.b", "Aab.g") << " * 180.0 / 3.14159265358979;";
+    ss.newLine() << ss.floatDecl("h") << " = (Aab.g == 0.0) ? 0.0 : " << ss.atan2("Aab.b", "Aab.g") << " * " << 180.0 / 3.14159265358979 << ";";
     ss.newLine() << "h = h - floor(h / 360.0) * 360.0;";
     ss.newLine() << "h = (h < 0.0) ? h + 360.0 : h;";
 
@@ -392,10 +392,10 @@ void _Add_JMh_to_RGB_Shader(
 {
     const std::string pxl(shaderCreator->getPixelName());
 
-    ss.newLine() << ss.floatDecl("h") << " = " << pxl << ".b * 3.14159265358979 / 180.0;";
+    ss.newLine() << ss.floatDecl("h") << " = " << pxl << ".b * " << 3.14159265358979 / 180.0 << ";";
 
     ss.newLine() << ss.float3Decl("Aab") << ";";
-    ss.newLine() << "Aab.r = pow(" << pxl << ".r / 100.0, 1.0 / (" << p.cz << "));";
+    ss.newLine() << "Aab.r = pow(" << pxl << ".r / 100.0, " << 1.0 / p.cz << ");";
     ss.newLine() << "Aab.g = " << pxl << ".g * cos(h);";
     ss.newLine() << "Aab.b = " << pxl << ".g * sin(h);";
 
@@ -550,8 +550,8 @@ void _Add_Tonescale_Compress_Fwd_Shader(
     ss.newLine() << ss.floatDecl("h") << " = " << pxl << ".b;";
 
     // Tonescale applied in Y (convert to and from J)
-    ss.newLine() << ss.floatDecl("A") << " = " << p.A_w_J << " * pow(abs(J) / 100.0, 1.0 / (" << p.cz << "));";
-    ss.newLine() << ss.floatDecl("Y") << " = sign(J) * pow(( " << ACES2::cam_nl_offset << " * A) / ( " << ACES2::cam_nl_scale << " - A), 1.0 / 0.42) / " << p.F_L_n << ";";
+    ss.newLine() << ss.floatDecl("A") << " = " << p.A_w_J << " * pow(abs(J) / 100.0, " << 1.0 / p.cz << ");";
+    ss.newLine() << ss.floatDecl("Y") << " = sign(J) * pow(( " << ACES2::cam_nl_offset << " * A) / ( " << ACES2::cam_nl_scale << " - A), " << 1.0 / 0.42 << ") / " << p.F_L_n << ";";
 
     ss.newLine() << ss.floatDecl("f") << " = " << t.m_2  << " * pow(max(0.0, Y) / (Y + " << t.s_2 << "), " << t.g << ");";
     ss.newLine() << ss.floatDecl("Y_ts") << " = max(0.0, f * f / (f + " << t.t_1 << ")) * " << t.n_r << ";";
@@ -574,8 +574,7 @@ void _Add_Tonescale_Compress_Fwd_Shader(
     ss.newLine() << "{";
     ss.indent();
 
-    ss.newLine() << ss.floatDecl("PI") << " = 3.14159265358979;";
-    ss.newLine() << ss.floatDecl("h_rad") << " = h / 180.0 * PI;";
+    ss.newLine() << ss.floatDecl("h_rad") << " = h * " << 3.14159265358979f / 180.0f << ";";
     ss.newLine() << ss.floatDecl("a") << " = cos(h_rad);";
     ss.newLine() << ss.floatDecl("b") << " = sin(h_rad);";
     ss.newLine() << ss.floatDecl("cos_hr2") << " = a * a - b * b;";
@@ -583,7 +582,7 @@ void _Add_Tonescale_Compress_Fwd_Shader(
     ss.newLine() << ss.floatDecl("cos_hr3") << " = 4.0 * a * a * a - 3.0 * a;";
     ss.newLine() << ss.floatDecl("sin_hr3") << " = 3.0 * b - 4.0 * b * b * b;";
     ss.newLine() << ss.floatDecl("M") << " = 11.34072 * a + 16.46899 * cos_hr2 + 7.88380 * cos_hr3 + 14.66441 * b + -6.37224 * sin_hr2 + 9.19364 * sin_hr3 + 77.12896;";
-    ss.newLine() << "Mnorm = M * " << c.chroma_compress_scale << ";";
+    ss.newLine() << "Mnorm = M * " << c.chroma_compress_scale << ";"; // TODO precompute
 
     ss.dedent();
     ss.newLine() << "}";
@@ -620,8 +619,8 @@ void _Add_Tonescale_Compress_Inv_Shader(
     ss.newLine() << ss.floatDecl("h") << " = " << pxl << ".b;";
 
     // Inverse Tonescale applied in Y (convert to and from J)
-    ss.newLine() << ss.floatDecl("A") << " = " << p.A_w_J << " * pow(abs(J_ts) / 100.0, 1.0 / (" << p.cz << "));";
-    ss.newLine() << ss.floatDecl("Y_ts") << " = sign(J_ts) * pow(( " << ACES2::cam_nl_offset << " * A) / ( " << ACES2::cam_nl_scale << " - A), 1.0 / 0.42) / " << p.F_L_n << " / 100.0;";
+    ss.newLine() << ss.floatDecl("A") << " = " << p.A_w_J << " * pow(abs(J_ts) / 100.0, " << 1.0f / p.cz << ");";
+    ss.newLine() << ss.floatDecl("Y_ts") << " = sign(J_ts) * pow(( " << ACES2::cam_nl_offset << " * A) / ( " << ACES2::cam_nl_scale << " - A), " << 1.0 / 0.42 << ") / " << p.F_L_n << " / 100.0;";
 
     ss.newLine() << ss.floatDecl("Z") << " = max(0.0, min(" << t.n << " / (" << t.u_2 * t.n_r << "), Y_ts));";
     ss.newLine() << ss.floatDecl("ht") << " = (Z + sqrt(Z * (4.0 * " << t.t_1 << " + Z))) / 2.0;";
@@ -645,8 +644,7 @@ void _Add_Tonescale_Compress_Inv_Shader(
     ss.newLine() << "{";
     ss.indent();
 
-    ss.newLine() << ss.floatDecl("PI") << " = 3.14159265358979;";
-    ss.newLine() << ss.floatDecl("h_rad") << " = h / 180.0 * PI;";
+    ss.newLine() << ss.floatDecl("h_rad") << " = h * " << 3.14159265358979 / 180.0 << ";";
     ss.newLine() << ss.floatDecl("a") << " = cos(h_rad);";
     ss.newLine() << ss.floatDecl("b") << " = sin(h_rad);";
     ss.newLine() << ss.floatDecl("cos_hr2") << " = a * a - b * b;";
@@ -814,7 +812,7 @@ std::string _Add_Focus_Gain_func(
     ss.newLine() << "{";
     ss.indent();
     ss.newLine() << ss.floatDecl("gain") << " = ( " << s.limit_J_max << " - thr) / max(0.0001, (" << s.limit_J_max << " - min(" << s.limit_J_max << ", J)));";
-    ss.newLine() << "return pow(log(gain)/log(10.0)," << ACES2::focus_adjust_gain_inv << ") + 1.0;";
+    ss.newLine() << "return pow(log(gain)/log(10.0), " << ACES2::focus_adjust_gain_inv << ") + 1.0;";
     ss.dedent();
     ss.newLine() << "}";
     ss.newLine() << "else";
@@ -854,54 +852,30 @@ std::string _Add_Solve_J_Intersect_func(
     ss.newLine() << "{";
     ss.indent();
 
-    ss.newLine() << ss.floatDecl("a") << " = " << "M / (focusJ * slope_gain);";
-    ss.newLine() << ss.floatDecl("b") << " = 0.0;";
-    ss.newLine() << ss.floatDecl("c") << " = 0.0;";
-    ss.newLine() << ss.floatDecl("intersectJ") << " = 0.0;";
-
-    ss.newLine() << "if (J < focusJ)";
-    ss.newLine() << "{";
-    ss.indent();
-    ss.newLine() << "b = 1.0 - M / slope_gain;";
-    ss.dedent();
-    ss.newLine() << "}";
-    ss.newLine() << "else";
-    ss.newLine() << "{";
-    ss.indent();
-    ss.newLine() << "b = - (1.0 + M / slope_gain + " << s.limit_J_max << " * M / (focusJ * slope_gain));";
-    ss.dedent();
-    ss.newLine() << "}";
-
-    ss.newLine() << "if (J < focusJ)";
-    ss.newLine() << "{";
-    ss.indent();
-    ss.newLine() << "c = -J;";
-    ss.dedent();
-    ss.newLine() << "}";
-    ss.newLine() << "else";
-    ss.newLine() << "{";
-    ss.indent();
-    ss.newLine() << "c = " << s.limit_J_max << " * M / slope_gain + J;";
-    ss.dedent();
-    ss.newLine() << "}";
-
-    ss.newLine() << ss.floatDecl("root") << " = sqrt(b * b - 4.0 * a * c);";
+    ss.newLine() << ss.floatDecl("M_scaled") << " = M / slope_gain;";
+    ss.newLine() << ss.floatDecl("a") << " = M_scaled / focusJ;";
 
     ss.newLine() << "if (J < focusJ)";
     ss.newLine() << "{";
     ss.indent();
-    ss.newLine() << "intersectJ = 2.0 * c / (-b - root);";
+    ss.newLine() << ss.floatDecl("b") << " = 1.0 - M_scaled;";
+    ss.newLine() << ss.floatDecl("c") << " = -J;";
+    ss.newLine() << ss.floatDecl("det") << " =  b * b - 4.f * a * c;";
+    ss.newLine() << ss.floatDecl("root") << " =  sqrt(det);";
+    ss.newLine() << "return -2.0 * c / (b + root);";
     ss.dedent();
     ss.newLine() << "}";
     ss.newLine() << "else";
     ss.newLine() << "{";
     ss.indent();
-    ss.newLine() << "intersectJ = 2.0 * c / (-b + root);";
+    ss.newLine() << ss.floatDecl("b") << " = - (1.0 + M_scaled + " << s.limit_J_max << " * a);";
+    ss.newLine() << ss.floatDecl("c") << " = " << s.limit_J_max << " * M_scaled + J;";
+    ss.newLine() << ss.floatDecl("det") << " =  b * b - 4.f * a * c;";
+    ss.newLine() << ss.floatDecl("root") << " =  sqrt(det);";
+    ss.newLine() << "return -2.0 * c / (b - root);";
     ss.dedent();
     ss.newLine() << "}";
 
-    ss.newLine() << "return intersectJ;";
-
     ss.dedent();
     ss.newLine() << "}";
 
@@ -928,24 +902,10 @@ std::string _Add_Find_Gamut_Boundary_Intersection_func(
 
     GpuShaderText ss(shaderCreator->getLanguage());
 
-    ss.newLine() << ss.floatKeyword() << " " << name << "(" << ss.float3Keyword() << " JMh_s, " << ss.float2Keyword()
-                 << " JM_cusp, float J_focus, float slope_gain, float gamma_top_inv, float gamma_bottom_inv, float J_intersect_source, float J_intersect_cusp)";
-    ss.newLine() << "{";
-    ss.indent();
-
-    ss.newLine() << ss.floatDecl("slope") << " = 0.0;";
-    ss.newLine() << "if (J_intersect_source < J_focus)";
-    ss.newLine() << "{";
-    ss.indent();
-    ss.newLine() << "slope = J_intersect_source * (J_intersect_source - J_focus) / (J_focus * slope_gain);";
-    ss.dedent();
-    ss.newLine() << "}";
-    ss.newLine() << "else";
+    ss.newLine() << ss.floatKeyword() << " " << name << "(" << ss.float2Keyword()
+                 << " JM_cusp, float gamma_top_inv, float gamma_bottom_inv, float J_intersect_source, float J_intersect_cusp, float slope)";
     ss.newLine() << "{";
     ss.indent();
-    ss.newLine() << "slope = (" << s.limit_J_max << " - J_intersect_source) * (J_intersect_source - J_focus) / (J_focus * slope_gain);";
-    ss.dedent();
-    ss.newLine() << "}";
 
     ss.newLine() << ss.floatDecl("M_boundary_lower") << " = J_intersect_cusp * pow(J_intersect_source / J_intersect_cusp, gamma_bottom_inv) / (JM_cusp.r / JM_cusp.g - slope);";
     ss.newLine() << ss.floatDecl("M_boundary_upper") << " = JM_cusp.g * (" << s.limit_J_max << " - J_intersect_cusp) * pow((" << s.limit_J_max << " - J_intersect_source) / (" << s.limit_J_max << " - J_intersect_cusp), gamma_top_inv) / (slope * JM_cusp.g + " << s.limit_J_max << " - JM_cusp.r);";
@@ -958,7 +918,7 @@ std::string _Add_Find_Gamut_Boundary_Intersection_func(
     ss.newLine() << ss.floatDecl("s") << " = " << ACES2::smooth_cusps << " * JM_cusp.g;";
 
     ss.newLine() << ss.floatDecl("h") << " = max(s - abs(a - b), 0.0) / s;";
-    ss.newLine() << "smin = min(a, b) - h * h * h * s * (1.0 / 6.0);";
+    ss.newLine() << "smin = min(a, b) - h * h * h * s * " << (1.0 / 6.0) << ";";
 
     ss.dedent();
     ss.newLine() << "}";
@@ -973,60 +933,6 @@ std::string _Add_Find_Gamut_Boundary_Intersection_func(
     return name;
 }
 
-std::string _Add_Reach_Boundary_func(
-    GpuShaderCreatorRcPtr & shaderCreator,
-    unsigned resourceIndex,
-    const ACES2::SharedCompressionParameters & s,
-    const ACES2::GamutCompressParams & g,
-    const std::string & getFocusGainName,
-    const std::string & solveJIntersectName)
-{
-    // Reserve name
-    std::ostringstream resName;
-    resName << shaderCreator->getResourcePrefix()
-            << std::string("_")
-            << std::string("get_reach_boundary")
-            << resourceIndex;
-
-    // Note: Remove potentially problematic double underscores from GLSL resource names.
-    std::string name(resName.str());
-    StringUtils::ReplaceInPlace(name, "__", "_");
-
-    GpuShaderText ss(shaderCreator->getLanguage());
-
-    ss.newLine() << ss.float3Keyword() << " " << name << "(float J, float M, float h, " << ss.float2Keyword() << " JMcusp, float focusJ, float reachMaxM)";
-    ss.newLine() << "{";
-    ss.indent();
-
-    ss.newLine() << ss.floatDecl("slope_gain") << " = " << s.limit_J_max << " * " << g.focus_dist << " * " << getFocusGainName << "(J, JMcusp.r);";
-    ss.newLine() << ss.floatDecl("intersectJ") << " = " << solveJIntersectName << "(J, M, focusJ, slope_gain);";
-
-    ss.newLine() << ss.floatDecl("slope") << " = 0.0;";
-    ss.newLine() << "if (intersectJ < focusJ)";
-    ss.newLine() << "{";
-    ss.indent();
-    ss.newLine() << "slope = intersectJ * (intersectJ - focusJ) / (focusJ * slope_gain);";
-    ss.dedent();
-    ss.newLine() << "}";
-    ss.newLine() << "else";
-    ss.newLine() << "{";
-    ss.indent();
-    ss.newLine() << "slope = (" << s.limit_J_max << " - intersectJ) * (intersectJ - focusJ) / (focusJ * slope_gain);";
-    ss.dedent();
-    ss.newLine() << "}";
-
-    ss.newLine() << ss.floatDecl("boundary") << " = " << s.limit_J_max << " * pow(intersectJ / " << s.limit_J_max << ", " << s.model_gamma_inv << ") * reachMaxM / (" << s.limit_J_max << " - slope * reachMaxM);";
-
-    ss.newLine() << "return " << ss.float3Const("J", "boundary", "h") << ";";
-
-    ss.dedent();
-    ss.newLine() << "}";
-
-    shaderCreator->addToHelperShaderCode(ss.string().c_str());
-
-    return name;
-}
-
 std::string _Add_Compression_func(
     GpuShaderCreatorRcPtr & shaderCreator,
     unsigned resourceIndex,
@@ -1102,7 +1008,6 @@ std::string _Add_Compress_Gamut_func(
     const ACES2::GamutCompressParams & g,
     const std::string & getFocusGainName,
     const std::string & findGamutBoundaryIntersectionName,
-    const std::string & getReachBoundaryName,
     const std::string & compressionName,
     const std::string & solveJIntersectName)
 {
@@ -1140,27 +1045,17 @@ std::string _Add_Compress_Gamut_func(
     ss.newLine() << ss.float2Decl("JMcusp") << " = JMGcusp.rg;";
 
     ss.newLine() << ss.floatDecl("focusJ") << " = " << ss.lerp("JMcusp.r", std::to_string(g.mid_J), std::string("min(1.0, ") + std::to_string(ACES2::cusp_mid_blend) + " - (JMcusp.r / " + std::to_string(s.limit_J_max)) << "));";
-    ss.newLine() << ss.floatDecl("slope_gain") << " = " << s.limit_J_max << " * " << g.focus_dist << " * " << getFocusGainName << "(Jx, JMcusp.r);";
+    ss.newLine() << ss.floatDecl("slope_gain") << " = " << s.limit_J_max * g.focus_dist << " * " << getFocusGainName << "(Jx, JMcusp.r);";
     ss.newLine() << ss.floatDecl("J_intersect_source") << " = " << solveJIntersectName << "(JMh.r, JMh.g, focusJ, slope_gain);";
-    ss.newLine() << ss.floatDecl("gamut_slope") << " = 0.0;";
-    ss.newLine() << "if (J_intersect_source < focusJ)";
-    ss.newLine() << "{";
-    ss.indent();
-    ss.newLine() << "gamut_slope = J_intersect_source * (J_intersect_source - focusJ) / (focusJ * slope_gain);";
-    ss.dedent();
-    ss.newLine() << "}";
-    ss.newLine() << "else";
-    ss.newLine() << "{";
-    ss.indent();
-    ss.newLine() << "gamut_slope = (" << s.limit_J_max << " - J_intersect_source) * (J_intersect_source - focusJ) / (focusJ * slope_gain);";
-    ss.dedent();
-    ss.newLine() << "}";
+    ss.newLine() << ss.floatDecl("gamut_slope") << " = (J_intersect_source < focusJ) ? J_intersect_source : (" << s.limit_J_max << " - J_intersect_source);";
+    ss.newLine() << "gamut_slope = gamut_slope * (J_intersect_source - focusJ) / (focusJ * slope_gain);";
 
     ss.newLine() << ss.floatDecl("gamma_top_inv") << " = JMGcusp.b;";
-    ss.newLine() << ss.floatDecl("gamma_bottom_inv") << " = " << g.lower_hull_gamma_inv << ";";
+    ss.newLine() << ss.floatDecl("gamma_bottom_inv") << " = " << g.lower_hull_gamma_inv << ";"; // TODO move to where it is used
 
     ss.newLine() << ss.floatDecl("J_intersect_cusp") << " = " << solveJIntersectName << "(JMcusp.r, JMcusp.g, focusJ, slope_gain);";
-    ss.newLine() << ss.floatDecl("gamutBoundaryM") << " = " << findGamutBoundaryIntersectionName << "(JMh, JMcusp, focusJ, slope_gain, gamma_top_inv, gamma_bottom_inv, J_intersect_source, J_intersect_cusp);";
+    ss.newLine() << ss.floatDecl("gamutBoundaryM") << " = " << findGamutBoundaryIntersectionName
+                 << "(JMcusp, gamma_top_inv, gamma_bottom_inv, J_intersect_source, J_intersect_cusp, gamut_slope);";
 
     ss.newLine() << "if (gamutBoundaryM <= 0.0)";
     ss.newLine() << "{";
@@ -1200,9 +1095,8 @@ void _Add_Gamut_Compress_Fwd_Shader(
     std::string getFocusGainName = _Add_Focus_Gain_func(shaderCreator, resourceIndex, s);
     std::string solveJIntersectName = _Add_Solve_J_Intersect_func(shaderCreator, resourceIndex, s);
     std::string findGamutBoundaryIntersectionName = _Add_Find_Gamut_Boundary_Intersection_func(shaderCreator, resourceIndex, s);
-    std::string getReachBoundaryName = _Add_Reach_Boundary_func(shaderCreator, resourceIndex, s, g, getFocusGainName, solveJIntersectName);
     std::string compressionName = _Add_Compression_func(shaderCreator, resourceIndex, false);
-    std::string gamutCompressName = _Add_Compress_Gamut_func(shaderCreator, resourceIndex, s, g, getFocusGainName, findGamutBoundaryIntersectionName, getReachBoundaryName, compressionName, solveJIntersectName);
+    std::string gamutCompressName = _Add_Compress_Gamut_func(shaderCreator, resourceIndex, s, g, getFocusGainName, findGamutBoundaryIntersectionName, compressionName, solveJIntersectName);
 
     const std::string pxl(shaderCreator->getPixelName());
 
@@ -1221,9 +1115,8 @@ void _Add_Gamut_Compress_Inv_Shader(
     std::string getFocusGainName = _Add_Focus_Gain_func(shaderCreator, resourceIndex, s);
     std::string solveJIntersectName = _Add_Solve_J_Intersect_func(shaderCreator, resourceIndex, s);
     std::string findGamutBoundaryIntersectionName = _Add_Find_Gamut_Boundary_Intersection_func(shaderCreator, resourceIndex, s);
-    std::string getReachBoundaryName = _Add_Reach_Boundary_func(shaderCreator, resourceIndex, s, g, getFocusGainName, solveJIntersectName);
     std::string compressionName = _Add_Compression_func(shaderCreator, resourceIndex, true);
-    std::string gamutCompressName = _Add_Compress_Gamut_func(shaderCreator, resourceIndex, s, g, getFocusGainName, findGamutBoundaryIntersectionName, getReachBoundaryName, compressionName, solveJIntersectName);
+    std::string gamutCompressName = _Add_Compress_Gamut_func(shaderCreator, resourceIndex, s, g, getFocusGainName, findGamutBoundaryIntersectionName, compressionName, solveJIntersectName);
 
     const std::string pxl(shaderCreator->getPixelName());
 

From 8213ba31d70104ac08c1285a366f6245326f0027 Mon Sep 17 00:00:00 2001
From: Kevin Wheatley <kevin.wheatley@framestore.com>
Date: Mon, 3 Feb 2025 18:43:02 +0000
Subject: [PATCH 58/70] Precompute more scaling factors into matrices and
 nonlinear functions

Signed-off-by: Kevin Wheatley <kevin.wheatley@framestore.com>
---
 .../ops/fixedfunction/ACES2/Transform.cpp     | 73 +++++++--------
 .../ops/fixedfunction/FixedFunctionOpGPU.cpp  | 88 +++++++++----------
 2 files changed, 80 insertions(+), 81 deletions(-)

diff --git a/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp b/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp
index 0e89489c46..1ae89f1c62 100644
--- a/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp
+++ b/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp
@@ -101,34 +101,34 @@ float reach_m_from_table(float h, const ACES2::Table1D &rt)
 // CAM
 //
 
-inline float _post_adaptation_cone_response_compression_fwd(float Rc, const float F_L_n)
+inline float _post_adaptation_cone_response_compression_fwd(float Rc)
 {
-    const float F_L_Y = powf(Rc * F_L_n, 0.42f);
-    const float Ra    = (cam_nl_scale * F_L_Y) / (cam_nl_offset + F_L_Y);
+    const float F_L_Y = powf(Rc, 0.42f);
+    const float Ra    = (F_L_Y) / (cam_nl_offset + F_L_Y);
     return Ra;
 }
 
-inline float _post_adaptation_cone_response_compression_inv(float Ra, const float F_L_n)
+inline float _post_adaptation_cone_response_compression_inv(float Ra)
 {
-    const float F_L_Y = (cam_nl_offset * Ra) / (cam_nl_scale - Ra); // TODO: what happens when Ra >= cam_nl_scale (400.0f)
-    const float Rc    = powf(F_L_Y, 1.f / 0.42f) / F_L_n;
+    const float F_L_Y = (cam_nl_offset * Ra) / (1.0f - Ra); // TODO: what happens when Ra >= 1.0
+    const float Rc    = powf(F_L_Y, 1.f / 0.42f);
     return Rc;
 }
 
 
-float post_adaptation_cone_response_compression_fwd(float v, float F_L)
+float post_adaptation_cone_response_compression_fwd(float v)
 {
     const float abs_v = std::abs(v);
-    const float Ra =  _post_adaptation_cone_response_compression_fwd(abs_v, F_L);
+    const float Ra =  _post_adaptation_cone_response_compression_fwd(abs_v);
     // Note that std::copysign(1.f, 0.f) returns 1 but the CTL copysign(1.,0.) returns 0.
     // TODO: Should we change the behaviour?
     return std::copysign(Ra, v);
 }
 
-float post_adaptation_cone_response_compression_inv(float v, float F_L)
+float post_adaptation_cone_response_compression_inv(float v)
 {
     const float abs_v = std::abs(v);
-    const float Rc    = _post_adaptation_cone_response_compression_inv(abs_v, F_L);
+    const float Rc    = _post_adaptation_cone_response_compression_inv(abs_v);
     return std::copysign(Rc, v);
 }
 
@@ -147,13 +147,13 @@ inline float J_to_Achromatic_n(float J, const float cz)
 inline float _J_to_Y(float abs_J, const JMhParams &p)
 {
     const float Ra = p.A_w_J * J_to_Achromatic_n(abs_J, p.cz);
-    const float Y  = _post_adaptation_cone_response_compression_inv(Ra, p.F_L_n);
+    const float Y  = _post_adaptation_cone_response_compression_inv(Ra) / p.F_L_n;
     return Y;
 }
 inline float _Y_to_J(float abs_Y, const JMhParams &p)
 {
-    const float Ra    = _post_adaptation_cone_response_compression_fwd(abs_Y, p.F_L_n);
-    const float J     = Achromatic_n_to_J(Ra / p.A_w_J, p.cz);
+    const float Ra = _post_adaptation_cone_response_compression_fwd(abs_Y * p.F_L_n);
+    const float J  = Achromatic_n_to_J(Ra / p.A_w_J, p.cz);
     return J;
 }
 
@@ -169,9 +169,9 @@ inline f3 RGB_to_Aab(const f3 &RGB, const JMhParams &p)
     const f3 rgb_m = mult_f3_f33(RGB, p.MATRIX_RGB_to_CAM16_c);
 
     const f3 rgb_a = {
-        post_adaptation_cone_response_compression_fwd(rgb_m[0], p.F_L_n),
-        post_adaptation_cone_response_compression_fwd(rgb_m[1], p.F_L_n),
-        post_adaptation_cone_response_compression_fwd(rgb_m[2], p.F_L_n)
+        post_adaptation_cone_response_compression_fwd(rgb_m[0]),
+        post_adaptation_cone_response_compression_fwd(rgb_m[1]),
+        post_adaptation_cone_response_compression_fwd(rgb_m[2])
     };
 
     const f3 Aab = mult_f3_f33(rgb_a, p.MATRIX_cone_response_to_Aab);
@@ -218,9 +218,9 @@ inline f3 Aab_to_RGB(const f3 &Aab, const JMhParams &p)
     const f3 rgb_a = mult_f3_f33(Aab, p.MATRIX_Aab_to_cone_response);
 
     const f3 rgb_m = {
-        post_adaptation_cone_response_compression_inv(rgb_a[0], p.F_L_n),
-        post_adaptation_cone_response_compression_inv(rgb_a[1], p.F_L_n),
-        post_adaptation_cone_response_compression_inv(rgb_a[2], p.F_L_n)
+        post_adaptation_cone_response_compression_inv(rgb_a[0]),
+        post_adaptation_cone_response_compression_inv(rgb_a[1]),
+        post_adaptation_cone_response_compression_inv(rgb_a[2])
     };
 
     const f3 rgb = mult_f3_f33(rgb_m, p.MATRIX_CAM16_c_to_RGB);
@@ -241,17 +241,17 @@ f3 JMh_to_RGB(const f3 &JMh, const JMhParams &p)
 inline float chroma_compress_norm(float h, float chroma_compress_scale)
 {
     const float h_rad = to_radians(h);
-    const float a = cos(h_rad);
-    const float b = sin(h_rad);
-    const float cos_hr2 = a * a - b * b;
-    const float sin_hr2 = 2.0f * a * b;
-    const float cos_hr3 = 4.0f * a * a * a - 3.0f * a;
-    const float sin_hr3 = 3.0f * b - 4.0f * b * b * b;
-
-    const float M = 11.34072f * a +
+    const float cos_hr1 = cos(h_rad);
+    const float sin_hr1 = sin(h_rad);
+    const float cos_hr2 = 2.0f * cos_hr1 * cos_hr1 - 1.0f;
+    const float sin_hr2 = 2.0f * cos_hr1 * sin_hr1;
+    const float cos_hr3 = 4.0f * cos_hr1 * cos_hr1 * cos_hr1 - 3.0f * cos_hr1;
+    const float sin_hr3 = 3.0f * sin_hr1 - 4.0f * sin_hr1 * sin_hr1 * sin_hr1;
+
+    const float M = 11.34072f * cos_hr1 +
               16.46899f * cos_hr2 +
                7.88380f * cos_hr3 +
-              14.66441f * b +
+              14.66441f * sin_hr1 +
               -6.37224f * sin_hr2 +
                9.19364f * sin_hr3 +
               77.12896f;
@@ -383,7 +383,7 @@ inline float model_gamma(void)
 
 JMhParams init_JMhParams(const Primaries &prims)
 {
-    const m33f cone_response_to_Aab = {
+    const m33f base_cone_response_to_Aab = {
         2.0f, 1.0f, 1.0f / 20.0f,
         1.0f, -12.0f / 11.0f, 1.0f / 11.0f,
         1.0f / 9.0f, 1.0f / 9.0f, -2.0f / 9.0f
@@ -406,9 +406,9 @@ JMhParams init_JMhParams(const Primaries &prims)
     const float cz    = model_gamma();
 
     const f3 D_RGB = {
-        Y_W / RGB_w[0],
-        Y_W / RGB_w[1],
-        Y_W / RGB_w[2]
+        F_L_n * Y_W / RGB_w[0],
+        F_L_n * Y_W / RGB_w[1],
+        F_L_n * Y_W / RGB_w[2]
     };
 
     const f3 RGB_WC {
@@ -418,13 +418,14 @@ JMhParams init_JMhParams(const Primaries &prims)
     };
 
     const f3 RGB_AW = {
-        post_adaptation_cone_response_compression_fwd(RGB_WC[0], F_L_n),
-        post_adaptation_cone_response_compression_fwd(RGB_WC[1], F_L_n),
-        post_adaptation_cone_response_compression_fwd(RGB_WC[2], F_L_n)
+        post_adaptation_cone_response_compression_fwd(RGB_WC[0]),
+        post_adaptation_cone_response_compression_fwd(RGB_WC[1]),
+        post_adaptation_cone_response_compression_fwd(RGB_WC[2])
     };
 
+    const m33f cone_response_to_Aab = mult_f33_f33(scale_f33(Identity_M33, f3_from_f(cam_nl_scale)), base_cone_response_to_Aab); // TODO: this scale maybe ill conditioning the matrix
     const float A_w   = cone_response_to_Aab[0] * RGB_AW[0] + cone_response_to_Aab[1] * RGB_AW[1] + cone_response_to_Aab[2] * RGB_AW[2];
-    const float A_w_J = _post_adaptation_cone_response_compression_fwd(reference_luminance, F_L_n);
+    const float A_w_J = _post_adaptation_cone_response_compression_fwd(F_L);
 
     // TODO: evaluate the condition number of the below matrices and consider extracting a power of 2 scale out of them may improve noise behaviour
     //       power of 2 to make cost of extra scaling limited vs generalised multiply/divide ?
diff --git a/src/OpenColorIO/ops/fixedfunction/FixedFunctionOpGPU.cpp b/src/OpenColorIO/ops/fixedfunction/FixedFunctionOpGPU.cpp
index 5ebd9fa008..c6587d355b 100644
--- a/src/OpenColorIO/ops/fixedfunction/FixedFunctionOpGPU.cpp
+++ b/src/OpenColorIO/ops/fixedfunction/FixedFunctionOpGPU.cpp
@@ -369,8 +369,8 @@ void _Add_RGB_to_JMh_Shader(
 
     ss.newLine() << ss.float3Decl("lms") << " = " << ss.mat3fMul(&p.MATRIX_RGB_to_CAM16_c[0], pxl + ".rgb") << ";";
 
-    ss.newLine() << ss.float3Decl("F_L_v") << " = pow(" << p.F_L_n << " * abs(lms), " << ss.float3Const(0.42f) << ");";
-    ss.newLine() << ss.float3Decl("rgb_a") << " = ( " << ACES2::cam_nl_scale << " * sign(lms) * F_L_v) / ( " << ACES2::cam_nl_offset << " + F_L_v);";
+    ss.newLine() << ss.float3Decl("F_L_v") << " = pow(abs(lms), " << ss.float3Const(0.42f) << ");";
+    ss.newLine() << ss.float3Decl("rgb_a") << " = (sign(lms) * F_L_v) / ( " << ACES2::cam_nl_offset << " + F_L_v);";
 
     ss.newLine() << ss.float3Decl("Aab") << " = " << ss.mat3fMul(&p.MATRIX_cone_response_to_Aab[0], "rgb_a.rgb") << ";";
 
@@ -401,7 +401,7 @@ void _Add_JMh_to_RGB_Shader(
 
     ss.newLine() << ss.float3Decl("rgb_a") << " = " << ss.mat3fMul(&p.MATRIX_Aab_to_cone_response[0], "Aab.rgb") << ";";
 
-    ss.newLine() << ss.float3Decl("lms") << " = sign(rgb_a) * pow( " << ACES2::cam_nl_offset << " * abs(rgb_a) / ( " << ACES2::cam_nl_scale << " - abs(rgb_a)), " << ss.float3Const(1.f / 0.42f) << ")/ " << p.F_L_n << ";";
+    ss.newLine() << ss.float3Decl("lms") << " = sign(rgb_a) * pow( " << ACES2::cam_nl_offset << " * abs(rgb_a) / (1.0f - abs(rgb_a)), " << ss.float3Const(1.f / 0.42f) << ");";
 
     ss.newLine() << pxl << ".rgb = " << ss.mat3fMul(&p.MATRIX_CAM16_c_to_RGB[0], "lms") << ";";
 }
@@ -532,6 +532,36 @@ std::string _Add_Toe_func(
     return name;
 }
 
+void _Add_ChromaCompressionNorm_Shader(
+    GpuShaderText & ss,
+    const ACES2::ChromaCompressParams & c)
+{
+    // Mnorm
+    ss.newLine() << ss.floatDecl("Mnorm") << ";";
+    ss.newLine() << "{";
+    ss.indent();
+
+    ss.newLine() << ss.floatDecl("h_rad") << " = h * " << 3.14159265358979f / 180.0f << ";";
+    ss.newLine() << ss.floatDecl("a") << " = cos(h_rad);";
+    ss.newLine() << ss.floatDecl("b") << " = sin(h_rad);";
+    ss.newLine() << ss.floatDecl("cos_hr2") << " = a * a - b * b;";
+    ss.newLine() << ss.floatDecl("sin_hr2") << " = 2.0 * a * b;";
+    ss.newLine() << ss.floatDecl("cos_hr3") << " = 4.0 * a * a * a - 3.0 * a;";
+    ss.newLine() << ss.floatDecl("sin_hr3") << " = 3.0 * b - 4.0 * b * b * b;";
+    ss.newLine() << ss.float3Decl("cosines") << " = " <<  ss.float3Const("a", "cos_hr2", "cos_hr3") <<";";
+    ss.newLine() << ss.float3Decl("cosine_weights") << " = " <<  ss.float3Const(11.34072 * c.chroma_compress_scale,
+                                                                                16.46899 * c.chroma_compress_scale,
+                                                                                7.88380 * c.chroma_compress_scale) <<";";
+    ss.newLine() << ss.float3Decl("sines") << " = " <<  ss.float3Const("b", "sin_hr2", "sin_hr3") <<";";
+    ss.newLine() << ss.float3Decl("sine_weights") << " = " <<  ss.float3Const(14.66441 * c.chroma_compress_scale,
+                                                                              -6.37224 * c.chroma_compress_scale,
+                                                                               9.19364 * c.chroma_compress_scale) <<";";
+    ss.newLine() << "Mnorm = dot(cosines, cosine_weights) + dot(sines, sine_weights) + " << 77.12896 * c.chroma_compress_scale<< ";";
+
+    ss.dedent();
+    ss.newLine() << "}";
+}
+
 void _Add_Tonescale_Compress_Fwd_Shader(
     GpuShaderCreatorRcPtr & shaderCreator,
     GpuShaderText & ss,
@@ -550,14 +580,14 @@ void _Add_Tonescale_Compress_Fwd_Shader(
     ss.newLine() << ss.floatDecl("h") << " = " << pxl << ".b;";
 
     // Tonescale applied in Y (convert to and from J)
-    ss.newLine() << ss.floatDecl("A") << " = " << p.A_w_J << " * pow(abs(J) / 100.0, " << 1.0 / p.cz << ");";
-    ss.newLine() << ss.floatDecl("Y") << " = sign(J) * pow(( " << ACES2::cam_nl_offset << " * A) / ( " << ACES2::cam_nl_scale << " - A), " << 1.0 / 0.42 << ") / " << p.F_L_n << ";";
+    ss.newLine() << ss.floatDecl("A") << " = " << p.A_w_J << " * pow(abs(J) * " << 1.0 / ACES2::J_scale << ", " << 1.0 / p.cz << ");";
+    ss.newLine() << ss.floatDecl("Y") << " = pow(( " << ACES2::cam_nl_offset << " * A) / (1.0f - A), " << 1.0 / 0.42 << ");";
 
-    ss.newLine() << ss.floatDecl("f") << " = " << t.m_2  << " * pow(max(0.0, Y) / (Y + " << t.s_2 << "), " << t.g << ");";
-    ss.newLine() << ss.floatDecl("Y_ts") << " = max(0.0, f * f / (f + " << t.t_1 << ")) * " << t.n_r << ";";
+    ss.newLine() << ss.floatDecl("f") << " = " << t.m_2  << " * pow(Y / (Y + " << t.s_2 * p.F_L_n << "), " << t.g << ");";
+    ss.newLine() << ss.floatDecl("Y_ts") << " = max(0.0, f * f / (f + " << t.t_1 << "));";
 
-    ss.newLine() << ss.floatDecl("F_L_Y") << " = pow(" << p.F_L_n << " * abs(Y_ts), 0.42);";
-    ss.newLine() << ss.floatDecl("J_ts") << " = sign(Y_ts) * 100.0 * pow((( " << ACES2::cam_nl_scale << " * F_L_Y) / ( " << ACES2::cam_nl_offset << " + F_L_Y)) / " << p.A_w_J << ", " << p.cz << ");";
+    ss.newLine() << ss.floatDecl("F_L_Y") << " = pow(" << p.F_L_n  *  t.n_r << " * Y_ts, 0.42);";
+    ss.newLine() << ss.floatDecl("J_ts") << " = " << ACES2::J_scale << " * pow((F_L_Y / ( " << ACES2::cam_nl_offset << " + F_L_Y)) * " << 1.0f / p.A_w_J << ", " << p.cz << ");";
 
     // ChromaCompress
     ss.newLine() << ss.floatDecl("M_cp") << " = M;";
@@ -569,23 +599,7 @@ void _Add_Tonescale_Compress_Fwd_Shader(
     ss.newLine() << ss.floatDecl("nJ") << " = J_ts / " << s.limit_J_max << ";";
     ss.newLine() << ss.floatDecl("snJ") << " = max(0.0, 1.0 - nJ);";
 
-    // Mnorm
-    ss.newLine() << ss.floatDecl("Mnorm") << ";";
-    ss.newLine() << "{";
-    ss.indent();
-
-    ss.newLine() << ss.floatDecl("h_rad") << " = h * " << 3.14159265358979f / 180.0f << ";";
-    ss.newLine() << ss.floatDecl("a") << " = cos(h_rad);";
-    ss.newLine() << ss.floatDecl("b") << " = sin(h_rad);";
-    ss.newLine() << ss.floatDecl("cos_hr2") << " = a * a - b * b;";
-    ss.newLine() << ss.floatDecl("sin_hr2") << " = 2.0 * a * b;";
-    ss.newLine() << ss.floatDecl("cos_hr3") << " = 4.0 * a * a * a - 3.0 * a;";
-    ss.newLine() << ss.floatDecl("sin_hr3") << " = 3.0 * b - 4.0 * b * b * b;";
-    ss.newLine() << ss.floatDecl("M") << " = 11.34072 * a + 16.46899 * cos_hr2 + 7.88380 * cos_hr3 + 14.66441 * b + -6.37224 * sin_hr2 + 9.19364 * sin_hr3 + 77.12896;";
-    ss.newLine() << "Mnorm = M * " << c.chroma_compress_scale << ";"; // TODO precompute
-
-    ss.dedent();
-    ss.newLine() << "}";
+    _Add_ChromaCompressionNorm_Shader(ss, c);
 
     ss.newLine() << ss.floatDecl("limit") << " = pow(nJ, " << s.model_gamma_inv << ") * reachMaxM / Mnorm;";
     ss.newLine() << "M_cp = M * pow(J_ts / J, " << s.model_gamma_inv << ");";
@@ -620,14 +634,14 @@ void _Add_Tonescale_Compress_Inv_Shader(
 
     // Inverse Tonescale applied in Y (convert to and from J)
     ss.newLine() << ss.floatDecl("A") << " = " << p.A_w_J << " * pow(abs(J_ts) / 100.0, " << 1.0f / p.cz << ");";
-    ss.newLine() << ss.floatDecl("Y_ts") << " = sign(J_ts) * pow(( " << ACES2::cam_nl_offset << " * A) / ( " << ACES2::cam_nl_scale << " - A), " << 1.0 / 0.42 << ") / " << p.F_L_n << " / 100.0;";
+    ss.newLine() << ss.floatDecl("Y_ts") << " = sign(J_ts) * pow(( " << ACES2::cam_nl_offset << " * A) / (1.0f - A), " << 1.0 / 0.42 << ") / " << p.F_L_n << " / 100.0;";
 
     ss.newLine() << ss.floatDecl("Z") << " = max(0.0, min(" << t.n << " / (" << t.u_2 * t.n_r << "), Y_ts));";
     ss.newLine() << ss.floatDecl("ht") << " = (Z + sqrt(Z * (4.0 * " << t.t_1 << " + Z))) / 2.0;";
     ss.newLine() << ss.floatDecl("Y") << " = " << t.s_2 << " / (pow((" << t.m_2 << " / ht), (1.0 / " << t.g << ")) - 1.0);";
 
     ss.newLine() << ss.floatDecl("F_L_Y") << " = pow(" << p.F_L_n << " * abs(Y), 0.42);";
-    ss.newLine() << ss.floatDecl("J") << " = sign(Y) * 100.0 * pow((( " << ACES2::cam_nl_scale << " * F_L_Y) / ( " << ACES2::cam_nl_offset << " + F_L_Y)) / " << p.A_w_J << ", " << p.cz << ");";
+    ss.newLine() << ss.floatDecl("J") << " = sign(Y) * 100.0 * pow((F_L_Y / ( " << ACES2::cam_nl_offset << " + F_L_Y)) / " << p.A_w_J << ", " << p.cz << ");";
 
     // ChromaCompress
     ss.newLine() << ss.floatDecl("M") << " = M_cp;";
@@ -639,23 +653,7 @@ void _Add_Tonescale_Compress_Inv_Shader(
     ss.newLine() << ss.floatDecl("nJ") << " = J_ts / " << s.limit_J_max << ";";
     ss.newLine() << ss.floatDecl("snJ") << " = max(0.0, 1.0 - nJ);";
 
-    // Mnorm
-    ss.newLine() << ss.floatDecl("Mnorm") << ";";
-    ss.newLine() << "{";
-    ss.indent();
-
-    ss.newLine() << ss.floatDecl("h_rad") << " = h * " << 3.14159265358979 / 180.0 << ";";
-    ss.newLine() << ss.floatDecl("a") << " = cos(h_rad);";
-    ss.newLine() << ss.floatDecl("b") << " = sin(h_rad);";
-    ss.newLine() << ss.floatDecl("cos_hr2") << " = a * a - b * b;";
-    ss.newLine() << ss.floatDecl("sin_hr2") << " = 2.0 * a * b;";
-    ss.newLine() << ss.floatDecl("cos_hr3") << " = 4.0 * a * a * a - 3.0 * a;";
-    ss.newLine() << ss.floatDecl("sin_hr3") << " = 3.0 * b - 4.0 * b * b * b;";
-    ss.newLine() << ss.floatDecl("M") << " = 11.34072 * a + 16.46899 * cos_hr2 + 7.88380 * cos_hr3 + 14.66441 * b + -6.37224 * sin_hr2 + 9.19364 * sin_hr3 + 77.12896;";
-    ss.newLine() << "Mnorm = M * " << c.chroma_compress_scale << ";";
-
-    ss.dedent();
-    ss.newLine() << "}";
+    _Add_ChromaCompressionNorm_Shader(ss, c);
 
     ss.newLine() << ss.floatDecl("limit") << " = pow(nJ, " << s.model_gamma_inv << ") * reachMaxM / Mnorm;";
 

From 4a13c362489e16115162c606d58d216459651eaf Mon Sep 17 00:00:00 2001
From: Kevin Wheatley <kevin.wheatley@framestore.com>
Date: Tue, 4 Feb 2025 16:00:53 +0000
Subject: [PATCH 59/70] Experiment with unsigned integers for array access

Signed-off-by: Kevin Wheatley <kevin.wheatley@framestore.com>
---
 .../ops/fixedfunction/ACES2/Common.h          |  30 ++---
 .../ops/fixedfunction/ACES2/Transform.cpp     | 107 +++++++++---------
 2 files changed, 69 insertions(+), 68 deletions(-)

diff --git a/src/OpenColorIO/ops/fixedfunction/ACES2/Common.h b/src/OpenColorIO/ops/fixedfunction/ACES2/Common.h
index 2c5952585a..b01bde93de 100644
--- a/src/OpenColorIO/ops/fixedfunction/ACES2/Common.h
+++ b/src/OpenColorIO/ops/fixedfunction/ACES2/Common.h
@@ -47,17 +47,17 @@ inline float from_radians(const float v) { return wrap_to_hue_limit(v); };
 
 struct TableBase
 {
-    static constexpr int _TABLE_ADDITION_ENTRIES = 2;
-    static constexpr int base_index = 1;
-    static constexpr int nominal_size = 360;
-    static constexpr int total_size = nominal_size + _TABLE_ADDITION_ENTRIES;
+    static constexpr unsigned int _TABLE_ADDITION_ENTRIES = 2;
+    static constexpr unsigned int base_index = 1;
+    static constexpr unsigned int nominal_size = 360;
+    static constexpr unsigned int total_size = nominal_size + _TABLE_ADDITION_ENTRIES;
 
-    static constexpr int lower_wrap_index = 0;
-    static constexpr int upper_wrap_index = base_index + nominal_size;
-    static constexpr int first_nominal_index = base_index;
-    static constexpr int last_nominal_index = upper_wrap_index - 1;
+    static constexpr unsigned int lower_wrap_index = 0;
+    static constexpr unsigned int upper_wrap_index = base_index + nominal_size;
+    static constexpr unsigned int first_nominal_index = base_index;
+    static constexpr unsigned int last_nominal_index = upper_wrap_index - 1;
 
-    inline float base_hue_for_position(int i_lo) const
+    inline float base_hue_for_position(unsigned int i_lo) const
     {
         if (hue_limit == float(nominal_size)) // TODO C++ 17 if constexpr
             return float(i_lo);
@@ -66,22 +66,22 @@ struct TableBase
         return result;
     }
 
-    inline int hue_position_in_uniform_table(float wrapped_hue) const 
+    inline unsigned int hue_position_in_uniform_table(float wrapped_hue) const 
     {
         if (hue_limit == float(nominal_size)) // TODO C++ 17 if constexpr
-            return int(wrapped_hue);
+            return static_cast<unsigned int>(wrapped_hue);
         else
-            return int(wrapped_hue / hue_limit * float(nominal_size)); // TODO: can we use the 'lost' fraction for the lerps?
+            return static_cast<unsigned int>(wrapped_hue / hue_limit * float(nominal_size)); // TODO: can we use the 'lost' fraction for the lerps?
     }
 
-   inline int nominal_hue_position_in_uniform_table(float wrapped_hue) const 
+   inline unsigned int nominal_hue_position_in_uniform_table(float wrapped_hue) const 
     {
         return first_nominal_index + hue_position_in_uniform_table(wrapped_hue);
     }
 
-    inline int clamp_to_table_bounds(int entry) const // TODO: this should be removed if we can constrain the hue range properly
+    inline unsigned int clamp_to_table_bounds(unsigned int entry) const // TODO: this should be removed if we can constrain the hue range properly
     {
-        return std::min(nominal_size - 1, std::max(0, entry));
+        return std::min(nominal_size - 1U, std::max(0U, entry));
     }
 };
 
diff --git a/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp b/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp
index 1ae89f1c62..55746c3007 100644
--- a/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp
+++ b/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp
@@ -6,6 +6,7 @@
 #include <array>
 #include <algorithm>
 #include <cmath>
+#include <limits>
 
 namespace OCIO_NAMESPACE
 {
@@ -34,7 +35,7 @@ inline f3 lerp(const float lower[3], const float upper[3], const float t)
     };
 }
 
-inline int midpoint(int a, int b)
+inline unsigned int midpoint(unsigned int a, unsigned int b)
 {
     return (a + b) / 2;
 }
@@ -44,15 +45,15 @@ inline float midpoint(float a, float b)
     return (a + b) / 2.f;
 }
 
-int lookup_hue_interval(float h, const Table1D &hues, const std::array<int, 2> & hue_linearity_search_range)
+unsigned int lookup_hue_interval(float h, const Table1D &hues, const std::array<int, 2> & hue_linearity_search_range)
 {
     // Search the given Table for the interval containing the desired hue
     // Returns the upper index of the interval
 
     // We can narrow the search range based on the hues being almost uniform
-    int i = hues.nominal_hue_position_in_uniform_table(h);
-    int i_lo = std::max(hues.lower_wrap_index, i + hue_linearity_search_range[0]); // Should be nominal not lower_wrap?
-    int i_hi = std::min(hues.upper_wrap_index, i + hue_linearity_search_range[1]);
+    unsigned int i = hues.nominal_hue_position_in_uniform_table(h);
+    unsigned int i_lo = std::max(int(hues.lower_wrap_index), int(i) + hue_linearity_search_range[0]); // Should be nominal not lower_wrap?
+    unsigned int i_hi = std::min(int(hues.upper_wrap_index), int(i) + hue_linearity_search_range[1]);
 
     while (i_lo + 1 < i_hi)
     {
@@ -67,7 +68,7 @@ int lookup_hue_interval(float h, const Table1D &hues, const std::array<int, 2> &
         i = midpoint(i_lo, i_hi);
     }
 
-    i_hi = std::max(1, i_hi); // TODO: should not be needed if we initialise with the correct lo and hi
+    i_hi = std::max(1U, i_hi); // TODO: should not be needed if we initialise with the correct lo and hi
 
     return i_hi;
 }
@@ -77,22 +78,22 @@ inline float interpolation_weight(float h, float h_lo, float h_hi)
     return (h - h_lo) / (h_hi - h_lo);
 }
 
-inline float interpolation_weight(float h, int h_lo, int h_hi)
+inline float interpolation_weight(float h, unsigned int h_lo, unsigned int h_hi)
 {
     return (h - h_lo) / (h_hi - h_lo);
 }
 
-inline f3 cusp_from_table(int i_hi, float t, const Table3D &gt)
+inline f3 cusp_from_table(unsigned int i_hi, float t, const Table3D &gt)
 {
     return lerp(gt[i_hi-1], gt[i_hi], t);
 }
 
 float reach_m_from_table(float h, const ACES2::Table1D &rt)
 {
-    const int base = rt.hue_position_in_uniform_table(h);
+    const unsigned int base = rt.hue_position_in_uniform_table(h);
     const float t = h - base; // NOTE assumes uniform 1 degree 360 spacing
-    const int i_lo = base + rt.first_nominal_index; 
-    const int i_hi = i_lo + 1; // NOTE assumes uniform 1 degree 360 spacing 
+    const unsigned int i_lo = base + rt.first_nominal_index; 
+    const unsigned int i_hi = i_lo + 1; // NOTE assumes uniform 1 degree 360 spacing 
 
     return lerpf(rt[i_lo], rt[i_hi], t);
 }
@@ -455,12 +456,12 @@ JMhParams init_JMhParams(const Primaries &prims)
    return p;
 }
 
-inline f3 generate_unit_cube_cusp_corners(const int corner)
+inline f3 generate_unit_cube_cusp_corners(const unsigned int corner)
 {
     // Generation order R, Y, G, C, B, M to ensure hues rotate in correct order
-    return {float(int(((corner + 1) % cuspCornerCount) < 3)),
-            float(int(((corner + 5) % cuspCornerCount) < 3)),
-            float(int(((corner + 3) % cuspCornerCount) < 3))
+    return {float(static_cast<unsigned int>(((corner + 1) % cuspCornerCount) < 3)),
+            float(static_cast<unsigned int>(((corner + 5) % cuspCornerCount) < 3)),
+            float(static_cast<unsigned int>(((corner + 3) % cuspCornerCount) < 3))
             };
 }
 
@@ -471,8 +472,8 @@ void build_limiting_cusp_corners_tables(std::array<f3, totalCornerCount>& RGB_co
     // They are then arranged into a cycle with the lowest JMh value at [1] to allow for hue wrapping
     std::array<f3, cuspCornerCount> temp_RGB_corners;
     std::array<f3, cuspCornerCount> temp_JMh_corners;
-    int min_index = 0;
-    for (int i = 0; i != cuspCornerCount; ++i)
+    unsigned int min_index = 0;
+    for (unsigned int i = 0; i != cuspCornerCount; ++i)
     {
       temp_RGB_corners[i] = mult_f_f3(peakLuminance / reference_luminance, generate_unit_cube_cusp_corners(i));
       temp_JMh_corners[i] = RGB_to_JMh(temp_RGB_corners[i], params);
@@ -481,7 +482,7 @@ void build_limiting_cusp_corners_tables(std::array<f3, totalCornerCount>& RGB_co
     }
 
     // Rotate entries placing lowest at [1] (not [0])
-    for (int i = 0; i != cuspCornerCount; ++i)
+    for (unsigned int i = 0; i != cuspCornerCount; ++i)
     {
       RGB_corners[i + 1] = temp_RGB_corners[(i + min_index) % cuspCornerCount];
       JMh_corners[i + 1] = temp_JMh_corners[(i + min_index) % cuspCornerCount];
@@ -510,8 +511,8 @@ void find_reach_corners_table(std::array<f3, totalCornerCount>& JMh_corners, con
     std::array<f3, cuspCornerCount> temp_JMh_corners;
     const float limitA = J_to_Achromatic_n(limitJ, params.cz);
 
-    int min_index = 0;
-    for (int i = 0; i != cuspCornerCount; ++i)
+    unsigned int min_index = 0;
+    for (unsigned int i = 0; i != cuspCornerCount; ++i)
     {
         const f3 rgb_vector = generate_unit_cube_cusp_corners(i);
 
@@ -540,7 +541,7 @@ void find_reach_corners_table(std::array<f3, totalCornerCount>& JMh_corners, con
     }
 
     // Rotate entries placing lowest at [1] (not [0]) // TODO: could use std::rotate_copy or even the ranges vs in C++20
-    for (int i = 0; i != cuspCornerCount; ++i)
+    for (unsigned int i = 0; i != cuspCornerCount; ++i)
     {
       JMh_corners[i + 1] = temp_JMh_corners[(i + min_index) % cuspCornerCount];
     }
@@ -554,15 +555,15 @@ void find_reach_corners_table(std::array<f3, totalCornerCount>& JMh_corners, con
     JMh_corners[cuspCornerCount + 1][2] = JMh_corners[cuspCornerCount + 1][2] + hue_limit;
 }
 
-int extract_sorted_cube_hues(std::array<float, max_sorted_corners>& sorted_hues,
+unsigned int extract_sorted_cube_hues(std::array<float, max_sorted_corners>& sorted_hues,
                              const std::array<f3, totalCornerCount>& reach_JMh, const std::array<f3, totalCornerCount>& display_JMh)
 {
     // Basic merge of 2 sorted arrays, extracting the unique hues.
     // Return the count of the unique hues
     //TODO: use STL for this and similar functions
-    int idx         = 0;
-    int reach_idx   = 1;
-    int display_idx = 1;
+    unsigned int idx         = 0;
+    unsigned int reach_idx   = 1;
+    unsigned int display_idx = 1;
     while ((reach_idx < (cuspCornerCount + 1)) || (display_idx < (cuspCornerCount + 1)))
     {
         const float reach_hue   = reach_JMh[reach_idx][2];
@@ -591,25 +592,25 @@ int extract_sorted_cube_hues(std::array<float, max_sorted_corners>& sorted_hues,
     return idx;
 }
 
-void build_hue_sample_interval(const int samples, const float lower, const float upper, Table1D &hue_table, const int base)
+void build_hue_sample_interval(const unsigned int samples, const float lower, const float upper, Table1D &hue_table, const unsigned int base)
 {
     const float delta = (upper - lower) / float(samples);
-    for (int i = 0; i != samples; ++i)
+    for (unsigned int i = 0; i != samples; ++i)
     {
         hue_table[base + i] = lower + float(i) * delta;
     }
 }
 
-void build_hue_table(Table1D &hue_table, const std::array<float, max_sorted_corners>& sorted_hues, const int unique_hues)
+void build_hue_table(Table1D &hue_table, const std::array<float, max_sorted_corners>& sorted_hues, const unsigned int unique_hues)
 {
     const float ideal_spacing = hue_table.nominal_size / hue_limit;
-    std::array<int, 2 * cuspCornerCount + 2> samples_count = {};
-    int         last_idx  = -1;
-    int         min_index = sorted_hues[0] == 0.0f ? 0 : 1; // Ensure we can always sample at 0.0 hue
-    for (int hue_idx = 0; hue_idx != unique_hues; ++hue_idx)
+    std::array<unsigned int, 2 * cuspCornerCount + 2> samples_count = {};
+    unsigned int last_idx  = std::numeric_limits<unsigned int>::max();
+    unsigned int min_index = sorted_hues[0] == 0.0f ? 0 : 1; // Ensure we can always sample at 0.0 hue
+    for (unsigned int hue_idx = 0; hue_idx != unique_hues; ++hue_idx)
     {
         // BUG: "hue_table.size - 1" will fail if we have multiple hues mapping near the top of the table
-        int nominal_idx = std::min(std::max(int(std::round(sorted_hues[hue_idx] * ideal_spacing)), min_index), hue_table.nominal_size - 1);
+        unsigned int nominal_idx = std::min(std::max(static_cast<unsigned int>(std::round(sorted_hues[hue_idx] * ideal_spacing)), min_index), hue_table.nominal_size - 1);
         if (last_idx == nominal_idx)
         {
             // Last two hues should sample at same index, need to adjust them
@@ -623,18 +624,18 @@ void build_hue_table(Table1D &hue_table, const std::array<float, max_sorted_corn
                 nominal_idx = nominal_idx + 1;
             }
         }
-        samples_count[hue_idx] = std::min(nominal_idx, hue_table.nominal_size - 1);
+        samples_count[hue_idx] = std::min(nominal_idx, hue_table.nominal_size - 1U);
         last_idx = min_index = nominal_idx;
     }
 
-    int total_samples = 0;
+    unsigned int total_samples = 0;
     // Special cases for ends
-    int i = 0;
+    unsigned int i = 0;
     build_hue_sample_interval(samples_count[i], 0.0f, sorted_hues[i], hue_table, total_samples + 1);
     total_samples += samples_count[i];
     for (++i; i != unique_hues; ++i)
     {
-        const int samples = samples_count[i] - samples_count[i - 1];
+        const unsigned int samples = samples_count[i] - samples_count[i - 1];
         build_hue_sample_interval(samples, sorted_hues[i - 1], sorted_hues[i], hue_table, total_samples + 1);
         total_samples += samples;
     }
@@ -652,8 +653,8 @@ std::array<float, 2> find_display_cusp_for_hue(float hue, const std::array<f3, t
     // along the line calculating the JMh of points along the line till we find the required value.
     // All values on the line segments are valid cusp locations.
 
-    int upper_corner = 1;
-    for (int i = upper_corner; i != totalCornerCount; ++i) // TODO: binary search?
+    unsigned int upper_corner = 1;
+    for (unsigned int i = upper_corner; i != totalCornerCount; ++i) // TODO: binary search?
     {
         if (JMh_corners[i][2] > hue)
         {
@@ -661,7 +662,7 @@ std::array<float, 2> find_display_cusp_for_hue(float hue, const std::array<f3, t
             break;
         }
     }
-    const int lower_corner = upper_corner - 1;
+    const unsigned int lower_corner = upper_corner - 1;
 
     // hue should now be within [lower_corner, upper_corner), handle exact match
     if (JMh_corners[lower_corner][2] == hue)
@@ -721,7 +722,7 @@ Table3D build_cusp_table(const Table1D& hue_table, const std::array<f3, totalCor
 {
     std::array<float, 2> previous = {0.0f, 0.0f};
     Table3D output_table;
-    for (int i = output_table.first_nominal_index; i != output_table.upper_wrap_index; ++i)
+    for (unsigned int i = output_table.first_nominal_index; i != output_table.upper_wrap_index; ++i)
     {
       const float hue = hue_table[i];
       const std::array<float, 2> JM = find_display_cusp_for_hue(hue, RGB_corners, JMh_corners, params, previous);
@@ -756,7 +757,7 @@ Table3D make_uniform_hue_gamut_table(const JMhParams &reach_params, const JMhPar
 
     find_reach_corners_table(reach_JMh_corners, reach_params, sp.limit_J_max, forward_limit);
     build_limiting_cusp_corners_tables(limiting_RGB_corners, limiting_JMh_corners, params, peakLuminance);
-    const int unique_hues = extract_sorted_cube_hues(sorted_hues, reach_JMh_corners, limiting_JMh_corners);
+    const unsigned int unique_hues = extract_sorted_cube_hues(sorted_hues, reach_JMh_corners, limiting_JMh_corners);
     build_hue_table(hue_table, sorted_hues, unique_hues);
     return build_cusp_table(hue_table, limiting_RGB_corners, limiting_JMh_corners, params);
 }
@@ -764,8 +765,8 @@ Table3D make_uniform_hue_gamut_table(const JMhParams &reach_params, const JMhPar
 Table3D make_gamut_table(const JMhParams &params, float peakLuminance, Table1D& hue_table)
 {
     Table3D gamutCuspTableUnsorted{};
-    int minhIndex = 0;
-    for (int i = 0; i < gamutCuspTableUnsorted.nominal_size; i++)
+    unsigned int minhIndex = 0;
+    for (unsigned int i = 0; i < gamutCuspTableUnsorted.nominal_size; i++)
     {
         const float hNorm = float(i) / float(gamutCuspTableUnsorted.nominal_size);
         const f3 HSV = {hNorm, 1., 1.};
@@ -781,7 +782,7 @@ Table3D make_gamut_table(const JMhParams &params, float peakLuminance, Table1D&
     }
 
     Table3D gamutCuspTable{};
-    for (int i = 0; i < gamutCuspTableUnsorted.nominal_size; i++)
+    for (unsigned int i = 0; i < gamutCuspTableUnsorted.nominal_size; i++)
     {
         gamutCuspTable[i + gamutCuspTable.base_index][0] = gamutCuspTableUnsorted[(minhIndex+i) % gamutCuspTableUnsorted.nominal_size][0];
         gamutCuspTable[i + gamutCuspTable.base_index][1] = gamutCuspTableUnsorted[(minhIndex+i) % gamutCuspTableUnsorted.nominal_size][1];
@@ -803,7 +804,7 @@ Table3D make_gamut_table(const JMhParams &params, float peakLuminance, Table1D&
     gamutCuspTable[gamutCuspTable.upper_wrap_index][2] += hue_limit;
 
     // Extract hue table
-    for (int i = 0; i < gamutCuspTable.total_size; i++)
+    for (unsigned int i = 0; i < gamutCuspTable.total_size; i++)
     {
         hue_table[i] = gamutCuspTable[i][2];
     }
@@ -819,7 +820,7 @@ Table1D make_reach_m_table(const JMhParams &params, const float limit_J_max)
 {
     Table1D gamutReachTable{};
 
-    for (int i = 0; i < gamutReachTable.nominal_size; i++) {
+    for (unsigned int i = 0; i < gamutReachTable.nominal_size; i++) {
         const float hue = gamutReachTable.base_hue_for_position(i);
 
         constexpr float search_range = 50.f;
@@ -1058,7 +1059,7 @@ HueDependantGamutParams init_HueDependantGamutParams(const float hue, const Reso
     HueDependantGamutParams hdp;
     hdp.gamma_bottom_inv = p.lower_hull_gamma_inv;
 
-    const int i_hi = lookup_hue_interval(hue, p.hue_table, p.hue_linearity_search_range);
+    const unsigned int i_hi = lookup_hue_interval(hue, p.hue_table, p.hue_linearity_search_range);
     const float t = interpolation_weight(hue, p.hue_table[i_hi-1], p.hue_table[i_hi]);
     const f3 cusp = cusp_from_table(i_hi, t, p.gamut_cusp_table);
 
@@ -1089,7 +1090,7 @@ f3 gamut_compress_inv(const f3 &JMh, const ResolvedSharedCompressionParameters &
     return compressGamut<true>(JMh, Jx, sr, p, hdp);
 }
 
-static constexpr int gamma_test_count = 5;
+static constexpr unsigned int gamma_test_count = 5;
 struct testData {
     f3 testJMh;
     float J_intersect_source;
@@ -1157,7 +1158,7 @@ void make_upper_hull_gamma(
     float lower_hull_gamma_inv,
     const JMhParams &limitJMhParams)
 {
-    for (int i = gamutCuspTable.first_nominal_index; i != gamutCuspTable.upper_wrap_index; ++i)
+    for (unsigned int i = gamutCuspTable.first_nominal_index; i != gamutCuspTable.upper_wrap_index; ++i)
     {
         const float hue = hue_table[i];
         const f2 JMcusp = { gamutCuspTable[i][0], gamutCuspTable[i][1] };
@@ -1318,10 +1319,10 @@ std::array<int, 2> determine_hue_linearity_search_range(const Table3D &gamutCusp
     constexpr int lower_padding = 0;
     constexpr int upper_padding = 1;
     std::array<int, 2> hue_linearity_search_range = {lower_padding, upper_padding};
-    for (int i = gamutCuspTable.first_nominal_index; i != gamutCuspTable.upper_wrap_index; ++i)
+    for (unsigned int i = gamutCuspTable.first_nominal_index; i != gamutCuspTable.upper_wrap_index; ++i)
     {
-        const int pos = gamutCuspTable.nominal_hue_position_in_uniform_table(gamutCuspTable[i][2]); // TODO: compute from hue table less cache pressure?
-        const int delta = i - pos;
+        const unsigned int pos = gamutCuspTable.nominal_hue_position_in_uniform_table(gamutCuspTable[i][2]); // TODO: compute from hue table less cache pressure?
+        const int delta = int(i) - int(pos);
         hue_linearity_search_range[0] = std::min(hue_linearity_search_range[0], delta + lower_padding);
         hue_linearity_search_range[1] = std::max(hue_linearity_search_range[1], delta + upper_padding);
 

From 6d609bc216f3e6e3daf41decc0f1adaf5961580a Mon Sep 17 00:00:00 2001
From: Kevin Wheatley <kevin.wheatley@framestore.com>
Date: Wed, 5 Feb 2025 17:44:16 +0000
Subject: [PATCH 60/70] Bypass one J-> A conversion by saving the Aab computed
 earlier

Signed-off-by: Kevin Wheatley <kevin.wheatley@framestore.com>
---
 .../ops/fixedfunction/ACES2/Transform.cpp     | 29 ++++++++++++++++---
 .../ops/fixedfunction/ACES2/Transform.h       |  3 ++
 .../ops/fixedfunction/FixedFunctionOpCPU.cpp  |  5 ++--
 3 files changed, 31 insertions(+), 6 deletions(-)

diff --git a/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp b/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp
index 55746c3007..715bac8b8c 100644
--- a/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp
+++ b/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp
@@ -145,12 +145,18 @@ inline float J_to_Achromatic_n(float J, const float cz)
 
 // Optimization for achromatic values
 
-inline float _J_to_Y(float abs_J, const JMhParams &p)
+inline float _A_to_Y(float A, const JMhParams &p)
 {
-    const float Ra = p.A_w_J * J_to_Achromatic_n(abs_J, p.cz);
+    const float Ra = p.A_w_J * A;
     const float Y  = _post_adaptation_cone_response_compression_inv(Ra) / p.F_L_n;
     return Y;
 }
+
+inline float _J_to_Y(float abs_J, const JMhParams &p)
+{
+    return _A_to_Y(J_to_Achromatic_n(abs_J, p.cz), p);
+}
+
 inline float _Y_to_J(float abs_Y, const JMhParams &p)
 {
     const float Ra = _post_adaptation_cone_response_compression_fwd(abs_Y * p.F_L_n);
@@ -165,7 +171,7 @@ float Y_to_J(float Y, const JMhParams &p)
     return std::copysign(J, Y);
 }
 
-inline f3 RGB_to_Aab(const f3 &RGB, const JMhParams &p)
+f3 RGB_to_Aab(const f3 &RGB, const JMhParams &p)
 {
     const f3 rgb_m = mult_f3_f33(RGB, p.MATRIX_RGB_to_CAM16_c);
 
@@ -179,7 +185,7 @@ inline f3 RGB_to_Aab(const f3 &RGB, const JMhParams &p)
     return Aab;
 }
 
-inline f3 Aab_to_JMh(const f3 &Aab, const JMhParams &p)
+f3 Aab_to_JMh(const f3 &Aab, const JMhParams &p)
 {
     if (Aab[0] <= 0.f)
     {
@@ -327,6 +333,21 @@ float tonescale_inv(const float J, const JMhParams &p, const ToneScaleParams &pt
     return tonescale<true>(J, p, pt);
 }
 
+
+template <bool inverse>
+inline float tonescale_A_to_J(const float A, const JMhParams &p, const ToneScaleParams &pt) // TODO: consider computing tonescale from and to A rather than J to avoid extra pow() calls
+{
+    const float Y_in  = _A_to_Y(A, p);
+    const float Y_out = aces_tonescale<inverse>(Y_in, pt);
+    const float J_out = _Y_to_J(Y_out, p);
+    return std::copysign(J_out, A);
+}
+
+float tonescale_A_to_J_fwd(const float A, const JMhParams &p, const ToneScaleParams &pt)
+{
+    return tonescale_A_to_J<false>(A, p, pt);
+}
+
 f3 chroma_compress_fwd(const f3 &JMh, const float J_ts, const ResolvedSharedCompressionParameters &pr, const ChromaCompressParams &pc)
 {
     const float J = JMh[0];
diff --git a/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.h b/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.h
index 16693dfbd4..6d87224ba4 100644
--- a/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.h
+++ b/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.h
@@ -20,11 +20,14 @@ ChromaCompressParams init_ChromaCompressParams(float peakLuminance, const ToneSc
 GamutCompressParams init_GamutCompressParams(float peakLuminance, const JMhParams &inputJMhParams, const JMhParams &limitJMhParams,
                                              const ToneScaleParams &tsParams, const SharedCompressionParameters &shParams, const JMhParams &reachParams);
 
+f3 RGB_to_Aab(const f3 &RGB, const JMhParams &p);
+f3 Aab_to_JMh(const f3 &Aab, const JMhParams &p);
 f3 RGB_to_JMh(const f3 &RGB, const JMhParams &p);
 f3 JMh_to_RGB(const f3 &JMh, const JMhParams &p);
 
 float tonescale_fwd(const float J, const JMhParams &p, const ToneScaleParams &pt);
 float tonescale_inv(const float J, const JMhParams &p, const ToneScaleParams &pt);
+float tonescale_A_to_J_fwd(const float A, const JMhParams &p, const ToneScaleParams &pt);
 
 f3 chroma_compress_fwd(const f3 &JMh, const float J_ts, const ResolvedSharedCompressionParameters &ps, const ChromaCompressParams &pc);
 f3 chroma_compress_inv(const f3 &JMh, const float J, const ResolvedSharedCompressionParameters &ps, const ChromaCompressParams &pc);
diff --git a/src/OpenColorIO/ops/fixedfunction/FixedFunctionOpCPU.cpp b/src/OpenColorIO/ops/fixedfunction/FixedFunctionOpCPU.cpp
index 18bd5b8338..0ed39faeee 100644
--- a/src/OpenColorIO/ops/fixedfunction/FixedFunctionOpCPU.cpp
+++ b/src/OpenColorIO/ops/fixedfunction/FixedFunctionOpCPU.cpp
@@ -1052,9 +1052,10 @@ void Renderer_ACES_OutputTransform20::fwd(const void * inImg, void * outImg, lon
     for(long idx=0; idx<numPixels; ++idx)
     {
         const ACES2::f3 RGBIn {in[0], in[1], in[2]};
-        const ACES2::f3 JMh           = ACES2::RGB_to_JMh(RGBIn, m_pIn);
+        const ACES2::f3 Aab           = ACES2::RGB_to_Aab(RGBIn, m_pIn);
+        const ACES2::f3 JMh           = ACES2::Aab_to_JMh(Aab, m_pIn);
         const ACES2::ResolvedSharedCompressionParameters rp = resolve_CompressionParams(JMh[2], m_s);
-        const float J_ts = ACES2::tonescale_fwd(JMh[0], m_pIn, m_t);
+        const float J_ts = ACES2::tonescale_A_to_J_fwd(Aab[0], m_pIn, m_t);
         const ACES2::f3 tonemappedJMh = ACES2::chroma_compress_fwd(JMh, J_ts, rp, m_c);
         const ACES2::f3 compressedJMh = ACES2::gamut_compress_fwd(tonemappedJMh, rp, m_g);
         const ACES2::f3 RGBOut        = ACES2::JMh_to_RGB(compressedJMh, m_pOut);

From da17d7c1432aa1fe84582703275a7bf79e7cb89b Mon Sep 17 00:00:00 2001
From: Kevin Wheatley <kevin.wheatley@framestore.com>
Date: Wed, 5 Feb 2025 17:46:16 +0000
Subject: [PATCH 61/70] Test intrinsics for compression Norm calculation

Signed-off-by: Kevin Wheatley <kevin.wheatley@framestore.com>
---
 .../ops/fixedfunction/ACES2/Transform.cpp     | 52 +++++++++++++++++++
 1 file changed, 52 insertions(+)

diff --git a/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp b/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp
index 715bac8b8c..51f13b357f 100644
--- a/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp
+++ b/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp
@@ -8,6 +8,9 @@
 #include <cmath>
 #include <limits>
 
+#include "SSE2.h"
+#include "AVX.h"
+
 namespace OCIO_NAMESPACE
 {
 
@@ -245,6 +248,30 @@ f3 JMh_to_RGB(const f3 &JMh, const JMhParams &p)
 // Tonescale / Chroma compress
 //
 
+//TODO: move to header
+// https://stackoverflow.com/questions/6996764/fastest-way-to-do-horizontal-sse-vector-sum-or-other-reduction/35270026#35270026
+// https://gcc.godbolt.org/#compilers:!((compiler:g6,options:%27-xc+-O3+-Wall+-fverbose-asm+-march%3Dhaswell+-mno-avx%27,sourcez:MQSwdgxgNgrgJgUwAQB4QFt3gC4CdwB0AFgHwBQoksiqAztnDseWQPStLZEi1I9JQQAa2QBZAIa5x2AOS0ANEgBGMbJwDuCcSLicA9kgh70ABxBRk2A0oTZsCXIb2ICbDtHFgA5khhgAZnq42H7SCFAAnkiIECCIvFa%2BtMjoegBu4ia0rLREMP4muuD0Wrp6/kipaURQWWT%2BUHrSSES0MOgA%2BlkdtMkAjAAUHR3ofQBMABxIaQCUSADeSEvLK6tr62vs00gAvEgA2kgAIkgAwkgAPkgAQkgAgkgAumQrw6OTSLn5y3sjnV/%2BBoILq0AZpRTgpAdUSiDoAZQAEgBVABiKIAMgBRAZjRQAZkUAAZFH0ZjMANxLLaHc4nK63B7PV4jcZTNroXhLX6YDriOBwEFgxQAimbDjs3h7Q5HADU51OMrpNxlDzuMtuTOWAI2XKhPKqCBqgoBwvatFFqy2B2W5yWStl8sVTypHAsai4yCMpnMDiQ4jSejifsq6WUUVwCBgtHAPgBLy1Zp1uz1nT5At6Awlwry/gp8aWEZCuDArx5EDS2F6HX8eLGmbNeYAvmR6o1mq12iCesk8UMWR9Zgt81D%2B2yc8m/iN0oa4DATILZpT1lslLgmnAIOJ6Ehwgh0AgwJWkAS%2BvokLjCcO3qzPonuan%2BQvs/k8yttet71O0oaoMac6aOVFLZuC8IgWnEKAKgAWhIAQ9HUcDIOHCUNk/NNu3rDln1zclh0LGBiw2Sdy0rWhq1rTDzVw5sWy2Wh0AgixHCMGhowALwQRQVDUKMYEYqJaEaTRHD8CxeicRAoPY5B%2BHEJATDXJQLHQVsmjUDtOm6Ig0z7d4pkHeYr1HW8OQnHltMfLIhWmV8E1Mj8Uw6CyBSsrMJVsgtbAIkt1mIisqxrOt3OoltQH8RAKmGO4ADUAA1hlU9t2TGABWAA2Lt/QAD101K0psodmT06YhN1P48o6Td6CyCqslZMEPJHYrqhAUCzPQCqECyvBxAgbB/FZJ8kFJSlgNasDWSM5qb3a3lLNBNIhIhECiAtZYtjTTgiGQUrJpWfDCI0rtegQXs0lZNakw2sAijAQQwEsbbPh7aYHGjPQwEUdRuAgMD%2BD0ExsAwCCkECRwYti4ctgGMADHULd7Fu%2BhcBgPqQA%2BhQ/Ruv0oCgJByi2vc/QjQmkAAFluLAwAwdoZjIZtgAPRh/BbNx2HZrY4D0GAlIQMguZ5iwWnZLp02SOtr0mXQ0jgOZFiTBWtjSZNDluK5GSmm8/AisqeW1hB/HABAXNBS6FY2TmPpkNR1CCXAIi%2B5APsiJJkDTOEjkUB7v0cKxUe4bxSa8SQlHELxkCUEAj3UKOwM8JAUQABT9flNY%2BAFsFMT8DSNKz9f8RRiIRwoF1li0tg9En%2BFhkNqlqOA04mXR30/KrsG6QpMxzTOTEag6fL8ysBX8NKyaGHl0NoOAwTgbCyRC2iOY5vhD1rMgcGFzsEDMWtuwQCXRxAJAsrligQHCg2RwhhKitZI/uFH3XOj8ExeqEbgOm3kBR4GLLFBPpcOoth4iggDBwnhdCwzAFBeIeBUZA2/PcOKAhbC8CjJ8f0bswz2CQDHLg3M1APWNjGYMVQKDhGSI3e%2B38yZPx6DmIEn8d51j/lCGE8JkRoixAMPoRJ8SKDGPPchN1z5UJMo/NCj4v4UQfmTf%2BjVJYTGobWOhAIgSNCYSAPoaVMKj0LuwxEqIMTYl4UgYkx4BHzxWFsOE8MTCk1KtgW2O5lIHkrGI9kKjJECmkUFdoeiWggFrH3LyhFB7RkGtGCinjBGAOsRwOEcJMRjFrg3eJ/dHJdR6n1TRMioBEjNjYpJcidzfhLPgmAbUo5OG9GJM8VQkb2D5PjCoclBD2CkHjEwWTcC6FwLWeQWVMDyEvDRDeR1fHdh7D0ISPQGK4xYggXSd9j4zEMisLKs1nK5JYfI3CGytnoV8b/PZeFQklnCVoyYPQgksKbKzReKdMiI1BmudASAJBSFkLwCGLQ7AmAAFzsHoG/acuAGjwQIF6VgABHGACB6DozANkPotY%2BgAE4%2BhkzSqwIg8EoJWCku0KCHV0pQTxfgNiH1sD8VYKisYDKyZ9EJMAeljLmWJXUiLboDEvnLI%2BCfQqywjDIrUIokyABFPiug9hZX2cKjGYrjKNCOHxPGex2RSr5PKl0gTVUgz2AMJAUFFAmuPniJAMpj4AHZ/4pKtVlfKdMVgiu3OK7g%2Br1WORzr%2BVy7QtWz0ldKxqdF2ieuTEas1ZqsqngdRah1KVLU2v/oSJNWV7XH1oQ6p1w5XVKuKuycN3jBQqrVYoD1aqQ2ugMIa41pr63H1TQ6jNWUs3HxzS6xVTUbyNGTIWtVOqkBjQjXW0d0bY3mrTYmh11qkDOoVaK7tHxuCzTURYY0Yay0mU9USGNVaTIjqjQ2rKTbj4TvTWm%2BNmap1pvyjOuduau3ivZLNSeoJGjlpAOXDg6EsjRANlob5hgoCeC8HIT4DChYAyBlgDiuAzlFguWWfyZFAqYXuUAA%3D%3D)),filterAsm:(binary:!t,commentOnly:!t,directives:!t,intel:!t,labels:!t),version:3
+// Alternate https://github.com/vectorclass/version2/blob/master/vectorf128.h#L1048
+
+#if OCIO_USE_SSE2 || OCIO_USE_AVX
+inline float hsum_ps_sse1(__m128 v) {                              // v    = [   D   C |   B   A ]
+    __m128 shuf   = _mm_shuffle_ps(v, v, _MM_SHUFFLE(2, 3, 0, 1)); // shuf = [   C   D |   A   B ]
+    __m128 sums   = _mm_add_ps(v, shuf);                           // sums = [ D+C C+D | B+A A+B ]
+    shuf          = _mm_movehl_ps(shuf, sums);                     // shuf = [   C   D | D+C C+D ]
+    sums          = _mm_add_ss(sums, shuf);                        // sums = [ D+C C+D | B+A A+B+C+D ]
+    return    _mm_cvtss_f32(sums);                                 // A+B+C+D
+}
+#endif
+#ifdef OCIO_USE_AVX
+inline float hsum256_ps_avx(__m256 v) {
+    __m128 vlow  = _mm256_castps256_ps128(v);
+    __m128 vhigh = _mm256_extractf128_ps(v, 1); // high 128
+    __m128 v128  = _mm_add_ps(vlow, vhigh);     // add the low 128
+    return hsum_ps_sse1(v128);
+}
+#endif
+
+
 inline float chroma_compress_norm(float h, float chroma_compress_scale)
 {
     const float h_rad = to_radians(h);
@@ -255,6 +282,30 @@ inline float chroma_compress_norm(float h, float chroma_compress_scale)
     const float cos_hr3 = 4.0f * cos_hr1 * cos_hr1 * cos_hr1 - 3.0f * cos_hr1;
     const float sin_hr3 = 3.0f * sin_hr1 - 4.0f * sin_hr1 * sin_hr1 * sin_hr1;
 
+// TODO: benchmark this across multiple platforms to justify the multiple code paths.
+#if OCIO_USE_SSE2
+#if OCIO_USE_AVX
+    alignas(AVX_SIMD_BYTES) float trig_angles_hr[8] = {cos_hr1, cos_hr2, cos_hr3, sin_hr1, sin_hr2, sin_hr3, 1.0f, 0.0f};
+    __m256 scaling = _mm256_broadcast_ss(&chroma_compress_scale);
+    __m256 trigs = _mm256_load_ps(trig_angles_hr);
+    __m256 trig_weights = _mm256_mul_ps(_mm256_set_ps(11.34072f, 16.46899f, 14.66441f, 4.66441f, -6.37224f, 9.19364f, 77.12896f, 0.0f), scaling);
+    __m256 t1 = _mm256_mul_ps(trigs, trig_weights);
+    return hsum256_ps_avx(t1);
+#else
+    alignas(SSE2_SIMD_BYTES) float cosine_hr[4] = {cos_hr1, cos_hr2, cos_hr3, 0.0f};
+    alignas(SSE2_SIMD_BYTES) float sine_hr[4] = {sin_hr1, sin_hr2, sin_hr3, 1.0f};
+    __m128 scaling = _mm_load1_ps(&chroma_compress_scale);
+    __m128 cosines = _mm_load_ps(cosine_hr);
+    __m128 cosine_weights = _mm_mul_ps(_mm_set_ps(11.34072f, 16.46899f, 14.66441f, 0.0f), scaling);
+    __m128 sines = _mm_load_ps(sine_hr);
+    __m128 sine_weights =  _mm_mul_ps(_mm_set_ps(4.66441f, -6.37224f, 9.19364f, 77.12896f), scaling);
+
+    __m128 t1 = _mm_mul_ps(cosines, cosine_weights);
+    __m128 t2 = _mm_mul_ps(sines, sine_weights);
+    __m128 t3 = _mm_add_ps(t1, t2); // TODO use fmadd_ps_sse2 from Lut1DOpCPU_SSE2.cpp ?
+    return hsum_ps_sse1(t3);
+#endif
+#else
     const float M = 11.34072f * cos_hr1 +
               16.46899f * cos_hr2 +
                7.88380f * cos_hr3 +
@@ -264,6 +315,7 @@ inline float chroma_compress_norm(float h, float chroma_compress_scale)
               77.12896f;
 
     return M * chroma_compress_scale; // TODO: is it worth prescaling the above M constants ?
+#endif
 }
 
 inline float toe_fwd( float x, float limit, float k1_in, float k2_in)

From 7e96bdf4dd91bf953dec5d420aed60574a2abeb0 Mon Sep 17 00:00:00 2001
From: Kevin Wheatley <kevin.wheatley@framestore.com>
Date: Wed, 19 Feb 2025 19:28:51 +0000
Subject: [PATCH 62/70] Attempt to calculate sin/cos only once per pixel. Some
 minor micro optimisations. Further alignment of GPU with CPU code, Tests
 values need evaluating Some GPU results are different - TBD

Signed-off-by: Kevin Wheatley <kevin.wheatley@framestore.com>
---
 .../ops/fixedfunction/ACES2/Common.h          |   9 +-
 .../ops/fixedfunction/ACES2/Transform.cpp     | 214 ++++++++-------
 .../ops/fixedfunction/ACES2/Transform.h       |   9 +-
 .../ops/fixedfunction/FixedFunctionOpCPU.cpp  |  40 ++-
 .../ops/fixedfunction/FixedFunctionOpGPU.cpp  | 246 ++++++++++++++----
 5 files changed, 348 insertions(+), 170 deletions(-)

diff --git a/src/OpenColorIO/ops/fixedfunction/ACES2/Common.h b/src/OpenColorIO/ops/fixedfunction/ACES2/Common.h
index b01bde93de..6357721928 100644
--- a/src/OpenColorIO/ops/fixedfunction/ACES2/Common.h
+++ b/src/OpenColorIO/ops/fixedfunction/ACES2/Common.h
@@ -95,14 +95,15 @@ struct Table1D : public TableBase, std::array<float, TableBase::total_size>
 
 struct JMhParams
 {
-    float F_L_n;    // F_L normalised
-    float cz;
-    float A_w;
-    float A_w_J;
     m33f MATRIX_RGB_to_CAM16_c;
     m33f MATRIX_CAM16_c_to_RGB;
     m33f MATRIX_cone_response_to_Aab;
     m33f MATRIX_Aab_to_cone_response;
+    float F_L_n;    // F_L normalised
+    float cz;
+    float inv_cz;   // 1/cz
+    float A_w_J;
+    float inv_A_w_J; // 1/A_w_J
 };
 
 struct ToneScaleParams
diff --git a/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp b/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp
index 51f13b357f..a91c316342 100644
--- a/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp
+++ b/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp
@@ -141,9 +141,9 @@ inline float Achromatic_n_to_J(float A, const float cz)
     return J_scale * powf(A, cz);
 }
 
-inline float J_to_Achromatic_n(float J, const float cz)
+inline float J_to_Achromatic_n(float J, const float inv_cz)
 {
-    return powf(J / J_scale, 1.f / cz);
+    return powf(J * (1.0f / J_scale), inv_cz);
 }
 
 // Optimization for achromatic values
@@ -157,13 +157,13 @@ inline float _A_to_Y(float A, const JMhParams &p)
 
 inline float _J_to_Y(float abs_J, const JMhParams &p)
 {
-    return _A_to_Y(J_to_Achromatic_n(abs_J, p.cz), p);
+    return _A_to_Y(J_to_Achromatic_n(abs_J, p.inv_cz), p);
 }
 
 inline float _Y_to_J(float abs_Y, const JMhParams &p)
 {
     const float Ra = _post_adaptation_cone_response_compression_fwd(abs_Y * p.F_L_n);
-    const float J  = Achromatic_n_to_J(Ra / p.A_w_J, p.cz);
+    const float J  = Achromatic_n_to_J(Ra * p.inv_A_w_J, p.cz);
     return J;
 }
 
@@ -209,21 +209,28 @@ f3 RGB_to_JMh(const f3 &RGB, const JMhParams &p)
     return JMh;
 }
 
-inline f3 JMh_to_Aab(const f3 &JMh, const JMhParams &p)
+f3 JMh_to_Aab(const f3 &JMh, const float &cos_hr, const float &sin_hr, const JMhParams &p)
 {
     const float J = JMh[0];
     const float M = JMh[1];
-    const float h = JMh[2];
 
+    const float A = J_to_Achromatic_n(J, p.inv_cz);
+    const float a = M * cos_hr;
+    const float b = M * sin_hr;
+    return {A, a, b};
+}
+
+f3 JMh_to_Aab(const f3 &JMh, const JMhParams &p)
+{
+    const float h = JMh[2];
     const float h_rad = to_radians(h);
+    const float cos_hr = cos(h_rad);
+    const float sin_hr = sin(h_rad);
 
-    const float A = J_to_Achromatic_n(J, p.cz);
-    const float a = M * cos(h_rad);
-    const float b = M * sin(h_rad);
-    return {A, a, b};
+    return JMh_to_Aab(JMh, cos_hr, sin_hr, p);
 }
 
-inline f3 Aab_to_RGB(const f3 &Aab, const JMhParams &p)
+f3 Aab_to_RGB(const f3 &Aab, const JMhParams &p)
 {
     const f3 rgb_a = mult_f3_f33(Aab, p.MATRIX_Aab_to_cone_response);
 
@@ -263,59 +270,60 @@ inline float hsum_ps_sse1(__m128 v) {                              // v    = [
 }
 #endif
 #ifdef OCIO_USE_AVX
-inline float hsum256_ps_avx(__m256 v) {
-    __m128 vlow  = _mm256_castps256_ps128(v);
-    __m128 vhigh = _mm256_extractf128_ps(v, 1); // high 128
-    __m128 v128  = _mm_add_ps(vlow, vhigh);     // add the low 128
+inline float hsum256_ps_avx(__m256 v) {         // v     = [   H   G |   F   E |   D   C |   B   A ]
+    __m128 vlow  = _mm256_castps256_ps128(v);   // vlow  = [   D   C |   B   A ]
+    __m128 vhigh = _mm256_extractf128_ps(v, 1); // vhigh = [   H   G |   F   E ]
+    __m128 v128  = _mm_add_ps(vlow, vhigh);     // v128  = [ H+D G+C | F+B E+A ]
     return hsum_ps_sse1(v128);
 }
 #endif
 
 
-inline float chroma_compress_norm(float h, float chroma_compress_scale)
+float chroma_compress_norm(float cos_hr1, float sin_hr1, float chroma_compress_scale)
 {
-    const float h_rad = to_radians(h);
-    const float cos_hr1 = cos(h_rad);
-    const float sin_hr1 = sin(h_rad);
     const float cos_hr2 = 2.0f * cos_hr1 * cos_hr1 - 1.0f;
     const float sin_hr2 = 2.0f * cos_hr1 * sin_hr1;
     const float cos_hr3 = 4.0f * cos_hr1 * cos_hr1 * cos_hr1 - 3.0f * cos_hr1;
     const float sin_hr3 = 3.0f * sin_hr1 - 4.0f * sin_hr1 * sin_hr1 * sin_hr1;
 
-// TODO: benchmark this across multiple platforms to justify the multiple code paths.
+    constexpr unsigned int AVX_ALIGNMENT = 32;
+    alignas(AVX_ALIGNMENT) const float trig_angles_hr[8] = {
+        cos_hr1, cos_hr2, cos_hr3, 0.0f,
+        sin_hr1, sin_hr2, sin_hr3, 1.0f
+    };
+    alignas(AVX_ALIGNMENT) static constexpr float weights[8] = { // TODO: investigate reordering of the entries so we are summing equal magnitude values first?
+        11.34072f, 16.46899f, 14.66441f,  0.0f,
+        4.66441f,  -6.37224f,  9.19364f, 77.12896f
+    };
+    // TODO: benchmark this across multiple platforms to justify the multiple code paths.
 #if OCIO_USE_SSE2
 #if OCIO_USE_AVX
-    alignas(AVX_SIMD_BYTES) float trig_angles_hr[8] = {cos_hr1, cos_hr2, cos_hr3, sin_hr1, sin_hr2, sin_hr3, 1.0f, 0.0f};
-    __m256 scaling = _mm256_broadcast_ss(&chroma_compress_scale);
-    __m256 trigs = _mm256_load_ps(trig_angles_hr);
-    __m256 trig_weights = _mm256_mul_ps(_mm256_set_ps(11.34072f, 16.46899f, 14.66441f, 4.66441f, -6.37224f, 9.19364f, 77.12896f, 0.0f), scaling);
-    __m256 t1 = _mm256_mul_ps(trigs, trig_weights);
-    return hsum256_ps_avx(t1);
+    __m256 trigs        = _mm256_load_ps(trig_angles_hr);
+    __m256 trig_weights = _mm256_load_ps(weights);
+    __m256 t1           = _mm256_mul_ps(trigs, trig_weights);
+    const float M       = hsum256_ps_avx(t1);
 #else
-    alignas(SSE2_SIMD_BYTES) float cosine_hr[4] = {cos_hr1, cos_hr2, cos_hr3, 0.0f};
-    alignas(SSE2_SIMD_BYTES) float sine_hr[4] = {sin_hr1, sin_hr2, sin_hr3, 1.0f};
-    __m128 scaling = _mm_load1_ps(&chroma_compress_scale);
-    __m128 cosines = _mm_load_ps(cosine_hr);
-    __m128 cosine_weights = _mm_mul_ps(_mm_set_ps(11.34072f, 16.46899f, 14.66441f, 0.0f), scaling);
-    __m128 sines = _mm_load_ps(sine_hr);
-    __m128 sine_weights =  _mm_mul_ps(_mm_set_ps(4.66441f, -6.37224f, 9.19364f, 77.12896f), scaling);
-
-    __m128 t1 = _mm_mul_ps(cosines, cosine_weights);
-    __m128 t2 = _mm_mul_ps(sines, sine_weights);
-    __m128 t3 = _mm_add_ps(t1, t2); // TODO use fmadd_ps_sse2 from Lut1DOpCPU_SSE2.cpp ?
-    return hsum_ps_sse1(t3);
+    __m128 cosines        = _mm_load_ps(trig_angles_hr);
+    __m128 sines          = _mm_load_ps(&trig_angles_hr[4]);
+    __m128 cosine_weights = _mm_load_ps(weights);
+    __m128 sine_weights   = _mm_load_ps(&weights[4]);
+
+    __m128 t1     = _mm_mul_ps(cosines, cosine_weights);
+    __m128 t2     = _mm_mul_ps(sines, sine_weights);
+    __m128 t3     = _mm_add_ps(t1, t2); // TODO use fmadd_ps_sse2 from Lut1DOpCPU_SSE2.cpp ?
+    const float M = hsum_ps_sse1(t3);
 #endif
 #else
-    const float M = 11.34072f * cos_hr1 +
-              16.46899f * cos_hr2 +
-               7.88380f * cos_hr3 +
-              14.66441f * sin_hr1 +
-              -6.37224f * sin_hr2 +
-               9.19364f * sin_hr3 +
-              77.12896f;
-
-    return M * chroma_compress_scale; // TODO: is it worth prescaling the above M constants ?
+    const float M = weights[0] * trig_angles_hr[0] +
+                    weights[1] * trig_angles_hr[1] +
+                    weights[2] * trig_angles_hr[2] +
+                    weights[4] * trig_angles_hr[4] +
+                    weights[5] * trig_angles_hr[5] +
+                    weights[6] * trig_angles_hr[6] +
+                    weights[7];
 #endif
+
+    return M * chroma_compress_scale; // TODO: is it worth prescaling the above weights?
 }
 
 inline float toe_fwd( float x, float limit, float k1_in, float k2_in)
@@ -400,7 +408,7 @@ float tonescale_A_to_J_fwd(const float A, const JMhParams &p, const ToneScalePar
     return tonescale_A_to_J<false>(A, p, pt);
 }
 
-f3 chroma_compress_fwd(const f3 &JMh, const float J_ts, const ResolvedSharedCompressionParameters &pr, const ChromaCompressParams &pc)
+f3 chroma_compress_fwd(const f3 &JMh, const float J_ts, const float Mnorm, const ResolvedSharedCompressionParameters &pr, const ChromaCompressParams &pc)
 {
     const float J = JMh[0];
     const float M = JMh[1];
@@ -412,7 +420,6 @@ f3 chroma_compress_fwd(const f3 &JMh, const float J_ts, const ResolvedSharedComp
     {
         const float nJ = J_ts / pr.limit_J_max;
         const float snJ = std::max(0.f, 1.f - nJ);
-        const float Mnorm = chroma_compress_norm(h, pc.chroma_compress_scale);
         const float limit = powf(nJ, pr.model_gamma_inv) * pr.reachMaxM / Mnorm;
 
         M_cp = M * powf(J_ts / J, pr.model_gamma_inv);
@@ -425,7 +432,7 @@ f3 chroma_compress_fwd(const f3 &JMh, const float J_ts, const ResolvedSharedComp
     return {J_ts, M_cp, h};
 }
 
-f3 chroma_compress_inv(const f3 &JMh, const float J, const ResolvedSharedCompressionParameters &pr, const ChromaCompressParams &pc)
+f3 chroma_compress_inv(const f3 &JMh, const float J, const float Mnorm, const ResolvedSharedCompressionParameters &pr, const ChromaCompressParams &pc)
 {
     const float J_ts = JMh[0];
     const float M_cp = JMh[1];
@@ -436,7 +443,6 @@ f3 chroma_compress_inv(const f3 &JMh, const float J, const ResolvedSharedCompres
     {
         const float nJ = J_ts / pr.limit_J_max;
         const float snJ = std::max(0.f, 1.f - nJ);
-        const float Mnorm = chroma_compress_norm(h, pc.chroma_compress_scale);
         const float limit = powf(nJ, pr.model_gamma_inv) * pr.reachMaxM / Mnorm;
 
         M = M_cp / Mnorm;
@@ -478,6 +484,7 @@ JMhParams init_JMhParams(const Primaries &prims)
 
     const float F_L_n = F_L / reference_luminance;
     const float cz    = model_gamma();
+    const float inv_cz = 1.0f / cz;
 
     const f3 D_RGB = {
         F_L_n * Y_W / RGB_w[0],
@@ -500,6 +507,7 @@ JMhParams init_JMhParams(const Primaries &prims)
     const m33f cone_response_to_Aab = mult_f33_f33(scale_f33(Identity_M33, f3_from_f(cam_nl_scale)), base_cone_response_to_Aab); // TODO: this scale maybe ill conditioning the matrix
     const float A_w   = cone_response_to_Aab[0] * RGB_AW[0] + cone_response_to_Aab[1] * RGB_AW[1] + cone_response_to_Aab[2] * RGB_AW[2];
     const float A_w_J = _post_adaptation_cone_response_compression_fwd(F_L);
+    const float inv_A_w_J = 1.0f / A_w_J;
 
     // TODO: evaluate the condition number of the below matrices and consider extracting a power of 2 scale out of them may improve noise behaviour
     //       power of 2 to make cost of extra scaling limited vs generalised multiply/divide ?
@@ -517,14 +525,15 @@ JMhParams init_JMhParams(const Primaries &prims)
     const m33f MATRIX_Aab_to_cone_response = invert_f33(MATRIX_cone_response_to_Aab);
 
     JMhParams p = {
-        F_L_n,
-        cz,
-        A_w,
-        A_w_J,
         MATRIX_RGB_to_CAM16_c,
         MATRIX_CAM16_c_to_RGB,
         MATRIX_cone_response_to_Aab,
-        MATRIX_Aab_to_cone_response
+        MATRIX_Aab_to_cone_response,
+        F_L_n,
+        cz,
+        inv_cz,
+        A_w_J,
+        inv_A_w_J
     };
    return p;
 }
@@ -582,7 +591,7 @@ void find_reach_corners_table(std::array<f3, totalCornerCount>& JMh_corners, con
     // as such does not form a cube, but is formed by the transformed 3 lower planes of the cube and
     // the plane at J = limitJ
     std::array<f3, cuspCornerCount> temp_JMh_corners;
-    const float limitA = J_to_Achromatic_n(limitJ, params.cz);
+    const float limitA = J_to_Achromatic_n(limitJ, params.inv_cz);
 
     unsigned int min_index = 0;
     for (unsigned int i = 0; i != cuspCornerCount; ++i)
@@ -944,33 +953,21 @@ inline bool outside_hull(const f3 &rgb, float maxRGBtestVal)
     return rgb[0] > maxRGBtestVal || rgb[1] > maxRGBtestVal || rgb[2] > maxRGBtestVal;
 }
 
-inline float get_focus_gain(float J, float cuspJ, float limit_J_max, float focus_dist)
+inline float get_focus_gain(float J, float analytical_threshold, float limit_J_max, float focus_dist)
 {
-    const float thr = lerpf(cuspJ, limit_J_max, focus_gain_blend);
-
-    /* Note from Pekka
-
-    If we’d be willing to make a tiny change in the pixel values, we can eliminate one pow() from getFocusGain() function by changing the focusAdjustGain value to 0.5 from the current 0.55. It then becomes:
-
-      float gain = log10((limitJmax - thr) / max(0.0001f, (limitJmax - min(limitJmax, J))));
-      return gain * gain + 1.0f;
-    from the current:
-
-      float gain = (limitJmax - thr) / max(0.0001f, (limitJmax - min(limitJmax, J)));
-      return pow(log10(gain), 1.0f / focusAdjustGain) + 1.0f;
-    
-    */
-    float gain = 1.0f;
-    if (J > thr)
+    float gain = limit_J_max * focus_dist;
+    if (J > analytical_threshold)
     {
-        // Approximate inverse required above threshold
-        gain = (limit_J_max - thr) / std::max(0.0001f, (limit_J_max - std::min(limit_J_max, J)));
-        gain = powf(log10(gain), focus_adjust_gain_inv) + 1.f;
+        // Approximate inverse required above threshold due to the introduction of J in the calculation
+        float gain_adjustment = log10((limit_J_max - analytical_threshold) / std::max(0.0001f, limit_J_max - J));
+        //gain = powf(gain, focus_adjust_gain_inv) + 1.f;
+        gain_adjustment = gain_adjustment * gain_adjustment + 1.f;
+        gain = gain * gain_adjustment;
     }
-    return limit_J_max * focus_dist * gain;
+    return gain;
 }
 
-float solve_J_intersect(float J, float M, float focusJ, float maxJ, float slope_gain)
+float solve_J_intersect(float J, float M, float focusJ, float maxJ, float slope_gain) // TODO: eval placing maxJ as last patrameter?
 {
     const float M_scaled = M / slope_gain;
     const float a = M_scaled / focusJ;
@@ -1023,8 +1020,8 @@ inline float estimate_line_and_boundary_intersection_M(const float J_axis_inters
     // line from origin to J,M Max       l1(x) = J/M * x
     // line from J Intersect' with slope l2(x) = slope * x + Intersect'
 
-    return shifted_intersection / ((J_max / M_max) - slope);
-    //return shifted_intersection * M_max / (J_max - slope * M_max);
+    //return shifted_intersection / ((J_max / M_max) - slope);
+    return shifted_intersection * M_max / (J_max - slope * M_max);
 }
 
 float find_gamut_boundary_intersection(const f2 &JM_cusp, float J_max, float gamma_top_inv, float gamma_bottom_inv, const float J_intersect_source, const float slope, const float J_intersect_cusp)
@@ -1064,7 +1061,7 @@ inline float remap_M(const float M, const float gamut_boundary_M, const float re
     const float proportion = std::max(boundary_ratio, compression_threshold);
     const float threshold  = proportion * gamut_boundary_M;
 
-    if (proportion >= 1.0f || M <= threshold)
+    if (M <= threshold || proportion >= 1.0f)
         return M;
 
     // Translate to place threshold at zero
@@ -1086,21 +1083,9 @@ f3 compressGamut(const f3 &JMh, float Jx, const ACES2::ResolvedSharedCompression
     const float M = JMh[1];
     const float h = JMh[2];
     
-    // TODO: test migrating these to calling function so they are only tested once for the inverse?
-    if (J <= 0.0f) // Limit to +ve J values // TODO test this is needed
-    {
-        return {0.0f, 0.f, h};
-    }
-    if (M <= 0.0f || J > sr.limit_J_max) // We compress M only so avoid mapping zero
-                                         // Above the expected maximum we explicitly map to 0 M
-    {
-        return {J, 0.f, h};
-    }
-
-    const float slope_gain = get_focus_gain(Jx, hdp.JMcusp[0], sr.limit_J_max, p.focus_dist);
+    const float slope_gain = get_focus_gain(Jx, hdp.analytical_threshold, sr.limit_J_max, p.focus_dist);
     const float J_intersect_source = solve_J_intersect(J, M, hdp.focusJ, sr.limit_J_max, slope_gain);
     const float gamut_slope = compute_compression_vector_slope(J_intersect_source, hdp.focusJ, sr.limit_J_max, slope_gain);
- 
 
     const float J_intersect_cusp = solve_J_intersect(hdp.JMcusp[0], hdp.JMcusp[1], hdp.focusJ, sr.limit_J_max, slope_gain);
     const float gamut_boundary_M = find_gamut_boundary_intersection(hdp.JMcusp, sr.limit_J_max, hdp.gamma_top_inv, hdp.gamma_bottom_inv, J_intersect_source, gamut_slope, J_intersect_cusp);
@@ -1145,16 +1130,42 @@ HueDependantGamutParams init_HueDependantGamutParams(const float hue, const Reso
 
 f3 gamut_compress_fwd(const f3 &JMh, const ResolvedSharedCompressionParameters &sr, const GamutCompressParams &p)
 {
-    const HueDependantGamutParams hdp = init_HueDependantGamutParams(JMh[2], sr, p);
+    const float J = JMh[0];
+    const float M = JMh[1];
+    const float h = JMh[2];
+
+    if (J <= 0.0f) // Limit to +ve J values // TODO test this is needed
+    {
+        return {0.0f, 0.f, h};
+    }
+    if (M <= 0.0f || J > sr.limit_J_max) // We compress M only so avoid mapping zero
+                                         // Above the expected maximum we explicitly map to 0 M
+    {
+        return {J, 0.f, h};
+    }
+    const HueDependantGamutParams hdp = init_HueDependantGamutParams(h, sr, p);
     
     return compressGamut<false>(JMh, JMh[0], sr, p, hdp);
 }
 
 f3 gamut_compress_inv(const f3 &JMh, const ResolvedSharedCompressionParameters &sr, const GamutCompressParams &p)
 {
-    const HueDependantGamutParams hdp = init_HueDependantGamutParams(JMh[2], sr, p);
+    const float J = JMh[0];
+    const float M = JMh[1];
+    const float h = JMh[2];
+
+    if (J <= 0.0f) // Limit to +ve J values // TODO test this is needed
+    {
+        return {0.0f, 0.f, h};
+    }
+    if (M <= 0.0f || J > sr.limit_J_max) // We compress M only so avoid mapping zero
+                                         // Above the expected maximum we explicitly map to 0 M
+    {
+        return {J, 0.f, h};
+    }
+    const HueDependantGamutParams hdp = init_HueDependantGamutParams(h, sr, p);
 
-    float Jx = JMh[0];
+    float Jx = J;
     if (Jx > hdp.analytical_threshold)
     {
         // Approximation above threshold
@@ -1175,11 +1186,12 @@ std::array<testData, gamma_test_count> generate_gamma_test_data(const f2 &JMcusp
 {
     const std::array<float, gamma_test_count> testPositions = {0.01f, 0.1f, 0.5f, 0.8f, 0.99f};
     std::array<testData, gamma_test_count> data;
+    const float analytical_threshold = lerpf(JMcusp[0], limit_J_max, focus_gain_blend);
+    const float focusJ = compute_focusJ(JMcusp[0], mid_J, limit_J_max);
 
-    std::generate(data.begin(), data.end(), [JMcusp, limit_J_max, mid_J, focus_dist, testPositions, hue, testIndex = 0]() mutable {
+    std::generate(data.begin(), data.end(), [JMcusp, limit_J_max, focus_dist, testPositions, hue, analytical_threshold, focusJ, testIndex = 0]() mutable {
         const float testJ = lerpf(JMcusp[0], limit_J_max, testPositions[testIndex]);
-        const float focusJ = compute_focusJ(JMcusp[0], mid_J, limit_J_max);
-        const float slope_gain = get_focus_gain(testJ, JMcusp[0], limit_J_max, focus_dist);
+        const float slope_gain = get_focus_gain(testJ, analytical_threshold, limit_J_max, focus_dist);
         const float J_intersect_source = solve_J_intersect(testJ, JMcusp[1], focusJ, limit_J_max, slope_gain);
         testData result = {
             { testJ, JMcusp[1], hue },
diff --git a/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.h b/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.h
index 6d87224ba4..11adc13fdb 100644
--- a/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.h
+++ b/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.h
@@ -23,14 +23,19 @@ GamutCompressParams init_GamutCompressParams(float peakLuminance, const JMhParam
 f3 RGB_to_Aab(const f3 &RGB, const JMhParams &p);
 f3 Aab_to_JMh(const f3 &Aab, const JMhParams &p);
 f3 RGB_to_JMh(const f3 &RGB, const JMhParams &p);
+f3 JMh_to_Aab(const f3 &JMh, const JMhParams &p);
+f3 JMh_to_Aab(const f3 &JMh, const float &cos_hr, const float &sin_hr, const JMhParams &p);
+f3 Aab_to_RGB(const f3 &Aab, const JMhParams &p);
 f3 JMh_to_RGB(const f3 &JMh, const JMhParams &p);
 
 float tonescale_fwd(const float J, const JMhParams &p, const ToneScaleParams &pt);
 float tonescale_inv(const float J, const JMhParams &p, const ToneScaleParams &pt);
 float tonescale_A_to_J_fwd(const float A, const JMhParams &p, const ToneScaleParams &pt);
 
-f3 chroma_compress_fwd(const f3 &JMh, const float J_ts, const ResolvedSharedCompressionParameters &ps, const ChromaCompressParams &pc);
-f3 chroma_compress_inv(const f3 &JMh, const float J, const ResolvedSharedCompressionParameters &ps, const ChromaCompressParams &pc);
+float chroma_compress_norm(float cos_hr, float sin_hr, float chroma_compress_scale);
+
+f3 chroma_compress_fwd(const f3 &JMh, const float J_ts, const float Mnorm, const ResolvedSharedCompressionParameters &ps, const ChromaCompressParams &pc);
+f3 chroma_compress_inv(const f3 &JMh, const float J, const float Mnorm, const ResolvedSharedCompressionParameters &ps, const ChromaCompressParams &pc);
 
 f3 gamut_compress_fwd(const f3 &JMh, const ResolvedSharedCompressionParameters &ps, const GamutCompressParams &p);
 f3 gamut_compress_inv(const f3 &JMh, const ResolvedSharedCompressionParameters &ps, const GamutCompressParams &p);
diff --git a/src/OpenColorIO/ops/fixedfunction/FixedFunctionOpCPU.cpp b/src/OpenColorIO/ops/fixedfunction/FixedFunctionOpCPU.cpp
index 0ed39faeee..feb387db5f 100644
--- a/src/OpenColorIO/ops/fixedfunction/FixedFunctionOpCPU.cpp
+++ b/src/OpenColorIO/ops/fixedfunction/FixedFunctionOpCPU.cpp
@@ -1054,11 +1054,19 @@ void Renderer_ACES_OutputTransform20::fwd(const void * inImg, void * outImg, lon
         const ACES2::f3 RGBIn {in[0], in[1], in[2]};
         const ACES2::f3 Aab           = ACES2::RGB_to_Aab(RGBIn, m_pIn);
         const ACES2::f3 JMh           = ACES2::Aab_to_JMh(Aab, m_pIn);
+
         const ACES2::ResolvedSharedCompressionParameters rp = resolve_CompressionParams(JMh[2], m_s);
+        const float h_rad = ACES2::to_radians(JMh[2]);
+        const float cos_hr1 = std::cos(h_rad);
+        const float sin_hr1 = std::sin(h_rad);
+        const float Mnorm = ACES2::chroma_compress_norm(cos_hr1, sin_hr1, m_c.chroma_compress_scale);
+
         const float J_ts = ACES2::tonescale_A_to_J_fwd(Aab[0], m_pIn, m_t);
-        const ACES2::f3 tonemappedJMh = ACES2::chroma_compress_fwd(JMh, J_ts, rp, m_c);
+        const ACES2::f3 tonemappedJMh = ACES2::chroma_compress_fwd(JMh, J_ts, Mnorm, rp, m_c);
         const ACES2::f3 compressedJMh = ACES2::gamut_compress_fwd(tonemappedJMh, rp, m_g);
-        const ACES2::f3 RGBOut        = ACES2::JMh_to_RGB(compressedJMh, m_pOut);
+
+        const ACES2::f3 Aabout        = ACES2::JMh_to_Aab(compressedJMh, cos_hr1, sin_hr1, m_pOut);
+        const ACES2::f3 RGBOut        = ACES2::Aab_to_RGB(Aabout, m_pOut);
 
         out[0] = RGBOut[0];
         out[1] = RGBOut[1];
@@ -1079,12 +1087,20 @@ void Renderer_ACES_OutputTransform20::inv(const void * inImg, void * outImg, lon
     {
         const ACES2::f3 RGBout {in[0], in[1], in[2]};
         const ACES2::f3 compressedJMh = ACES2::RGB_to_JMh(RGBout, m_pOut);
+
         const ACES2::ResolvedSharedCompressionParameters rp = resolve_CompressionParams(compressedJMh[2], m_s);
-        const ACES2::f3 tonemappedJMh = ACES2::gamut_compress_inv(compressedJMh, rp, m_g);
-        const float J = ACES2::tonescale_inv(tonemappedJMh[0], m_pIn, m_t);
-        const ACES2::f3 JMh           = ACES2::chroma_compress_inv(tonemappedJMh, J, rp, m_c);
-        const ACES2::f3 RGBin         = ACES2::JMh_to_RGB(JMh, m_pIn);
+        const float h_rad = ACES2::to_radians(compressedJMh[2]);
+        const float cos_hr1 = std::cos(h_rad);
+        const float sin_hr1 = std::sin(h_rad);
+        const float Mnorm = ACES2::chroma_compress_norm(cos_hr1, sin_hr1, m_c.chroma_compress_scale);
 
+        const ACES2::f3 tonemappedJMh = ACES2::gamut_compress_inv(compressedJMh, rp, m_g);
+        const float J         = ACES2::tonescale_inv(tonemappedJMh[0], m_pIn, m_t);
+        const ACES2::f3 JMh   = ACES2::chroma_compress_inv(tonemappedJMh, J, Mnorm, rp, m_c);
+    
+        const ACES2::f3 Aab   = ACES2::JMh_to_Aab(JMh, cos_hr1, sin_hr1, m_pIn);
+        const ACES2::f3 RGBin = ACES2::Aab_to_RGB(Aab, m_pIn);
+    
         out[0] = RGBin[0];
         out[1] = RGBin[1];
         out[2] = RGBin[2];
@@ -1204,9 +1220,13 @@ void Renderer_ACES_TONESCALE_COMPRESS_20::fwd(const void * inImg, void * outImg,
     for(long idx=0; idx<numPixels; ++idx)
     {
         const float normalised_hue = ACES2::from_degrees(in[2]);
+        const float h_rad = ACES2::to_radians(normalised_hue);
+        const float cos_hr1 = cos(h_rad);
+        const float sin_hr1 = sin(h_rad);
+        const float Mnorm = ACES2::chroma_compress_norm(cos_hr1, sin_hr1, m_c.chroma_compress_scale);
         const ACES2::ResolvedSharedCompressionParameters rp = resolve_CompressionParams(normalised_hue, m_s);
         const float J_ts = ACES2::tonescale_fwd(in[0], m_p, m_t);
-        const ACES2::f3 JMh = ACES2::chroma_compress_fwd({in[0], in[1], normalised_hue}, J_ts, rp, m_c);
+        const ACES2::f3 JMh = ACES2::chroma_compress_fwd({in[0], in[1], normalised_hue}, J_ts, Mnorm, rp, m_c);
 
         out[0] = JMh[0];
         out[1] = JMh[1];
@@ -1226,9 +1246,13 @@ void Renderer_ACES_TONESCALE_COMPRESS_20::inv(const void * inImg, void * outImg,
     for(long idx=0; idx<numPixels; ++idx)
     {
         const float normalised_hue = ACES2::from_degrees(in[2]);
+        const float h_rad = ACES2::to_radians(normalised_hue);
+        const float cos_hr1 = cos(h_rad);
+        const float sin_hr1 = sin(h_rad);
+        const float Mnorm = ACES2::chroma_compress_norm(cos_hr1, sin_hr1, m_c.chroma_compress_scale);
         const ACES2::ResolvedSharedCompressionParameters rp = resolve_CompressionParams(normalised_hue, m_s);
         const float J = ACES2::tonescale_inv(in[0], m_p, m_t);
-        const ACES2::f3 JMh = ACES2::chroma_compress_inv({in[0], in[1],  normalised_hue}, J, rp, m_c);
+        const ACES2::f3 JMh = ACES2::chroma_compress_inv({in[0], in[1],  normalised_hue}, J, Mnorm, rp, m_c);
 
         out[0] = JMh[0];
         out[1] = JMh[1];
diff --git a/src/OpenColorIO/ops/fixedfunction/FixedFunctionOpGPU.cpp b/src/OpenColorIO/ops/fixedfunction/FixedFunctionOpGPU.cpp
index c6587d355b..8af86e0638 100644
--- a/src/OpenColorIO/ops/fixedfunction/FixedFunctionOpGPU.cpp
+++ b/src/OpenColorIO/ops/fixedfunction/FixedFunctionOpGPU.cpp
@@ -360,21 +360,49 @@ void _Add_WrapHueChannel_Shader(
     ss.newLine() << pxl << ".b = hwrap;";
 }
 
-void _Add_RGB_to_JMh_Shader(
+void _Add_SinCos_Shader(
+    GpuShaderCreatorRcPtr & shaderCreator,
+    GpuShaderText & ss)
+{
+    const std::string pxl(shaderCreator->getPixelName());
+    ss.newLine() << ss.floatDecl("h_rad") << " = " << pxl << ".b * " << 3.14159265358979f / 180.0f << ";";
+    ss.newLine() << ss.floatDecl("cos_hr") << " = cos(h_rad);";
+    ss.newLine() << ss.floatDecl("sin_hr") << " = sin(h_rad);";
+}
+
+
+void _Add_RGB_to_Aab_Shader(
     GpuShaderCreatorRcPtr & shaderCreator,
     GpuShaderText & ss,
     const ACES2::JMhParams & p)
 {
     const std::string pxl(shaderCreator->getPixelName());
 
+    ss.newLine() << "{";
+    ss.indent();
+
     ss.newLine() << ss.float3Decl("lms") << " = " << ss.mat3fMul(&p.MATRIX_RGB_to_CAM16_c[0], pxl + ".rgb") << ";";
 
     ss.newLine() << ss.float3Decl("F_L_v") << " = pow(abs(lms), " << ss.float3Const(0.42f) << ");";
     ss.newLine() << ss.float3Decl("rgb_a") << " = (sign(lms) * F_L_v) / ( " << ACES2::cam_nl_offset << " + F_L_v);";
 
-    ss.newLine() << ss.float3Decl("Aab") << " = " << ss.mat3fMul(&p.MATRIX_cone_response_to_Aab[0], "rgb_a.rgb") << ";";
+    ss.newLine() << "Aab = " << ss.mat3fMul(&p.MATRIX_cone_response_to_Aab[0], "rgb_a.rgb") << ";";
 
-    ss.newLine() << ss.floatDecl("J") << " = 100.0 * pow(Aab.r, " << p.cz << ");";
+    ss.dedent();
+    ss.newLine() << "}";
+}
+
+void _Add_Aab_to_JMh_Shader(
+    GpuShaderCreatorRcPtr & shaderCreator,
+    GpuShaderText & ss,
+    const ACES2::JMhParams & p)
+{
+    const std::string pxl(shaderCreator->getPixelName());
+
+    ss.newLine() << "{";
+    ss.indent();
+
+    ss.newLine() << ss.floatDecl("J") << " = " << ACES2::J_scale << " * pow(Aab.r, " << p.cz << ");";
 
     ss.newLine() << ss.floatDecl("M") << " = (J == 0.0) ? 0.0 : sqrt(Aab.g * Aab.g + Aab.b * Aab.b);";
 
@@ -382,28 +410,83 @@ void _Add_RGB_to_JMh_Shader(
     ss.newLine() << "h = h - floor(h / 360.0) * 360.0;";
     ss.newLine() << "h = (h < 0.0) ? h + 360.0 : h;";
 
-    ss.newLine() << pxl << ".rgb = " << ss.float3Const("J", "M", "h") << ";";
+    ss.newLine() << "JMh.rgb = " << ss.float3Const("J", "M", "h") << ";";
+
+    ss.dedent();
+    ss.newLine() << "}";
 }
 
-void _Add_JMh_to_RGB_Shader(
+void _Add_RGB_to_JMh_Shader(
     GpuShaderCreatorRcPtr & shaderCreator,
     GpuShaderText & ss,
     const ACES2::JMhParams & p)
 {
     const std::string pxl(shaderCreator->getPixelName());
 
-    ss.newLine() << ss.floatDecl("h") << " = " << pxl << ".b * " << 3.14159265358979 / 180.0 << ";";
+    ss.newLine() << ss.float3Decl("JMh") << ";";    // TODO: leaky abstraction should really be explicit functions
+    ss.newLine() << ss.float3Decl("Aab") << ";";    // TODO: leaky abstraction should really be explicit functions
 
-    ss.newLine() << ss.float3Decl("Aab") << ";";
-    ss.newLine() << "Aab.r = pow(" << pxl << ".r / 100.0, " << 1.0 / p.cz << ");";
-    ss.newLine() << "Aab.g = " << pxl << ".g * cos(h);";
-    ss.newLine() << "Aab.b = " << pxl << ".g * sin(h);";
+    ss.newLine() << "{";
+    ss.indent();
 
-    ss.newLine() << ss.float3Decl("rgb_a") << " = " << ss.mat3fMul(&p.MATRIX_Aab_to_cone_response[0], "Aab.rgb") << ";";
+    _Add_RGB_to_Aab_Shader(shaderCreator, ss, p);
+    _Add_Aab_to_JMh_Shader(shaderCreator, ss, p);
+    
+    ss.newLine() << pxl << ".rgb = JMh;";
 
+    ss.dedent();
+    ss.newLine() << "}";
+}
+
+
+void _Add_JMh_to_Aab_Shader(
+    GpuShaderCreatorRcPtr & shaderCreator,
+    GpuShaderText & ss,
+    const ACES2::JMhParams & p)
+{
+    const std::string pxl(shaderCreator->getPixelName());
+
+    ss.newLine() << "{";
+    ss.indent();
+
+
+    ss.newLine() << "Aab.r = pow(JMh.r * " << 1.0f / ACES2::J_scale << ", " << p.inv_cz << ");";
+    ss.newLine() << "Aab.g = JMh.g * cos_hr;";
+    ss.newLine() << "Aab.b = JMh.g * sin_hr;";
+    
+    ss.dedent();
+    ss.newLine() << "}";
+}
+
+void _Add_Aab_to_RGB_Shader(
+    GpuShaderCreatorRcPtr & shaderCreator,
+    GpuShaderText & ss,
+    const ACES2::JMhParams & p)
+{
+    ss.newLine() << "{";
+    ss.indent();
+
+    ss.newLine() << ss.float3Decl("rgb_a") << " = " << ss.mat3fMul(&p.MATRIX_Aab_to_cone_response[0], "Aab.rgb") << ";";
     ss.newLine() << ss.float3Decl("lms") << " = sign(rgb_a) * pow( " << ACES2::cam_nl_offset << " * abs(rgb_a) / (1.0f - abs(rgb_a)), " << ss.float3Const(1.f / 0.42f) << ");";
+    ss.newLine() << "JMh.rgb = " << ss.mat3fMul(&p.MATRIX_CAM16_c_to_RGB[0], "lms") << ";";
+
+    ss.dedent();
+    ss.newLine() << "}";
+}
+
+void _Add_JMh_to_RGB_Shader(
+    GpuShaderCreatorRcPtr & shaderCreator,
+    GpuShaderText & ss,
+    const ACES2::JMhParams & p)
+{
+    const std::string pxl(shaderCreator->getPixelName());
 
-    ss.newLine() << pxl << ".rgb = " << ss.mat3fMul(&p.MATRIX_CAM16_c_to_RGB[0], "lms") << ";";
+    ss.newLine() << ss.float3Decl("JMh") << " = " << pxl << ".rgb;";
+    ss.newLine() << ss.float3Decl("Aab") << ";";
+    _Add_JMh_to_Aab_Shader(shaderCreator, ss, p);
+    _Add_Aab_to_RGB_Shader(shaderCreator, ss, p);
+
+    ss.newLine() << pxl << ".rgb = JMh;";
 }
 
 std::string _Add_Reach_table(
@@ -532,6 +615,65 @@ std::string _Add_Toe_func(
     return name;
 }
 
+std::string _Add_Tonescale_func(
+    GpuShaderCreatorRcPtr & shaderCreator,
+    unsigned resourceIndex,
+    bool invert,
+    const ACES2::JMhParams & p,
+    const ACES2::ToneScaleParams & t)
+{
+    // Reserve name
+    std::ostringstream resName;
+    resName << shaderCreator->getResourcePrefix()
+            << std::string("_")
+            << std::string("tonescale")
+            << (invert ? std::string("_inv") : std::string("_fwd"))
+            << resourceIndex;
+
+    // Note: Remove potentially problematic double underscores from GLSL resource names.
+    std::string name(resName.str());
+    StringUtils::ReplaceInPlace(name, "__", "_");
+
+    GpuShaderText ss(shaderCreator->getLanguage());
+
+    ss.newLine() << ss.floatKeyword() << " " << name << "(float J)";
+    ss.newLine() << "{";
+    ss.indent();
+
+    // Tonescale applied in Y (convert to and from J)
+    ss.newLine() << ss.floatDecl("A") << " = " << p.A_w_J << " * pow(abs(J) * " << 1.0f / ACES2::J_scale << ", " << p.inv_cz << ");";
+    ss.newLine() << ss.floatDecl("Y") << " = pow(( " << ACES2::cam_nl_offset << " * A) / (1.0f - A), " << 1.0 / 0.42 << ");";
+
+    if (invert)
+    {
+        // Inverse Tonescale applied in Y (convert to and from J)
+        ss.newLine() << ss.floatDecl("Y_i") << " = Y / " << double(p.F_L_n) * ACES2::reference_luminance << ";";
+ 
+        ss.newLine() << ss.floatDecl("Z") << " = max(0.0, min(" << t.inverse_limit << ", Y_i));";
+        ss.newLine() << ss.floatDecl("ht") << " = 0.5 * (Z + sqrt(Z * (" << 4.0 * t.t_1 << " + Z)));";
+        ss.newLine() << ss.floatDecl("Yo") << " = " << double(p.F_L_n) * t.s_2 << " / (pow((" << t.m_2 << " / ht), (" << 1.0 / t.g << ")) - 1.0);";
+
+        ss.newLine() << ss.floatDecl("F_L_Y") << " = pow(abs(Yo), 0.42);";
+    }
+    else
+    {
+        // Tonescale applied in Y (convert to and from J)
+        ss.newLine() << ss.floatDecl("f") << " = " << t.m_2  << " * pow(Y / (Y + " << double(t.s_2) * p.F_L_n << "), " << t.g << ");";
+        ss.newLine() << ss.floatDecl("Y_ts") << " = max(0.0, f * f / (f + " << t.t_1 << "));";
+        ss.newLine() << ss.floatDecl("F_L_Y") << " = pow(" << double(p.F_L_n) * ACES2::reference_luminance << " * Y_ts, 0.42);";
+    }
+
+    ss.newLine() << ss.floatDecl("J_ts") << " = " << ACES2::J_scale << " * pow((F_L_Y / ( " << ACES2::cam_nl_offset << " + F_L_Y)) * " << p.inv_A_w_J << ", " << p.cz << ");";
+    ss.newLine() << "return J_ts;";
+
+    ss.dedent();
+    ss.newLine() << "}";
+
+    shaderCreator->addToHelperShaderCode(ss.string().c_str());
+
+    return name;
+}
+
 void _Add_ChromaCompressionNorm_Shader(
     GpuShaderText & ss,
     const ACES2::ChromaCompressParams & c)
@@ -541,18 +683,15 @@ void _Add_ChromaCompressionNorm_Shader(
     ss.newLine() << "{";
     ss.indent();
 
-    ss.newLine() << ss.floatDecl("h_rad") << " = h * " << 3.14159265358979f / 180.0f << ";";
-    ss.newLine() << ss.floatDecl("a") << " = cos(h_rad);";
-    ss.newLine() << ss.floatDecl("b") << " = sin(h_rad);";
-    ss.newLine() << ss.floatDecl("cos_hr2") << " = a * a - b * b;";
-    ss.newLine() << ss.floatDecl("sin_hr2") << " = 2.0 * a * b;";
-    ss.newLine() << ss.floatDecl("cos_hr3") << " = 4.0 * a * a * a - 3.0 * a;";
-    ss.newLine() << ss.floatDecl("sin_hr3") << " = 3.0 * b - 4.0 * b * b * b;";
-    ss.newLine() << ss.float3Decl("cosines") << " = " <<  ss.float3Const("a", "cos_hr2", "cos_hr3") <<";";
+    ss.newLine() << ss.floatDecl("cos_hr2") << " = cos_hr * cos_hr - sin_hr * sin_hr;";
+    ss.newLine() << ss.floatDecl("sin_hr2") << " = 2.0 * cos_hr * sin_hr;";
+    ss.newLine() << ss.floatDecl("cos_hr3") << " = 4.0 * cos_hr * cos_hr * cos_hr - 3.0 * cos_hr;";
+    ss.newLine() << ss.floatDecl("sin_hr3") << " = 3.0 * sin_hr - 4.0 * sin_hr * sin_hr * sin_hr;";
+    ss.newLine() << ss.float3Decl("cosines") << " = " <<  ss.float3Const("cos_hr", "cos_hr2", "cos_hr3") <<";";
     ss.newLine() << ss.float3Decl("cosine_weights") << " = " <<  ss.float3Const(11.34072 * c.chroma_compress_scale,
                                                                                 16.46899 * c.chroma_compress_scale,
                                                                                 7.88380 * c.chroma_compress_scale) <<";";
-    ss.newLine() << ss.float3Decl("sines") << " = " <<  ss.float3Const("b", "sin_hr2", "sin_hr3") <<";";
+    ss.newLine() << ss.float3Decl("sines") << " = " <<  ss.float3Const("sin_hr", "sin_hr2", "sin_hr3") <<";";
     ss.newLine() << ss.float3Decl("sine_weights") << " = " <<  ss.float3Const(14.66441 * c.chroma_compress_scale,
                                                                               -6.37224 * c.chroma_compress_scale,
                                                                                9.19364 * c.chroma_compress_scale) <<";";
@@ -579,15 +718,6 @@ void _Add_Tonescale_Compress_Fwd_Shader(
     ss.newLine() << ss.floatDecl("M") << " = " << pxl << ".g;";
     ss.newLine() << ss.floatDecl("h") << " = " << pxl << ".b;";
 
-    // Tonescale applied in Y (convert to and from J)
-    ss.newLine() << ss.floatDecl("A") << " = " << p.A_w_J << " * pow(abs(J) * " << 1.0 / ACES2::J_scale << ", " << 1.0 / p.cz << ");";
-    ss.newLine() << ss.floatDecl("Y") << " = pow(( " << ACES2::cam_nl_offset << " * A) / (1.0f - A), " << 1.0 / 0.42 << ");";
-
-    ss.newLine() << ss.floatDecl("f") << " = " << t.m_2  << " * pow(Y / (Y + " << t.s_2 * p.F_L_n << "), " << t.g << ");";
-    ss.newLine() << ss.floatDecl("Y_ts") << " = max(0.0, f * f / (f + " << t.t_1 << "));";
-
-    ss.newLine() << ss.floatDecl("F_L_Y") << " = pow(" << p.F_L_n  *  t.n_r << " * Y_ts, 0.42);";
-    ss.newLine() << ss.floatDecl("J_ts") << " = " << ACES2::J_scale << " * pow((F_L_Y / ( " << ACES2::cam_nl_offset << " + F_L_Y)) * " << 1.0f / p.A_w_J << ", " << p.cz << ");";
 
     // ChromaCompress
     ss.newLine() << ss.floatDecl("M_cp") << " = M;";
@@ -632,17 +762,6 @@ void _Add_Tonescale_Compress_Inv_Shader(
     ss.newLine() << ss.floatDecl("M_cp") << " = " << pxl << ".g;";
     ss.newLine() << ss.floatDecl("h") << " = " << pxl << ".b;";
 
-    // Inverse Tonescale applied in Y (convert to and from J)
-    ss.newLine() << ss.floatDecl("A") << " = " << p.A_w_J << " * pow(abs(J_ts) / 100.0, " << 1.0f / p.cz << ");";
-    ss.newLine() << ss.floatDecl("Y_ts") << " = sign(J_ts) * pow(( " << ACES2::cam_nl_offset << " * A) / (1.0f - A), " << 1.0 / 0.42 << ") / " << p.F_L_n << " / 100.0;";
-
-    ss.newLine() << ss.floatDecl("Z") << " = max(0.0, min(" << t.n << " / (" << t.u_2 * t.n_r << "), Y_ts));";
-    ss.newLine() << ss.floatDecl("ht") << " = (Z + sqrt(Z * (4.0 * " << t.t_1 << " + Z))) / 2.0;";
-    ss.newLine() << ss.floatDecl("Y") << " = " << t.s_2 << " / (pow((" << t.m_2 << " / ht), (1.0 / " << t.g << ")) - 1.0);";
-
-    ss.newLine() << ss.floatDecl("F_L_Y") << " = pow(" << p.F_L_n << " * abs(Y), 0.42);";
-    ss.newLine() << ss.floatDecl("J") << " = sign(Y) * 100.0 * pow((F_L_Y / ( " << ACES2::cam_nl_offset << " + F_L_Y)) / " << p.A_w_J << ", " << p.cz << ");";
-
     // ChromaCompress
     ss.newLine() << ss.floatDecl("M") << " = M_cp;";
 
@@ -727,7 +846,7 @@ std::string _Add_Cusp_table(
     ss.newLine() << "{";
     ss.indent();
 
-    ss.newLine() << ss.intDecl("i") << " = " << ss.intKeyword() << "(h + " << g.gamut_cusp_table.base_index << ");";
+    ss.newLine() << ss.intDecl("i") << " = " << ss.intKeyword() << "(h) + " << g.gamut_cusp_table.base_index << ";";
 
     ss.newLine() << ss.intDecl("i_lo") << " = " << ss.intKeyword() << "(max("
                  << ss.floatKeyword() << "(" << g.gamut_cusp_table.lower_wrap_index << "), "
@@ -806,11 +925,13 @@ std::string _Add_Focus_Gain_func(
 
     ss.newLine() << ss.floatDecl("thr") << " = " << ss.lerp("cuspJ", std::to_string(s.limit_J_max), std::to_string(ACES2::focus_gain_blend)) << ";";
 
-    ss.newLine() << "if (J > thr)";
+    ss.newLine() << "if (J > thr)"; // TODO threshold
     ss.newLine() << "{";
     ss.indent();
-    ss.newLine() << ss.floatDecl("gain") << " = ( " << s.limit_J_max << " - thr) / max(0.0001, (" << s.limit_J_max << " - min(" << s.limit_J_max << ", J)));";
-    ss.newLine() << "return pow(log(gain)/log(10.0), " << ACES2::focus_adjust_gain_inv << ") + 1.0;";
+    ss.newLine() << ss.floatDecl("gain") << " = ( " << s.limit_J_max << " - thr) / max(0.0001, " << s.limit_J_max << " - J);";
+    ss.newLine() << "gain = log(gain)/log(10.0);";  // TODO log10(gain) but not all shading languages have log10() would log2(gain)/log2(10) be better? perhaps delegate to GpuShaderText?
+    //ss.newLine() << "return pow(gain, " << ACES2::focus_adjust_gain_inv << ") + 1.0;"; // TODO; remove once agreed on change
+    ss.newLine() << "return gain * gain + 1.0;";
     ss.dedent();
     ss.newLine() << "}";
     ss.newLine() << "else";
@@ -1176,23 +1297,25 @@ void Add_ACES_OutputTransform_Fwd_Shader(
     unsigned resourceIndex = shaderCreator->getNextResourceIndex();
 
     const std::string reachName = _Add_Reach_table(shaderCreator, resourceIndex, s.reach_m_table);
+    const std::string tonescaleName_Fwd = _Add_Tonescale_func(shaderCreator, resourceIndex, false, pIn, t);
     const std::string pxl(shaderCreator->getPixelName());
 
     ss.newLine() << "";
     ss.newLine() << "// Add RGB to JMh";
     ss.newLine() << "";
-    ss.newLine() << "{";
-    ss.indent();
-        _Add_RGB_to_JMh_Shader(shaderCreator, ss, pIn);
-    ss.dedent();
-    ss.newLine() << "}";
+    _Add_RGB_to_JMh_Shader(shaderCreator, ss, pIn);
+    _Add_SinCos_Shader(shaderCreator, ss);
+
 
     ss.newLine() << "";
     ss.newLine() << "// Add ToneScale and ChromaCompress (fwd)";
     ss.newLine() << "";
 
+    ss.newLine() << ss.floatDecl("J_ts") << " = " << tonescaleName_Fwd << "(" << pxl << ".r);";
+
     ss.newLine() << "// Sample tables (fwd)";
     ss.newLine() << ss.floatDecl("reachMaxM") << " = " << reachName << "_sample(" << pxl << ".b);";
+
     ss.newLine() << "";
 
     ss.newLine() << "{";
@@ -1256,15 +1379,14 @@ void Add_ACES_OutputTransform_Inv_Shader(
     const std::string pxl(shaderCreator->getPixelName());
     
     std::string reachName = _Add_Reach_table(shaderCreator, resourceIndex, s.reach_m_table);
+    std::string tonescaleName_Inv = _Add_Tonescale_func(shaderCreator, resourceIndex, true, pIn, t);
 
     ss.newLine() << "";
     ss.newLine() << "// Add RGB to JMh";
     ss.newLine() << "";
-    ss.newLine() << "{";
-    ss.indent();
-        _Add_RGB_to_JMh_Shader(shaderCreator, ss, pLim);
-    ss.dedent();
-    ss.newLine() << "}";
+    _Add_RGB_to_JMh_Shader(shaderCreator, ss, pLim);
+    _Add_SinCos_Shader(shaderCreator, ss);
+
 
     ss.newLine() << ss.floatDecl("reachMaxM") << " = " << reachName << "_sample(" << pxl << ".b);";
     ss.newLine() << "";
@@ -1279,6 +1401,7 @@ void Add_ACES_OutputTransform_Inv_Shader(
     ss.newLine() << "";
     ss.newLine() << "// Add ToneScale and ChromaCompress (inv)";
     ss.newLine() << "";
+    ss.newLine() << ss.floatDecl("J") << " = " << tonescaleName_Inv << "(" << pxl << ".b);";
     ss.newLine() << "{";
     ss.indent();
         _Add_Tonescale_Compress_Inv_Shader(shaderCreator, ss, resourceIndex, pIn, t, s, c);
@@ -1343,6 +1466,7 @@ void Add_JMh_to_RGB_Shader(
 
     ACES2::JMhParams p = ACES2::init_JMhParams(primaries);
     _Add_WrapHueChannel_Shader(shaderCreator, ss);
+    _Add_SinCos_Shader(shaderCreator, ss);
     _Add_JMh_to_RGB_Shader(shaderCreator, ss, p);
 }
 
@@ -1363,9 +1487,13 @@ void Add_Tonescale_Compress_Fwd_Shader(
     const std::string pxl(shaderCreator->getPixelName());
 
     const std::string reachName = _Add_Reach_table(shaderCreator, resourceIndex, s.reach_m_table);
+    const std::string tonescaleName_Fwd = _Add_Tonescale_func(shaderCreator, resourceIndex, false, p, t);
 
     _Add_WrapHueChannel_Shader(shaderCreator, ss);
+    _Add_SinCos_Shader(shaderCreator, ss);
+
     ss.newLine() << ss.floatDecl("reachMaxM") << " = " << reachName << "_sample(" << pxl << ".b);";
+    ss.newLine() << ss.floatDecl("J_ts") << " = " << tonescaleName_Fwd << "(" << pxl << ".r);";
 
     _Add_Tonescale_Compress_Fwd_Shader(shaderCreator, ss, resourceIndex, p, t, s, c);
 }
@@ -1387,9 +1515,13 @@ void Add_Tonescale_Compress_Inv_Shader(
     const std::string pxl(shaderCreator->getPixelName());
 
     const std::string reachName = _Add_Reach_table(shaderCreator, resourceIndex, s.reach_m_table);
+    std::string tonescaleName_Inv = _Add_Tonescale_func(shaderCreator, resourceIndex, true, p, t);
 
     _Add_WrapHueChannel_Shader(shaderCreator, ss);
+    _Add_SinCos_Shader(shaderCreator, ss);
+
     ss.newLine() << ss.floatDecl("reachMaxM") << " = " << reachName << "_sample(" << pxl << ".b);";
+    ss.newLine() << ss.floatDecl("J") << " = " << tonescaleName_Inv << "(" << pxl << ".r);";
 
     _Add_Tonescale_Compress_Inv_Shader(shaderCreator, ss, resourceIndex, p, t, s, c);
 }
@@ -1430,6 +1562,8 @@ void Add_Gamut_Compress_Fwd_Shader(
     const std::string reachName = _Add_Reach_table(shaderCreator, resourceIndex, s.reach_m_table);
 
     _Add_WrapHueChannel_Shader(shaderCreator, ss);
+    _Add_SinCos_Shader(shaderCreator, ss);
+
     ss.newLine() << ss.floatDecl("reachMaxM") << " = " << reachName << "_sample(" << pxl << ".b);";
 
     _Add_Gamut_Compress_Fwd_Shader(shaderCreator, ss, resourceIndex, s, g);
@@ -1471,6 +1605,8 @@ void Add_Gamut_Compress_Inv_Shader(
     const std::string reachName = _Add_Reach_table(shaderCreator, resourceIndex, s.reach_m_table);
 
     _Add_WrapHueChannel_Shader(shaderCreator, ss);
+    _Add_SinCos_Shader(shaderCreator, ss);
+
     ss.newLine() << ss.floatDecl("reachMaxM") << " = " << reachName << "_sample(" << pxl << ".b);";
 
     _Add_Gamut_Compress_Inv_Shader(shaderCreator, ss, resourceIndex, s, g);

From 36db2124f8e8d5e5c48032fd123bb00bc7cb82c4 Mon Sep 17 00:00:00 2001
From: Kevin Wheatley <kevin.wheatley@framestore.com>
Date: Wed, 19 Feb 2025 19:46:19 +0000
Subject: [PATCH 63/70] Remove unused parameters

Signed-off-by: Kevin Wheatley <kevin.wheatley@framestore.com>
---
 .../ops/fixedfunction/FixedFunctionOpGPU.cpp      | 15 +++++----------
 1 file changed, 5 insertions(+), 10 deletions(-)

diff --git a/src/OpenColorIO/ops/fixedfunction/FixedFunctionOpGPU.cpp b/src/OpenColorIO/ops/fixedfunction/FixedFunctionOpGPU.cpp
index 8af86e0638..ea6dced0c0 100644
--- a/src/OpenColorIO/ops/fixedfunction/FixedFunctionOpGPU.cpp
+++ b/src/OpenColorIO/ops/fixedfunction/FixedFunctionOpGPU.cpp
@@ -459,7 +459,6 @@ void _Add_JMh_to_Aab_Shader(
 }
 
 void _Add_Aab_to_RGB_Shader(
-    GpuShaderCreatorRcPtr & shaderCreator,
     GpuShaderText & ss,
     const ACES2::JMhParams & p)
 {
@@ -484,7 +483,7 @@ void _Add_JMh_to_RGB_Shader(
     ss.newLine() << ss.float3Decl("JMh") << " = " << pxl << ".rgb;";
     ss.newLine() << ss.float3Decl("Aab") << ";";
     _Add_JMh_to_Aab_Shader(shaderCreator, ss, p);
-    _Add_Aab_to_RGB_Shader(shaderCreator, ss, p);
+    _Add_Aab_to_RGB_Shader(ss, p);
 
     ss.newLine() << pxl << ".rgb = JMh;";
 }
@@ -705,8 +704,6 @@ void _Add_Tonescale_Compress_Fwd_Shader(
     GpuShaderCreatorRcPtr & shaderCreator,
     GpuShaderText & ss,
     unsigned resourceIndex,
-    const ACES2::JMhParams & p,
-    const ACES2::ToneScaleParams & t,
     const ACES2::SharedCompressionParameters & s,
     const ACES2::ChromaCompressParams & c)
 {
@@ -749,8 +746,6 @@ void _Add_Tonescale_Compress_Inv_Shader(
     GpuShaderCreatorRcPtr & shaderCreator,
     GpuShaderText & ss,
     unsigned resourceIndex,
-    const ACES2::JMhParams & p,
-    const ACES2::ToneScaleParams & t,
     const ACES2::SharedCompressionParameters & s,
     const ACES2::ChromaCompressParams & c)
 {
@@ -1320,7 +1315,7 @@ void Add_ACES_OutputTransform_Fwd_Shader(
 
     ss.newLine() << "{";
     ss.indent();
-        _Add_Tonescale_Compress_Fwd_Shader(shaderCreator, ss, resourceIndex, pIn, t, s, c);
+        _Add_Tonescale_Compress_Fwd_Shader(shaderCreator, ss, resourceIndex, s, c);
     ss.dedent();
     ss.newLine() << "}";
 
@@ -1404,7 +1399,7 @@ void Add_ACES_OutputTransform_Inv_Shader(
     ss.newLine() << ss.floatDecl("J") << " = " << tonescaleName_Inv << "(" << pxl << ".b);";
     ss.newLine() << "{";
     ss.indent();
-        _Add_Tonescale_Compress_Inv_Shader(shaderCreator, ss, resourceIndex, pIn, t, s, c);
+        _Add_Tonescale_Compress_Inv_Shader(shaderCreator, ss, resourceIndex, s, c);
     ss.dedent();
     ss.newLine() << "}";
 
@@ -1495,7 +1490,7 @@ void Add_Tonescale_Compress_Fwd_Shader(
     ss.newLine() << ss.floatDecl("reachMaxM") << " = " << reachName << "_sample(" << pxl << ".b);";
     ss.newLine() << ss.floatDecl("J_ts") << " = " << tonescaleName_Fwd << "(" << pxl << ".r);";
 
-    _Add_Tonescale_Compress_Fwd_Shader(shaderCreator, ss, resourceIndex, p, t, s, c);
+    _Add_Tonescale_Compress_Fwd_Shader(shaderCreator, ss, resourceIndex, s, c);
 }
 
 void Add_Tonescale_Compress_Inv_Shader(
@@ -1523,7 +1518,7 @@ void Add_Tonescale_Compress_Inv_Shader(
     ss.newLine() << ss.floatDecl("reachMaxM") << " = " << reachName << "_sample(" << pxl << ".b);";
     ss.newLine() << ss.floatDecl("J") << " = " << tonescaleName_Inv << "(" << pxl << ".r);";
 
-    _Add_Tonescale_Compress_Inv_Shader(shaderCreator, ss, resourceIndex, p, t, s, c);
+    _Add_Tonescale_Compress_Inv_Shader(shaderCreator, ss, resourceIndex, s, c);
 }
 
 void Add_Gamut_Compress_Fwd_Shader(

From e3aa39ffc5a61b92114d429e20f19e2eb0b8fb11 Mon Sep 17 00:00:00 2001
From: Kevin Wheatley <kevin.wheatley@framestore.com>
Date: Wed, 19 Feb 2025 20:03:47 +0000
Subject: [PATCH 64/70] Try tree vectoriser for gcc

Signed-off-by: Kevin Wheatley <kevin.wheatley@framestore.com>
---
 src/OpenColorIO/CMakeLists.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/OpenColorIO/CMakeLists.txt b/src/OpenColorIO/CMakeLists.txt
index f77a0896c4..518de5203b 100755
--- a/src/OpenColorIO/CMakeLists.txt
+++ b/src/OpenColorIO/CMakeLists.txt
@@ -225,7 +225,7 @@ endif()
 # TODO: What to do for other compilers?
 if(USE_GCC)
     set_property(SOURCE ops/fixedfunction/ACES2/Transform.cpp APPEND PROPERTY COMPILE_OPTIONS
-                -fno-math-errno -fno-signed-zeros -fno-trapping-math -fno-signaling-nans -ffinite-math-only -freciprocal-math)
+                -fno-math-errno -fno-signed-zeros -fno-trapping-math -fno-signaling-nans -ffinite-math-only -freciprocal-math -ftree-vectorize)
 endif()
 
 configure_file(CPUInfoConfig.h.in CPUInfoConfig.h)

From 07abe0addc401dca860d3f3dcf0e79d8264a7d92 Mon Sep 17 00:00:00 2001
From: Kevin Wheatley <kevin.wheatley@framestore.com>
Date: Wed, 19 Feb 2025 20:56:58 +0000
Subject: [PATCH 65/70] Add Vectorise option for MSVC

Signed-off-by: Kevin Wheatley <kevin.wheatley@framestore.com>
---
 src/OpenColorIO/CMakeLists.txt | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/OpenColorIO/CMakeLists.txt b/src/OpenColorIO/CMakeLists.txt
index 518de5203b..2f280d34da 100755
--- a/src/OpenColorIO/CMakeLists.txt
+++ b/src/OpenColorIO/CMakeLists.txt
@@ -227,6 +227,9 @@ if(USE_GCC)
     set_property(SOURCE ops/fixedfunction/ACES2/Transform.cpp APPEND PROPERTY COMPILE_OPTIONS
                 -fno-math-errno -fno-signed-zeros -fno-trapping-math -fno-signaling-nans -ffinite-math-only -freciprocal-math -ftree-vectorize)
 endif()
+if(MSVC)
+    set_property(SOURCE ops/fixedfunction/ACES2/Transform.cpp APPEND PROPERTY COMPILE_OPTIONS /Qvec)
+endif()
 
 configure_file(CPUInfoConfig.h.in CPUInfoConfig.h)
 

From abe5658f1e8d761e032b99d2a78f1915c199b481 Mon Sep 17 00:00:00 2001
From: Kevin Wheatley <kevin.wheatley@framestore.com>
Date: Thu, 20 Feb 2025 10:06:10 +0000
Subject: [PATCH 66/70] Remove unused function

Signed-off-by: Kevin Wheatley <kevin.wheatley@framestore.com>
---
 src/OpenColorIO/ops/fixedfunction/ACES2/Common.h | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/src/OpenColorIO/ops/fixedfunction/ACES2/Common.h b/src/OpenColorIO/ops/fixedfunction/ACES2/Common.h
index 6357721928..e55b78aeed 100644
--- a/src/OpenColorIO/ops/fixedfunction/ACES2/Common.h
+++ b/src/OpenColorIO/ops/fixedfunction/ACES2/Common.h
@@ -78,11 +78,6 @@ struct TableBase
     {
         return first_nominal_index + hue_position_in_uniform_table(wrapped_hue);
     }
-
-    inline unsigned int clamp_to_table_bounds(unsigned int entry) const // TODO: this should be removed if we can constrain the hue range properly
-    {
-        return std::min(nominal_size - 1U, std::max(0U, entry));
-    }
 };
 
 struct Table3D : public TableBase, std::array<float[3], TableBase::total_size>

From bb591542a55ad8b7e4cd9f5c12af0ec1da5f842e Mon Sep 17 00:00:00 2001
From: Kevin Wheatley <kevin.wheatley@framestore.com>
Date: Thu, 20 Feb 2025 10:26:34 +0000
Subject: [PATCH 67/70] Constexpr std::max is only available in C++ 14 for now
 avoid the call to it

Signed-off-by: Kevin Wheatley <kevin.wheatley@framestore.com>
---
 src/OpenColorIO/ops/fixedfunction/ACES2/Common.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/OpenColorIO/ops/fixedfunction/ACES2/Common.h b/src/OpenColorIO/ops/fixedfunction/ACES2/Common.h
index e55b78aeed..18f0123636 100644
--- a/src/OpenColorIO/ops/fixedfunction/ACES2/Common.h
+++ b/src/OpenColorIO/ops/fixedfunction/ACES2/Common.h
@@ -176,7 +176,7 @@ constexpr float chroma_expand_fact = 0.69f;
 constexpr float chroma_expand_thr = 0.5f;
 
 // Gamut compression
-constexpr float smooth_cusps = std::max(0.000001f, 0.12f);
+constexpr float smooth_cusps =  0.12f; // C++ 14 required for constexpr std::max(0.000001f, 0.12f);
 constexpr float smooth_m = 0.27f;
 constexpr float cusp_mid_blend = 1.3f;
 constexpr float focus_gain_blend = 0.3f;

From e7c0dac6807200f639296c6a40896a15487dc6e3 Mon Sep 17 00:00:00 2001
From: Kevin Wheatley <kevin.wheatley@framestore.com>
Date: Thu, 20 Feb 2025 10:42:55 +0000
Subject: [PATCH 68/70] Try to fir intrinsic based errors on osome build
 configurations

Signed-off-by: Kevin Wheatley <kevin.wheatley@framestore.com>
---
 src/OpenColorIO/CMakeLists.txt | 1 +
 tests/cpu/CMakeLists.txt       | 1 +
 2 files changed, 2 insertions(+)

diff --git a/src/OpenColorIO/CMakeLists.txt b/src/OpenColorIO/CMakeLists.txt
index 2f280d34da..5ae6add4ec 100755
--- a/src/OpenColorIO/CMakeLists.txt
+++ b/src/OpenColorIO/CMakeLists.txt
@@ -220,6 +220,7 @@ if(OCIO_USE_SIMD AND (OCIO_ARCH_X86 OR OCIO_USE_SSE2NEON))
     set_property(SOURCE ops/lut3d/Lut3DOpCPU_AVX.cpp APPEND PROPERTY COMPILE_OPTIONS ${OCIO_AVX_ARGS})
     set_property(SOURCE ops/lut3d/Lut3DOpCPU_AVX2.cpp APPEND PROPERTY COMPILE_OPTIONS ${OCIO_AVX2_ARGS})
     set_property(SOURCE ops/lut3d/Lut3DOpCPU_AVX512.cpp APPEND PROPERTY COMPILE_OPTIONS ${OCIO_AVX512_ARGS})
+    set_property(SOURCE ops/fixedfunction/ACES2/Transform.cpp APPEND PROPERTY COMPILE_OPTIONS ${OCIO_AVX_ARGS})
 endif()
 
 # TODO: What to do for other compilers?
diff --git a/tests/cpu/CMakeLists.txt b/tests/cpu/CMakeLists.txt
index 66c32cb8a7..84e0a12572 100755
--- a/tests/cpu/CMakeLists.txt
+++ b/tests/cpu/CMakeLists.txt
@@ -343,6 +343,7 @@ if(OCIO_USE_SIMD AND (OCIO_ARCH_X86 OR OCIO_USE_SSE2NEON))
     set_property(SOURCE "${CMAKE_SOURCE_DIR}/src/OpenColorIO/ops/lut3d/Lut3DOpCPU_AVX.cpp" APPEND PROPERTY COMPILE_OPTIONS ${OCIO_AVX_ARGS})
     set_property(SOURCE "${CMAKE_SOURCE_DIR}/src/OpenColorIO/ops/lut3d/Lut3DOpCPU_AVX2.cpp" APPEND PROPERTY COMPILE_OPTIONS ${OCIO_AVX2_ARGS})
     set_property(SOURCE "${CMAKE_SOURCE_DIR}/src/OpenColorIO/ops/lut3d/Lut3DOpCPU_AVX512.cpp" APPEND PROPERTY COMPILE_OPTIONS ${OCIO_AVX512_ARGS})
+    set_property(SOURCE "${CMAKE_SOURCE_DIR}/ops/fixedfunction/ACES2/Transform.cpp" APPEND PROPERTY COMPILE_OPTIONS ${OCIO_AVX_ARGS})
     set_property(SOURCE "SSE2_tests.cpp" APPEND PROPERTY COMPILE_OPTIONS ${OCIO_SSE2_ARGS})
     set_property(SOURCE "AVX_tests.cpp" APPEND PROPERTY COMPILE_OPTIONS ${OCIO_AVX_ARGS})
     set_property(SOURCE "AVX2_tests.cpp" APPEND PROPERTY COMPILE_OPTIONS ${OCIO_AVX2_ARGS})

From 430043bbf625f7e33886242f6971783d4073ff76 Mon Sep 17 00:00:00 2001
From: Kevin Wheatley <kevin.wheatley@framestore.com>
Date: Thu, 20 Feb 2025 11:24:41 +0000
Subject: [PATCH 69/70] Another C++ 14 usage fix

Signed-off-by: Kevin Wheatley <kevin.wheatley@framestore.com>
---
 src/OpenColorIO/CMakeLists.txt                        | 4 ++--
 src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp | 3 ++-
 tests/cpu/CMakeLists.txt                              | 2 +-
 3 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/src/OpenColorIO/CMakeLists.txt b/src/OpenColorIO/CMakeLists.txt
index 5ae6add4ec..d697855b72 100755
--- a/src/OpenColorIO/CMakeLists.txt
+++ b/src/OpenColorIO/CMakeLists.txt
@@ -220,11 +220,11 @@ if(OCIO_USE_SIMD AND (OCIO_ARCH_X86 OR OCIO_USE_SSE2NEON))
     set_property(SOURCE ops/lut3d/Lut3DOpCPU_AVX.cpp APPEND PROPERTY COMPILE_OPTIONS ${OCIO_AVX_ARGS})
     set_property(SOURCE ops/lut3d/Lut3DOpCPU_AVX2.cpp APPEND PROPERTY COMPILE_OPTIONS ${OCIO_AVX2_ARGS})
     set_property(SOURCE ops/lut3d/Lut3DOpCPU_AVX512.cpp APPEND PROPERTY COMPILE_OPTIONS ${OCIO_AVX512_ARGS})
-    set_property(SOURCE ops/fixedfunction/ACES2/Transform.cpp APPEND PROPERTY COMPILE_OPTIONS ${OCIO_AVX_ARGS})
+    set_property(SOURCE ops/fixedfunction/ACES2/Transform.cpp APPEND PROPERTY COMPILE_OPTIONS  ${OCIO_SSE2_ARGS} ${OCIO_AVX_ARGS})
 endif()
 
 # TODO: What to do for other compilers?
-if(USE_GCC)
+if(USE_GCC OR USE_CLANG)
     set_property(SOURCE ops/fixedfunction/ACES2/Transform.cpp APPEND PROPERTY COMPILE_OPTIONS
                 -fno-math-errno -fno-signed-zeros -fno-trapping-math -fno-signaling-nans -ffinite-math-only -freciprocal-math -ftree-vectorize)
 endif()
diff --git a/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp b/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp
index a91c316342..14461ce8fa 100644
--- a/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp
+++ b/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp
@@ -1189,7 +1189,8 @@ std::array<testData, gamma_test_count> generate_gamma_test_data(const f2 &JMcusp
     const float analytical_threshold = lerpf(JMcusp[0], limit_J_max, focus_gain_blend);
     const float focusJ = compute_focusJ(JMcusp[0], mid_J, limit_J_max);
 
-    std::generate(data.begin(), data.end(), [JMcusp, limit_J_max, focus_dist, testPositions, hue, analytical_threshold, focusJ, testIndex = 0]() mutable {
+    unsigned int testIndex = 0;
+    std::generate(data.begin(), data.end(), [JMcusp, limit_J_max, focus_dist, testPositions, hue, analytical_threshold, focusJ, testIndex]() mutable {
         const float testJ = lerpf(JMcusp[0], limit_J_max, testPositions[testIndex]);
         const float slope_gain = get_focus_gain(testJ, analytical_threshold, limit_J_max, focus_dist);
         const float J_intersect_source = solve_J_intersect(testJ, JMcusp[1], focusJ, limit_J_max, slope_gain);
diff --git a/tests/cpu/CMakeLists.txt b/tests/cpu/CMakeLists.txt
index 84e0a12572..7dd684659d 100755
--- a/tests/cpu/CMakeLists.txt
+++ b/tests/cpu/CMakeLists.txt
@@ -343,7 +343,7 @@ if(OCIO_USE_SIMD AND (OCIO_ARCH_X86 OR OCIO_USE_SSE2NEON))
     set_property(SOURCE "${CMAKE_SOURCE_DIR}/src/OpenColorIO/ops/lut3d/Lut3DOpCPU_AVX.cpp" APPEND PROPERTY COMPILE_OPTIONS ${OCIO_AVX_ARGS})
     set_property(SOURCE "${CMAKE_SOURCE_DIR}/src/OpenColorIO/ops/lut3d/Lut3DOpCPU_AVX2.cpp" APPEND PROPERTY COMPILE_OPTIONS ${OCIO_AVX2_ARGS})
     set_property(SOURCE "${CMAKE_SOURCE_DIR}/src/OpenColorIO/ops/lut3d/Lut3DOpCPU_AVX512.cpp" APPEND PROPERTY COMPILE_OPTIONS ${OCIO_AVX512_ARGS})
-    set_property(SOURCE "${CMAKE_SOURCE_DIR}/ops/fixedfunction/ACES2/Transform.cpp" APPEND PROPERTY COMPILE_OPTIONS ${OCIO_AVX_ARGS})
+    set_property(SOURCE "${CMAKE_SOURCE_DIR}/ops/fixedfunction/ACES2/Transform.cpp" APPEND PROPERTY COMPILE_OPTIONS  ${OCIO_SSE2_ARGS} ${OCIO_AVX_ARGS})
     set_property(SOURCE "SSE2_tests.cpp" APPEND PROPERTY COMPILE_OPTIONS ${OCIO_SSE2_ARGS})
     set_property(SOURCE "AVX_tests.cpp" APPEND PROPERTY COMPILE_OPTIONS ${OCIO_AVX_ARGS})
     set_property(SOURCE "AVX2_tests.cpp" APPEND PROPERTY COMPILE_OPTIONS ${OCIO_AVX2_ARGS})

From eab4a65f9d0eaa0357deb46f156a1b6ce5c1bff6 Mon Sep 17 00:00:00 2001
From: Kevin Wheatley <kevin.wheatley@framestore.com>
Date: Thu, 20 Feb 2025 11:31:30 +0000
Subject: [PATCH 70/70] Remove check for CLANG left over from testing

Signed-off-by: Kevin Wheatley <kevin.wheatley@framestore.com>
---
 src/OpenColorIO/CMakeLists.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/OpenColorIO/CMakeLists.txt b/src/OpenColorIO/CMakeLists.txt
index d697855b72..857058696c 100755
--- a/src/OpenColorIO/CMakeLists.txt
+++ b/src/OpenColorIO/CMakeLists.txt
@@ -224,7 +224,7 @@ if(OCIO_USE_SIMD AND (OCIO_ARCH_X86 OR OCIO_USE_SSE2NEON))
 endif()
 
 # TODO: What to do for other compilers?
-if(USE_GCC OR USE_CLANG)
+if(USE_GCC)
     set_property(SOURCE ops/fixedfunction/ACES2/Transform.cpp APPEND PROPERTY COMPILE_OPTIONS
                 -fno-math-errno -fno-signed-zeros -fno-trapping-math -fno-signaling-nans -ffinite-math-only -freciprocal-math -ftree-vectorize)
 endif()