diff --git a/src/OpenColorIO/ops/fixedfunction/ACES2/ColorLib.h b/src/OpenColorIO/ops/fixedfunction/ACES2/ColorLib.h
index f10ddf3d56..426e31f271 100644
--- a/src/OpenColorIO/ops/fixedfunction/ACES2/ColorLib.h
+++ b/src/OpenColorIO/ops/fixedfunction/ACES2/ColorLib.h
@@ -7,6 +7,7 @@
 #include "transforms/builtins/ColorMatrixHelpers.h"
 #include "MatrixLib.h"
 
+#include <cmath>
 
 namespace OCIO_NAMESPACE
 {
@@ -14,30 +15,6 @@ namespace OCIO_NAMESPACE
 namespace ACES2
 {
 
-inline f3 HSV_to_RGB(const f3 &HSV)
-{
-    const float C = HSV[2] * HSV[1];
-    const float X = C * (1.f - std::abs(std::fmod(HSV[0] * 6.f, 2.f) - 1.f));
-    const float m = HSV[2] - C;
-
-    f3 RGB{};
-    if (HSV[0] < 1.f/6.f) {
-        RGB = {C, X, 0.f};
-    } else if (HSV[0] < 2./6.) {
-        RGB = {X, C, 0.f};
-    } else if (HSV[0] < 3./6.) {
-        RGB = {0.f, C, X};
-    } else if (HSV[0] < 4./6.) {
-        RGB = {0.f, X, C};
-    } else if (HSV[0] < 5./6.) {
-        RGB = {X, 0.f, C};
-    } else {
-        RGB = {C, 0.f, X};
-    }
-    RGB = add_f_f3(m, RGB);
-    return RGB;
-}
-
 inline m33f RGBtoXYZ_f33(const Primaries &C)
 {
     return m33_from_ocio_matrix_array(
diff --git a/src/OpenColorIO/ops/fixedfunction/ACES2/Common.h b/src/OpenColorIO/ops/fixedfunction/ACES2/Common.h
index aeb1524071..18f0123636 100644
--- a/src/OpenColorIO/ops/fixedfunction/ACES2/Common.h
+++ b/src/OpenColorIO/ops/fixedfunction/ACES2/Common.h
@@ -7,44 +7,98 @@
 #include "MatrixLib.h"
 #include "ColorLib.h"
 
+#include <cmath>
 
 namespace OCIO_NAMESPACE
 {
 
 namespace ACES2
 {
+constexpr float PI = 3.14159265358979f;
 
-constexpr int TABLE_SIZE = 360;
-constexpr int TABLE_ADDITION_ENTRIES = 2;
-constexpr int TABLE_TOTAL_SIZE = TABLE_SIZE + TABLE_ADDITION_ENTRIES;
-constexpr int GAMUT_TABLE_BASE_INDEX = 1;
+constexpr float hue_limit = 360.0f;
+//constexpr float hue_limit = 2.0f * PI;
+inline float _wrap_to_hue_limit(float y)
+{
+    if ( y < 0.f)
+    {
+        y = y + hue_limit;
+    }
+    return y;
+}
+
+inline float wrap_to_hue_limit(float hue)
+{
+    float y = std::fmod(hue, hue_limit);
+    return _wrap_to_hue_limit(y);
+}
+inline constexpr float to_degrees(const float v) { return v; }
+inline float from_degrees(const float v) { return wrap_to_hue_limit(v); }
+inline constexpr float to_radians(const float v) { return PI * v / 180.0f; };
+inline float _from_radians(const float v) { return _wrap_to_hue_limit(180.0f * v / PI); }; // v needs to be wrapped already
+inline float from_radians(const float v) { return wrap_to_hue_limit(180.0f * v / PI); };
+/*
+inline constexpr float to_degrees(const float v) { return 180.0f * v / PI; }
+inline float from_degrees(const float v) { return wrap_to_hue_limit(PI * v / 180.0f); }
+inline constexpr float to_radians(const float v) { return v; }
+inline float _from_radians(const float v) { return _wrap_to_hue_limit(v); };
+inline float from_radians(const float v) { return wrap_to_hue_limit(v); };
+*/
+
+struct TableBase
+{
+    static constexpr unsigned int _TABLE_ADDITION_ENTRIES = 2;
+    static constexpr unsigned int base_index = 1;
+    static constexpr unsigned int nominal_size = 360;
+    static constexpr unsigned int total_size = nominal_size + _TABLE_ADDITION_ENTRIES;
+
+    static constexpr unsigned int lower_wrap_index = 0;
+    static constexpr unsigned int upper_wrap_index = base_index + nominal_size;
+    static constexpr unsigned int first_nominal_index = base_index;
+    static constexpr unsigned int last_nominal_index = upper_wrap_index - 1;
+
+    inline float base_hue_for_position(unsigned int i_lo) const
+    {
+        if (hue_limit == float(nominal_size)) // TODO C++ 17 if constexpr
+            return float(i_lo);
+
+        const float result = i_lo * hue_limit / nominal_size;
+        return result;
+    }
+
+    inline unsigned int hue_position_in_uniform_table(float wrapped_hue) const 
+    {
+        if (hue_limit == float(nominal_size)) // TODO C++ 17 if constexpr
+            return static_cast<unsigned int>(wrapped_hue);
+        else
+            return static_cast<unsigned int>(wrapped_hue / hue_limit * float(nominal_size)); // TODO: can we use the 'lost' fraction for the lerps?
+    }
+
+   inline unsigned int nominal_hue_position_in_uniform_table(float wrapped_hue) const 
+    {
+        return first_nominal_index + hue_position_in_uniform_table(wrapped_hue);
+    }
+};
 
-struct Table3D
+struct Table3D : public TableBase, std::array<float[3], TableBase::total_size>
 {
-    static constexpr int base_index = GAMUT_TABLE_BASE_INDEX;
-    static constexpr int size = TABLE_SIZE;
-    static constexpr int total_size = TABLE_TOTAL_SIZE;
-    float table[TABLE_TOTAL_SIZE][3];
 };
 
-struct Table1D
+struct Table1D : public TableBase, std::array<float, TableBase::total_size>
 {
-    static constexpr int base_index = GAMUT_TABLE_BASE_INDEX;
-    static constexpr int size = TABLE_SIZE;
-    static constexpr int total_size = TABLE_TOTAL_SIZE;
-    float table[TABLE_TOTAL_SIZE];
 };
 
 struct JMhParams
 {
-    float F_L;
-    float z;
-    float A_w;
+    m33f MATRIX_RGB_to_CAM16_c;
+    m33f MATRIX_CAM16_c_to_RGB;
+    m33f MATRIX_cone_response_to_Aab;
+    m33f MATRIX_Aab_to_cone_response;
+    float F_L_n;    // F_L normalised
+    float cz;
+    float inv_cz;   // 1/cz
     float A_w_J;
-    f3 XYZ_w;
-    f3 D_RGB;
-    m33f MATRIX_RGB_to_CAM16;
-    m33f MATRIX_CAM16_to_RGB;
+    float inv_A_w_J; // 1/A_w_J
 };
 
 struct ToneScaleParams
@@ -57,41 +111,63 @@ struct ToneScaleParams
     float s_2;
     float u_2;
     float m_2;
+    float forward_limit;
+    float inverse_limit;
+    float log_peak;
 };
 
-struct ChromaCompressParams
+struct SharedCompressionParameters
 {
     float limit_J_max;
-    float model_gamma;
+    float model_gamma_inv;
+    Table1D reach_m_table;
+};
+
+struct ResolvedSharedCompressionParameters
+{
+    float limit_J_max;
+    float model_gamma_inv;
+    float reachMaxM;
+};
+
+struct ChromaCompressParams
+{
     float sat;
     float sat_thr;
     float compr;
-    Table1D reach_m_table;
     float chroma_compress_scale;
     static constexpr float cusp_mid_blend = 1.3f;
 };
 
+struct HueDependantGamutParams
+{
+    float gamma_bottom_inv;
+    f2 JMcusp;
+    float gamma_top_inv;
+    float focusJ;
+    float analytical_threshold;
+};
 struct GamutCompressParams
 {
-    float limit_J_max;
     float mid_J;
-    float model_gamma;
     float focus_dist;
-    float lower_hull_gamma;
-    Table1D reach_m_table;
+    float lower_hull_gamma_inv;
+    std::array<int, 2> hue_linearity_search_range;
+    Table1D hue_table;;
     Table3D gamut_cusp_table;
-    Table1D upper_hull_gamma_table;
 };
 
 // CAM
 constexpr float reference_luminance = 100.f;
 constexpr float L_A = 100.f;
 constexpr float Y_b = 20.f;
-constexpr float ac_resp = 1.f;
-constexpr float ra = 2.f * ac_resp;
-constexpr float ba = 0.05f + (2.f - ra);
 constexpr f3 surround = {0.9f, 0.59f, 0.9f}; // Dim surround
 
+constexpr float J_scale = 100.0f;
+constexpr float cam_nl_Y_reference = 100.0f;
+constexpr float cam_nl_offset = 0.2713f * cam_nl_Y_reference;
+constexpr float cam_nl_scale = 4.0f * cam_nl_Y_reference;
+
 // Chroma compression
 constexpr float chroma_compress = 2.4f;
 constexpr float chroma_compress_fact = 3.3f;
@@ -100,11 +176,11 @@ constexpr float chroma_expand_fact = 0.69f;
 constexpr float chroma_expand_thr = 0.5f;
 
 // Gamut compression
-constexpr float smooth_cusps = 0.12f;
+constexpr float smooth_cusps =  0.12f; // C++ 14 required for constexpr std::max(0.000001f, 0.12f);
 constexpr float smooth_m = 0.27f;
 constexpr float cusp_mid_blend = 1.3f;
 constexpr float focus_gain_blend = 0.3f;
-constexpr float focus_adjust_gain = 0.55f;
+constexpr float focus_adjust_gain_inv = 1.0f / 0.55f;
 constexpr float focus_distance = 1.35f;
 constexpr float focus_distance_scaling = 1.75f;
 constexpr float compression_threshold = 0.75f;
@@ -125,6 +201,11 @@ constexpr float gammaMaximum = 5.0f;
 constexpr float gammaSearchStep = 0.4f;
 constexpr float gammaAccuracy = 1e-5f;
 
+constexpr int cuspCornerCount = 6;
+constexpr int totalCornerCount = cuspCornerCount + 2;
+constexpr int max_sorted_corners = 2 * cuspCornerCount;
+constexpr float reach_cusp_tolerance = 1e-3f;
+constexpr float display_cusp_tolerance = 1e-7f;
 
 } // namespace ACES2
 
diff --git a/src/OpenColorIO/ops/fixedfunction/ACES2/MatrixLib.h b/src/OpenColorIO/ops/fixedfunction/ACES2/MatrixLib.h
index 27a24df0c9..d552742ccb 100644
--- a/src/OpenColorIO/ops/fixedfunction/ACES2/MatrixLib.h
+++ b/src/OpenColorIO/ops/fixedfunction/ACES2/MatrixLib.h
@@ -6,6 +6,7 @@
 
 #include "ops/matrix/MatrixOpData.h"
 
+#include <array>
 
 namespace OCIO_NAMESPACE
 {
diff --git a/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp b/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp
index 80b8ba5c72..3e23f59a27 100644
--- a/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp
+++ b/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.cpp
@@ -3,6 +3,10 @@
 
 #include "Transform.h"
 
+#include <array>
+#include <algorithm>
+#include <cmath>
+#include <limits>
 
 namespace OCIO_NAMESPACE
 {
@@ -13,48 +17,47 @@ namespace ACES2
 //
 // Table lookups
 //
-
-float wrap_to_360(float hue)
+inline f3 lerp(const f3& lower, const f3& upper, const float t)
 {
-    float y = std::fmod(hue, 360.f);
-    if ( y < 0.f)
-    {
-        y = y + 360.f;
-    }
-    return y;
+    return {
+        lerpf(lower[0], upper[0], t),
+        lerpf(lower[1], upper[1], t),
+        lerpf(lower[2], upper[2], t)
+    };
 }
 
-float base_hue_for_position(int i_lo, int table_size) 
+inline f3 lerp(const float lower[3], const float upper[3], const float t)
 {
-    const float result = i_lo * 360.f / table_size;
-    return result;
+    return {
+        lerpf(lower[0], upper[0], t),
+        lerpf(lower[1], upper[1], t),
+        lerpf(lower[2], upper[2], t)
+    };
 }
 
-int hue_position_in_uniform_table(float hue, int table_size)
+inline unsigned int midpoint(unsigned int a, unsigned int b)
 {
-    const float wrapped_hue = wrap_to_360(hue);
-    return int(wrapped_hue / 360.f * (float) table_size);
+    return (a + b) / 2;
 }
 
-int next_position_in_table(int entry, int table_size)
+inline float midpoint(float a, float b)
 {
-    return (entry + 1) % table_size;
+    return (a + b) / 2.f;
 }
 
-int clamp_to_table_bounds(int entry, int table_size)
+unsigned int lookup_hue_interval(float h, const Table1D &hues, const std::array<int, 2> & hue_linearity_search_range)
 {
-    return std::min(table_size - 1, std::max(0, entry));
-}
+    // Search the given Table for the interval containing the desired hue
+    // Returns the upper index of the interval
 
-f2 cusp_from_table(float h, const Table3D &gt)
-{
-    int i_lo = 0;
-    int i_hi = gt.base_index + gt.size; // allowed as we have an extra entry in the table
-    int i = clamp_to_table_bounds(hue_position_in_uniform_table(h, gt.size) + gt.base_index, gt.total_size);
+    // We can narrow the search range based on the hues being almost uniform
+    unsigned int i = hues.nominal_hue_position_in_uniform_table(h);
+    unsigned int i_lo = std::max(int(hues.lower_wrap_index), int(i) + hue_linearity_search_range[0]); // Should be nominal not lower_wrap?
+    unsigned int i_hi = std::min(int(hues.upper_wrap_index), int(i) + hue_linearity_search_range[1]);
 
     while (i_lo + 1 < i_hi)
     {
-        if (h > gt.table[i][2])
+        if (h > hues[i])
         {
             i_lo = i;
         }
@@ -62,165 +65,273 @@ f2 cusp_from_table(float h, const Table3D &gt)
         {
             i_hi = i;
         }
-        i = clamp_to_table_bounds((i_lo + i_hi) / 2, gt.total_size);
+        i = midpoint(i_lo, i_hi);
     }
 
-    i_hi = std::max(1, i_hi);
-
-    const f3 lo {
-        gt.table[i_hi-1][0],
-        gt.table[i_hi-1][1],
-        gt.table[i_hi-1][2]
-    };
-
-    const f3 hi {
-        gt.table[i_hi][0],
-        gt.table[i_hi][1],
-        gt.table[i_hi][2]
-    };
-
-    const float t = (h - lo[2]) / (hi[2] - lo[2]);
-    const float cuspJ = lerpf(lo[0], hi[0], t);
-    const float cuspM = lerpf(lo[1], hi[1], t);
+    i_hi = std::max(1U, i_hi); // TODO: should not be needed if we initialise with the correct lo and hi
 
-    return { cuspJ, cuspM };
+    return i_hi;
 }
 
-float reach_m_from_table(float h, const ACES2::Table1D &gt)
+inline float interpolation_weight(float h, float h_lo, float h_hi)
 {
-    const int i_lo = clamp_to_table_bounds(hue_position_in_uniform_table(h, gt.size), gt.total_size);
-    const int i_hi = clamp_to_table_bounds(next_position_in_table(i_lo, gt.size), gt.total_size);
-
-    const float t = (h - i_lo) / (i_hi - i_lo);
-    return lerpf(gt.table[i_lo], gt.table[i_hi], t);
+    return (h - h_lo) / (h_hi - h_lo);
 }
 
-float hue_dependent_upper_hull_gamma(float h, const ACES2::Table1D &gt)
+inline float interpolation_weight(float h, unsigned int h_lo, unsigned int h_hi)
 {
-    const int i_lo = clamp_to_table_bounds(hue_position_in_uniform_table(h, gt.size) + gt.base_index, gt.total_size);
-    const int i_hi = clamp_to_table_bounds(next_position_in_table(i_lo, gt.size), gt.total_size);
+    return (h - h_lo) / (h_hi - h_lo);
+}
 
-    const float base_hue = (float) (i_lo - gt.base_index);
+inline f3 cusp_from_table(unsigned int i_hi, float t, const Table3D &gt)
+{
+    return lerp(gt[i_hi-1], gt[i_hi], t);
+}
 
-    const float t = wrap_to_360(h) - base_hue;
+float reach_m_from_table(float h, const ACES2::Table1D &rt)
+{
+    const unsigned int base = rt.hue_position_in_uniform_table(h);
+    const float t = h - base; // NOTE assumes uniform 1 degree 360 spacing
+    const unsigned int i_lo = base + rt.first_nominal_index; 
+    const unsigned int i_hi = i_lo + 1; // NOTE assumes uniform 1 degree 360 spacing 
 
-    return lerpf(gt.table[i_lo], gt.table[i_hi], t);
+    return lerpf(rt[i_lo], rt[i_hi], t);
 }
 
 //
 // CAM
 //
 
-// Post adaptation non linear response compression
-float panlrc_forward(float v, float F_L)
+inline float _post_adaptation_cone_response_compression_fwd(float Rc)
+{
+    const float F_L_Y = powf(Rc, 0.42f);
+    const float Ra    = (F_L_Y) / (cam_nl_offset + F_L_Y);
+    return Ra;
+}
+
+inline float _post_adaptation_cone_response_compression_inv(float Ra)
 {
-    const float F_L_v = powf(F_L * std::abs(v) / reference_luminance, 0.42f);
+    const float F_L_Y = (cam_nl_offset * Ra) / (1.0f - Ra); // TODO: what happens when Ra >= 1.0
+    const float Rc    = powf(F_L_Y, 1.f / 0.42f);
+    return Rc;
+}
+
+
+float post_adaptation_cone_response_compression_fwd(float v)
+{
+    const float abs_v = std::abs(v);
+    const float Ra =  _post_adaptation_cone_response_compression_fwd(abs_v);
     // Note that std::copysign(1.f, 0.f) returns 1 but the CTL copysign(1.,0.) returns 0.
     // TODO: Should we change the behaviour?
-    return (400.f * std::copysign(1.f, v) * F_L_v) / (27.13f + F_L_v);
+    return std::copysign(Ra, v);
 }
 
-float panlrc_inverse(float v, float F_L)
+float post_adaptation_cone_response_compression_inv(float v)
 {
-    return std::copysign(1.f, v) * reference_luminance / F_L * powf((27.13f * std::abs(v) / (400.f - std::abs(v))), 1.f / 0.42f);
+    const float abs_v = std::abs(v);
+    const float Rc    = _post_adaptation_cone_response_compression_inv(abs_v);
+    return std::copysign(Rc, v);
 }
 
-// Optimization used during initialization
-float Y_to_J(float Y, const JMhParams &params)
+inline float Achromatic_n_to_J(float A, const float cz)
 {
-    float F_L_Y = powf(params.F_L * std::abs(Y) / reference_luminance, 0.42f);
-    return std::copysign(1.f, Y) * reference_luminance * powf(((400.f * F_L_Y) / (27.13f + F_L_Y)) / params.A_w_J, surround[1] * params.z);
+    return J_scale * powf(A, cz);
 }
 
-f3 RGB_to_JMh(const f3 &RGB, const JMhParams &p)
+inline float J_to_Achromatic_n(float J, const float inv_cz)
 {
-    const float red = RGB[0];
-    const float grn = RGB[1];
-    const float blu = RGB[2];
+    return powf(J * (1.0f / J_scale), inv_cz);
+}
 
-    const float red_m = red * p.MATRIX_RGB_to_CAM16[0] + grn * p.MATRIX_RGB_to_CAM16[1] + blu * p.MATRIX_RGB_to_CAM16[2];
-    const float grn_m = red * p.MATRIX_RGB_to_CAM16[3] + grn * p.MATRIX_RGB_to_CAM16[4] + blu * p.MATRIX_RGB_to_CAM16[5];
-    const float blu_m = red * p.MATRIX_RGB_to_CAM16[6] + grn * p.MATRIX_RGB_to_CAM16[7] + blu * p.MATRIX_RGB_to_CAM16[8];
+// Optimization for achromatic values
 
-    const float red_a =  panlrc_forward(red_m * p.D_RGB[0], p.F_L);
-    const float grn_a =  panlrc_forward(grn_m * p.D_RGB[1], p.F_L);
-    const float blu_a =  panlrc_forward(blu_m * p.D_RGB[2], p.F_L);
+inline float _A_to_Y(float A, const JMhParams &p)
+{
+    const float Ra = p.A_w_J * A;
+    const float Y  = _post_adaptation_cone_response_compression_inv(Ra) / p.F_L_n;
+    return Y;
+}
 
-    const float A = 2.f * red_a + grn_a + 0.05f * blu_a;
-    const float a = red_a - 12.f * grn_a / 11.f + blu_a / 11.f;
-    const float b = (red_a + grn_a - 2.f * blu_a) / 9.f;
+inline float _J_to_Y(float abs_J, const JMhParams &p)
+{
+    return _A_to_Y(J_to_Achromatic_n(abs_J, p.inv_cz), p);
+}
 
-    const float J = 100.f * powf(A / p.A_w, surround[1] * p.z);
+inline float _Y_to_J(float abs_Y, const JMhParams &p)
+{
+    const float Ra = _post_adaptation_cone_response_compression_fwd(abs_Y * p.F_L_n);
+    const float J  = Achromatic_n_to_J(Ra * p.inv_A_w_J, p.cz);
+    return J;
+}
+
+float Y_to_J(float Y, const JMhParams &p)
+{
+    const float abs_Y = std::abs(Y);
+    const float J     = _Y_to_J(abs_Y, p);
+    return std::copysign(J, Y);
+}
+
+f3 RGB_to_Aab(const f3 &RGB, const JMhParams &p)
+{
+    const f3 rgb_m = mult_f3_f33(RGB, p.MATRIX_RGB_to_CAM16_c);
+
+    const f3 rgb_a = {
+        post_adaptation_cone_response_compression_fwd(rgb_m[0]),
+        post_adaptation_cone_response_compression_fwd(rgb_m[1]),
+        post_adaptation_cone_response_compression_fwd(rgb_m[2])
+    };
 
-    const float M = J == 0.f ? 0.f : 43.f * surround[2] * sqrt(a * a + b * b);
+    const f3 Aab = mult_f3_f33(rgb_a, p.MATRIX_cone_response_to_Aab);
+    return Aab;
+}
 
-    const float PI = 3.14159265358979f;
-    const float h_rad = std::atan2(b, a);
-    float h = std::fmod(h_rad * 180.f / PI, 360.f);
-    if (h < 0.f)
+f3 Aab_to_JMh(const f3 &Aab, const JMhParams &p)
+{
+    if (Aab[0] <= 0.f)
     {
-        h += 360.f;
+        return {0.f, 0.f, 0.f};
     }
+    const float J = Achromatic_n_to_J(Aab[0], p.cz);
+    const float M = sqrt(Aab[1] * Aab[1] + Aab[2] * Aab[2]);
+    const float h_rad = std::atan2(Aab[2], Aab[1]);
+    float h = _from_radians(h_rad); // Call to unwrapped hue version due to atan2 limits
 
     return {J, M, h};
 }
 
-f3 JMh_to_RGB(const f3 &JMh, const JMhParams &p)
+f3 RGB_to_JMh(const f3 &RGB, const JMhParams &p)
+{
+    const f3 Aab = RGB_to_Aab(RGB, p);
+    const f3 JMh = Aab_to_JMh(Aab, p);
+    return JMh;
+}
+
+f3 JMh_to_Aab(const f3 &JMh, const float &cos_hr, const float &sin_hr, const JMhParams &p)
 {
     const float J = JMh[0];
     const float M = JMh[1];
-    const float h = JMh[2];
 
-    const float PI = 3.14159265358979f;
-    const float h_rad = h * PI / 180.f;
+    const float A = J_to_Achromatic_n(J, p.inv_cz);
+    const float a = M * cos_hr;
+    const float b = M * sin_hr;
+    return {A, a, b};
+}
+
+f3 JMh_to_Aab(const f3 &JMh, const JMhParams &p)
+{
+    const float h = JMh[2];
+    const float h_rad = to_radians(h);
+    const float cos_hr = cos(h_rad);
+    const float sin_hr = sin(h_rad);
 
-    const float scale = M / (43.f * surround[2]);
-    const float A = p.A_w * powf(J / 100.f, 1.f / (surround[1] * p.z));
-    const float a = scale * cos(h_rad);
-    const float b = scale * sin(h_rad);
+    return JMh_to_Aab(JMh, cos_hr, sin_hr, p);
+}
 
-    const float red_a = (460.f * A + 451.f * a + 288.f * b) / 1403.f;
-    const float grn_a = (460.f * A - 891.f * a - 261.f * b) / 1403.f;
-    const float blu_a = (460.f * A - 220.f * a - 6300.f * b) / 1403.f;
+f3 Aab_to_RGB(const f3 &Aab, const JMhParams &p)
+{
+    const f3 rgb_a = mult_f3_f33(Aab, p.MATRIX_Aab_to_cone_response);
 
-    float red_m = panlrc_inverse(red_a, p.F_L) / p.D_RGB[0];
-    float grn_m = panlrc_inverse(grn_a, p.F_L) / p.D_RGB[1];
-    float blu_m = panlrc_inverse(blu_a, p.F_L) / p.D_RGB[2];
+    const f3 rgb_m = {
+        post_adaptation_cone_response_compression_inv(rgb_a[0]),
+        post_adaptation_cone_response_compression_inv(rgb_a[1]),
+        post_adaptation_cone_response_compression_inv(rgb_a[2])
+    };
 
-    const float red = red_m * p.MATRIX_CAM16_to_RGB[0] + grn_m * p.MATRIX_CAM16_to_RGB[1] + blu_m * p.MATRIX_CAM16_to_RGB[2];
-    const float grn = red_m * p.MATRIX_CAM16_to_RGB[3] + grn_m * p.MATRIX_CAM16_to_RGB[4] + blu_m * p.MATRIX_CAM16_to_RGB[5];
-    const float blu = red_m * p.MATRIX_CAM16_to_RGB[6] + grn_m * p.MATRIX_CAM16_to_RGB[7] + blu_m * p.MATRIX_CAM16_to_RGB[8];
+    const f3 rgb = mult_f3_f33(rgb_m, p.MATRIX_CAM16_c_to_RGB);
+    return rgb;
+}
 
-    return {red, grn, blu};
+f3 JMh_to_RGB(const f3 &JMh, const JMhParams &p)
+{
+    const f3 Aab = JMh_to_Aab(JMh, p);
+    const f3 rgb = Aab_to_RGB(Aab, p);
+    return rgb;
 }
 
 //
 // Tonescale / Chroma compress
 //
 
-float chroma_compress_norm(float h, float chroma_compress_scale)
-{
-    const float PI = 3.14159265358979f;
-    const float h_rad = h / 180.f * PI;
-    const float a = cos(h_rad);
-    const float b = sin(h_rad);
-    const float cos_hr2 = a * a - b * b;
-    const float sin_hr2 = 2.0f * a * b;
-    const float cos_hr3 = 4.0f * a * a * a - 3.0f * a;
-    const float sin_hr3 = 3.0f * b - 4.0f * b * b * b;
+/*
+//TODO: move to header
+// https://stackoverflow.com/questions/6996764/fastest-way-to-do-horizontal-sse-vector-sum-or-other-reduction/35270026#35270026
+// https://gcc.godbolt.org/#compilers:!((compiler:g6,options:%27-xc+-O3+-Wall+-fverbose-asm+-march%3Dhaswell+-mno-avx%27,sourcez:MQSwdgxgNgrgJgUwAQB4QFt3gC4CdwB0AFgHwBQoksiqAztnDseWQPStLZEi1I9JQQAa2QBZAIa5x2AOS0ANEgBGMbJwDuCcSLicA9kgh70ABxBRk2A0oTZsCXIb2ICbDtHFgA5khhgAZnq42H7SCFAAnkiIECCIvFa%2BtMjoegBu4ia0rLREMP4muuD0Wrp6/kipaURQWWT%2BUHrSSES0MOgA%2BlkdtMkAjAAUHR3ofQBMABxIaQCUSADeSEvLK6tr62vs00gAvEgA2kgAIkgAwkgAPkgAQkgAgkgAumQrw6OTSLn5y3sjnV/%2BBoILq0AZpRTgpAdUSiDoAZQAEgBVABiKIAMgBRAZjRQAZkUAAZFH0ZjMANxLLaHc4nK63B7PV4jcZTNroXhLX6YDriOBwEFgxQAimbDjs3h7Q5HADU51OMrpNxlDzuMtuTOWAI2XKhPKqCBqgoBwvatFFqy2B2W5yWStl8sVTypHAsai4yCMpnMDiQ4jSejifsq6WUUVwCBgtHAPgBLy1Zp1uz1nT5At6Awlwry/gp8aWEZCuDArx5EDS2F6HX8eLGmbNeYAvmR6o1mq12iCesk8UMWR9Zgt81D%2B2yc8m/iN0oa4DATILZpT1lslLgmnAIOJ6Ehwgh0AgwJWkAS%2BvokLjCcO3qzPonuan%2BQvs/k8yttet71O0oaoMac6aOVFLZuC8IgWnEKAKgAWhIAQ9HUcDIOHCUNk/NNu3rDln1zclh0LGBiw2Sdy0rWhq1rTDzVw5sWy2Wh0AgixHCMGhowALwQRQVDUKMYEYqJaEaTRHD8CxeicRAoPY5B%2BHEJATDXJQLHQVsmjUDtOm6Ig0z7d4pkHeYr1HW8OQnHltMfLIhWmV8E1Mj8Uw6CyBSsrMJVsgtbAIkt1mIisqxrOt3OoltQH8RAKmGO4ADUAA1hlU9t2TGABWAA2Lt/QAD101K0psodmT06YhN1P48o6Td6CyCqslZMEPJHYrqhAUCzPQCqECyvBxAgbB/FZJ8kFJSlgNasDWSM5qb3a3lLNBNIhIhECiAtZYtjTTgiGQUrJpWfDCI0rtegQXs0lZNakw2sAijAQQwEsbbPh7aYHGjPQwEUdRuAgMD%2BD0ExsAwCCkECRwYti4ctgGMADHULd7Fu%2BhcBgPqQA%2BhQ/Ruv0oCgJByi2vc/QjQmkAAFluLAwAwdoZjIZtgAPRh/BbNx2HZrY4D0GAlIQMguZ5iwWnZLp02SOtr0mXQ0jgOZFiTBWtjSZNDluK5GSmm8/AisqeW1hB/HABAXNBS6FY2TmPpkNR1CCXAIi%2B5APsiJJkDTOEjkUB7v0cKxUe4bxSa8SQlHELxkCUEAj3UKOwM8JAUQABT9flNY%2BAFsFMT8DSNKz9f8RRiIRwoF1li0tg9En%2BFhkNqlqOA04mXR30/KrsG6QpMxzTOTEag6fL8ysBX8NKyaGHl0NoOAwTgbCyRC2iOY5vhD1rMgcGFzsEDMWtuwQCXRxAJAsrligQHCg2RwhhKitZI/uFH3XOj8ExeqEbgOm3kBR4GLLFBPpcOoth4iggDBwnhdCwzAFBeIeBUZA2/PcOKAhbC8CjJ8f0bswz2CQDHLg3M1APWNjGYMVQKDhGSI3e%2B38yZPx6DmIEn8d51j/lCGE8JkRoixAMPoRJ8SKDGPPchN1z5UJMo/NCj4v4UQfmTf%2BjVJYTGobWOhAIgSNCYSAPoaVMKj0LuwxEqIMTYl4UgYkx4BHzxWFsOE8MTCk1KtgW2O5lIHkrGI9kKjJECmkUFdoeiWggFrH3LyhFB7RkGtGCinjBGAOsRwOEcJMRjFrg3eJ/dHJdR6n1TRMioBEjNjYpJcidzfhLPgmAbUo5OG9GJM8VQkb2D5PjCoclBD2CkHjEwWTcC6FwLWeQWVMDyEvDRDeR1fHdh7D0ISPQGK4xYggXSd9j4zEMisLKs1nK5JYfI3CGytnoV8b/PZeFQklnCVoyYPQgksKbKzReKdMiI1BmudASAJBSFkLwCGLQ7AmAAFzsHoG/acuAGjwQIF6VgABHGACB6DozANkPotY%2BgAE4%2BhkzSqwIg8EoJWCku0KCHV0pQTxfgNiH1sD8VYKisYDKyZ9EJMAeljLmWJXUiLboDEvnLI%2BCfQqywjDIrUIokyABFPiug9hZX2cKjGYrjKNCOHxPGex2RSr5PKl0gTVUgz2AMJAUFFAmuPniJAMpj4AHZ/4pKtVlfKdMVgiu3OK7g%2Br1WORzr%2BVy7QtWz0ldKxqdF2ieuTEas1ZqsqngdRah1KVLU2v/oSJNWV7XH1oQ6p1w5XVKuKuycN3jBQqrVYoD1aqQ2ugMIa41pr63H1TQ6jNWUs3HxzS6xVTUbyNGTIWtVOqkBjQjXW0d0bY3mrTYmh11qkDOoVaK7tHxuCzTURYY0Yay0mU9USGNVaTIjqjQ2rKTbj4TvTWm%2BNmap1pvyjOuduau3ivZLNSeoJGjlpAOXDg6EsjRANlob5hgoCeC8HIT4DChYAyBlgDiuAzlFguWWfyZFAqYXuUAA%3D%3D)),filterAsm:(binary:!t,commentOnly:!t,directives:!t,intel:!t,labels:!t),version:3
+// Alternate https://github.com/vectorclass/version2/blob/master/vectorf128.h#L1048
+
+#if OCIO_USE_SSE2 || OCIO_USE_AVX
+inline float hsum_ps_sse1(__m128 v) {                              // v    = [   D   C |   B   A ]
+    __m128 shuf   = _mm_shuffle_ps(v, v, _MM_SHUFFLE(2, 3, 0, 1)); // shuf = [   C   D |   A   B ]
+    __m128 sums   = _mm_add_ps(v, shuf);                           // sums = [ D+C C+D | B+A A+B ]
+    shuf          = _mm_movehl_ps(shuf, sums);                     // shuf = [   C   D | D+C C+D ]
+    sums          = _mm_add_ss(sums, shuf);                        // sums = [ D+C C+D | B+A A+B+C+D ]
+    return    _mm_cvtss_f32(sums);                                 // A+B+C+D
+}
+#endif
+
+#ifdef OCIO_USE_AVX
+inline float hsum256_ps_avx(__m256 v) {         // v     = [   H   G |   F   E |   D   C |   B   A ]
+    __m128 vlow  = _mm256_castps256_ps128(v);   // vlow  = [   D   C |   B   A ]
+    __m128 vhigh = _mm256_extractf128_ps(v, 1); // vhigh = [   H   G |   F   E ]
+    __m128 v128  = _mm_add_ps(vlow, vhigh);     // v128  = [ H+D G+C | F+B E+A ]
+    return hsum_ps_sse1(v128);
+}
+#endif
+#endif
+*/
 
-    const float M = 11.34072f * a +
-              16.46899f * cos_hr2 +
-               7.88380f * cos_hr3 +
-              14.66441f * b +
-              -6.37224f * sin_hr2 +
-               9.19364f * sin_hr3 +
-              77.12896f;
+float chroma_compress_norm(float cos_hr1, float sin_hr1, float chroma_compress_scale)
+{
+    const float cos_hr2 = 2.0f * cos_hr1 * cos_hr1 - 1.0f;
+    const float sin_hr2 = 2.0f * cos_hr1 * sin_hr1;
+    const float cos_hr3 = 4.0f * cos_hr1 * cos_hr1 * cos_hr1 - 3.0f * cos_hr1;
+    const float sin_hr3 = 3.0f * sin_hr1 - 4.0f * sin_hr1 * sin_hr1 * sin_hr1;
+
+    constexpr unsigned int AVX_ALIGNMENT = 32;
+    alignas(AVX_ALIGNMENT) const float trig_angles_hr[8] = {
+        cos_hr1, cos_hr2, cos_hr3, 0.0f,
+        sin_hr1, sin_hr2, sin_hr3, 1.0f
+    };
+    alignas(AVX_ALIGNMENT) static constexpr float weights[8] = { // TODO: investigate reordering of the entries so we are summing equal magnitude values first?
+        11.34072f, 16.46899f, 7.88380f, 0.0f,
+        14.66441f, -6.37224f, 9.19364f, 77.12896f
+    };
 
-    return M * chroma_compress_scale;
+    /*
+    // TODO: benchmark this across multiple platforms to justify the multiple code paths.
+#if OCIO_USE_SSE2
+#if OCIO_USE_AVX
+    __m256 trigs        = _mm256_load_ps(trig_angles_hr);
+    __m256 trig_weights = _mm256_load_ps(weights);
+    __m256 t1           = _mm256_mul_ps(trigs, trig_weights);
+    const float M       = hsum256_ps_avx(t1);
+#else
+    __m128 cosines        = _mm_load_ps(trig_angles_hr);
+    __m128 sines          = _mm_load_ps(&trig_angles_hr[4]);
+    __m128 cosine_weights = _mm_load_ps(weights);
+    __m128 sine_weights   = _mm_load_ps(&weights[4]);
+
+    __m128 t1     = _mm_mul_ps(cosines, cosine_weights);
+    __m128 t2     = _mm_mul_ps(sines, sine_weights);
+    __m128 t3     = _mm_add_ps(t1, t2); // TODO use fmadd_ps_sse2 from Lut1DOpCPU_SSE2.cpp ?
+    const float M = hsum_ps_sse1(t3);
+#endif
+#else
+    */
+    const float M = weights[0] * trig_angles_hr[0] +
+                    weights[1] * trig_angles_hr[1] +
+                    weights[2] * trig_angles_hr[2] +
+                    weights[4] * trig_angles_hr[4] +
+                    weights[5] * trig_angles_hr[5] +
+                    weights[6] * trig_angles_hr[6] +
+                    weights[7];
+    /*
+#endif
+    */
+
+    return M * chroma_compress_scale; // TODO: is it worth prescaling the above weights?
 }
 
-float toe_fwd( float x, float limit, float k1_in, float k2_in)
+inline float toe_fwd( float x, float limit, float k1_in, float k2_in)
 {
     if (x > limit)
     {
@@ -230,10 +341,13 @@ float toe_fwd( float x, float limit, float k1_in, float k2_in)
     const float k2 = std::max(k2_in, 0.001f);
     const float k1 = sqrt(k1_in * k1_in + k2 * k2);
     const float k3 = (limit + k1) / (limit + k2);
-    return 0.5f * (k3 * x - k1 + sqrt((k3 * x - k1) * (k3 * x - k1) + 4.f * k2 * k3 * x));
+
+    const float minus_b = k3 * x - k1;
+    const float minus_ac = k2 * k3 * x; // a is 1.0
+    return 0.5f * (minus_b + sqrt(minus_b * minus_b + 4.f * minus_ac)); // a is 1.0, mins_b squared == b^2
 }
 
-float toe_inv( float x, float limit, float k1_in, float k2_in)
+inline float toe_inv( float x, float limit, float k1_in, float k2_in)
 {
     if (x > limit)
     {
@@ -246,33 +360,74 @@ float toe_inv( float x, float limit, float k1_in, float k2_in)
     return (x * x + k1 * x) / (k3 * (x + k2));
 }
 
-f3 tonescale_chroma_compress_fwd(const f3 &JMh, const JMhParams &p, const ToneScaleParams &pt, const ChromaCompressParams &pc)
+template <bool inverse>
+inline float aces_tonescale(const float Y_in, const ToneScaleParams &pt)
 {
-    const float J = JMh[0];
-    const float M = JMh[1];
-    const float h = JMh[2];
+    if (inverse)
+    {
+        const float Y_ts_norm = Y_in / reference_luminance; // TODO
+        const float Z = std::max(0.f, std::min(pt.inverse_limit, Y_ts_norm)); // TODO: could eliminate max in the context of the full tonescale
+        const float f = (Z + sqrt(Z * (4.f * pt.t_1 + Z))) / 2.f;
+        const float Y = pt.s_2 / (powf((pt.m_2 / f), (1.f / pt.g)) - 1.f); // TODO: investigate precomputing reciprocal m_2  and a negative power? may swap a division for a multiply? powf(pt.m_2_recip * f, (-1.f / pt.g))
+        return Y;
+    }
 
+    const float f    = pt.m_2 * powf(Y_in / (Y_in + pt.s_2), pt.g);
+    const float Y_ts = std::max(0.f, f * f / (f + pt.t_1)) * pt.n_r;  // max prevents -ve values being output also handles division by zero possibility // TODO: can we eliminate the n_r scaling? should it be reference_luminance
+    return Y_ts;
+}
+
+template <bool inverse>
+inline float tonescale(const float J, const JMhParams &p, const ToneScaleParams &pt) // TODO: consider computing tonescale from and to A rather than J to avoid extra pow() calls
+{
     // Tonescale applied in Y (convert to and from J)
-    const float A = p.A_w_J * powf(std::abs(J) / 100.f, 1.f / (surround[1] * p.z));
-    const float Y = std::copysign(1.f, J) * 100.f / p.F_L * powf((27.13f * A) / (400.f - A), 1.f / 0.42f) / 100.f;
+    const float J_abs = std::abs(J);
+    const float Y_in  = _J_to_Y(J_abs, p);
+    const float Y_out = aces_tonescale<inverse>(Y_in, pt);
+    const float J_out = _Y_to_J(Y_out, p);
+    return std::copysign(J_out, J);
+}
 
-    const float f = pt.m_2 * powf(std::max(0.f, Y) / (Y + pt.s_2), pt.g);
-    const float Y_ts = std::max(0.f, f * f / (f + pt.t_1)) * pt.n_r;
+float tonescale_fwd(const float J, const JMhParams &p, const ToneScaleParams &pt)
+{
+    return tonescale<false>(J, p, pt);
+}
+
+float tonescale_inv(const float J, const JMhParams &p, const ToneScaleParams &pt)
+{
+    return tonescale<true>(J, p, pt);
+}
 
-    const float F_L_Y = powf(p.F_L * std::abs(Y_ts) / 100.f, 0.42f);
-    const float J_ts = std::copysign(1.f, Y_ts) * 100.f * powf(((400.f * F_L_Y) / (27.13f + F_L_Y)) / p.A_w_J, surround[1] * p.z);
 
-    // ChromaCompress
+template <bool inverse>
+inline float tonescale_A_to_J(const float A, const JMhParams &p, const ToneScaleParams &pt) // TODO: consider computing tonescale from and to A rather than J to avoid extra pow() calls
+{
+    const float Y_in  = _A_to_Y(A, p);
+    const float Y_out = aces_tonescale<inverse>(Y_in, pt);
+    const float J_out = _Y_to_J(Y_out, p);
+    return std::copysign(J_out, A);
+}
+
+float tonescale_A_to_J_fwd(const float A, const JMhParams &p, const ToneScaleParams &pt)
+{
+    return tonescale_A_to_J<false>(A, p, pt);
+}
+
+f3 chroma_compress_fwd(const f3 &JMh, const float J_ts, const float Mnorm, const ResolvedSharedCompressionParameters &pr, const ChromaCompressParams &pc)
+{
+    const float J = JMh[0];
+    const float M = JMh[1];
+    const float h = JMh[2];
+
     float M_cp = M;
 
     if (M != 0.0)
     {
-        const float nJ = J_ts / pc.limit_J_max;
+        const float nJ = J_ts / pr.limit_J_max;
         const float snJ = std::max(0.f, 1.f - nJ);
-        const float Mnorm = chroma_compress_norm(h, pc.chroma_compress_scale);
-        const float limit = powf(nJ, pc.model_gamma) * reach_m_from_table(h, pc.reach_m_table) / Mnorm;
+        const float limit = powf(nJ, pr.model_gamma_inv) * pr.reachMaxM / Mnorm;
 
-        M_cp = M * powf(J_ts / J, pc.model_gamma);
+        M_cp = M * powf(J_ts / J, pr.model_gamma_inv);
         M_cp = M_cp / Mnorm;
         M_cp = limit - toe_fwd(limit - M_cp, limit - 0.001f, snJ * pc.sat, sqrt(nJ * nJ + pc.sat_thr));
         M_cp = toe_fwd(M_cp, limit, nJ * pc.compr, snJ);
@@ -282,49 +437,45 @@ f3 tonescale_chroma_compress_fwd(const f3 &JMh, const JMhParams &p, const ToneSc
     return {J_ts, M_cp, h};
 }
 
-f3 tonescale_chroma_compress_inv(const f3 &JMh, const JMhParams &p, const ToneScaleParams &pt, const ChromaCompressParams &pc)
+f3 chroma_compress_inv(const f3 &JMh, const float J, const float Mnorm, const ResolvedSharedCompressionParameters &pr, const ChromaCompressParams &pc)
 {
     const float J_ts = JMh[0];
     const float M_cp = JMh[1];
     const float h    = JMh[2];
-
-    // Inverse Tonescale applied in Y (convert to and from J)
-    const float A = p.A_w_J * powf(std::abs(J_ts) / 100.f, 1.f / (surround[1] * p.z));
-    const float Y_ts = std::copysign(1.f, J_ts) * 100.f / p.F_L * powf((27.13f * A) / (400.f - A), 1.f / 0.42f) / 100.f;
-
-    const float Z = std::max(0.f, std::min(pt.n / (pt.u_2 * pt.n_r), Y_ts));
-    const float ht = (Z + sqrt(Z * (4.f * pt.t_1 + Z))) / 2.f;
-    const float Y = pt.s_2 / (powf((pt.m_2 / ht), (1.f / pt.g)) - 1.f) * pt.n_r;
-
-    const float F_L_Y = powf(p.F_L * std::abs(Y) / 100.f, 0.42f);
-    const float J = std::copysign(1.f, Y) * 100.f * powf(((400.f * F_L_Y) / (27.13f + F_L_Y)) / p.A_w_J, surround[1] * p.z);
-
-    // Inverse ChromaCompress
     float M = M_cp;
 
     if (M_cp != 0.0)
     {
-        const float nJ = J_ts / pc.limit_J_max;
+        const float nJ = J_ts / pr.limit_J_max;
         const float snJ = std::max(0.f, 1.f - nJ);
-        const float Mnorm = chroma_compress_norm(h, pc.chroma_compress_scale);
-        const float limit = powf(nJ, pc.model_gamma) * reach_m_from_table(h, pc.reach_m_table) / Mnorm;
+        const float limit = powf(nJ, pr.model_gamma_inv) * pr.reachMaxM / Mnorm;
 
         M = M_cp / Mnorm;
         M = toe_inv(M, limit, nJ * pc.compr, snJ);
         M = limit - toe_inv(limit - M, limit - 0.001f, snJ * pc.sat, sqrt(nJ * nJ + pc.sat_thr));
         M = M * Mnorm;
-        M = M * powf(J_ts / J, -pc.model_gamma);
+        M = M * powf(J_ts / J, -pr.model_gamma_inv);
     }
 
     return {J, M, h};
 }
 
-JMhParams init_JMhParams(const Primaries &P)
+inline float model_gamma(void)
 {
-    JMhParams p;
+    // c * z nonlinearity
+    return surround[1] * (1.48f + sqrt(Y_b / reference_luminance));
+}
+
+JMhParams init_JMhParams(const Primaries &prims)
+{
+    const m33f base_cone_response_to_Aab = {
+        2.0f, 1.0f, 1.0f / 20.0f,
+        1.0f, -12.0f / 11.0f, 1.0f / 11.0f,
+        1.0f / 9.0f, 1.0f / 9.0f, -2.0f / 9.0f
+    };
 
     const m33f MATRIX_16 = XYZtoRGB_f33(CAM16::primaries);
-    const m33f RGB_to_XYZ = RGBtoXYZ_f33(P);
+    const m33f RGB_to_XYZ = RGBtoXYZ_f33(prims);
     const f3 XYZ_w = mult_f3_f33(f3_from_f(reference_luminance), RGB_to_XYZ);
 
     const float Y_W = XYZ_w[1];
@@ -332,16 +483,18 @@ JMhParams init_JMhParams(const Primaries &P)
     const f3 RGB_w = mult_f3_f33(XYZ_w, MATRIX_16);
 
     // Viewing condition dependent parameters
-    const float K = 1.f / (5.f * L_A + 1.f);
-    const float K4 = powf(K, 4.f);
-    const float N = Y_b / Y_W;
+    constexpr float K = 1.f / (5.f * L_A + 1.f);
+    constexpr float K4 = K * K * K * K;
     const float F_L = 0.2f * K4 * (5.f * L_A) + 0.1f * powf((1.f - K4), 2.f) * powf(5.f * L_A, 1.f/3.f);
-    const float z = 1.48f + sqrt(N);
+
+    const float F_L_n = F_L / reference_luminance;
+    const float cz    = model_gamma();
+    const float inv_cz = 1.0f / cz;
 
     const f3 D_RGB = {
-        Y_W / RGB_w[0],
-        Y_W / RGB_w[1],
-        Y_W / RGB_w[2]
+        F_L_n * Y_W / RGB_w[0],
+        F_L_n * Y_W / RGB_w[1],
+        F_L_n * Y_W / RGB_w[2]
     };
 
     const f3 RGB_WC {
@@ -351,77 +504,349 @@ JMhParams init_JMhParams(const Primaries &P)
     };
 
     const f3 RGB_AW = {
-        panlrc_forward(RGB_WC[0], F_L),
-        panlrc_forward(RGB_WC[1], F_L),
-        panlrc_forward(RGB_WC[2], F_L)
+        post_adaptation_cone_response_compression_fwd(RGB_WC[0]),
+        post_adaptation_cone_response_compression_fwd(RGB_WC[1]),
+        post_adaptation_cone_response_compression_fwd(RGB_WC[2])
     };
 
-    const float A_w = ra * RGB_AW[0] + RGB_AW[1] + ba * RGB_AW[2];
+    const m33f cone_response_to_Aab = mult_f33_f33(scale_f33(Identity_M33, f3_from_f(cam_nl_scale)), base_cone_response_to_Aab); // TODO: this scale maybe ill conditioning the matrix
+    const float A_w   = cone_response_to_Aab[0] * RGB_AW[0] + cone_response_to_Aab[1] * RGB_AW[1] + cone_response_to_Aab[2] * RGB_AW[2];
+    const float A_w_J = _post_adaptation_cone_response_compression_fwd(F_L);
+    const float inv_A_w_J = 1.0f / A_w_J;
 
-    const float F_L_W = powf(F_L, 0.42f);
-    const float A_w_J   = (400.f * F_L_W) / (27.13f + F_L_W);
+    // TODO: evaluate the condition number of the below matrices and consider extracting a power of 2 scale out of them may improve noise behaviour
+    //       power of 2 to make cost of extra scaling limited vs generalised multiply/divide ?
 
-    p.XYZ_w = XYZ_w;
-    p.F_L = F_L;
-    p.z = z;
-    p.D_RGB = D_RGB;
-    p.A_w = A_w;
-    p.A_w_J = A_w_J;
+    // Note we are prescaling the CAM16 LMS responses to directly provide for chromatic adaptation.
+    const m33f MATRIX_RGB_to_CAM16 = mult_f33_f33(RGBtoRGB_f33(prims, CAM16::primaries), scale_f33(Identity_M33, f3_from_f(reference_luminance)));
+    const m33f MATRIX_RGB_to_CAM16_c = mult_f33_f33(scale_f33(Identity_M33, D_RGB), MATRIX_RGB_to_CAM16);
+    const m33f MATRIX_CAM16_c_to_RGB = invert_f33(MATRIX_RGB_to_CAM16_c);
 
-    p.MATRIX_RGB_to_CAM16 = mult_f33_f33(RGBtoRGB_f33(P, CAM16::primaries), scale_f33(Identity_M33, f3_from_f(100.f)));
-    p.MATRIX_CAM16_to_RGB = invert_f33(p.MATRIX_RGB_to_CAM16);
+    const m33f MATRIX_cone_response_to_Aab = {
+        cone_response_to_Aab[0] / A_w,                cone_response_to_Aab[1] / A_w,                cone_response_to_Aab[2] / A_w,
+        cone_response_to_Aab[3] * 43.f * surround[2], cone_response_to_Aab[4] * 43.f * surround[2], cone_response_to_Aab[5] * 43.f * surround[2],
+        cone_response_to_Aab[6] * 43.f * surround[2], cone_response_to_Aab[7] * 43.f * surround[2], cone_response_to_Aab[8] * 43.f * surround[2],
+    };
+    const m33f MATRIX_Aab_to_cone_response = invert_f33(MATRIX_cone_response_to_Aab);
+
+    JMhParams p = {
+        MATRIX_RGB_to_CAM16_c,
+        MATRIX_CAM16_c_to_RGB,
+        MATRIX_cone_response_to_Aab,
+        MATRIX_Aab_to_cone_response,
+        F_L_n,
+        cz,
+        inv_cz,
+        A_w_J,
+        inv_A_w_J
+    };
+   return p;
+}
 
-    return p;
+inline f3 generate_unit_cube_cusp_corners(const unsigned int corner)
+{
+    // Generation order R, Y, G, C, B, M to ensure hues rotate in correct order
+    return {float(static_cast<unsigned int>(((corner + 1) % cuspCornerCount) < 3)),
+            float(static_cast<unsigned int>(((corner + 5) % cuspCornerCount) < 3)),
+            float(static_cast<unsigned int>(((corner + 3) % cuspCornerCount) < 3))
+            };
 }
 
-Table3D make_gamut_table(const Primaries &P, float peakLuminance)
+void build_limiting_cusp_corners_tables(std::array<f3, totalCornerCount>& RGB_corners, std::array<f3, totalCornerCount>& JMh_corners,
+                               const JMhParams &params, const float peakLuminance)
 {
-    const JMhParams params = init_JMhParams(P);
+    // We calculate the RGB and JMh values for the limiting gamut cusp corners
+    // They are then arranged into a cycle with the lowest JMh value at [1] to allow for hue wrapping
+    std::array<f3, cuspCornerCount> temp_RGB_corners;
+    std::array<f3, cuspCornerCount> temp_JMh_corners;
+    unsigned int min_index = 0;
+    for (unsigned int i = 0; i != cuspCornerCount; ++i)
+    {
+      temp_RGB_corners[i] = mult_f_f3(peakLuminance / reference_luminance, generate_unit_cube_cusp_corners(i));
+      temp_JMh_corners[i] = RGB_to_JMh(temp_RGB_corners[i], params);
+      if (temp_JMh_corners[i][2] < temp_JMh_corners[min_index][2])
+        min_index = i;
+    }
 
-    Table3D gamutCuspTableUnsorted{};
-    for (int i = 0; i < gamutCuspTableUnsorted.size; i++)
+    // Rotate entries placing lowest at [1] (not [0])
+    for (unsigned int i = 0; i != cuspCornerCount; ++i)
     {
-        const float hNorm = (float) i / gamutCuspTableUnsorted.size;
-        const f3 HSV = {hNorm, 1., 1.};
-        const f3 RGB = HSV_to_RGB(HSV);
-        const f3 scaledRGB = mult_f_f3(peakLuminance / reference_luminance, RGB);
-        const f3 JMh = RGB_to_JMh(scaledRGB, params);
+      RGB_corners[i + 1] = temp_RGB_corners[(i + min_index) % cuspCornerCount];
+      JMh_corners[i + 1] = temp_JMh_corners[(i + min_index) % cuspCornerCount];
+    }
 
-        gamutCuspTableUnsorted.table[i][0] = JMh[0];
-        gamutCuspTableUnsorted.table[i][1] = JMh[1];
-        gamutCuspTableUnsorted.table[i][2] = JMh[2];
+    // Copy end elements to create a cycle
+    RGB_corners[0]                   = RGB_corners[cuspCornerCount];
+    RGB_corners[cuspCornerCount + 1] = RGB_corners[1];
+    JMh_corners[0]                   = JMh_corners[cuspCornerCount];
+    JMh_corners[cuspCornerCount + 1] = JMh_corners[1];
+
+    // Wrap the hues, to maintain monotonicity these entries will fall outside [0.0, hue_limit)
+    JMh_corners[0][2]                   = JMh_corners[0][2] - hue_limit;
+    JMh_corners[cuspCornerCount + 1][2] = JMh_corners[cuspCornerCount + 1][2] + hue_limit;
+}
+
+void find_reach_corners_table(std::array<f3, totalCornerCount>& JMh_corners, const JMhParams &params, const float limitJ, const float maximum_source)
+{
+    // We need to find the value of JMh that corresponds to limitJ for each corner
+    // This is done by scaling the unit corners converting to JMh until the J value is near the limitJ
+    // As an optimisation we use the equivalent Achromatic value to search for the J value and avoid the
+    // non-linear transform during the search
+    // Strictly speaking we should only need to find the R, G and  B "corners" as the reach is unbounded and
+    // as such does not form a cube, but is formed by the transformed 3 lower planes of the cube and
+    // the plane at J = limitJ
+    std::array<f3, cuspCornerCount> temp_JMh_corners;
+    const float limitA = J_to_Achromatic_n(limitJ, params.inv_cz);
+
+    unsigned int min_index = 0;
+    for (unsigned int i = 0; i != cuspCornerCount; ++i)
+    {
+        const f3 rgb_vector = generate_unit_cube_cusp_corners(i);
+
+        float lower = 0.0f;
+        float upper = maximum_source;
+        while ((upper - lower) > reach_cusp_tolerance)
+        {
+            float       test        = midpoint(lower, upper);
+            const f3    test_corner = mult_f_f3(test, rgb_vector);
+            const float A           = RGB_to_Aab(test_corner, params)[0];
+            if (A < limitA)
+            {
+                lower = test;
+            }
+            else
+            {
+                upper = test;
+            }
+            if (A == limitA)
+                break;
+        }
+        temp_JMh_corners[i] = RGB_to_JMh(mult_f_f3(upper, rgb_vector), params);
+
+        if (temp_JMh_corners[i][2] < temp_JMh_corners[min_index][2])
+            min_index = i;
     }
 
-    int minhIndex = 0;
-    for (int i = 0; i < gamutCuspTableUnsorted.size; i++)
+    // Rotate entries placing lowest at [1] (not [0]) // TODO: could use std::rotate_copy or even the ranges vs in C++20
+    for (unsigned int i = 0; i != cuspCornerCount; ++i)
     {
-        if ( gamutCuspTableUnsorted.table[i][2] < gamutCuspTableUnsorted.table[minhIndex][2])
-            minhIndex = i;
+      JMh_corners[i + 1] = temp_JMh_corners[(i + min_index) % cuspCornerCount];
     }
 
-    Table3D gamutCuspTable{};
-    for (int i = 0; i < gamutCuspTableUnsorted.size; i++)
+    // Copy end elements to create a cycle
+    JMh_corners[0]                   = JMh_corners[cuspCornerCount];
+    JMh_corners[cuspCornerCount + 1] = JMh_corners[1];
+
+    // Wrap the hues, to maintain monotonicity these entries will fall outside [0.0, hue_limit)
+    JMh_corners[0][2]                   = JMh_corners[0][2] - hue_limit;
+    JMh_corners[cuspCornerCount + 1][2] = JMh_corners[cuspCornerCount + 1][2] + hue_limit;
+}
+
+unsigned int extract_sorted_cube_hues(std::array<float, max_sorted_corners>& sorted_hues,
+                             const std::array<f3, totalCornerCount>& reach_JMh, const std::array<f3, totalCornerCount>& display_JMh)
+{
+    // Basic merge of 2 sorted arrays, extracting the unique hues.
+    // Return the count of the unique hues
+    //TODO: use STL for this and similar functions
+    unsigned int idx         = 0;
+    unsigned int reach_idx   = 1;
+    unsigned int display_idx = 1;
+    while ((reach_idx < (cuspCornerCount + 1)) || (display_idx < (cuspCornerCount + 1)))
     {
-        gamutCuspTable.table[i + gamutCuspTable.base_index][0] = gamutCuspTableUnsorted.table[(minhIndex+i) % gamutCuspTableUnsorted.size][0];
-        gamutCuspTable.table[i + gamutCuspTable.base_index][1] = gamutCuspTableUnsorted.table[(minhIndex+i) % gamutCuspTableUnsorted.size][1];
-        gamutCuspTable.table[i + gamutCuspTable.base_index][2] = gamutCuspTableUnsorted.table[(minhIndex+i) % gamutCuspTableUnsorted.size][2];
+        const float reach_hue   = reach_JMh[reach_idx][2];
+        const float display_hue = display_JMh[display_idx][2];
+        if (reach_hue == display_hue)
+        {
+            sorted_hues[idx] = reach_hue;
+            ++reach_idx;
+            ++display_idx; // When equal consume both
+        }
+        else
+        {
+            if (reach_hue < display_hue)
+            {
+                sorted_hues[idx] = reach_hue;
+                ++reach_idx;
+            }
+            else
+            {
+                sorted_hues[idx] = display_hue;
+                ++display_idx;
+            }
+      }
+      ++idx;
     }
+    return idx;
+}
 
-    // Copy last populated entry to first empty spot
-    gamutCuspTable.table[0][0] = gamutCuspTable.table[gamutCuspTable.base_index + gamutCuspTable.size-1][0];
-    gamutCuspTable.table[0][1] = gamutCuspTable.table[gamutCuspTable.base_index + gamutCuspTable.size-1][1];
-    gamutCuspTable.table[0][2] = gamutCuspTable.table[gamutCuspTable.base_index + gamutCuspTable.size-1][2];
+void build_hue_sample_interval(const unsigned int samples, const float lower, const float upper, Table1D &hue_table, const unsigned int base)
+{
+    const float delta = (upper - lower) / float(samples);
+    for (unsigned int i = 0; i != samples; ++i)
+    {
+        hue_table[base + i] = lower + float(i) * delta;
+    }
+}
 
-    // Copy first populated entry to last empty spot
-    gamutCuspTable.table[gamutCuspTable.base_index + gamutCuspTable.size][0] = gamutCuspTable.table[gamutCuspTable.base_index][0];
-    gamutCuspTable.table[gamutCuspTable.base_index + gamutCuspTable.size][1] = gamutCuspTable.table[gamutCuspTable.base_index][1];
-    gamutCuspTable.table[gamutCuspTable.base_index + gamutCuspTable.size][2] = gamutCuspTable.table[gamutCuspTable.base_index][2];
+void build_hue_table(Table1D &hue_table, const std::array<float, max_sorted_corners>& sorted_hues, const unsigned int unique_hues)
+{
+    const float ideal_spacing = hue_table.nominal_size / hue_limit;
+    std::array<unsigned int, 2 * cuspCornerCount + 2> samples_count = {};
+    unsigned int last_idx  = std::numeric_limits<unsigned int>::max();
+    unsigned int min_index = sorted_hues[0] == 0.0f ? 0 : 1; // Ensure we can always sample at 0.0 hue
+    for (unsigned int hue_idx = 0; hue_idx != unique_hues; ++hue_idx)
+    {
+        // BUG: "hue_table.size - 1" will fail if we have multiple hues mapping near the top of the table
+        unsigned int nominal_idx = std::min(std::max(static_cast<unsigned int>(std::round(sorted_hues[hue_idx] * ideal_spacing)), min_index), hue_table.nominal_size - 1);
+        if (last_idx == nominal_idx)
+        {
+            // Last two hues should sample at same index, need to adjust them
+            // Adjust previous sample down if we can
+            if (hue_idx > 1 && samples_count[hue_idx - 2] != (samples_count[hue_idx - 1] - 1))
+            {
+                samples_count[hue_idx - 1] = samples_count[hue_idx - 1] - 1;
+            }
+            else
+            {
+                nominal_idx = nominal_idx + 1;
+            }
+        }
+        samples_count[hue_idx] = std::min(nominal_idx, hue_table.nominal_size - 1U);
+        last_idx = min_index = nominal_idx;
+    }
 
-    // Wrap the hues, to maintain monotonicity. These entries will fall outside [0.0, 360.0]
-    gamutCuspTable.table[0][2] = gamutCuspTable.table[0][2] - 360.f;
-    gamutCuspTable.table[gamutCuspTable.size+1][2] = gamutCuspTable.table[gamutCuspTable.size+1][2] + 360.f;
+    unsigned int total_samples = 0;
+    // Special cases for ends
+    unsigned int i = 0;
+    build_hue_sample_interval(samples_count[i], 0.0f, sorted_hues[i], hue_table, total_samples + 1);
+    total_samples += samples_count[i];
+    for (++i; i != unique_hues; ++i)
+    {
+        const unsigned int samples = samples_count[i] - samples_count[i - 1];
+        build_hue_sample_interval(samples, sorted_hues[i - 1], sorted_hues[i], hue_table, total_samples + 1);
+        total_samples += samples;
+    }
+    // BUG: could break if we are unlucky with samples all being used up by this point
+    build_hue_sample_interval(hue_table.nominal_size - total_samples, sorted_hues[i - 1], hue_limit, hue_table, total_samples + 1);
 
-    return gamutCuspTable;
+    hue_table[hue_table.lower_wrap_index] = hue_table[hue_table.last_nominal_index] - hue_limit;
+    hue_table[hue_table.upper_wrap_index] = hue_table[hue_table.first_nominal_index] + hue_limit;
+}
+
+std::array<float, 2> find_display_cusp_for_hue(float hue, const std::array<f3, totalCornerCount>& RGB_corners, const std::array<f3, totalCornerCount>& JMh_corners,
+                                               const JMhParams &params, std::array<float, 2> & previous)
+{
+    // This works by finding the required line segment between two of the XYZ cusp corners, then binary searching
+    // along the line calculating the JMh of points along the line till we find the required value.
+    // All values on the line segments are valid cusp locations.
+
+    unsigned int upper_corner = 1;
+    for (unsigned int i = upper_corner; i != totalCornerCount; ++i) // TODO: binary search?
+    {
+        if (JMh_corners[i][2] > hue)
+        {
+            upper_corner = i;
+            break;
+        }
+    }
+    const unsigned int lower_corner = upper_corner - 1;
+
+    // hue should now be within [lower_corner, upper_corner), handle exact match
+    if (JMh_corners[lower_corner][2] == hue)
+    {
+        return {JMh_corners[lower_corner][0], JMh_corners[lower_corner][1]};
+    }
+
+    // search by lerping between RGB corners for the hue
+    const f3 cusp_lower = RGB_corners[lower_corner];
+    const f3 cusp_upper = RGB_corners[upper_corner];
+    f3       sample;
+
+    float sample_t;
+    float lower_t = (upper_corner == previous[0]) ? previous[1] : 0.0f; // If we are still on the same segment start from where we left off
+    float upper_t = 1.0f;
+
+    f3 JMh;
+
+    // There is an edge case where we need to search towards the range when across the [0.0f, hue_limit) boundary
+    // each edge needs the directions swapped. This is handled by comparing against the appropriate corner to make
+    // sure we are still in the expected range between the lower and upper corner hue limits
+    while ((upper_t - lower_t) > display_cusp_tolerance)
+    {
+        sample_t = midpoint(lower_t, upper_t);
+        sample   = lerp(cusp_lower, cusp_upper, sample_t);
+        JMh      = RGB_to_JMh(sample, params);
+        if (JMh[2] < JMh_corners[lower_corner][2])
+        {
+            upper_t = sample_t;
+        }
+        else if (JMh[2] >= JMh_corners[upper_corner][2])
+        {
+            lower_t = sample_t;
+        }
+        else if (JMh[2] > hue)
+        {
+            upper_t = sample_t;
+        }
+        else
+        {
+            lower_t = sample_t;
+        }
+    }
+
+    // Use the midpoint of the final interval for the actual samples
+    sample_t = midpoint(lower_t, upper_t);
+    sample   = lerp(cusp_lower, cusp_upper, sample_t);
+    JMh      = RGB_to_JMh(sample, params);
+
+    previous[0] = float(upper_corner);
+    previous[1] = sample_t;
+
+    return {JMh[0], JMh[1]};
+}
+
+Table3D build_cusp_table(const Table1D& hue_table, const std::array<f3, totalCornerCount>& RGB_corners, const std::array<f3, totalCornerCount>& JMh_corners, const JMhParams &params)
+{
+    std::array<float, 2> previous = {0.0f, 0.0f};
+    Table3D output_table;
+    for (unsigned int i = output_table.first_nominal_index; i != output_table.upper_wrap_index; ++i)
+    {
+      const float hue = hue_table[i];
+      const std::array<float, 2> JM = find_display_cusp_for_hue(hue, RGB_corners, JMh_corners, params, previous);
+      output_table[i][0] = JM[0];
+      output_table[i][1] = JM[1] * (1.f + smooth_m * smooth_cusps);;
+      output_table[i][2] = hue;
+    }
+
+    // Copy extra entries to ease the code to handle hues wrapping around
+    output_table[output_table.lower_wrap_index][0] = output_table[output_table.last_nominal_index][0];
+    output_table[output_table.lower_wrap_index][1] = output_table[output_table.last_nominal_index][1];
+    output_table[output_table.lower_wrap_index][2] = hue_table[hue_table.lower_wrap_index];
+    output_table[output_table.upper_wrap_index][0] = output_table[output_table.first_nominal_index][0];
+    output_table[output_table.upper_wrap_index][1] = output_table[output_table.first_nominal_index][1];
+    output_table[output_table.upper_wrap_index][2] = hue_table[hue_table.upper_wrap_index];
+    return output_table;
+}
+
+Table3D make_uniform_hue_gamut_table(const JMhParams &reach_params, const JMhParams &params, float peakLuminance, float forward_limit, const SharedCompressionParameters& sp, Table1D& hue_table)
+{
+    // The principal here is to sample the hues as uniformly as possible, whilst ensuring we sample the corners
+    // of the limiting gamut and the reach primaries at limit J Max
+    //
+    // The corners are calculated then the hues are extracted and merged to form a unique sorted hue list
+    // We then build the hue table from the list, those hues are then used to compute the JMh od the limiting
+    // gamut cusp.
+
+    std::array<f3, totalCornerCount> reach_JMh_corners;
+    std::array<f3, totalCornerCount> limiting_RGB_corners;
+    std::array<f3, totalCornerCount> limiting_JMh_corners;
+    std::array<float, max_sorted_corners> sorted_hues;
+
+    find_reach_corners_table(reach_JMh_corners, reach_params, sp.limit_J_max, forward_limit);
+    build_limiting_cusp_corners_tables(limiting_RGB_corners, limiting_JMh_corners, params, peakLuminance);
+    const unsigned int unique_hues = extract_sorted_cube_hues(sorted_hues, reach_JMh_corners, limiting_JMh_corners);
+    build_hue_table(hue_table, sorted_hues, unique_hues);
+    return build_cusp_table(hue_table, limiting_RGB_corners, limiting_JMh_corners, params);
 }
 
 bool any_below_zero(const f3 &rgb)
@@ -429,22 +854,20 @@ bool any_below_zero(const f3 &rgb)
     return (rgb[0] < 0. || rgb[1] < 0. || rgb[2] < 0.);
 }
 
-Table1D make_reach_m_table(const Primaries &P, float peakLuminance)
+Table1D make_reach_m_table(const JMhParams &params, const float limit_J_max)
 {
-    const JMhParams params = init_JMhParams(P);
-    const float limit_J_max = Y_to_J(peakLuminance, params);
-
     Table1D gamutReachTable{};
 
-    for (int i = 0; i < gamutReachTable.size; i++) {
-        const float hue = (float) i;
+    for (unsigned int i = 0; i < gamutReachTable.nominal_size; i++) {
+        const float hue = gamutReachTable.base_hue_for_position(i);
 
-        const float search_range = 50.f;
-        float low = 0.;
+        constexpr float search_range = 50.f;
+        constexpr float search_maximum = 1300.f; // TODO: magic limit
+        float low = 0.f;
         float high = low + search_range;
         bool outside = false;
 
-        while ((outside != true) & (high < 1300.f))
+        while ((outside != true) & (high < search_maximum))
         {
             const f3 searchJMh = {limit_J_max, high, hue};
             const f3 newLimitRGB = JMh_to_RGB(searchJMh, params);
@@ -456,7 +879,7 @@ Table1D make_reach_m_table(const Primaries &P, float peakLuminance)
             }
         }
 
-        while (high-low > 1e-2)
+        while (high-low > 1e-2) // TODO:tolerance as constexpr
         {
             const float sampleM = (high + low) / 2.f;
             const f3 searchJMh = {limit_J_max, sampleM, hue};
@@ -472,265 +895,293 @@ Table1D make_reach_m_table(const Primaries &P, float peakLuminance)
             }
         }
 
-        gamutReachTable.table[i] = high;
+        gamutReachTable[i + gamutReachTable.base_index] = high;
     }
+    gamutReachTable[gamutReachTable.lower_wrap_index] = gamutReachTable[gamutReachTable.last_nominal_index];
+    gamutReachTable[gamutReachTable.upper_wrap_index] = gamutReachTable[gamutReachTable.first_nominal_index];
 
     return gamutReachTable;
 }
 
-bool outside_hull(const f3 &rgb)
+inline bool outside_hull(const f3 &rgb, float maxRGBtestVal)
 {
     // limit value, once we cross this value, we are outside of the top gamut shell
-    const float maxRGBtestVal = 1.0;
     return rgb[0] > maxRGBtestVal || rgb[1] > maxRGBtestVal || rgb[2] > maxRGBtestVal;
 }
 
-float get_focus_gain(float J, float cuspJ, float limit_J_max)
+inline float get_focus_gain(float J, float analytical_threshold, float limit_J_max, float focus_dist)
 {
-    const float thr = lerpf(cuspJ, limit_J_max, focus_gain_blend);
-
-    if (J > thr)
+    float gain = limit_J_max * focus_dist;
+    if (J > analytical_threshold)
     {
-        // Approximate inverse required above threshold
-        float gain = (limit_J_max - thr) / std::max(0.0001f, (limit_J_max - std::min(limit_J_max, J)));
-        return powf(log10(gain), 1.f / focus_adjust_gain) + 1.f;
-    }
-    else
-    {
-        // Analytic inverse possible below cusp
-        return 1.f;
+        // Approximate inverse required above threshold due to the introduction of J in the calculation
+        float gain_adjustment = log10((limit_J_max - analytical_threshold) / std::max(0.0001f, limit_J_max - J));
+        //gain = powf(gain, focus_adjust_gain_inv) + 1.f;
+        gain_adjustment = gain_adjustment * gain_adjustment + 1.f;
+        gain = gain * gain_adjustment;
     }
+    return gain;
 }
 
-float solve_J_intersect(float J, float M, float focusJ, float maxJ, float slope_gain)
+float solve_J_intersect(float J, float M, float focusJ, float maxJ, float slope_gain) // TODO: eval placing maxJ as last patrameter?
 {
-    const float a = M / (focusJ * slope_gain);
-    float b = 0.f;
-    float c = 0.f;
-    float intersectJ = 0.f;
+    const float M_scaled = M / slope_gain;
+    const float a = M_scaled / focusJ;
 
     if (J < focusJ)
     {
-        b = 1.f - M / slope_gain;
+        const float b = 1.f - M_scaled;
+        const float c = -J;
+        const float det = b * b - 4.f * a * c;
+        const float root = sqrt(det);
+        return -2.f * c / (b + root);
     }
     else
     {
-        b = - (1.f + M / slope_gain + maxJ * M / (focusJ * slope_gain));
+        const float b = -(1.f + M_scaled + maxJ * a);
+        const float c = maxJ * M_scaled + J;
+        const float det = b * b - 4.f * a * c;
+        const float root = sqrt(det);
+        return -2.f * c / (b - root);
     }
-
-    if (J < focusJ)
-    {
-        c = -J;
-    }
-    else
-    {
-        c = maxJ * M / slope_gain + J;
-    }
-
-    const float root = sqrt(b * b - 4.f * a * c);
-
-    if (J < focusJ)
-    {
-        intersectJ = 2.f * c / (-b - root);
-    }
-    else
-    {
-        intersectJ = 2.f * c / (-b + root);
-    }
-
-    return intersectJ;
 }
 
-float smin(float a, float b, float s)
+// Smooth minimum about the scaled reference, based upon a cubic polynomial
+inline float smin_scaled(float a, float b, float scale_reference)
 {
-    const float h = std::max(s - std::abs(a - b), 0.f) / s;
-    return std::min(a, b) - h * h * h * s * (1.f / 6.f);
+    const float s_scaled = smooth_cusps * scale_reference;
+    const float h = std::max(s_scaled - std::abs(a - b), 0.0f) / s_scaled;
+    return std::min(a, b) - h * h * h * s_scaled * (1.f / 6.f);
 }
 
-f3 find_gamut_boundary_intersection(const f3 &JMh_s, const f2 &JM_cusp_in, float J_focus, float J_max, float slope_gain, float gamma_top, float gamma_bottom)
+inline float compute_compression_vector_slope(const float intersectJ, const float focusJ, const float limitJmax, const float slope_gain)
 {
-    const float s = std::max(0.000001f, smooth_cusps);
-    const f2 JM_cusp = {
-        JM_cusp_in[0],
-        JM_cusp_in[1] * (1.f + smooth_m * s)
-    };
+    const float direction_scaler = (intersectJ < focusJ) ? intersectJ : (limitJmax - intersectJ); // TODO < vs <=
+    return direction_scaler * (intersectJ - focusJ) / (focusJ * slope_gain);
+}
 
-    const float J_intersect_source = solve_J_intersect(JMh_s[0], JMh_s[1], J_focus, J_max, slope_gain);
-    const float J_intersect_cusp = solve_J_intersect(JM_cusp[0], JM_cusp[1], J_focus, J_max, slope_gain);
+inline float estimate_line_and_boundary_intersection_M(const float J_axis_intersect, const float slope, const float inv_gamma,
+                                                       const float J_max, const float M_max, const float J_intersection_reference)
+{
+    // Line defined by     J = slope * x + J_axis_intersect
+    // Boundary defined by J = J_max * (x / M_max) ^ (1/inv_gamma)
+    // Approximate as we do not want to iteratively solve intersection of a straight line and an exponential
 
-    float slope = 0.f;
-    if (J_intersect_source < J_focus)
-    {
-        slope = J_intersect_source * (J_intersect_source - J_focus) / (J_focus * slope_gain);
-    }
-    else
-    {
-        slope = (J_max - J_intersect_source) * (J_intersect_source - J_focus) / (J_focus * slope_gain);
-    }
+    // We calculate a shifted intersection from the original intersection using the inverse of the exponential
+    // and the provided reference
+    const float normalised_J         = J_axis_intersect / J_intersection_reference;
+    const float shifted_intersection = J_intersection_reference * powf(normalised_J, inv_gamma);
 
-    const float M_boundary_lower = J_intersect_cusp * powf(J_intersect_source / J_intersect_cusp, 1.f / gamma_bottom) / (JM_cusp[0] / JM_cusp[1] - slope);
-    const float M_boundary_upper = JM_cusp[1] * (J_max - J_intersect_cusp) * powf((J_max - J_intersect_source) / (J_max - J_intersect_cusp), 1.f / gamma_top) / (slope * JM_cusp[1] + J_max - JM_cusp[0]);
-    const float M_boundary = JM_cusp[1] * smin(M_boundary_lower / JM_cusp[1], M_boundary_upper / JM_cusp[1], s);
-    const float J_boundary = J_intersect_source + slope * M_boundary;
+    // Now we find the M intersection of two lines
+    // line from origin to J,M Max       l1(x) = J/M * x
+    // line from J Intersect' with slope l2(x) = slope * x + Intersect'
 
-    return {J_boundary, M_boundary, J_intersect_source};
+    //return shifted_intersection / ((J_max / M_max) - slope);
+    return shifted_intersection * M_max / (J_max - slope * M_max);
 }
 
-f3 get_reach_boundary(
-    float J,
-    float M,
-    float h,
-    const f2 &JMcusp,
-    float focusJ,
-    float limit_J_max,
-    float model_gamma,
-    float focus_dist,
-    const ACES2::Table1D & reach_m_table
-)
+float find_gamut_boundary_intersection(const f2 &JM_cusp, float J_max, float gamma_top_inv, float gamma_bottom_inv, const float J_intersect_source, const float slope, const float J_intersect_cusp)
 {
-    const float reachMaxM = reach_m_from_table(h, reach_m_table);
-
-    const float slope_gain = limit_J_max * focus_dist * get_focus_gain(J, JMcusp[0], limit_J_max);
-
-    const float intersectJ = solve_J_intersect(J, M, focusJ, limit_J_max, slope_gain);
+    const float M_boundary_lower = estimate_line_and_boundary_intersection_M(J_intersect_source, slope, gamma_bottom_inv, JM_cusp[0], JM_cusp[1], J_intersect_cusp);
+
+    // The upper hull is flipped and thus 'zeroed' at J_max
+    // Also note we negate the slope
+    const float f_J_intersect_cusp   = J_max - J_intersect_cusp;
+    const float f_J_intersect_source = J_max - J_intersect_source;
+    const float f_JM_cusp_J          = J_max - JM_cusp[0];
+    const float M_boundary_upper =
+      estimate_line_and_boundary_intersection_M(f_J_intersect_source, -slope, gamma_top_inv, f_JM_cusp_J, JM_cusp[1], f_J_intersect_cusp);
+
+    // Smooth minimum between the two calculated values for the M component
+    const float M_boundary = smin_scaled(M_boundary_lower, M_boundary_upper, JM_cusp[1]);
+    return M_boundary;
+}
 
-    float slope;
-    if (intersectJ < focusJ)
-    {
-        slope = intersectJ * (intersectJ - focusJ) / (focusJ * slope_gain);
-    }
-    else
+template <bool invert>
+inline float reinhard_remap(const float scale, const float nd)
+{
+    if (invert)
     {
-        slope = (limit_J_max - intersectJ) * (intersectJ - focusJ) / (focusJ * slope_gain);
+        if (nd >= 1.0f) // TODO: given remap_M already tests against proportion do we need this asymptote test
+            return scale;
+        else
+            return scale * -(nd / (nd - 1.0f));
     }
-
-    const float boundary = limit_J_max * powf(intersectJ / limit_J_max, model_gamma) * reachMaxM / (limit_J_max - slope * reachMaxM);
-    return {J, boundary, h};
+    return scale * nd / (1.0f + nd);
 }
 
-float compression_function(
-    float v,
-    float thr,
-    float lim,
-    bool invert)
+template <bool invert>
+inline float remap_M(const float M, const float gamut_boundary_M, const float reach_boundary_M)
 {
-    float s = (lim - thr) * (1.f - thr) / (lim - 1.f);
-    float nd = (v - thr) / s;
-
-    float vCompressed;
-
-    if (invert) {
-        if (v < thr || lim <= 1.0001f || v > thr + s) {
-            vCompressed = v;
-        } else {
-            vCompressed = thr + s * (-nd / (nd - 1.f));
-        }
-    } else {
-        if (v < thr || lim <= 1.0001f) {
-            vCompressed = v;
-        } else {
-            vCompressed = thr + s * nd / (1.f + nd);
-        }
-    }
-
-    return vCompressed;
+    const float boundary_ratio = gamut_boundary_M / reach_boundary_M;
+    const float proportion = std::max(boundary_ratio, compression_threshold);
+    const float threshold  = proportion * gamut_boundary_M;
+
+    if (M <= threshold || proportion >= 1.0f)
+        return M;
+
+    // Translate to place threshold at zero
+    const float m_offset     = M - threshold;
+    const float gamut_offset = gamut_boundary_M - threshold;
+    const float reach_offset = reach_boundary_M - threshold;
+
+    const float scale = reach_offset / ((reach_offset / gamut_offset) - 1.0f);
+    const float nd = m_offset / scale;
+ 
+    // shift back to absolute
+    return threshold + reinhard_remap<invert>(scale, nd);
 }
 
-f3 compressGamut(const f3 &JMh, float Jx, const ACES2::GamutCompressParams& p, bool invert)
+template <bool invert>
+f3 compressGamut(const f3 &JMh, float Jx, const ACES2::ResolvedSharedCompressionParameters& sr, const ACES2::GamutCompressParams& p, const HueDependantGamutParams hdp)
 {
     const float J = JMh[0];
     const float M = JMh[1];
     const float h = JMh[2];
+    
+    const float slope_gain = get_focus_gain(Jx, hdp.analytical_threshold, sr.limit_J_max, p.focus_dist);
+    const float J_intersect_source = solve_J_intersect(J, M, hdp.focusJ, sr.limit_J_max, slope_gain);
+    const float gamut_slope = compute_compression_vector_slope(J_intersect_source, hdp.focusJ, sr.limit_J_max, slope_gain);
+
+    const float J_intersect_cusp = solve_J_intersect(hdp.JMcusp[0], hdp.JMcusp[1], hdp.focusJ, sr.limit_J_max, slope_gain);
+    const float gamut_boundary_M = find_gamut_boundary_intersection(hdp.JMcusp, sr.limit_J_max, hdp.gamma_top_inv, hdp.gamma_bottom_inv, J_intersect_source, gamut_slope, J_intersect_cusp);
 
-    if (M < 0.0001f || J > p.limit_J_max)
+    if (gamut_boundary_M <= 0.0f) // TODO: when/why does this happen?
     {
         return {J, 0.f, h};
     }
-    else
-    {
-        const f2 project_from = {J, M};
-        const f2 JMcusp = cusp_from_table(h, p.gamut_cusp_table);
-        const float focusJ = lerpf(JMcusp[0], p.mid_J, std::min(1.f, cusp_mid_blend - (JMcusp[0] / p.limit_J_max)));
-        const float slope_gain = p.limit_J_max * p.focus_dist * get_focus_gain(Jx, JMcusp[0], p.limit_J_max);
-
-        const float gamma_top = hue_dependent_upper_hull_gamma(h, p.upper_hull_gamma_table);
-        const float gamma_bottom = p.lower_hull_gamma;
-
-        const f3 boundaryReturn = find_gamut_boundary_intersection({J, M, h}, JMcusp, focusJ, p.limit_J_max, slope_gain, gamma_top, gamma_bottom);
-        const f2 JMboundary = {boundaryReturn[0], boundaryReturn[1]};
-        const f2 project_to = {boundaryReturn[2], 0.f};
-
-        if (JMboundary[1] <= 0.0f)
-        {
-            return {J, 0.f, h};
-        }
-
-        const f3 reachBoundary = get_reach_boundary(JMboundary[0], JMboundary[1], h, JMcusp, focusJ, p.limit_J_max, p.model_gamma, p.focus_dist, p.reach_m_table);
 
-        const float difference = std::max(1.0001f, reachBoundary[1] / JMboundary[1]);
-        const float threshold = std::max(compression_threshold, 1.f / difference);
+    const float reachBoundaryM   = estimate_line_and_boundary_intersection_M(J_intersect_source, gamut_slope, sr.model_gamma_inv, sr.limit_J_max, sr.reachMaxM, sr.limit_J_max);
 
-        float v = project_from[1] / JMboundary[1];
-        v = compression_function(v, threshold, difference, invert);
+    const float remapped_M = remap_M<invert>(M, gamut_boundary_M, reachBoundaryM);
 
-        const f2 JMcompressed {
-            project_to[0] + v * (JMboundary[0] - project_to[0]),
-            project_to[1] + v * (JMboundary[1] - project_to[1])
-        };
+    const f3 JMcompressed {
+        J_intersect_source + remapped_M * gamut_slope,
+        remapped_M,
+        h
+    };
+    return JMcompressed;
+}
 
-        return {JMcompressed[0], JMcompressed[1], h};
-    }
+inline float compute_focusJ(float cusp_J, float mid_J, float limit_J_max)
+{
+    return lerpf(cusp_J, mid_J, std::min(1.f, cusp_mid_blend - (cusp_J / limit_J_max)));
 }
 
-f3 gamut_compress_fwd(const f3 &JMh, const GamutCompressParams &p)
+HueDependantGamutParams init_HueDependantGamutParams(const float hue, const ResolvedSharedCompressionParameters &sr, const GamutCompressParams &p)
 {
-    return compressGamut(JMh, JMh[0], p, false);
+    HueDependantGamutParams hdp;
+    hdp.gamma_bottom_inv = p.lower_hull_gamma_inv;
+
+    const unsigned int i_hi = lookup_hue_interval(hue, p.hue_table, p.hue_linearity_search_range);
+    const float t = interpolation_weight(hue, p.hue_table[i_hi-1], p.hue_table[i_hi]);
+    const f3 cusp = cusp_from_table(i_hi, t, p.gamut_cusp_table);
+
+    hdp.JMcusp = { cusp[0], cusp[1] };
+    hdp.gamma_top_inv = { cusp[2] };
+    hdp.focusJ = compute_focusJ(hdp.JMcusp[0], p.mid_J, sr.limit_J_max);
+    hdp.analytical_threshold = lerpf(hdp.JMcusp[0], sr.limit_J_max, focus_gain_blend);
+    return hdp;
 }
 
-f3 gamut_compress_inv(const f3 &JMh, const GamutCompressParams &p)
+f3 gamut_compress_fwd(const f3 &JMh, const ResolvedSharedCompressionParameters &sr, const GamutCompressParams &p)
 {
-    const f2 JMcusp = cusp_from_table(JMh[2], p.gamut_cusp_table);
-    float Jx = JMh[0];
+    const float J = JMh[0];
+    const float M = JMh[1];
+    const float h = JMh[2];
 
-    f3 unCompressedJMh;
+    if (J <= 0.0f) // Limit to +ve J values // TODO test this is needed
+    {
+        return {0.0f, 0.f, h};
+    }
+    if (M <= 0.0f || J > sr.limit_J_max) // We compress M only so avoid mapping zero
+                                         // Above the expected maximum we explicitly map to 0 M
+    {
+        return {J, 0.f, h};
+    }
+    const HueDependantGamutParams hdp = init_HueDependantGamutParams(h, sr, p);
+    
+    return compressGamut<false>(JMh, JMh[0], sr, p, hdp);
+}
 
-    // Analytic inverse below threshold
-    if (Jx <= lerpf(JMcusp[0], p.limit_J_max, focus_gain_blend))
+f3 gamut_compress_inv(const f3 &JMh, const ResolvedSharedCompressionParameters &sr, const GamutCompressParams &p)
+{
+    const float J = JMh[0];
+    const float M = JMh[1];
+    const float h = JMh[2];
+
+    if (J <= 0.0f) // Limit to +ve J values // TODO test this is needed
     {
-        unCompressedJMh = compressGamut(JMh, Jx, p, true);
+        return {0.0f, 0.f, h};
     }
-    // Approximation above threshold
-    else
+    if (M <= 0.0f || J > sr.limit_J_max) // We compress M only so avoid mapping zero
+                                         // Above the expected maximum we explicitly map to 0 M
     {
-        Jx = compressGamut(JMh, Jx, p, true)[0];
-        unCompressedJMh = compressGamut(JMh, Jx, p, true);
+        return {J, 0.f, h};
     }
+    const HueDependantGamutParams hdp = init_HueDependantGamutParams(h, sr, p);
 
-    return unCompressedJMh;
+    float Jx = J;
+    if (Jx > hdp.analytical_threshold)
+    {
+        // Approximation above threshold
+        Jx = compressGamut<true>(JMh, Jx, sr, p, hdp)[0];
+    }
+    return compressGamut<true>(JMh, Jx, sr, p, hdp);
+}
+
+static constexpr unsigned int gamma_test_count = 5;
+struct testData {
+    f3 testJMh;
+    float J_intersect_source;
+    float slope;
+    float J_intersect_cusp;
+};
+
+std::array<testData, gamma_test_count> generate_gamma_test_data(const f2 &JMcusp, float hue, float limit_J_max, float mid_J, float focus_dist)
+{
+    const std::array<float, gamma_test_count> testPositions = {0.01f, 0.1f, 0.5f, 0.8f, 0.99f};
+    std::array<testData, gamma_test_count> data;
+    const float analytical_threshold = lerpf(JMcusp[0], limit_J_max, focus_gain_blend);
+    const float focusJ = compute_focusJ(JMcusp[0], mid_J, limit_J_max);
+
+    unsigned int testIndex = 0;
+    std::generate(data.begin(), data.end(), [JMcusp, limit_J_max, focus_dist, testPositions, hue, analytical_threshold, focusJ, testIndex]() mutable {
+        const float testJ = lerpf(JMcusp[0], limit_J_max, testPositions[testIndex]);
+        const float slope_gain = get_focus_gain(testJ, analytical_threshold, limit_J_max, focus_dist);
+        const float J_intersect_source = solve_J_intersect(testJ, JMcusp[1], focusJ, limit_J_max, slope_gain);
+        testData result = {
+            { testJ, JMcusp[1], hue },
+            J_intersect_source,
+            compute_compression_vector_slope(J_intersect_source, focusJ, limit_J_max, slope_gain),
+            solve_J_intersect(JMcusp[0], JMcusp[1], focusJ, limit_J_max, slope_gain)
+        };
+        testIndex++;
+        return result;
+    });
+    return data;
 }
 
 bool evaluate_gamma_fit(
     const f2 &JMcusp,
-    const f3 testJMh[3],
-    float topGamma,
+    const std::array<testData, gamma_test_count> data,
+    float topGamma_inv,
     float peakLuminance,
     float limit_J_max,
-    float mid_J,
-    float focus_dist,
-    float lower_hull_gamma,
+    float lower_hull_gamma_inv,
     const JMhParams &limitJMhParams)
 {
-    const float focusJ = lerpf(JMcusp[0], mid_J, std::min(1.f, cusp_mid_blend - (JMcusp[0] / limit_J_max)));
-
-    for (size_t testIndex = 0; testIndex < 3; testIndex++)
+    const float luminance_limit = peakLuminance / reference_luminance;
+    for (auto test_data: data)
     {
-        const float slope_gain = limit_J_max * focus_dist * get_focus_gain(testJMh[testIndex][0], JMcusp[0], limit_J_max);
-        const f3 approxLimit = find_gamut_boundary_intersection(testJMh[testIndex], JMcusp, focusJ, limit_J_max, slope_gain, topGamma, lower_hull_gamma);
-        const f3 approximate_JMh = {approxLimit[0], approxLimit[1], testJMh[testIndex][2]};
+        const float approxLimit_M = find_gamut_boundary_intersection(JMcusp, limit_J_max, topGamma_inv, lower_hull_gamma_inv,
+                                                    test_data.J_intersect_source, test_data.slope, test_data.J_intersect_cusp);
+        const float approxLimit_J = test_data.J_intersect_source + test_data.slope * approxLimit_M;
+
+        const f3 approximate_JMh = {approxLimit_J, approxLimit_M, test_data.testJMh[2]};
         const f3 newLimitRGB = JMh_to_RGB(approximate_JMh, limitJMhParams);
-        const f3 newLimitRGBScaled = mult_f_f3(reference_luminance / peakLuminance, newLimitRGB);
 
-        if (!outside_hull(newLimitRGBScaled))
+        if (!outside_hull(newLimitRGB, luminance_limit))
         {
             return false;
         }
@@ -739,47 +1190,38 @@ bool evaluate_gamma_fit(
     return true;
 }
 
-Table1D make_upper_hull_gamma(
-    const Table3D &gamutCuspTable,
+void make_upper_hull_gamma(
+    const Table1D &hue_table,
+    Table3D &gamutCuspTable,
     float peakLuminance,
     float limit_J_max,
     float mid_J,
     float focus_dist,
-    float lower_hull_gamma,
+    float lower_hull_gamma_inv,
     const JMhParams &limitJMhParams)
 {
-    const int test_count = 3;
-    const float testPositions[test_count] = {0.01f, 0.5f, 0.99f};
-
-    Table1D gammaTable{};
-    Table1D gamutTopGamma{};
-
-    for (int i = 0; i < gammaTable.size; i++)
+    for (unsigned int i = gamutCuspTable.first_nominal_index; i != gamutCuspTable.upper_wrap_index; ++i)
     {
-        gammaTable.table[i] = -1.f;
+        const float hue = hue_table[i];
+        const f2 JMcusp = { gamutCuspTable[i][0], gamutCuspTable[i][1] };
 
-        const float hue = (float) i;
-        const f2 JMcusp = cusp_from_table(hue, gamutCuspTable);
-
-        f3 testJMh[test_count]{};
-        for (int testIndex = 0; testIndex < test_count; testIndex++)
-        {
-            const float testJ = JMcusp[0] + ((limit_J_max - JMcusp[0]) * testPositions[testIndex]);
-            testJMh[testIndex] = {
-                testJ,
-                JMcusp[1],
-                hue
-            };
-        }
+        std::array<testData, gamma_test_count> data = generate_gamma_test_data(JMcusp, hue, limit_J_max, mid_J, focus_dist);
 
+        // TODO: calculate best fitting inverse gamma directly rather than reciprocating it in the loop
+        // TODO: adjacent found gamma values typically fall close to each other could initialise the search range
+        //       with values near to speed up searching. Note that discrepancies do occur at/near corners of gamut
         const float search_range = gammaSearchStep;
         float low = gammaMinimum;
         float high = low + search_range;
         bool outside = false;
 
-        while (!(outside) && (high < 5.f))
+        auto gamma_fit_predicate = [JMcusp, data, peakLuminance, limit_J_max, lower_hull_gamma_inv, limitJMhParams](float gamma)
+        {
+            return evaluate_gamma_fit(JMcusp, data, 1.0f / gamma, peakLuminance, limit_J_max, lower_hull_gamma_inv, limitJMhParams);
+        };
+        while (!(outside) && (high < gammaMaximum))
         {
-            const bool gammaFound = evaluate_gamma_fit(JMcusp, testJMh, high, peakLuminance, limit_J_max, mid_J, focus_dist, lower_hull_gamma, limitJMhParams);
+            const bool gammaFound = gamma_fit_predicate(high);
             if (!gammaFound)
             {
                 low = high;
@@ -794,30 +1236,23 @@ Table1D make_upper_hull_gamma(
         float testGamma = -1.f;
         while ( (high-low) > gammaAccuracy)
         {
-            testGamma = (high + low) / 2.f;
-            const bool gammaFound = evaluate_gamma_fit(JMcusp, testJMh, testGamma, peakLuminance, limit_J_max, mid_J, focus_dist, lower_hull_gamma, limitJMhParams);
+            testGamma = midpoint(high, low);
+            const bool gammaFound = gamma_fit_predicate(testGamma);
             if (gammaFound)
             {
                 high = testGamma;
-                gammaTable.table[i] = high;
             }
             else
             {
                 low = testGamma;
             }
         }
-
-        // Duplicate gamma value to array, leaving empty entries at first and last position
-        gamutTopGamma.table[i+gamutTopGamma.base_index] = gammaTable.table[i];
+        gamutCuspTable[i][2] = 1.0f / high;
     }
 
-    // Copy last populated entry to first empty spot
-    gamutTopGamma.table[0] = gammaTable.table[gammaTable.size-1];
-
-    // Copy first populated entry to last empty spot
-    gamutTopGamma.table[gamutTopGamma.total_size-1] = gammaTable.table[0];
-
-    return gamutTopGamma;
+    // Copy last populated entries to empty spot 'wrapping' entries
+    gamutCuspTable[gamutCuspTable.lower_wrap_index][2] = gamutCuspTable[gamutCuspTable.last_nominal_index][2];
+    gamutCuspTable[gamutCuspTable.upper_wrap_index][2] = gamutCuspTable[gamutCuspTable.first_nominal_index][2];
 }
 
 // Tonescale pre-calculations
@@ -836,6 +1271,8 @@ ToneScaleParams init_ToneScaleParams(float peakLuminance)
     const float r_hit_max = 896.f;   // scene-referred value "hitting the roof"
 
     // Calculate output constants
+    // TODO: factor these calculations into well named functions
+    // TODO: ensure correct use of n_r vs n vs reference_luminance vs 100.f etc
     const float r_hit = r_hit_min + (r_hit_max - r_hit_min) * (log(n/n_r)/log(10000.f/100.f));
     const float m_0 = (n / n_r);
     const float m_1 = 0.5f * (m_0 + sqrt(m_0 * (m_0 + 4.f * t_1)));
@@ -846,9 +1283,12 @@ ToneScaleParams init_ToneScaleParams(float peakLuminance)
     const float g_ip = 0.5f * (c_t + sqrt(c_t * (c_t + 4.f * t_1)));
     const float g_ipp2 = -(m_1 * powf((g_ip/m),(1.f/g))) / (powf(g_ip/m , 1.f/g)-1.f);
     const float w_2 = c / g_ipp2;
-    const float s_2 = w_2 * m_1;
+    const float s_2 = w_2 * m_1 * reference_luminance;
     const float u_2 = powf((r_hit/m_1)/((r_hit/m_1) + w_2), g);
     const float m_2 = m_1 / u_2;
+    const float inverse_limit = n / (u_2 * n_r);
+    const float forward_limit = 8.0f * r_hit;
+    const float log_peak = log10( n / n_r);
 
     ToneScaleParams TonescaleParams = {
         n,
@@ -858,70 +1298,99 @@ ToneScaleParams init_ToneScaleParams(float peakLuminance)
         c_t,
         s_2,
         u_2,
-        m_2
+        m_2,
+        forward_limit,
+        inverse_limit,
+        log_peak
     };
 
     return TonescaleParams;
 }
 
-ChromaCompressParams init_ChromaCompressParams(float peakLuminance)
+SharedCompressionParameters init_SharedCompressionParams(float peakLuminance, const JMhParams &inputJMhParams, const JMhParams &reachParams)
 {
-    const ToneScaleParams tsParams = init_ToneScaleParams(peakLuminance);
-    const JMhParams inputJMhParams = init_JMhParams(ACES_AP0::primaries);
+    const float limit_J_max = Y_to_J(peakLuminance, inputJMhParams);
+    const float model_gamma_inv = 1.f / model_gamma();
+
+    SharedCompressionParameters params = {
+        limit_J_max,
+        model_gamma_inv,
+        make_reach_m_table(reachParams, limit_J_max)
+    };
+    return params;
+}
 
-    float limit_J_max = Y_to_J(peakLuminance, inputJMhParams);
+ResolvedSharedCompressionParameters resolve_CompressionParams(float hue, const SharedCompressionParameters &p)
+{
+    ResolvedSharedCompressionParameters params = {
+        p.limit_J_max,
+        p.model_gamma_inv,
+        reach_m_from_table(hue, p.reach_m_table)
+    };
+    return params;
+}
 
+ChromaCompressParams init_ChromaCompressParams(float peakLuminance, const ToneScaleParams &tsParams)
+{
     // Calculated chroma compress variables
-    const float log_peak = log10( tsParams.n / tsParams.n_r);
-    const float compr = chroma_compress + (chroma_compress * chroma_compress_fact) * log_peak;
-    const float sat = std::max(0.2f, chroma_expand - (chroma_expand * chroma_expand_fact) * log_peak);
+    // TODO: name these magic constants
+    const float compr = chroma_compress + (chroma_compress * chroma_compress_fact) * tsParams.log_peak;
+    const float sat = std::max(0.2f, chroma_expand - (chroma_expand * chroma_expand_fact) * tsParams.log_peak);
     const float sat_thr = chroma_expand_thr / tsParams.n;
-    const float model_gamma = 1.f / (surround[1] * (1.48f + sqrt(Y_b / L_A)));
-
-    ChromaCompressParams params{};
-    params.limit_J_max = limit_J_max;
-    params.model_gamma = model_gamma;
-    params.sat = sat;
-    params.sat_thr = sat_thr;
-    params.compr = compr;
-    params.chroma_compress_scale = powf(0.03379f * peakLuminance, 0.30596f) - 0.45135f;
-    params.reach_m_table = make_reach_m_table(ACES_AP1::primaries, peakLuminance);
+    const float chroma_compress_scale =  powf(0.03379f * peakLuminance, 0.30596f) - 0.45135f;
+
+    ChromaCompressParams params = {
+        sat,
+        sat_thr,
+        compr,
+        chroma_compress_scale
+    };
     return params;
 }
 
-GamutCompressParams init_GamutCompressParams(float peakLuminance, const Primaries &limitingPrimaries)
+std::array<int, 2> determine_hue_linearity_search_range(const Table3D &gamutCuspTable)
 {
-    const ToneScaleParams tsParams = init_ToneScaleParams(peakLuminance);
-    const JMhParams inputJMhParams = init_JMhParams(ACES_AP0::primaries);
+    // This function searches through the hues looking for the largest deviations from a linear distribution
+    // We can then use this to initialise the binary search range to something smaller than the full one to
+    // reduce the number of lookups per hue lookup from ~ceil(log2(table size)) to ~ceil(log2(range))
+    // during image rendering.
+
+    // TODO: Padding values are a quick hack to ensure the range encloses the needed range, left as an exersize
+    // for the reader to fully reason if these values could be smaller, probably best done the closer the hue
+    // distribution comes to linear as the overhead becomes a potentially greater issue
+    constexpr int lower_padding = 0;
+    constexpr int upper_padding = 1;
+    std::array<int, 2> hue_linearity_search_range = {lower_padding, upper_padding};
+    for (unsigned int i = gamutCuspTable.first_nominal_index; i != gamutCuspTable.upper_wrap_index; ++i)
+    {
+        const unsigned int pos = gamutCuspTable.nominal_hue_position_in_uniform_table(gamutCuspTable[i][2]); // TODO: compute from hue table less cache pressure?
+        const int delta = int(i) - int(pos);
+        hue_linearity_search_range[0] = std::min(hue_linearity_search_range[0], delta + lower_padding);
+        hue_linearity_search_range[1] = std::max(hue_linearity_search_range[1], delta + upper_padding);
 
-    float limit_J_max = Y_to_J(peakLuminance, inputJMhParams);
-    float mid_J = Y_to_J(tsParams.c_t * 100.f, inputJMhParams);
+        //std::cout << i << " " << pos << " " << delta << " " << gamutCuspTable[i][0] << " " << gamutCuspTable[i][1] << " " << gamutCuspTable[i][2] << "\n";
+    }
+    //std::cout << "search range " << hue_linearity_search_range[0] << " " << hue_linearity_search_range[1] << "\n";
+    return hue_linearity_search_range;
+}
 
-    // Calculated chroma compress variables
-    const float log_peak = log10( tsParams.n / tsParams.n_r);
-    const float model_gamma = 1.f / (surround[1] * (1.48f + sqrt(Y_b / L_A)));
-    const float focus_dist = focus_distance + focus_distance * focus_distance_scaling * log_peak;
-    const float lower_hull_gamma =  1.14f + 0.07f * log_peak;
+GamutCompressParams init_GamutCompressParams(float peakLuminance, const JMhParams &inputJMhParams, const JMhParams &limitJMhParams,
+                                             const ToneScaleParams &tsParams, const SharedCompressionParameters &shParams, const JMhParams &reachParams)
+{
+    float mid_J = Y_to_J(tsParams.c_t * reference_luminance, inputJMhParams);
 
-    const JMhParams limitJMhParams = init_JMhParams(limitingPrimaries);
+    // Calculated chroma compress variables
+    const float focus_dist = focus_distance + focus_distance * focus_distance_scaling * tsParams.log_peak;
+    const float lower_hull_gamma_inv =  1.0f / (1.14f + 0.07f * tsParams.log_peak); // TODO: name these magic constants
 
-    GamutCompressParams params{};
-    params.limit_J_max = limit_J_max;
+    GamutCompressParams params;
     params.mid_J = mid_J;
-    params.model_gamma = model_gamma;
     params.focus_dist = focus_dist;
-    params.lower_hull_gamma = lower_hull_gamma;
-    params.reach_m_table = make_reach_m_table(ACES_AP1::primaries, peakLuminance);
-    params.gamut_cusp_table = make_gamut_table(limitingPrimaries, peakLuminance);
-    params.upper_hull_gamma_table = make_upper_hull_gamma(
-        params.gamut_cusp_table,
-        peakLuminance,
-        limit_J_max,
-        mid_J,
-        focus_dist,
-        lower_hull_gamma,
-        limitJMhParams);
-
+    params.lower_hull_gamma_inv = lower_hull_gamma_inv;
+    params.gamut_cusp_table = make_uniform_hue_gamut_table(reachParams, limitJMhParams, peakLuminance, tsParams.forward_limit, shParams, params.hue_table);
+    params.hue_linearity_search_range = determine_hue_linearity_search_range(params.gamut_cusp_table);
+    make_upper_hull_gamma(params.hue_table, params.gamut_cusp_table, peakLuminance, shParams.limit_J_max, mid_J, focus_dist,
+                          lower_hull_gamma_inv, limitJMhParams); //TODO: mess of parameters
     return params;
 }
 
diff --git a/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.h b/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.h
index ff3f969e5a..11adc13fdb 100644
--- a/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.h
+++ b/src/OpenColorIO/ops/fixedfunction/ACES2/Transform.h
@@ -14,17 +14,31 @@ namespace ACES2
 
 JMhParams init_JMhParams(const Primaries &P);
 ToneScaleParams init_ToneScaleParams(float peakLuminance);
-ChromaCompressParams init_ChromaCompressParams(float peakLuminance);
-GamutCompressParams init_GamutCompressParams(float peakLuminance, const Primaries &P);
-
+SharedCompressionParameters init_SharedCompressionParams(float peakLuminance, const JMhParams &inputJMhParams, const JMhParams &reachParams);
+ResolvedSharedCompressionParameters resolve_CompressionParams(float hue, const SharedCompressionParameters &p);
+ChromaCompressParams init_ChromaCompressParams(float peakLuminance, const ToneScaleParams &tsParams);
+GamutCompressParams init_GamutCompressParams(float peakLuminance, const JMhParams &inputJMhParams, const JMhParams &limitJMhParams,
+                                             const ToneScaleParams &tsParams, const SharedCompressionParameters &shParams, const JMhParams &reachParams);
+
+f3 RGB_to_Aab(const f3 &RGB, const JMhParams &p);
+f3 Aab_to_JMh(const f3 &Aab, const JMhParams &p);
 f3 RGB_to_JMh(const f3 &RGB, const JMhParams &p);
+f3 JMh_to_Aab(const f3 &JMh, const JMhParams &p);
+f3 JMh_to_Aab(const f3 &JMh, const float &cos_hr, const float &sin_hr, const JMhParams &p);
+f3 Aab_to_RGB(const f3 &Aab, const JMhParams &p);
 f3 JMh_to_RGB(const f3 &JMh, const JMhParams &p);
 
-f3 tonescale_chroma_compress_fwd(const f3 &JMh, const JMhParams &p, const ToneScaleParams &pt, const ChromaCompressParams &pc);
-f3 tonescale_chroma_compress_inv(const f3 &JMh, const JMhParams &p, const ToneScaleParams &pt, const ChromaCompressParams &pc);
+float tonescale_fwd(const float J, const JMhParams &p, const ToneScaleParams &pt);
+float tonescale_inv(const float J, const JMhParams &p, const ToneScaleParams &pt);
+float tonescale_A_to_J_fwd(const float A, const JMhParams &p, const ToneScaleParams &pt);
+
+float chroma_compress_norm(float cos_hr, float sin_hr, float chroma_compress_scale);
+
+f3 chroma_compress_fwd(const f3 &JMh, const float J_ts, const float Mnorm, const ResolvedSharedCompressionParameters &ps, const ChromaCompressParams &pc);
+f3 chroma_compress_inv(const f3 &JMh, const float J, const float Mnorm, const ResolvedSharedCompressionParameters &ps, const ChromaCompressParams &pc);
 
-f3 gamut_compress_fwd(const f3 &JMh, const GamutCompressParams &p);
-f3 gamut_compress_inv(const f3 &JMh, const GamutCompressParams &p);
+f3 gamut_compress_fwd(const f3 &JMh, const ResolvedSharedCompressionParameters &ps, const GamutCompressParams &p);
+f3 gamut_compress_inv(const f3 &JMh, const ResolvedSharedCompressionParameters &ps, const GamutCompressParams &p);
 
 
 } // namespace ACES2
diff --git a/src/OpenColorIO/ops/fixedfunction/FixedFunctionOpCPU.cpp b/src/OpenColorIO/ops/fixedfunction/FixedFunctionOpCPU.cpp
index e9e9774338..feb387db5f 100644
--- a/src/OpenColorIO/ops/fixedfunction/FixedFunctionOpCPU.cpp
+++ b/src/OpenColorIO/ops/fixedfunction/FixedFunctionOpCPU.cpp
@@ -147,6 +147,7 @@ class Renderer_ACES_OutputTransform20 : public OpCPU
     ACES2::JMhParams m_pIn;
     ACES2::JMhParams m_pOut;
     ACES2::ToneScaleParams m_t;
+    ACES2::SharedCompressionParameters m_s;
     ACES2::ChromaCompressParams m_c;
     ACES2::GamutCompressParams m_g;
 };
@@ -184,6 +185,7 @@ class Renderer_ACES_TONESCALE_COMPRESS_20 : public OpCPU
     bool m_fwd;
     ACES2::JMhParams m_p;
     ACES2::ToneScaleParams m_t;
+    ACES2::SharedCompressionParameters m_s;
     ACES2::ChromaCompressParams m_c;
 };
 
@@ -201,6 +203,7 @@ class Renderer_ACES_GAMUT_COMPRESS_20 : public OpCPU
 
 protected:
     bool m_fwd;
+    ACES2::SharedCompressionParameters m_s;
     ACES2::GamutCompressParams m_g;
 };
 
@@ -1023,8 +1026,10 @@ Renderer_ACES_OutputTransform20::Renderer_ACES_OutputTransform20(ConstFixedFunct
     m_pIn = ACES2::init_JMhParams(ACES_AP0::primaries);
     m_pOut = ACES2::init_JMhParams(lim_primaries);
     m_t = ACES2::init_ToneScaleParams(peak_luminance);
-    m_c = ACES2::init_ChromaCompressParams(peak_luminance);
-    m_g = ACES2::init_GamutCompressParams(peak_luminance, lim_primaries);
+    const ACES2::JMhParams reachGamut = ACES2::init_JMhParams(ACES_AP1::primaries);
+    m_s = ACES2::init_SharedCompressionParams(peak_luminance, m_pIn, reachGamut);
+    m_c = ACES2::init_ChromaCompressParams(peak_luminance, m_t);
+    m_g = ACES2::init_GamutCompressParams(peak_luminance, m_pIn, m_pOut, m_t, m_s, reachGamut);
 }
 
 void Renderer_ACES_OutputTransform20::apply(const void * inImg, void * outImg, long numPixels) const
@@ -1047,10 +1052,21 @@ void Renderer_ACES_OutputTransform20::fwd(const void * inImg, void * outImg, lon
     for(long idx=0; idx<numPixels; ++idx)
     {
         const ACES2::f3 RGBIn {in[0], in[1], in[2]};
-        const ACES2::f3 JMh           = ACES2::RGB_to_JMh(RGBIn, m_pIn);
-        const ACES2::f3 tonemappedJMh = ACES2::tonescale_chroma_compress_fwd(JMh, m_pIn, m_t, m_c);
-        const ACES2::f3 compressedJMh = ACES2::gamut_compress_fwd(tonemappedJMh, m_g);
-        const ACES2::f3 RGBOut        = ACES2::JMh_to_RGB(compressedJMh, m_pOut);
+        const ACES2::f3 Aab           = ACES2::RGB_to_Aab(RGBIn, m_pIn);
+        const ACES2::f3 JMh           = ACES2::Aab_to_JMh(Aab, m_pIn);
+
+        const ACES2::ResolvedSharedCompressionParameters rp = resolve_CompressionParams(JMh[2], m_s);
+        const float h_rad = ACES2::to_radians(JMh[2]);
+        const float cos_hr1 = std::cos(h_rad);
+        const float sin_hr1 = std::sin(h_rad);
+        const float Mnorm = ACES2::chroma_compress_norm(cos_hr1, sin_hr1, m_c.chroma_compress_scale);
+
+        const float J_ts = ACES2::tonescale_A_to_J_fwd(Aab[0], m_pIn, m_t);
+        const ACES2::f3 tonemappedJMh = ACES2::chroma_compress_fwd(JMh, J_ts, Mnorm, rp, m_c);
+        const ACES2::f3 compressedJMh = ACES2::gamut_compress_fwd(tonemappedJMh, rp, m_g);
+
+        const ACES2::f3 Aabout        = ACES2::JMh_to_Aab(compressedJMh, cos_hr1, sin_hr1, m_pOut);
+        const ACES2::f3 RGBOut        = ACES2::Aab_to_RGB(Aabout, m_pOut);
 
         out[0] = RGBOut[0];
         out[1] = RGBOut[1];
@@ -1071,10 +1087,20 @@ void Renderer_ACES_OutputTransform20::inv(const void * inImg, void * outImg, lon
     {
         const ACES2::f3 RGBout {in[0], in[1], in[2]};
         const ACES2::f3 compressedJMh = ACES2::RGB_to_JMh(RGBout, m_pOut);
-        const ACES2::f3 tonemappedJMh = ACES2::gamut_compress_inv(compressedJMh, m_g);
-        const ACES2::f3 JMh           = ACES2::tonescale_chroma_compress_inv(tonemappedJMh, m_pIn, m_t, m_c);
-        const ACES2::f3 RGBin         = ACES2::JMh_to_RGB(JMh, m_pIn);
 
+        const ACES2::ResolvedSharedCompressionParameters rp = resolve_CompressionParams(compressedJMh[2], m_s);
+        const float h_rad = ACES2::to_radians(compressedJMh[2]);
+        const float cos_hr1 = std::cos(h_rad);
+        const float sin_hr1 = std::sin(h_rad);
+        const float Mnorm = ACES2::chroma_compress_norm(cos_hr1, sin_hr1, m_c.chroma_compress_scale);
+
+        const ACES2::f3 tonemappedJMh = ACES2::gamut_compress_inv(compressedJMh, rp, m_g);
+        const float J         = ACES2::tonescale_inv(tonemappedJMh[0], m_pIn, m_t);
+        const ACES2::f3 JMh   = ACES2::chroma_compress_inv(tonemappedJMh, J, Mnorm, rp, m_c);
+    
+        const ACES2::f3 Aab   = ACES2::JMh_to_Aab(JMh, cos_hr1, sin_hr1, m_pIn);
+        const ACES2::f3 RGBin = ACES2::Aab_to_RGB(Aab, m_pIn);
+    
         out[0] = RGBin[0];
         out[1] = RGBin[1];
         out[2] = RGBin[2];
@@ -1132,7 +1158,7 @@ void Renderer_ACES_RGB_TO_JMh_20::fwd(const void * inImg, void * outImg, long nu
 
         out[0] = JMh[0];
         out[1] = JMh[1];
-        out[2] = JMh[2];
+        out[2] = ACES2::to_degrees(JMh[2]);
         out[3] = in[3];
 
         in  += 4;
@@ -1147,7 +1173,8 @@ void Renderer_ACES_RGB_TO_JMh_20::inv(const void * inImg, void * outImg, long nu
 
     for(long idx=0; idx<numPixels; ++idx)
     {
-        const ACES2::f3 RGB = ACES2::JMh_to_RGB({in[0], in[1], in[2]}, m_p);
+        const float normalised_hue = ACES2::from_degrees(in[2]);
+        const ACES2::f3 RGB = ACES2::JMh_to_RGB({in[0], in[1], normalised_hue}, m_p);
 
         out[0] = RGB[0];
         out[1] = RGB[1];
@@ -1168,7 +1195,9 @@ Renderer_ACES_TONESCALE_COMPRESS_20::Renderer_ACES_TONESCALE_COMPRESS_20(ConstFi
 
     m_p = ACES2::init_JMhParams(ACES_AP0::primaries);
     m_t = ACES2::init_ToneScaleParams(peak_luminance);
-    m_c = ACES2::init_ChromaCompressParams(peak_luminance);
+    const ACES2::JMhParams reachGamut = ACES2::init_JMhParams(ACES_AP1::primaries);
+    m_s = ACES2::init_SharedCompressionParams(peak_luminance, m_p, reachGamut);
+    m_c = ACES2::init_ChromaCompressParams(peak_luminance, m_t);
 }
 
 void Renderer_ACES_TONESCALE_COMPRESS_20::apply(const void * inImg, void * outImg, long numPixels) const
@@ -1190,11 +1219,18 @@ void Renderer_ACES_TONESCALE_COMPRESS_20::fwd(const void * inImg, void * outImg,
 
     for(long idx=0; idx<numPixels; ++idx)
     {
-        const ACES2::f3 JMh = ACES2::tonescale_chroma_compress_fwd({in[0], in[1], in[2]}, m_p, m_t, m_c);
+        const float normalised_hue = ACES2::from_degrees(in[2]);
+        const float h_rad = ACES2::to_radians(normalised_hue);
+        const float cos_hr1 = cos(h_rad);
+        const float sin_hr1 = sin(h_rad);
+        const float Mnorm = ACES2::chroma_compress_norm(cos_hr1, sin_hr1, m_c.chroma_compress_scale);
+        const ACES2::ResolvedSharedCompressionParameters rp = resolve_CompressionParams(normalised_hue, m_s);
+        const float J_ts = ACES2::tonescale_fwd(in[0], m_p, m_t);
+        const ACES2::f3 JMh = ACES2::chroma_compress_fwd({in[0], in[1], normalised_hue}, J_ts, Mnorm, rp, m_c);
 
         out[0] = JMh[0];
         out[1] = JMh[1];
-        out[2] = JMh[2];
+        out[2] = ACES2::to_degrees(JMh[2]);
         out[3] = in[3];
 
         in  += 4;
@@ -1209,11 +1245,18 @@ void Renderer_ACES_TONESCALE_COMPRESS_20::inv(const void * inImg, void * outImg,
 
     for(long idx=0; idx<numPixels; ++idx)
     {
-        const ACES2::f3 JMh = ACES2::tonescale_chroma_compress_inv({in[0], in[1], in[2]}, m_p, m_t, m_c);
+        const float normalised_hue = ACES2::from_degrees(in[2]);
+        const float h_rad = ACES2::to_radians(normalised_hue);
+        const float cos_hr1 = cos(h_rad);
+        const float sin_hr1 = sin(h_rad);
+        const float Mnorm = ACES2::chroma_compress_norm(cos_hr1, sin_hr1, m_c.chroma_compress_scale);
+        const ACES2::ResolvedSharedCompressionParameters rp = resolve_CompressionParams(normalised_hue, m_s);
+        const float J = ACES2::tonescale_inv(in[0], m_p, m_t);
+        const ACES2::f3 JMh = ACES2::chroma_compress_inv({in[0], in[1],  normalised_hue}, J, Mnorm, rp, m_c);
 
         out[0] = JMh[0];
         out[1] = JMh[1];
-        out[2] = JMh[2];
+        out[2] = ACES2::to_degrees(JMh[2]);
         out[3] = in[3];
 
         in  += 4;
@@ -1244,7 +1287,12 @@ Renderer_ACES_GAMUT_COMPRESS_20::Renderer_ACES_GAMUT_COMPRESS_20(ConstFixedFunct
         {white_x, white_y}
     };
 
-    m_g = ACES2::init_GamutCompressParams(peakLuminance, limitingPrimaries);
+    const ACES2::JMhParams pIn = ACES2::init_JMhParams(ACES_AP0::primaries);
+    const ACES2::JMhParams pLim = ACES2::init_JMhParams(limitingPrimaries);
+    const ACES2::ToneScaleParams t = ACES2::init_ToneScaleParams(peakLuminance);
+    const ACES2::JMhParams reachGamut = ACES2::init_JMhParams(ACES_AP1::primaries);
+    m_s = ACES2::init_SharedCompressionParams(peakLuminance, pIn, reachGamut);
+    m_g = ACES2::init_GamutCompressParams(peakLuminance, pIn, pLim, t, m_s, reachGamut);
 }
 
 void Renderer_ACES_GAMUT_COMPRESS_20::apply(const void * inImg, void * outImg, long numPixels) const
@@ -1266,11 +1314,13 @@ void Renderer_ACES_GAMUT_COMPRESS_20::fwd(const void * inImg, void * outImg, lon
 
     for(long idx=0; idx<numPixels; ++idx)
     {
-        const ACES2::f3 JMh = ACES2::gamut_compress_fwd({in[0], in[1], in[2]}, m_g);
+        const float normalised_hue = ACES2::from_degrees(in[2]);
+        const ACES2::ResolvedSharedCompressionParameters rp = resolve_CompressionParams(normalised_hue, m_s);
+        const ACES2::f3 JMh = ACES2::gamut_compress_fwd({in[0], in[1], normalised_hue}, rp, m_g);
 
         out[0] = JMh[0];
         out[1] = JMh[1];
-        out[2] = JMh[2];
+        out[2] = ACES2::to_degrees(JMh[2]);
         out[3] = in[3];
 
         in  += 4;
@@ -1285,11 +1335,13 @@ void Renderer_ACES_GAMUT_COMPRESS_20::inv(const void * inImg, void * outImg, lon
 
     for(long idx=0; idx<numPixels; ++idx)
     {
-        const ACES2::f3 JMh = ACES2::gamut_compress_inv({in[0], in[1], in[2]}, m_g);
+        const float normalised_hue = ACES2::from_degrees(in[2]);
+        const ACES2::ResolvedSharedCompressionParameters rp = resolve_CompressionParams(normalised_hue, m_s);
+        const ACES2::f3 JMh = ACES2::gamut_compress_inv({in[0], in[1], normalised_hue}, rp, m_g);
 
         out[0] = JMh[0];
         out[1] = JMh[1];
-        out[2] = JMh[2];
+        out[2] = ACES2::to_degrees(JMh[2]);
         out[3] = in[3];
 
         in  += 4;
diff --git a/src/OpenColorIO/ops/fixedfunction/FixedFunctionOpGPU.cpp b/src/OpenColorIO/ops/fixedfunction/FixedFunctionOpGPU.cpp
index 1b4ba870dd..ae65eb3920 100644
--- a/src/OpenColorIO/ops/fixedfunction/FixedFunctionOpGPU.cpp
+++ b/src/OpenColorIO/ops/fixedfunction/FixedFunctionOpGPU.cpp
@@ -349,57 +349,155 @@ void Add_GamutComp_13_Inv_Shader(GpuShaderText & ss,
     );
 }
 
-void _Add_RGB_to_JMh_Shader(
+void _Add_WrapHueChannel_Shader(
+    GpuShaderCreatorRcPtr & shaderCreator,
+    GpuShaderText & ss)
+{
+    const std::string pxl(shaderCreator->getPixelName());
+    ss.newLine() << ss.floatDecl("hwrap") << " = " << pxl << ".b;";
+    ss.newLine() << "hwrap = hwrap - floor(hwrap / 360.0) * 360.0;";
+    ss.newLine() << "hwrap = (hwrap < 0.0) ? hwrap + 360.0 : hwrap;";
+    ss.newLine() << pxl << ".b = hwrap;";
+}
+
+void _Add_SinCos_Shader(
+    GpuShaderCreatorRcPtr & shaderCreator,
+    GpuShaderText & ss)
+{
+    const std::string pxl(shaderCreator->getPixelName());
+    ss.newLine() << ss.floatDecl("h_rad") << " = " << pxl << ".b * " << 3.14159265358979f / 180.0f << ";";
+    ss.newLine() << ss.floatDecl("cos_hr") << " = cos(h_rad);";
+    ss.newLine() << ss.floatDecl("sin_hr") << " = sin(h_rad);";
+}
+
+
+void _Add_RGB_to_Aab_Shader(
     GpuShaderCreatorRcPtr & shaderCreator,
     GpuShaderText & ss,
     const ACES2::JMhParams & p)
 {
     const std::string pxl(shaderCreator->getPixelName());
 
-    ss.newLine() << ss.float3Decl("lms") << " = " << ss.mat3fMul(&p.MATRIX_RGB_to_CAM16[0], pxl + ".rgb") << ";";
-    ss.newLine() << "lms = " << "lms * " << ss.float3Const(p.D_RGB[0], p.D_RGB[1], p.D_RGB[2]) << ";";
+    ss.newLine() << "{";
+    ss.indent();
+
+    ss.newLine() << ss.float3Decl("lms") << " = " << ss.mat3fMul(&p.MATRIX_RGB_to_CAM16_c[0], pxl + ".rgb") << ";";
+
+    ss.newLine() << ss.float3Decl("F_L_v") << " = pow(abs(lms), " << ss.float3Const(0.42f) << ");";
+    ss.newLine() << ss.float3Decl("rgb_a") << " = (sign(lms) * F_L_v) / ( " << ACES2::cam_nl_offset << " + F_L_v);";
+
+    ss.newLine() << "Aab = " << ss.mat3fMul(&p.MATRIX_cone_response_to_Aab[0], "rgb_a.rgb") << ";";
+
+    ss.dedent();
+    ss.newLine() << "}";
+}
+
+void _Add_Aab_to_JMh_Shader(
+    GpuShaderCreatorRcPtr & shaderCreator,
+    GpuShaderText & ss,
+    const ACES2::JMhParams & p)
+{
+    const std::string pxl(shaderCreator->getPixelName());
 
-    ss.newLine() << ss.float3Decl("F_L_v") << " = pow(" << p.F_L << " * abs(lms) / 100.0, " << ss.float3Const(0.42f) << ");";
-    ss.newLine() << ss.float3Decl("rgb_a") << " = (400.0 * sign(lms) * F_L_v) / (27.13 + F_L_v);";
+    ss.newLine() << "{";
+    ss.indent();
 
-    ss.newLine() << ss.floatDecl("A") << " = 2.0 * rgb_a.r + rgb_a.g + 0.05 * rgb_a.b;";
-    ss.newLine() << ss.floatDecl("a") << " = rgb_a.r - 12.0 * rgb_a.g / 11.0 + rgb_a.b / 11.0;";
-    ss.newLine() << ss.floatDecl("b") << " = (rgb_a.r + rgb_a.g - 2.0 * rgb_a.b) / 9.0;";
+    ss.newLine() << "if (Aab.r <= 0.0)";
+    ss.newLine() << "{";
+    ss.indent();
+    ss.newLine() << "JMh.rgb = " << ss.float3Const(0.0) << ";";
+    ss.dedent();
+    ss.newLine() << "}";
 
-    ss.newLine() << ss.floatDecl("J") << " = 100.0 * pow(A / " << p.A_w << ", " << ACES2::surround[1] << " * " << p.z << ");";
+    ss.newLine() << "else";
+    ss.newLine() << "{";
+    ss.indent();
+    ss.newLine() << ss.floatDecl("J") << " = " << ACES2::J_scale << " * pow(Aab.r, " << p.cz << ");";
 
-    ss.newLine() << ss.floatDecl("M") << " = (J == 0.0) ? 0.0 : 43.0 * " << ACES2::surround[2] << " * sqrt(a * a + b * b);";
+    ss.newLine() << ss.floatDecl("M") << " = (J == 0.0) ? 0.0 : sqrt(Aab.g * Aab.g + Aab.b * Aab.b);";
 
-    ss.newLine() << ss.floatDecl("h") << " = (a == 0.0) ? 0.0 : " << ss.atan2("b", "a") << " * 180.0 / 3.14159265358979;";
+    ss.newLine() << ss.floatDecl("h") << " = (Aab.g == 0.0) ? 0.0 : " << ss.atan2("Aab.b", "Aab.g") << " * " << 180.0 / 3.14159265358979 << ";";
     ss.newLine() << "h = h - floor(h / 360.0) * 360.0;";
     ss.newLine() << "h = (h < 0.0) ? h + 360.0 : h;";
 
-    ss.newLine() << pxl << ".rgb = " << ss.float3Const("J", "M", "h") << ";";
+    ss.newLine() << "JMh.rgb = " << ss.float3Const("J", "M", "h") << ";";
+    ss.dedent();
+    ss.newLine() << "}";
+
+    ss.dedent();
+    ss.newLine() << "}";
 }
 
-void _Add_JMh_to_RGB_Shader(
+void _Add_RGB_to_JMh_Shader(
+    GpuShaderCreatorRcPtr & shaderCreator,
+    GpuShaderText & ss,
+    const ACES2::JMhParams & p)
+{
+    const std::string pxl(shaderCreator->getPixelName());
+
+    ss.newLine() << ss.float3Decl("JMh") << ";";    // TODO: leaky abstraction should really be explicit functions
+    ss.newLine() << ss.float3Decl("Aab") << ";";    // TODO: leaky abstraction should really be explicit functions
+
+    ss.newLine() << "{";
+    ss.indent();
+
+    _Add_RGB_to_Aab_Shader(shaderCreator, ss, p);
+    _Add_Aab_to_JMh_Shader(shaderCreator, ss, p);
+    
+    ss.newLine() << pxl << ".rgb = JMh;";
+
+    ss.dedent();
+    ss.newLine() << "}";
+}
+
+
+void _Add_JMh_to_Aab_Shader(
     GpuShaderCreatorRcPtr & shaderCreator,
     GpuShaderText & ss,
     const ACES2::JMhParams & p)
 {
     const std::string pxl(shaderCreator->getPixelName());
 
-    ss.newLine() << ss.floatDecl("h") << " = " << pxl << ".b * 3.14159265358979 / 180.0;";
+    ss.newLine() << "{";
+    ss.indent();
+
+
+    ss.newLine() << "Aab.r = pow(JMh.r * " << 1.0f / ACES2::J_scale << ", " << p.inv_cz << ");";
+    ss.newLine() << "Aab.g = JMh.g * cos_hr;";
+    ss.newLine() << "Aab.b = JMh.g * sin_hr;";
+    
+    ss.dedent();
+    ss.newLine() << "}";
+}
+
+void _Add_Aab_to_RGB_Shader(
+    GpuShaderText & ss,
+    const ACES2::JMhParams & p)
+{
+    ss.newLine() << "{";
+    ss.indent();
+
+    ss.newLine() << ss.float3Decl("rgb_a") << " = " << ss.mat3fMul(&p.MATRIX_Aab_to_cone_response[0], "Aab.rgb") << ";";
+    ss.newLine() << ss.float3Decl("lms") << " = sign(rgb_a) * pow( " << ACES2::cam_nl_offset << " * abs(rgb_a) / (1.0f - abs(rgb_a)), " << ss.float3Const(1.f / 0.42f) << ");";
+    ss.newLine() << "JMh.rgb = " << ss.mat3fMul(&p.MATRIX_CAM16_c_to_RGB[0], "lms") << ";";
 
-    ss.newLine() << ss.floatDecl("scale") << " = " << pxl << ".g / (43.0 * " << ACES2::surround[2] << ");";
-    ss.newLine() << ss.floatDecl("A") << " = " << p.A_w << " * pow(" << pxl << ".r / 100.0, 1.0 / (" << ACES2::surround[1] << " * " << p.z << "));";
-    ss.newLine() << ss.floatDecl("a") << " = scale * cos(h);";
-    ss.newLine() << ss.floatDecl("b") << " = scale * sin(h);";
+    ss.dedent();
+    ss.newLine() << "}";
+}
 
-    ss.newLine() << ss.float3Decl("rgb_a") << ";";
-    ss.newLine() << "rgb_a.r = (460.0 * A + 451.0 * a + 288.0 *b) / 1403.0;";
-    ss.newLine() << "rgb_a.g = (460.0 * A - 891.0 * a - 261.0 *b) / 1403.0;";
-    ss.newLine() << "rgb_a.b = (460.0 * A - 220.0 * a - 6300.0 *b) / 1403.0;";
+void _Add_JMh_to_RGB_Shader(
+    GpuShaderCreatorRcPtr & shaderCreator,
+    GpuShaderText & ss,
+    const ACES2::JMhParams & p)
+{
+    const std::string pxl(shaderCreator->getPixelName());
 
-    ss.newLine() << ss.float3Decl("lms") << " = sign(rgb_a) * 100.0 / " << p.F_L << " * pow(27.13 * abs(rgb_a) / (400.0 - abs(rgb_a)), " << ss.float3Const(1.f / 0.42f) << ");";
-    ss.newLine() << "lms = " << "lms / " << ss.float3Const(p.D_RGB[0], p.D_RGB[1], p.D_RGB[2]) << ";";
+    ss.newLine() << ss.float3Decl("JMh") << " = " << pxl << ".rgb;";
+    ss.newLine() << ss.float3Decl("Aab") << ";";
+    _Add_JMh_to_Aab_Shader(shaderCreator, ss, p);
+    _Add_Aab_to_RGB_Shader(ss, p);
 
-    ss.newLine() << pxl << ".rgb = " << ss.mat3fMul(&p.MATRIX_CAM16_to_RGB[0], "lms") << ";";
+    ss.newLine() << pxl << ".rgb = JMh;";
 }
 
 std::string _Add_Reach_table(
@@ -430,12 +528,12 @@ std::string _Add_Reach_table(
     shaderCreator->addTexture(
         name.c_str(),
         GpuShaderText::getSamplerName(name).c_str(),
-        table.size,
+        table.total_size,
         1,
         GpuShaderCreator::TEXTURE_RED_CHANNEL,
         dimensions,
         INTERP_NEAREST,
-        &(table.table[0]));
+        &(table[0]));
 
 
     if (dimensions == GpuShaderDesc::TEXTURE_1D)
@@ -458,26 +556,22 @@ std::string _Add_Reach_table(
     ss.newLine() << "{";
     ss.indent();
 
-    ss.newLine() << ss.floatDecl("hwrap") << " = h;";
-    ss.newLine() << "hwrap = hwrap - floor(hwrap / 360.0) * 360.0;";
-    ss.newLine() << "hwrap = (hwrap < 0.0) ? hwrap + 360.0 : hwrap;";
-
-    ss.newLine() << ss.floatDecl("i_lo") << " = floor(hwrap);";
-    ss.newLine() << ss.floatDecl("i_hi") << " = (i_lo + 1);";
-    ss.newLine() << "i_hi = i_hi - floor(i_hi / 360.0) * 360.0;";
+    ss.newLine() << ss.floatDecl("i_base") << " = floor(h);";
+    ss.newLine() << ss.floatDecl("i_lo") << " = i_base + " << table.base_index << ";";
+    ss.newLine() << ss.floatDecl("i_hi") << " = i_lo + 1;";
 
     if (dimensions == GpuShaderDesc::TEXTURE_1D)
     {
-        ss.newLine() << ss.floatDecl("lo") << " = " << ss.sampleTex1D(name, "(i_lo + 0.5) / 360.0") << ".r;";
-        ss.newLine() << ss.floatDecl("hi") << " = " << ss.sampleTex1D(name, "(i_hi + 0.5) / 360.0") << ".r;";
+        ss.newLine() << ss.floatDecl("lo") << " = " << ss.sampleTex1D(name, "(i_lo + 0.5) / " + std::to_string(table.total_size)) << ".r;";
+        ss.newLine() << ss.floatDecl("hi") << " = " << ss.sampleTex1D(name, "(i_hi + 0.5) / " + std::to_string(table.total_size)) << ".r;";
     }
     else
     {
-        ss.newLine() << ss.floatDecl("lo") << " = " << ss.sampleTex2D(name, ss.float2Const("(i_lo + 0.5) / 360.0", "0.0")) << ".r;";
-        ss.newLine() << ss.floatDecl("hi") << " = " << ss.sampleTex2D(name, ss.float2Const("(i_hi + 0.5) / 360.0", "0.5")) << ".r;";
+        ss.newLine() << ss.floatDecl("lo") << " = " << ss.sampleTex2D(name, ss.float2Const("(i_lo + 0.5) / " + std::to_string(table.total_size), "0.0")) << ".r;";
+        ss.newLine() << ss.floatDecl("hi") << " = " << ss.sampleTex2D(name, ss.float2Const("(i_hi + 0.5) / " + std::to_string(table.total_size), "0.5")) << ".r;";
     }
 
-    ss.newLine() << ss.floatDecl("t") << " = (h - i_lo) / (i_hi - i_lo);";
+    ss.newLine() << ss.floatDecl("t") << " = h - i_base;"; // Hardcoded single degree spacing
     ss.newLine() << "return " << ss.lerp("lo", "hi", "t") << ";";
 
     ss.dedent();
@@ -532,14 +626,101 @@ std::string _Add_Toe_func(
     return name;
 }
 
+std::string _Add_Tonescale_func(
+    GpuShaderCreatorRcPtr & shaderCreator,
+    unsigned resourceIndex,
+    bool invert,
+    const ACES2::JMhParams & p,
+    const ACES2::ToneScaleParams & t)
+{
+    // Reserve name
+    std::ostringstream resName;
+    resName << shaderCreator->getResourcePrefix()
+            << std::string("_")
+            << std::string("tonescale")
+            << (invert ? std::string("_inv") : std::string("_fwd"))
+            << resourceIndex;
+
+    // Note: Remove potentially problematic double underscores from GLSL resource names.
+    std::string name(resName.str());
+    StringUtils::ReplaceInPlace(name, "__", "_");
+
+    GpuShaderText ss(shaderCreator->getLanguage());
+
+    ss.newLine() << ss.floatKeyword() << " " << name << "(float J)";
+    ss.newLine() << "{";
+    ss.indent();
+
+    // Tonescale applied in Y (convert to and from J)
+    // TODO: Investigate if we can receive negative J here at all. 
+    // If not, abs(J) here and the sign(J) at the return may not be needed at all.
+    ss.newLine() << ss.floatDecl("A") << " = " << p.A_w_J << " * pow(abs(J) * " << 1.0f / ACES2::J_scale << ", " << p.inv_cz << ");";
+    ss.newLine() << ss.floatDecl("Y") << " = pow(( " << ACES2::cam_nl_offset << " * A) / (1.0f - A), " << 1.0 / 0.42 << ");";
+
+    if (invert)
+    {
+        // Inverse Tonescale applied in Y (convert to and from J)
+        ss.newLine() << ss.floatDecl("Y_i") << " = Y / " << double(p.F_L_n) * ACES2::reference_luminance << ";";
+ 
+        ss.newLine() << ss.floatDecl("Z") << " = max(0.0, min(" << t.inverse_limit << ", Y_i));";
+        ss.newLine() << ss.floatDecl("ht") << " = 0.5 * (Z + sqrt(Z * (" << 4.0 * t.t_1 << " + Z)));";
+        ss.newLine() << ss.floatDecl("Yo") << " = " << double(p.F_L_n) * t.s_2 << " / (pow((" << t.m_2 << " / ht), (" << 1.0 / t.g << ")) - 1.0);";
+
+        ss.newLine() << ss.floatDecl("F_L_Y") << " = pow(abs(Yo), 0.42);";
+    }
+    else
+    {
+        // Tonescale applied in Y (convert to and from J)
+        ss.newLine() << ss.floatDecl("f") << " = " << t.m_2  << " * pow(Y / (Y + " << double(t.s_2) * p.F_L_n << "), " << t.g << ");";
+        ss.newLine() << ss.floatDecl("Y_ts") << " = max(0.0, f * f / (f + " << t.t_1 << "));";
+        ss.newLine() << ss.floatDecl("F_L_Y") << " = pow(" << double(p.F_L_n) * ACES2::reference_luminance << " * Y_ts, 0.42);";
+    }
+
+    ss.newLine() << ss.floatDecl("J_ts") << " = " << ACES2::J_scale << " * pow((F_L_Y / ( " << ACES2::cam_nl_offset << " + F_L_Y)) * " << p.inv_A_w_J << ", " << p.cz << ");";
+    ss.newLine() << "return sign(J) * J_ts;";
+
+    ss.dedent();
+    ss.newLine() << "}";
+
+    shaderCreator->addToHelperShaderCode(ss.string().c_str());
+
+    return name;
+}
+
+void _Add_ChromaCompressionNorm_Shader(
+    GpuShaderText & ss,
+    const ACES2::ChromaCompressParams & c)
+{
+    // Mnorm
+    ss.newLine() << ss.floatDecl("Mnorm") << ";";
+    ss.newLine() << "{";
+    ss.indent();
+
+    // TODO: optimization: can bake weights into terms and convert dotprods to addition. /coz
+    ss.newLine() << ss.floatDecl("cos_hr2") << " = 2.0 * cos_hr * cos_hr - 1.0;";
+    ss.newLine() << ss.floatDecl("sin_hr2") << " = 2.0 * cos_hr * sin_hr;";
+    ss.newLine() << ss.floatDecl("cos_hr3") << " = 4.0 * cos_hr * cos_hr * cos_hr - 3.0 * cos_hr;";
+    ss.newLine() << ss.floatDecl("sin_hr3") << " = 3.0 * sin_hr - 4.0 * sin_hr * sin_hr * sin_hr;";
+    ss.newLine() << ss.float3Decl("cosines") << " = " <<  ss.float3Const("cos_hr", "cos_hr2", "cos_hr3") <<";";
+    ss.newLine() << ss.float3Decl("cosine_weights") << " = " <<  ss.float3Const(11.34072 * c.chroma_compress_scale,
+                                                                                16.46899 * c.chroma_compress_scale,
+                                                                                 7.88380 * c.chroma_compress_scale) <<";";
+    ss.newLine() << ss.float3Decl("sines") << " = " <<  ss.float3Const("sin_hr", "sin_hr2", "sin_hr3") <<";";
+    ss.newLine() << ss.float3Decl("sine_weights") << " = " <<  ss.float3Const(14.66441 * c.chroma_compress_scale,
+                                                                              -6.37224 * c.chroma_compress_scale,
+                                                                               9.19364 * c.chroma_compress_scale) <<";";
+    ss.newLine() << "Mnorm = dot(cosines, cosine_weights) + dot(sines, sine_weights) + " << 77.12896 * c.chroma_compress_scale<< ";";
+
+    ss.dedent();
+    ss.newLine() << "}";
+}
+
 void _Add_Tonescale_Compress_Fwd_Shader(
     GpuShaderCreatorRcPtr & shaderCreator,
     GpuShaderText & ss,
     unsigned resourceIndex,
-    const ACES2::JMhParams & p,
-    const ACES2::ToneScaleParams & t,
-    const ACES2::ChromaCompressParams & c,
-    const std::string & reachName)
+    const ACES2::SharedCompressionParameters & s,
+    const ACES2::ChromaCompressParams & c)
 {
     std::string toeName = _Add_Toe_func(shaderCreator, resourceIndex, false);
 
@@ -549,15 +730,6 @@ void _Add_Tonescale_Compress_Fwd_Shader(
     ss.newLine() << ss.floatDecl("M") << " = " << pxl << ".g;";
     ss.newLine() << ss.floatDecl("h") << " = " << pxl << ".b;";
 
-    // Tonescale applied in Y (convert to and from J)
-    ss.newLine() << ss.floatDecl("A") << " = " << p.A_w_J << " * pow(abs(J) / 100.0, 1.0 / (" << ACES2::surround[1] << " * " << p.z << "));";
-    ss.newLine() << ss.floatDecl("Y") << " = sign(J) * 100.0 / " << p.F_L << " * pow((27.13 * A) / (400.0 - A), 1.0 / 0.42) / 100.0;";
-
-    ss.newLine() << ss.floatDecl("f") << " = " << t.m_2  << " * pow(max(0.0, Y) / (Y + " << t.s_2 << "), " << t.g << ");";
-    ss.newLine() << ss.floatDecl("Y_ts") << " = max(0.0, f * f / (f + " << t.t_1 << ")) * " << t.n_r << ";";
-
-    ss.newLine() << ss.floatDecl("F_L_Y") << " = pow(" << p.F_L << " * abs(Y_ts) / 100.0, 0.42);";
-    ss.newLine() << ss.floatDecl("J_ts") << " = sign(Y_ts) * 100.0 * pow(((400.0 * F_L_Y) / (27.13 + F_L_Y)) / " << p.A_w_J << ", " << ACES2::surround[1] << " * " << p.z << ");";
 
     // ChromaCompress
     ss.newLine() << ss.floatDecl("M_cp") << " = M;";
@@ -566,31 +738,13 @@ void _Add_Tonescale_Compress_Fwd_Shader(
     ss.newLine() << "{";
     ss.indent();
 
-    ss.newLine() << ss.floatDecl("nJ") << " = J_ts / " << c.limit_J_max << ";";
+    ss.newLine() << ss.floatDecl("nJ") << " = J_ts / " << s.limit_J_max << ";";
     ss.newLine() << ss.floatDecl("snJ") << " = max(0.0, 1.0 - nJ);";
 
-    // Mnorm
-    ss.newLine() << ss.floatDecl("Mnorm") << ";";
-    ss.newLine() << "{";
-    ss.indent();
-
-    ss.newLine() << ss.floatDecl("PI") << " = 3.14159265358979;";
-    ss.newLine() << ss.floatDecl("h_rad") << " = h / 180.0 * PI;";
-    ss.newLine() << ss.floatDecl("a") << " = cos(h_rad);";
-    ss.newLine() << ss.floatDecl("b") << " = sin(h_rad);";
-    ss.newLine() << ss.floatDecl("cos_hr2") << " = a * a - b * b;";
-    ss.newLine() << ss.floatDecl("sin_hr2") << " = 2.0 * a * b;";
-    ss.newLine() << ss.floatDecl("cos_hr3") << " = 4.0 * a * a * a - 3.0 * a;";
-    ss.newLine() << ss.floatDecl("sin_hr3") << " = 3.0 * b - 4.0 * b * b * b;";
-    ss.newLine() << ss.floatDecl("M") << " = 11.34072 * a + 16.46899 * cos_hr2 + 7.88380 * cos_hr3 + 14.66441 * b + -6.37224 * sin_hr2 + 9.19364 * sin_hr3 + 77.12896;";
-    ss.newLine() << "Mnorm = M * " << c.chroma_compress_scale << ";";
-
-    ss.dedent();
-    ss.newLine() << "}";
+    _Add_ChromaCompressionNorm_Shader(ss, c);
 
-    ss.newLine() << ss.floatDecl("reachM") << " = " << reachName << "_sample(h);";
-    ss.newLine() << ss.floatDecl("limit") << " = pow(nJ, " << c.model_gamma << ") * reachM / Mnorm;";
-    ss.newLine() << "M_cp = M * pow(J_ts / J, " << c.model_gamma << ");";
+    ss.newLine() << ss.floatDecl("limit") << " = pow(nJ, " << s.model_gamma_inv << ") * reachMaxM / Mnorm;";
+    ss.newLine() << "M_cp = M * pow(J_ts / J, " << s.model_gamma_inv << ");";
     ss.newLine() << "M_cp = M_cp / Mnorm;";
 
     ss.newLine() << "M_cp = limit - " << toeName << "(limit - M_cp, limit - 0.001, snJ * " << c.sat << ", sqrt(nJ * nJ + " << c.sat_thr << "));";
@@ -607,30 +761,17 @@ void _Add_Tonescale_Compress_Inv_Shader(
     GpuShaderCreatorRcPtr & shaderCreator,
     GpuShaderText & ss,
     unsigned resourceIndex,
-    const ACES2::JMhParams & p,
-    const ACES2::ToneScaleParams & t,
-    const ACES2::ChromaCompressParams & c,
-    const std::string & reachName)
+    const ACES2::SharedCompressionParameters & s,
+    const ACES2::ChromaCompressParams & c)
 {
     std::string toeName = _Add_Toe_func(shaderCreator, resourceIndex, true);
 
     const std::string pxl(shaderCreator->getPixelName());
-
+    
     ss.newLine() << ss.floatDecl("J_ts") << " = " << pxl << ".r;";
     ss.newLine() << ss.floatDecl("M_cp") << " = " << pxl << ".g;";
     ss.newLine() << ss.floatDecl("h") << " = " << pxl << ".b;";
 
-    // Inverse Tonescale applied in Y (convert to and from J)
-    ss.newLine() << ss.floatDecl("A") << " = " << p.A_w_J << " * pow(abs(J_ts) / 100.0, 1.0 / (" << ACES2::surround[1] << " * " << p.z << "));";
-    ss.newLine() << ss.floatDecl("Y_ts") << " = sign(J_ts) * 100.0 / " << p.F_L << " * pow((27.13 * A) / (400.0 - A), 1.0 / 0.42) / 100.0;";
-
-    ss.newLine() << ss.floatDecl("Z") << " = max(0.0, min(" << t.n << " / (" << t.u_2 * t.n_r << "), Y_ts));";
-    ss.newLine() << ss.floatDecl("ht") << " = (Z + sqrt(Z * (4.0 * " << t.t_1 << " + Z))) / 2.0;";
-    ss.newLine() << ss.floatDecl("Y") << " = " << t.s_2 << " / (pow((" << t.m_2 << " / ht), (1.0 / " << t.g << ")) - 1.0);";
-
-    ss.newLine() << ss.floatDecl("F_L_Y") << " = pow(" << p.F_L << " * abs(Y * 100.0) / 100.0, 0.42);";
-    ss.newLine() << ss.floatDecl("J") << " = sign(Y) * 100.0 * pow(((400.0 * F_L_Y) / (27.13 + F_L_Y)) / " << p.A_w_J << ", " << ACES2::surround[1] << " * " << p.z << ");";
-
     // ChromaCompress
     ss.newLine() << ss.floatDecl("M") << " = M_cp;";
 
@@ -638,36 +779,18 @@ void _Add_Tonescale_Compress_Inv_Shader(
     ss.newLine() << "{";
     ss.indent();
 
-    ss.newLine() << ss.floatDecl("nJ") << " = J_ts / " << c.limit_J_max << ";";
+    ss.newLine() << ss.floatDecl("nJ") << " = J_ts / " << s.limit_J_max << ";";
     ss.newLine() << ss.floatDecl("snJ") << " = max(0.0, 1.0 - nJ);";
 
-    // Mnorm
-    ss.newLine() << ss.floatDecl("Mnorm") << ";";
-    ss.newLine() << "{";
-    ss.indent();
-
-    ss.newLine() << ss.floatDecl("PI") << " = 3.14159265358979;";
-    ss.newLine() << ss.floatDecl("h_rad") << " = h / 180.0 * PI;";
-    ss.newLine() << ss.floatDecl("a") << " = cos(h_rad);";
-    ss.newLine() << ss.floatDecl("b") << " = sin(h_rad);";
-    ss.newLine() << ss.floatDecl("cos_hr2") << " = a * a - b * b;";
-    ss.newLine() << ss.floatDecl("sin_hr2") << " = 2.0 * a * b;";
-    ss.newLine() << ss.floatDecl("cos_hr3") << " = 4.0 * a * a * a - 3.0 * a;";
-    ss.newLine() << ss.floatDecl("sin_hr3") << " = 3.0 * b - 4.0 * b * b * b;";
-    ss.newLine() << ss.floatDecl("M") << " = 11.34072 * a + 16.46899 * cos_hr2 + 7.88380 * cos_hr3 + 14.66441 * b + -6.37224 * sin_hr2 + 9.19364 * sin_hr3 + 77.12896;";
-    ss.newLine() << "Mnorm = M * " << c.chroma_compress_scale << ";";
-
-    ss.dedent();
-    ss.newLine() << "}";
+    _Add_ChromaCompressionNorm_Shader(ss, c);
 
-    ss.newLine() << ss.floatDecl("reachM") << " = " << reachName << "_sample(h);";
-    ss.newLine() << ss.floatDecl("limit") << " = pow(nJ, " << c.model_gamma << ") * reachM / Mnorm;";
+    ss.newLine() << ss.floatDecl("limit") << " = pow(nJ, " << s.model_gamma_inv << ") * reachMaxM / Mnorm;";
 
     ss.newLine() << "M = M_cp / Mnorm;";
     ss.newLine() << "M = " << toeName << "(M, limit, nJ * " << c.compr << ", snJ);";
     ss.newLine() << "M = limit - " << toeName << "(limit - M, limit - 0.001, snJ * " << c.sat << ", sqrt(nJ * nJ + " << c.sat_thr << "));";
     ss.newLine() << "M = M * Mnorm;";
-    ss.newLine() << "M = M * pow(J_ts / J, " << -c.model_gamma << ");";
+    ss.newLine() << "M = M * pow(J_ts / J, " << -s.model_gamma_inv << ");";
 
     ss.dedent();
     ss.newLine() << "}";
@@ -708,7 +831,7 @@ std::string _Add_Cusp_table(
         GpuShaderCreator::TEXTURE_RGB_CHANNEL,
         dimensions,
         INTERP_NEAREST,
-        &(g.gamut_cusp_table.table[0][0]));
+        &(g.gamut_cusp_table[0][0]));
 
     if (dimensions == GpuShaderDesc::TEXTURE_1D)
     {
@@ -727,24 +850,20 @@ std::string _Add_Cusp_table(
     GpuShaderText ss(shaderCreator->getLanguage());
 
     const std::string hues_array_name = name + "_hues_array";
-    std::vector<float> hues_array(g.gamut_cusp_table.total_size);
-    for (int i = 0; i < g.gamut_cusp_table.total_size; ++i)
-    {
-        hues_array[i] = g.gamut_cusp_table.table[i][2];
-    }
-    ss.declareFloatArrayConst(hues_array_name, (int) hues_array.size(), hues_array.data());
+    ss.declareFloatArrayConst(hues_array_name, (int) g.hue_table.total_size, g.hue_table.data());
 
-    ss.newLine() << ss.float2Keyword() << " " << name << "_sample(float h)";
+    ss.newLine() << ss.float3Keyword() << " " << name << "_sample(float h)";
     ss.newLine() << "{";
     ss.indent();
 
-    ss.newLine() << ss.floatDecl("i_lo") << " = 0;";
-    ss.newLine() << ss.floatDecl("i_hi") << " = " << g.gamut_cusp_table.base_index + g.gamut_cusp_table.size << ";";
+    ss.newLine() << ss.intDecl("i") << " = " << ss.intKeyword() << "(h) + " << g.gamut_cusp_table.base_index << ";";
 
-    ss.newLine() << ss.floatDecl("hwrap") << " = h;";
-    ss.newLine() << "hwrap = hwrap - floor(hwrap / 360.0) * 360.0;";
-    ss.newLine() << "hwrap = (hwrap < 0.0) ? hwrap + 360.0 : hwrap;";
-    ss.newLine() << ss.intDecl("i") << " = " << ss.intKeyword() << "(hwrap + " << g.gamut_cusp_table.base_index << ");";
+    ss.newLine() << ss.intDecl("i_lo") << " = " << ss.intKeyword() << "(max("
+                 << ss.floatKeyword() << "(" << g.gamut_cusp_table.lower_wrap_index << "), "
+                 << ss.floatKeyword() << "(i + " << g.hue_linearity_search_range[0] << ")));";
+    ss.newLine() << ss.intDecl("i_hi") << " = " << ss.intKeyword() << "(min("
+                 << ss.floatKeyword() << "(" << g.gamut_cusp_table.upper_wrap_index << "), "
+                 << ss.floatKeyword() << "(i + " << g.hue_linearity_search_range[1] << ")));";
 
     ss.newLine() << "while (i_lo + 1 < i_hi)";
     ss.newLine() << "{";
@@ -764,7 +883,7 @@ std::string _Add_Cusp_table(
     ss.newLine() << "i_hi = i;";
     ss.dedent();
     ss.newLine() << "}";
-    ss.newLine() << "i = " << ss.intKeyword() << "((i_lo + i_hi) / 2.0);";
+    ss.newLine() << "i = (i_lo + i_hi) / 2;";
 
     ss.dedent();
     ss.newLine() << "}";
@@ -780,98 +899,8 @@ std::string _Add_Cusp_table(
         ss.newLine() << ss.float3Decl("hi") << " = " << ss.sampleTex2D(name, ss.float2Const(std::string("(i_hi + 0.5) / ") + std::to_string(g.gamut_cusp_table.total_size), "0.5")) << ".rgb;";
     }
 
-    ss.newLine() << ss.floatDecl("t") << " = (h - lo.b) / (hi.b - lo.b);";
-    ss.newLine() << ss.floatDecl("cuspJ") << " = " << ss.lerp("lo.r", "hi.r", "t") << ";";
-    ss.newLine() << ss.floatDecl("cuspM") << " = " << ss.lerp("lo.g", "hi.g", "t") << ";";
-
-    ss.newLine() << "return " << ss.float2Const("cuspJ", "cuspM") << ";";
-
-    ss.dedent();
-    ss.newLine() << "}";
-
-    shaderCreator->addToHelperShaderCode(ss.string().c_str());
-
-    return name;
-}
-
-std::string _Add_Gamma_table(
-    GpuShaderCreatorRcPtr & shaderCreator,
-    unsigned resourceIndex,
-    const ACES2::GamutCompressParams & g)
-{
-    // Reserve name
-    std::ostringstream resName;
-    resName << shaderCreator->getResourcePrefix()
-            << std::string("_")
-            << std::string("upper_hull_gamma_table_")
-            << resourceIndex;
-
-    // Note: Remove potentially problematic double underscores from GLSL resource names.
-    std::string name(resName.str());
-    StringUtils::ReplaceInPlace(name, "__", "_");
-
-    // Register texture
-    GpuShaderDesc::TextureDimensions dimensions = GpuShaderDesc::TEXTURE_1D;
-    if (shaderCreator->getLanguage() == GPU_LANGUAGE_GLSL_ES_1_0
-        || shaderCreator->getLanguage() == GPU_LANGUAGE_GLSL_ES_3_0
-        || !shaderCreator->getAllowTexture1D())
-    {
-        dimensions = GpuShaderDesc::TEXTURE_2D;
-    }
-
-    shaderCreator->addTexture(
-        name.c_str(),
-        GpuShaderText::getSamplerName(name).c_str(),
-        g.gamut_cusp_table.total_size,
-        1,
-        GpuShaderCreator::TEXTURE_RED_CHANNEL,
-        dimensions,
-        INTERP_NEAREST,
-        &(g.upper_hull_gamma_table.table[0]));
-
-
-    if (dimensions == GpuShaderDesc::TEXTURE_1D)
-    {
-        GpuShaderText ss(shaderCreator->getLanguage());
-        ss.declareTex1D(name);
-        shaderCreator->addToDeclareShaderCode(ss.string().c_str());
-    }
-    else
-    {
-        GpuShaderText ss(shaderCreator->getLanguage());
-        ss.declareTex2D(name);
-        shaderCreator->addToDeclareShaderCode(ss.string().c_str());
-    }
-
-    // Sampler function
-    GpuShaderText ss(shaderCreator->getLanguage());
-
-    ss.newLine() << ss.floatKeyword() << " " << name << "_sample(float h)";
-    ss.newLine() << "{";
-    ss.indent();
-
-    ss.newLine() << ss.floatDecl("hwrap") << " = h;";
-    ss.newLine() << "hwrap = hwrap - floor(hwrap / 360.0) * 360.0;";
-    ss.newLine() << "hwrap = (hwrap < 0.0) ? hwrap + 360.0 : hwrap;";
-
-    ss.newLine() << ss.floatDecl("i_lo") << " = floor(hwrap) + " << g.upper_hull_gamma_table.base_index << ";";
-    ss.newLine() << ss.floatDecl("i_hi") << " = (i_lo + 1);";
-    ss.newLine() << "i_hi = i_hi - floor(i_hi / 360.0) * 360.0;";
-
-    ss.newLine() << ss.floatDecl("base_hue") << " = i_lo - " << g.upper_hull_gamma_table.base_index << ";";
-
-    if (dimensions == GpuShaderDesc::TEXTURE_1D)
-    {
-        ss.newLine() << ss.floatDecl("lo") << " = " << ss.sampleTex1D(name, std::string("(i_lo + 0.5) / ") + std::to_string(g.upper_hull_gamma_table.total_size)) << ".r;";
-        ss.newLine() << ss.floatDecl("hi") << " = " << ss.sampleTex1D(name, std::string("(i_hi + 0.5) / ") + std::to_string(g.upper_hull_gamma_table.total_size)) << ".r;";
-    }
-    else
-    {
-        ss.newLine() << ss.floatDecl("lo") << " = " << ss.sampleTex2D(name, ss.float2Const(std::string("(i_lo + 0.5) / ") + std::to_string(g.upper_hull_gamma_table.total_size), "0.5")) << ".r;";
-        ss.newLine() << ss.floatDecl("hi") << " = " << ss.sampleTex2D(name, ss.float2Const(std::string("(i_hi + 0.5) / ") + std::to_string(g.upper_hull_gamma_table.total_size), "0.5")) << ".r;";
-    }
-
-    ss.newLine() << ss.floatDecl("t") << " = hwrap - base_hue;";
+    ss.newLine() << ss.floatDecl("t") << " = (h - " << hues_array_name << "[i_hi - 1]) / "
+                                                "(" << hues_array_name << "[i_hi]" << " - " << hues_array_name << "[i_hi - 1]);";
     ss.newLine() << "return " << ss.lerp("lo", "hi", "t") << ";";
 
     ss.dedent();
@@ -885,7 +914,7 @@ std::string _Add_Gamma_table(
 std::string _Add_Focus_Gain_func(
     GpuShaderCreatorRcPtr & shaderCreator,
     unsigned resourceIndex,
-    const ACES2::GamutCompressParams & g)
+    const ACES2::SharedCompressionParameters & s)
 {
     // Reserve name
     std::ostringstream resName;
@@ -904,13 +933,15 @@ std::string _Add_Focus_Gain_func(
     ss.newLine() << "{";
     ss.indent();
 
-    ss.newLine() << ss.floatDecl("thr") << " = " << ss.lerp("cuspJ", std::to_string(g.limit_J_max), std::to_string(ACES2::focus_gain_blend)) << ";";
+    ss.newLine() << ss.floatDecl("thr") << " = " << ss.lerp("cuspJ", std::to_string(s.limit_J_max), std::to_string(ACES2::focus_gain_blend)) << ";";
 
-    ss.newLine() << "if (J > thr)";
+    ss.newLine() << "if (J > thr)"; // TODO threshold
     ss.newLine() << "{";
     ss.indent();
-    ss.newLine() << ss.floatDecl("gain") << " = ( " << g.limit_J_max << " - thr) / max(0.0001, (" << g.limit_J_max << " - min(" << g.limit_J_max << ", J)));";
-    ss.newLine() << "return pow(log(gain)/log(10.0), 1.0 / " << ACES2::focus_adjust_gain << ") + 1.0;";
+    ss.newLine() << ss.floatDecl("gain") << " = ( " << s.limit_J_max << " - thr) / max(0.0001, " << s.limit_J_max << " - J);";
+    ss.newLine() << "gain = log(gain)/log(10.0);";  // TODO log10(gain) but not all shading languages have log10() would log2(gain)/log2(10) be better? perhaps delegate to GpuShaderText?
+    //ss.newLine() << "return pow(gain, " << ACES2::focus_adjust_gain_inv << ") + 1.0;"; // TODO; remove once agreed on change
+    ss.newLine() << "return gain * gain + 1.0;";
     ss.dedent();
     ss.newLine() << "}";
     ss.newLine() << "else";
@@ -931,7 +962,7 @@ std::string _Add_Focus_Gain_func(
 std::string _Add_Solve_J_Intersect_func(
     GpuShaderCreatorRcPtr & shaderCreator,
     unsigned resourceIndex,
-    const ACES2::GamutCompressParams & g)
+    const ACES2::SharedCompressionParameters & s)
 {
     // Reserve name
     std::ostringstream resName;
@@ -950,54 +981,30 @@ std::string _Add_Solve_J_Intersect_func(
     ss.newLine() << "{";
     ss.indent();
 
-    ss.newLine() << ss.floatDecl("a") << " = " << "M / (focusJ * slope_gain);";
-    ss.newLine() << ss.floatDecl("b") << " = 0.0;";
-    ss.newLine() << ss.floatDecl("c") << " = 0.0;";
-    ss.newLine() << ss.floatDecl("intersectJ") << " = 0.0;";
+    ss.newLine() << ss.floatDecl("M_scaled") << " = M / slope_gain;";
+    ss.newLine() << ss.floatDecl("a") << " = M_scaled / focusJ;";
 
     ss.newLine() << "if (J < focusJ)";
     ss.newLine() << "{";
     ss.indent();
-    ss.newLine() << "b = 1.0 - M / slope_gain;";
+    ss.newLine() << ss.floatDecl("b") << " = 1.0 - M_scaled;";
+    ss.newLine() << ss.floatDecl("c") << " = -J;";
+    ss.newLine() << ss.floatDecl("det") << " =  b * b - 4.f * a * c;";
+    ss.newLine() << ss.floatDecl("root") << " =  sqrt(det);";
+    ss.newLine() << "return -2.0 * c / (b + root);";
     ss.dedent();
     ss.newLine() << "}";
     ss.newLine() << "else";
     ss.newLine() << "{";
     ss.indent();
-    ss.newLine() << "b = - (1.0 + M / slope_gain + " << g.limit_J_max << " * M / (focusJ * slope_gain));";
+    ss.newLine() << ss.floatDecl("b") << " = - (1.0 + M_scaled + " << s.limit_J_max << " * a);";
+    ss.newLine() << ss.floatDecl("c") << " = " << s.limit_J_max << " * M_scaled + J;";
+    ss.newLine() << ss.floatDecl("det") << " =  b * b - 4.f * a * c;";
+    ss.newLine() << ss.floatDecl("root") << " =  sqrt(det);";
+    ss.newLine() << "return -2.0 * c / (b - root);";
     ss.dedent();
     ss.newLine() << "}";
 
-    ss.newLine() << "if (J < focusJ)";
-    ss.newLine() << "{";
-    ss.indent();
-    ss.newLine() << "c = -J;";
-    ss.dedent();
-    ss.newLine() << "}";
-    ss.newLine() << "else";
-    ss.newLine() << "{";
-    ss.indent();
-    ss.newLine() << "c = " << g.limit_J_max << " * M / slope_gain + J;";
-    ss.dedent();
-    ss.newLine() << "}";
-
-    ss.newLine() << ss.floatDecl("root") << " = sqrt(b * b - 4.0 * a * c);";
-
-    ss.newLine() << "if (J < focusJ)";
-    ss.newLine() << "{";
-    ss.indent();
-    ss.newLine() << "intersectJ = 2.0 * c / (-b - root);";
-    ss.dedent();
-    ss.newLine() << "}";
-    ss.newLine() << "else";
-    ss.newLine() << "{";
-    ss.indent();
-    ss.newLine() << "intersectJ = 2.0 * c / (-b + root);";
-    ss.dedent();
-    ss.newLine() << "}";
-
-    ss.newLine() << "return intersectJ;";
-
     ss.dedent();
     ss.newLine() << "}";
 
@@ -1009,8 +1016,7 @@ std::string _Add_Solve_J_Intersect_func(
 std::string _Add_Find_Gamut_Boundary_Intersection_func(
     GpuShaderCreatorRcPtr & shaderCreator,
     unsigned resourceIndex,
-    const ACES2::GamutCompressParams & g,
-    const std::string & solveJIntersectName)
+    const ACES2::SharedCompressionParameters & s)
 {
     // Reserve name
     std::ostringstream resName;
@@ -1025,49 +1031,28 @@ std::string _Add_Find_Gamut_Boundary_Intersection_func(
 
     GpuShaderText ss(shaderCreator->getLanguage());
 
-    ss.newLine() << ss.float3Keyword() << " " << name << "(" << ss.float3Keyword() << " JMh_s, " << ss.float2Keyword() << " JM_cusp_in, float J_focus, float slope_gain, float gamma_top, float gamma_bottom)";
+    ss.newLine() << ss.floatKeyword() << " " << name << "(" << ss.float2Keyword()
+                 << " JM_cusp, float gamma_top_inv, float gamma_bottom_inv, float J_intersect_source, float J_intersect_cusp, float slope)";
     ss.newLine() << "{";
     ss.indent();
 
-    ss.newLine() << ss.float2Decl("JM_cusp") << " = " << ss.float2Const("JM_cusp_in.r", std::string("JM_cusp_in.g * (1.0 + ") + std::to_string(ACES2::smooth_m) + " * " + std::to_string(ACES2::smooth_cusps) + ")") << ";";
-
-    ss.newLine() << ss.floatDecl("J_intersect_source") << " = " << solveJIntersectName << "(JMh_s.r, JMh_s.g, J_focus, slope_gain);";
-    ss.newLine() << ss.floatDecl("J_intersect_cusp") << " = " << solveJIntersectName << "(JM_cusp.r, JM_cusp.g, J_focus, slope_gain);";
-
-    ss.newLine() << ss.floatDecl("slope") << " = 0.0;";
-    ss.newLine() << "if (J_intersect_source < J_focus)";
-    ss.newLine() << "{";
-    ss.indent();
-    ss.newLine() << "slope = J_intersect_source * (J_intersect_source - J_focus) / (J_focus * slope_gain);";
-    ss.dedent();
-    ss.newLine() << "}";
-    ss.newLine() << "else";
-    ss.newLine() << "{";
-    ss.indent();
-    ss.newLine() << "slope = (" << g.limit_J_max << " - J_intersect_source) * (J_intersect_source - J_focus) / (J_focus * slope_gain);";
-    ss.dedent();
-    ss.newLine() << "}";
-
-    ss.newLine() << ss.floatDecl("M_boundary_lower ") << " = J_intersect_cusp * pow(J_intersect_source / J_intersect_cusp, 1.0 / gamma_bottom) / (JM_cusp.r / JM_cusp.g - slope);";
-    ss.newLine() << ss.floatDecl("M_boundary_upper") << " = JM_cusp.g * (" << g.limit_J_max << " - J_intersect_cusp) * pow((" << g.limit_J_max << " - J_intersect_source) / (" << g.limit_J_max << " - J_intersect_cusp), 1.0 / gamma_top) / (slope * JM_cusp.g + " << g.limit_J_max << " - JM_cusp.r);";
+    ss.newLine() << ss.floatDecl("M_boundary_lower") << " = J_intersect_cusp * pow(J_intersect_source / J_intersect_cusp, gamma_bottom_inv) / (JM_cusp.r / JM_cusp.g - slope);";
+    ss.newLine() << ss.floatDecl("M_boundary_upper") << " = JM_cusp.g * (" << s.limit_J_max << " - J_intersect_cusp) * pow((" << s.limit_J_max << " - J_intersect_source) / (" << s.limit_J_max << " - J_intersect_cusp), gamma_top_inv) / (slope * JM_cusp.g + " << s.limit_J_max << " - JM_cusp.r);";
 
     ss.newLine() << ss.floatDecl("smin") << " = 0.0;";
     ss.newLine() << "{";
     ss.indent();
-    ss.newLine() << ss.floatDecl("a") << " = M_boundary_lower / JM_cusp.g;";
-    ss.newLine() << ss.floatDecl("b") << " = M_boundary_upper / JM_cusp.g;";
-    ss.newLine() << ss.floatDecl("s") << " = " << ACES2::smooth_cusps << ";";
+    ss.newLine() << ss.floatDecl("a") << " = M_boundary_lower;";
+    ss.newLine() << ss.floatDecl("b") << " = M_boundary_upper;";
+    ss.newLine() << ss.floatDecl("s") << " = " << ACES2::smooth_cusps << " * JM_cusp.g;";
 
     ss.newLine() << ss.floatDecl("h") << " = max(s - abs(a - b), 0.0) / s;";
-    ss.newLine() << "smin = min(a, b) - h * h * h * s * (1.0 / 6.0);";
+    ss.newLine() << "smin = min(a, b) - h * h * h * s * " << (1.0 / 6.0) << ";";
 
     ss.dedent();
     ss.newLine() << "}";
 
-    ss.newLine() << ss.floatDecl("M_boundary") << " = JM_cusp.g * smin;";
-    ss.newLine() << ss.floatDecl("J_boundary") << "= J_intersect_source + slope * M_boundary;";
-
-    ss.newLine() << "return " << ss.float3Const("J_boundary", "M_boundary", "J_intersect_source") << ";";
+    ss.newLine() << "return smin;";
 
     ss.dedent();
     ss.newLine() << "}";
@@ -1077,19 +1062,17 @@ std::string _Add_Find_Gamut_Boundary_Intersection_func(
     return name;
 }
 
-std::string _Add_Reach_Boundary_func(
+std::string _Add_Compression_func(
     GpuShaderCreatorRcPtr & shaderCreator,
     unsigned resourceIndex,
-    const ACES2::GamutCompressParams & g,
-    const std::string & reachName,
-    const std::string & getFocusGainName,
-    const std::string & solveJIntersectName)
+    bool invert)
 {
     // Reserve name
     std::ostringstream resName;
     resName << shaderCreator->getResourcePrefix()
             << std::string("_")
-            << std::string("get_reach_boundary")
+            << std::string("remap_M")
+            << (invert ? std::string("_inv") : std::string("_fwd"))
             << resourceIndex;
 
     // Note: Remove potentially problematic double underscores from GLSL resource names.
@@ -1098,102 +1081,47 @@ std::string _Add_Reach_Boundary_func(
 
     GpuShaderText ss(shaderCreator->getLanguage());
 
-    ss.newLine() << ss.float3Keyword() << " " << name << "(float J, float M, float h, " << ss.float2Keyword() << " JMcusp, float focusJ)";
+    ss.newLine() << ss.floatKeyword() << " " << name << "(float M, float gamut_boundary_M, float reach_boundary_M)";
     ss.newLine() << "{";
     ss.indent();
 
-    ss.newLine() << ss.floatDecl("reachMaxM") << " = " << reachName << "_sample(h);";
-    ss.newLine() << ss.floatDecl("slope_gain") << " = " << g.limit_J_max << " * " << g.focus_dist << " * " << getFocusGainName << "(J, JMcusp.r);";
-    ss.newLine() << ss.floatDecl("intersectJ") << " = " << solveJIntersectName << "(J, M, focusJ, slope_gain);";
+    ss.newLine() << ss.floatDecl("boundary_ratio") << " = gamut_boundary_M / reach_boundary_M;";
+    ss.newLine() << ss.floatDecl("proportion") << " = max(boundary_ratio, " << ACES2::compression_threshold << ");";
+    ss.newLine() << ss.floatDecl("threshold") << " = proportion * gamut_boundary_M;";
 
-    ss.newLine() << ss.floatDecl("slope") << " = 0.0;";
-    ss.newLine() << "if (intersectJ < focusJ)";
+    ss.newLine() << "if (proportion >= 1.0f || M <= threshold)";
     ss.newLine() << "{";
     ss.indent();
-    ss.newLine() << "slope = intersectJ * (intersectJ - focusJ) / (focusJ * slope_gain);";
-    ss.dedent();
-    ss.newLine() << "}";
-    ss.newLine() << "else";
-    ss.newLine() << "{";
-    ss.indent();
-    ss.newLine() << "slope = (" << g.limit_J_max << " - intersectJ) * (intersectJ - focusJ) / (focusJ * slope_gain);";
-    ss.dedent();
-    ss.newLine() << "}";
-
-    ss.newLine() << ss.floatDecl("boundary") << " = " << g.limit_J_max << " * pow(intersectJ / " << g.limit_J_max << ", " << g.model_gamma << ") * reachMaxM / (" << g.limit_J_max << " - slope * reachMaxM);";
-
-    ss.newLine() << "return " << ss.float3Const("J", "boundary", "h") << ";";
-
+    ss.newLine() << "return M;";
     ss.dedent();
     ss.newLine() << "}";
+    ss.newLine() << ss.floatDecl("m_offset") << " = M - threshold;";
+    ss.newLine() << ss.floatDecl("gamut_offset") << " = gamut_boundary_M - threshold;";
+    ss.newLine() << ss.floatDecl("reach_offset") << " = reach_boundary_M - threshold;";
 
-    shaderCreator->addToHelperShaderCode(ss.string().c_str());
-
-    return name;
-}
-
-std::string _Add_Compression_func(
-    GpuShaderCreatorRcPtr & shaderCreator,
-    unsigned resourceIndex,
-    bool invert)
-{
-    // Reserve name
-    std::ostringstream resName;
-    resName << shaderCreator->getResourcePrefix()
-            << std::string("_")
-            << std::string("compression")
-            << (invert ? std::string("_inv") : std::string("_fwd"))
-            << resourceIndex;
-
-    // Note: Remove potentially problematic double underscores from GLSL resource names.
-    std::string name(resName.str());
-    StringUtils::ReplaceInPlace(name, "__", "_");
-
-    GpuShaderText ss(shaderCreator->getLanguage());
-
-    ss.newLine() << ss.floatKeyword() << " " << name << "(float v, float thr, float lim)";
-    ss.newLine() << "{";
-    ss.indent();
-
-    ss.newLine() << ss.floatDecl("s") << " = (lim - thr) * (1.0 - thr) / (lim - 1.0);";
-    ss.newLine() << ss.floatDecl("nd") << " = (v - thr) / s;";
-
-
-    ss.newLine() << ss.floatDecl("vCompressed") << " = 0.0;";
+    ss.newLine() << ss.floatDecl("scale") << " = reach_offset / ((reach_offset / gamut_offset) - 1.0f);";
+    ss.newLine() << ss.floatDecl("nd") << " = m_offset / scale;";
 
     if (invert)
     {
-        ss.newLine() << "if (v < thr || lim <= 1.0001 || v > thr + s)";
+        ss.newLine() << "if (nd >= 1.0f)"; // TODO: could be done branchless?
         ss.newLine() << "{";
         ss.indent();
-        ss.newLine() << "vCompressed = v;";
+        ss.newLine() << "return threshold + scale;";
         ss.dedent();
         ss.newLine() << "}";
         ss.newLine() << "else";
         ss.newLine() << "{";
         ss.indent();
-        ss.newLine() << "vCompressed = thr + s * (-nd / (nd - 1));";
+        ss.newLine() << "return threshold + scale * -(nd / (nd - 1.0f));";
         ss.dedent();
         ss.newLine() << "}";
     }
     else
     {
-        ss.newLine() << "if (v < thr || lim <= 1.0001)";
-        ss.newLine() << "{";
-        ss.indent();
-        ss.newLine() << "vCompressed = v;";
-        ss.dedent();
-        ss.newLine() << "}";
-        ss.newLine() << "else";
-        ss.newLine() << "{";
-        ss.indent();
-        ss.newLine() << "vCompressed = thr + s * nd / (1.0 + nd);";
-        ss.dedent();
-        ss.newLine() << "}";
+        ss.newLine() << "return threshold + scale * nd / (1.0f + nd);";
     }
 
-    ss.newLine() << "return vCompressed;";
-
     ss.dedent();
     ss.newLine() << "}";
 
@@ -1205,13 +1133,12 @@ std::string _Add_Compression_func(
 std::string _Add_Compress_Gamut_func(
     GpuShaderCreatorRcPtr & shaderCreator,
     unsigned resourceIndex,
+    const ACES2::SharedCompressionParameters & s,
     const ACES2::GamutCompressParams & g,
-    const std::string & cuspName,
     const std::string & getFocusGainName,
-    const std::string & gammaName,
     const std::string & findGamutBoundaryIntersectionName,
-    const std::string & getReachBoundaryName,
-    const std::string & compressionName)
+    const std::string & compressionName,
+    const std::string & solveJIntersectName)
 {
     // Reserve name
     std::ostringstream resName;
@@ -1226,7 +1153,7 @@ std::string _Add_Compress_Gamut_func(
 
     GpuShaderText ss(shaderCreator->getLanguage());
 
-    ss.newLine() << ss.float3Keyword() << " " << name << "(" << ss.float3Keyword() << " JMh, float Jx)";
+    ss.newLine() << ss.float3Keyword() << " " << name << "(" << ss.float3Keyword() << " JMh, float Jx, " << ss.float3Keyword() << " JMGcusp, float reachMaxM)";
     ss.newLine() << "{";
     ss.indent();
 
@@ -1234,7 +1161,7 @@ std::string _Add_Compress_Gamut_func(
     ss.newLine() << ss.floatDecl("M") << " = JMh.g;";
     ss.newLine() << ss.floatDecl("h") << " = JMh.b;";
 
-    ss.newLine() << "if (M < 0.0001 || J > " << g.limit_J_max << ")";
+    ss.newLine() << "if (M <= 0.0 || J > " << s.limit_J_max << ")";
     ss.newLine() << "{";
     ss.indent();
     ss.newLine() << "return " << ss.float3Const("J", "0.0", "h") << ";";
@@ -1244,40 +1171,36 @@ std::string _Add_Compress_Gamut_func(
     ss.newLine() << "{";
     ss.indent();
 
-    ss.newLine() << ss.float2Decl("project_from") << " = " << ss.float2Const("J", "M") << ";";
-    ss.newLine() << ss.float2Decl("JMcusp") << " = " << cuspName << "_sample(h);";
+    ss.newLine() << ss.float2Decl("JMcusp") << " = JMGcusp.rg;";
 
-    ss.newLine() << ss.floatDecl("focusJ") << " = " << ss.lerp("JMcusp.r", std::to_string(g.mid_J), std::string("min(1.0, ") + std::to_string(ACES2::cusp_mid_blend) + " - (JMcusp.r / " + std::to_string(g.limit_J_max)) << "));";
-    ss.newLine() << ss.floatDecl("slope_gain") << " = " << g.limit_J_max << " * " << g.focus_dist << " * " << getFocusGainName << "(Jx, JMcusp.r);";
+    ss.newLine() << ss.floatDecl("focusJ") << " = " << ss.lerp("JMcusp.r", std::to_string(g.mid_J), std::string("min(1.0, ") + std::to_string(ACES2::cusp_mid_blend) + " - (JMcusp.r / " + std::to_string(s.limit_J_max)) << "));";
+    ss.newLine() << ss.floatDecl("slope_gain") << " = " << s.limit_J_max * g.focus_dist << " * " << getFocusGainName << "(Jx, JMcusp.r);";
+    ss.newLine() << ss.floatDecl("J_intersect_source") << " = " << solveJIntersectName << "(JMh.r, JMh.g, focusJ, slope_gain);";
+    ss.newLine() << ss.floatDecl("gamut_slope") << " = (J_intersect_source < focusJ) ? J_intersect_source : (" << s.limit_J_max << " - J_intersect_source);";
+    ss.newLine() << "gamut_slope = gamut_slope * (J_intersect_source - focusJ) / (focusJ * slope_gain);";
 
-    ss.newLine() << ss.floatDecl("gamma_top") << " = " << gammaName << "_sample(h);";
-    ss.newLine() << ss.floatDecl("gamma_bottom") << " = " << g.lower_hull_gamma << ";";
+    ss.newLine() << ss.floatDecl("gamma_top_inv") << " = JMGcusp.b;";
+    ss.newLine() << ss.floatDecl("gamma_bottom_inv") << " = " << g.lower_hull_gamma_inv << ";"; // TODO move to where it is used
 
-    ss.newLine() << ss.float3Decl("boundaryReturn") << " = " << findGamutBoundaryIntersectionName << "(" << ss.float3Const("J", "M", "h") << ", JMcusp, focusJ, slope_gain, gamma_top, gamma_bottom);";
-    ss.newLine() << ss.float2Decl("JMboundary") << " = " << ss.float2Const("boundaryReturn.r", "boundaryReturn.g") << ";";
-    ss.newLine() << ss.float2Decl("project_to") << " = " << ss.float2Const("boundaryReturn.b", "0.0") << ";";
+    ss.newLine() << ss.floatDecl("J_intersect_cusp") << " = " << solveJIntersectName << "(JMcusp.r, JMcusp.g, focusJ, slope_gain);";
+    ss.newLine() << ss.floatDecl("gamutBoundaryM") << " = " << findGamutBoundaryIntersectionName
+                 << "(JMcusp, gamma_top_inv, gamma_bottom_inv, J_intersect_source, J_intersect_cusp, gamut_slope);";
 
-    ss.newLine() << "if (JMboundary.g <= 0.0)";
+    ss.newLine() << "if (gamutBoundaryM <= 0.0)";
     ss.newLine() << "{";
     ss.indent();
     ss.newLine() << "return " << ss.float3Const("J", "0.0", "h") << ";";
     ss.dedent();
     ss.newLine() << "}";
 
-    ss.newLine() << ss.float3Decl("reachBoundary") << " = " << getReachBoundaryName << "(JMboundary.r, JMboundary.g, h, JMcusp, focusJ);";
-
-    ss.newLine() << ss.floatDecl("difference") << " = max(1.0001, reachBoundary.g / JMboundary.g);";
-    ss.newLine() << ss.floatDecl("threshold") << " = max(" << ACES2::compression_threshold << ", 1.0 / difference);";
+    ss.newLine() << ss.floatDecl("reachBoundaryM") << " = " << s.limit_J_max << " * pow(J_intersect_source / " << s.limit_J_max << ",  " << s.model_gamma_inv << ");";
+    ss.newLine() << "reachBoundaryM = reachBoundaryM / ((" << s.limit_J_max << " / reachMaxM) - gamut_slope);";
 
-    ss.newLine() << ss.floatDecl("v") << " = project_from.g / JMboundary.g;";
-    ss.newLine() << "v = " << compressionName << "(v, threshold, difference);";
+    ss.newLine() << ss.floatDecl("remapped_M") << " = " << compressionName << "(M, gamutBoundaryM, reachBoundaryM);";
+    ss.newLine() << ss.floatDecl("remapped_J") << " = J_intersect_source + remapped_M * gamut_slope;";
 
-    ss.newLine() << ss.float2Decl("JMcompressed") << " = " << ss.float2Const(
-        "project_to.r + v * (JMboundary.r - project_to.r)",
-        "project_to.g + v * (JMboundary.g - project_to.g)"
-    ) << ";";
 
-    ss.newLine() << "return " << ss.float3Const("JMcompressed.r", "JMcompressed.g", "h") << ";";
+    ss.newLine() << "return " << ss.float3Const("remapped_J", "remapped_M", "h") << ";";
 
     ss.dedent();
     ss.newLine() << "}";
@@ -1294,58 +1217,56 @@ void _Add_Gamut_Compress_Fwd_Shader(
     GpuShaderCreatorRcPtr & shaderCreator,
     GpuShaderText & ss,
     unsigned int resourceIndex,
-    const ACES2::GamutCompressParams & g,
-    const std::string & reachName)
+    const ACES2::SharedCompressionParameters & s,
+    const ACES2::GamutCompressParams & g)
 {
     std::string cuspName = _Add_Cusp_table(shaderCreator, resourceIndex, g);
-    std::string gammaName = _Add_Gamma_table(shaderCreator, resourceIndex, g);
-    std::string getFocusGainName = _Add_Focus_Gain_func(shaderCreator, resourceIndex, g);
-    std::string solveJIntersectName = _Add_Solve_J_Intersect_func(shaderCreator, resourceIndex, g);
-    std::string findGamutBoundaryIntersectionName = _Add_Find_Gamut_Boundary_Intersection_func(shaderCreator, resourceIndex, g, solveJIntersectName);
-    std::string getReachBoundaryName = _Add_Reach_Boundary_func(shaderCreator, resourceIndex, g, reachName, getFocusGainName, solveJIntersectName);
+    std::string getFocusGainName = _Add_Focus_Gain_func(shaderCreator, resourceIndex, s);
+    std::string solveJIntersectName = _Add_Solve_J_Intersect_func(shaderCreator, resourceIndex, s);
+    std::string findGamutBoundaryIntersectionName = _Add_Find_Gamut_Boundary_Intersection_func(shaderCreator, resourceIndex, s);
     std::string compressionName = _Add_Compression_func(shaderCreator, resourceIndex, false);
-    std::string gamutCompressName = _Add_Compress_Gamut_func(shaderCreator, resourceIndex, g, cuspName, getFocusGainName, gammaName, findGamutBoundaryIntersectionName, getReachBoundaryName, compressionName);
+    std::string gamutCompressName = _Add_Compress_Gamut_func(shaderCreator, resourceIndex, s, g, getFocusGainName, findGamutBoundaryIntersectionName, compressionName, solveJIntersectName);
 
     const std::string pxl(shaderCreator->getPixelName());
 
-    ss.newLine() << pxl << ".rgb = " << gamutCompressName << "(" << pxl << ".rgb, " << pxl << ".r);";
+    ss.newLine() << ss.float3Decl("JMGcusp") << " = " << cuspName << "_sample(" << pxl << ".b);";
+    ss.newLine() << pxl << ".rgb = " << gamutCompressName << "(" << pxl << ".rgb, " << pxl << ".r, JMGcusp, reachMaxM);";
 }
 
 void _Add_Gamut_Compress_Inv_Shader(
     GpuShaderCreatorRcPtr & shaderCreator,
     GpuShaderText & ss,
     unsigned int resourceIndex,
-    const ACES2::GamutCompressParams & g,
-    const std::string & reachName)
+    const ACES2::SharedCompressionParameters & s,
+    const ACES2::GamutCompressParams & g)
 {
     std::string cuspName = _Add_Cusp_table(shaderCreator, resourceIndex, g);
-    std::string gammaName = _Add_Gamma_table(shaderCreator, resourceIndex, g);
-    std::string getFocusGainName = _Add_Focus_Gain_func(shaderCreator, resourceIndex, g);
-    std::string solveJIntersectName = _Add_Solve_J_Intersect_func(shaderCreator, resourceIndex, g);
-    std::string findGamutBoundaryIntersectionName = _Add_Find_Gamut_Boundary_Intersection_func(shaderCreator, resourceIndex, g, solveJIntersectName);
-    std::string getReachBoundaryName = _Add_Reach_Boundary_func(shaderCreator, resourceIndex, g, reachName, getFocusGainName, solveJIntersectName);
+    std::string getFocusGainName = _Add_Focus_Gain_func(shaderCreator, resourceIndex, s);
+    std::string solveJIntersectName = _Add_Solve_J_Intersect_func(shaderCreator, resourceIndex, s);
+    std::string findGamutBoundaryIntersectionName = _Add_Find_Gamut_Boundary_Intersection_func(shaderCreator, resourceIndex, s);
     std::string compressionName = _Add_Compression_func(shaderCreator, resourceIndex, true);
-    std::string gamutCompressName = _Add_Compress_Gamut_func(shaderCreator, resourceIndex, g, cuspName, getFocusGainName, gammaName, findGamutBoundaryIntersectionName, getReachBoundaryName, compressionName);
+    std::string gamutCompressName = _Add_Compress_Gamut_func(shaderCreator, resourceIndex, s, g, getFocusGainName, findGamutBoundaryIntersectionName, compressionName, solveJIntersectName);
 
     const std::string pxl(shaderCreator->getPixelName());
 
-    ss.newLine() << ss.float2Decl("JMcusp") << " = " << cuspName << "_sample(" << pxl << ".b);";
+    ss.newLine() << ss.float3Decl("JMGcusp") << " = " << cuspName << "_sample(" << pxl << ".b);";
+
     ss.newLine() << ss.floatDecl("Jx") << " = " << pxl << ".r;";
     ss.newLine() << ss.float3Decl("unCompressedJMh") << ";";
 
     // Analytic inverse below threshold
-    ss.newLine() << "if (Jx <= " << ss.lerp("JMcusp.r", std::to_string(g.limit_J_max), std::to_string(ACES2::focus_gain_blend)) << ")";
+    ss.newLine() << "if (Jx <= " << ss.lerp("JMGcusp.r", std::to_string(s.limit_J_max), std::to_string(ACES2::focus_gain_blend)) << ")";
     ss.newLine() << "{";
     ss.indent();
-    ss.newLine() << "unCompressedJMh = " << gamutCompressName << "(" << pxl << ".rgb, Jx);";
+    ss.newLine() << "unCompressedJMh = " << gamutCompressName << "(" << pxl << ".rgb, Jx, JMGcusp, reachMaxM);";
     ss.dedent();
     ss.newLine() << "}";
     // Approximation above threshold
     ss.newLine() << "else";
     ss.newLine() << "{";
     ss.indent();
-    ss.newLine() << "Jx = " << gamutCompressName << "(" << pxl << ".rgb, Jx).r;";
-    ss.newLine() << "unCompressedJMh = " << gamutCompressName << "(" << pxl << ".rgb, Jx);";
+    ss.newLine() << "Jx = " << gamutCompressName << "(" << pxl << ".rgb, Jx, JMGcusp, reachMaxM).r;";
+    ss.newLine() << "unCompressedJMh = " << gamutCompressName << "(" << pxl << ".rgb, Jx, JMGcusp, reachMaxM);";
     ss.dedent();
     ss.newLine() << "}";
 
@@ -1368,8 +1289,6 @@ void Add_ACES_OutputTransform_Fwd_Shader(
     const float white_x = (float) params[7];
     const float white_y = (float) params[8];
 
-    const Primaries in_primaries = ACES_AP0::primaries;
-
     const Primaries lim_primaries = {
         {red_x  , red_y  },
         {green_x, green_y},
@@ -1377,31 +1296,41 @@ void Add_ACES_OutputTransform_Fwd_Shader(
         {white_x, white_y}
     };
 
-    ACES2::JMhParams pIn = ACES2::init_JMhParams(in_primaries);
-    ACES2::JMhParams pLim = ACES2::init_JMhParams(lim_primaries);
-    ACES2::ToneScaleParams t = ACES2::init_ToneScaleParams(peak_luminance);
-    ACES2::ChromaCompressParams c = ACES2::init_ChromaCompressParams(peak_luminance);
-    const ACES2::GamutCompressParams g = ACES2::init_GamutCompressParams(peak_luminance, lim_primaries);
+    const ACES2::JMhParams pIn = ACES2::init_JMhParams(ACES_AP0::primaries);
+    const ACES2::JMhParams pLim = ACES2::init_JMhParams(lim_primaries);
+    const ACES2::ToneScaleParams t = ACES2::init_ToneScaleParams(peak_luminance);
+    const ACES2::JMhParams reachGamut = ACES2::init_JMhParams(ACES_AP1::primaries);
+    const ACES2::SharedCompressionParameters s = ACES2::init_SharedCompressionParams(peak_luminance, pIn, reachGamut);
+    const ACES2::ChromaCompressParams c = ACES2::init_ChromaCompressParams(peak_luminance, t);
+    const ACES2::GamutCompressParams g = ACES2::init_GamutCompressParams(peak_luminance, pIn, pLim, t, s, reachGamut);
 
     unsigned resourceIndex = shaderCreator->getNextResourceIndex();
 
-    std::string reachName = _Add_Reach_table(shaderCreator, resourceIndex, g.reach_m_table);
+    const std::string reachName = _Add_Reach_table(shaderCreator, resourceIndex, s.reach_m_table);
+    const std::string tonescaleName_Fwd = _Add_Tonescale_func(shaderCreator, resourceIndex, false, pIn, t);
+    const std::string pxl(shaderCreator->getPixelName());
 
     ss.newLine() << "";
     ss.newLine() << "// Add RGB to JMh";
     ss.newLine() << "";
-    ss.newLine() << "{";
-    ss.indent();
-        _Add_RGB_to_JMh_Shader(shaderCreator, ss, pIn);
-    ss.dedent();
-    ss.newLine() << "}";
+    _Add_RGB_to_JMh_Shader(shaderCreator, ss, pIn);
+    _Add_SinCos_Shader(shaderCreator, ss);
+
 
     ss.newLine() << "";
     ss.newLine() << "// Add ToneScale and ChromaCompress (fwd)";
     ss.newLine() << "";
+
+    ss.newLine() << ss.floatDecl("J_ts") << " = " << tonescaleName_Fwd << "(" << pxl << ".r);";
+
+    ss.newLine() << "// Sample tables (fwd)";
+    ss.newLine() << ss.floatDecl("reachMaxM") << " = " << reachName << "_sample(" << pxl << ".b);";
+
+    ss.newLine() << "";
+
     ss.newLine() << "{";
     ss.indent();
-        _Add_Tonescale_Compress_Fwd_Shader(shaderCreator, ss, resourceIndex, pIn, t, c, reachName);
+        _Add_Tonescale_Compress_Fwd_Shader(shaderCreator, ss, resourceIndex, s, c);
     ss.dedent();
     ss.newLine() << "}";
 
@@ -1410,7 +1339,7 @@ void Add_ACES_OutputTransform_Fwd_Shader(
     ss.newLine() << "";
     ss.newLine() << "{";
     ss.indent();
-        _Add_Gamut_Compress_Fwd_Shader(shaderCreator, ss, resourceIndex, g, reachName);
+        _Add_Gamut_Compress_Fwd_Shader(shaderCreator, ss, resourceIndex, s, g);
     ss.dedent();
     ss.newLine() << "}";
 
@@ -1441,8 +1370,6 @@ void Add_ACES_OutputTransform_Inv_Shader(
     const float white_x = (float) params[7];
     const float white_y = (float) params[8];
 
-    const Primaries in_primaries = ACES_AP0::primaries;
-
     const Primaries lim_primaries = {
         {red_x  , red_y  },
         {green_x, green_y},
@@ -1450,40 +1377,44 @@ void Add_ACES_OutputTransform_Inv_Shader(
         {white_x, white_y}
     };
 
-    ACES2::JMhParams pIn = ACES2::init_JMhParams(in_primaries);
-    ACES2::JMhParams pLim = ACES2::init_JMhParams(lim_primaries);
-    ACES2::ToneScaleParams t = ACES2::init_ToneScaleParams(peak_luminance);
-    ACES2::ChromaCompressParams c = ACES2::init_ChromaCompressParams(peak_luminance);
-    const ACES2::GamutCompressParams g = ACES2::init_GamutCompressParams(peak_luminance, lim_primaries);
+    const ACES2::JMhParams pIn = ACES2::init_JMhParams(ACES_AP0::primaries);
+    const ACES2::JMhParams pLim = ACES2::init_JMhParams(lim_primaries);
+    const ACES2::ToneScaleParams t = ACES2::init_ToneScaleParams(peak_luminance);
+    const ACES2::JMhParams reachGamut = ACES2::init_JMhParams(ACES_AP1::primaries);
+    const ACES2::SharedCompressionParameters s = ACES2::init_SharedCompressionParams(peak_luminance, pIn, reachGamut);
+    const ACES2::ChromaCompressParams c = ACES2::init_ChromaCompressParams(peak_luminance, t);
+    const ACES2::GamutCompressParams g = ACES2::init_GamutCompressParams(peak_luminance, pIn, pLim, t, s, reachGamut);
 
     unsigned resourceIndex = shaderCreator->getNextResourceIndex();
-
-    std::string reachName = _Add_Reach_table(shaderCreator, resourceIndex, c.reach_m_table);
+    const std::string pxl(shaderCreator->getPixelName());
+    
+    std::string reachName = _Add_Reach_table(shaderCreator, resourceIndex, s.reach_m_table);
+    std::string tonescaleName_Inv = _Add_Tonescale_func(shaderCreator, resourceIndex, true, pIn, t);
 
     ss.newLine() << "";
     ss.newLine() << "// Add RGB to JMh";
     ss.newLine() << "";
-    ss.newLine() << "{";
-    ss.indent();
-        _Add_RGB_to_JMh_Shader(shaderCreator, ss, pLim);
-    ss.dedent();
-    ss.newLine() << "}";
+    _Add_RGB_to_JMh_Shader(shaderCreator, ss, pLim);
+    _Add_SinCos_Shader(shaderCreator, ss);
 
+
+    ss.newLine() << ss.floatDecl("reachMaxM") << " = " << reachName << "_sample(" << pxl << ".b);";
     ss.newLine() << "";
     ss.newLine() << "// Add GamutCompress (inv)";
     ss.newLine() << "";
     ss.newLine() << "{";
     ss.indent();
-        _Add_Gamut_Compress_Inv_Shader(shaderCreator, ss, resourceIndex, g, reachName);
+        _Add_Gamut_Compress_Inv_Shader(shaderCreator, ss, resourceIndex, s, g);
     ss.dedent();
     ss.newLine() << "}";
 
     ss.newLine() << "";
     ss.newLine() << "// Add ToneScale and ChromaCompress (inv)";
     ss.newLine() << "";
+    ss.newLine() << ss.floatDecl("J") << " = " << tonescaleName_Inv << "(" << pxl << ".r);";
     ss.newLine() << "{";
     ss.indent();
-        _Add_Tonescale_Compress_Inv_Shader(shaderCreator, ss, resourceIndex, pIn, t, c, reachName);
+        _Add_Tonescale_Compress_Inv_Shader(shaderCreator, ss, resourceIndex, s, c);
     ss.dedent();
     ss.newLine() << "}";
 
@@ -1544,7 +1475,8 @@ void Add_JMh_to_RGB_Shader(
     };
 
     ACES2::JMhParams p = ACES2::init_JMhParams(primaries);
-
+    _Add_WrapHueChannel_Shader(shaderCreator, ss);
+    _Add_SinCos_Shader(shaderCreator, ss);
     _Add_JMh_to_RGB_Shader(shaderCreator, ss, p);
 }
 
@@ -1555,15 +1487,25 @@ void Add_Tonescale_Compress_Fwd_Shader(
 {
     const float peak_luminance = (float) params[0];
 
-    ACES2::JMhParams p = ACES2::init_JMhParams(ACES_AP0::primaries);
-    ACES2::ToneScaleParams t = ACES2::init_ToneScaleParams(peak_luminance);
-    ACES2::ChromaCompressParams c = ACES2::init_ChromaCompressParams(peak_luminance);
+    const ACES2::JMhParams p = ACES2::init_JMhParams(ACES_AP0::primaries);
+    const ACES2::ToneScaleParams t = ACES2::init_ToneScaleParams(peak_luminance);
+    const ACES2::JMhParams reachGamut = ACES2::init_JMhParams(ACES_AP1::primaries);
+    const ACES2::SharedCompressionParameters s = ACES2::init_SharedCompressionParams(peak_luminance, p, reachGamut);
+    const ACES2::ChromaCompressParams c = ACES2::init_ChromaCompressParams(peak_luminance, t);
 
     unsigned resourceIndex = shaderCreator->getNextResourceIndex();
+    const std::string pxl(shaderCreator->getPixelName());
 
-    std::string reachName = _Add_Reach_table(shaderCreator, resourceIndex, c.reach_m_table);
+    const std::string reachName = _Add_Reach_table(shaderCreator, resourceIndex, s.reach_m_table);
+    const std::string tonescaleName_Fwd = _Add_Tonescale_func(shaderCreator, resourceIndex, false, p, t);
 
-    _Add_Tonescale_Compress_Fwd_Shader(shaderCreator, ss, resourceIndex, p, t, c, reachName);
+    _Add_WrapHueChannel_Shader(shaderCreator, ss);
+    _Add_SinCos_Shader(shaderCreator, ss);
+
+    ss.newLine() << ss.floatDecl("reachMaxM") << " = " << reachName << "_sample(" << pxl << ".b);";
+    ss.newLine() << ss.floatDecl("J_ts") << " = " << tonescaleName_Fwd << "(" << pxl << ".r);";
+
+    _Add_Tonescale_Compress_Fwd_Shader(shaderCreator, ss, resourceIndex, s, c);
 }
 
 void Add_Tonescale_Compress_Inv_Shader(
@@ -1573,15 +1515,25 @@ void Add_Tonescale_Compress_Inv_Shader(
 {
     const float peak_luminance = (float) params[0];
 
-    ACES2::JMhParams p = ACES2::init_JMhParams(ACES_AP0::primaries);
-    ACES2::ToneScaleParams t = ACES2::init_ToneScaleParams(peak_luminance);
-    ACES2::ChromaCompressParams c = ACES2::init_ChromaCompressParams(peak_luminance);
+    const ACES2::JMhParams p = ACES2::init_JMhParams(ACES_AP0::primaries);
+    const ACES2::ToneScaleParams t = ACES2::init_ToneScaleParams(peak_luminance);
+    const ACES2::JMhParams reachGamut = ACES2::init_JMhParams(ACES_AP1::primaries);
+    const ACES2::SharedCompressionParameters s = ACES2::init_SharedCompressionParams(peak_luminance, p, reachGamut);
+    const ACES2::ChromaCompressParams c = ACES2::init_ChromaCompressParams(peak_luminance, t);
 
     unsigned resourceIndex = shaderCreator->getNextResourceIndex();
+    const std::string pxl(shaderCreator->getPixelName());
 
-    std::string reachName = _Add_Reach_table(shaderCreator, resourceIndex, c.reach_m_table);
+    const std::string reachName = _Add_Reach_table(shaderCreator, resourceIndex, s.reach_m_table);
+    std::string tonescaleName_Inv = _Add_Tonescale_func(shaderCreator, resourceIndex, true, p, t);
 
-    _Add_Tonescale_Compress_Inv_Shader(shaderCreator, ss, resourceIndex, p, t, c, reachName);
+    _Add_WrapHueChannel_Shader(shaderCreator, ss);
+    _Add_SinCos_Shader(shaderCreator, ss);
+
+    ss.newLine() << ss.floatDecl("reachMaxM") << " = " << reachName << "_sample(" << pxl << ".b);";
+    ss.newLine() << ss.floatDecl("J") << " = " << tonescaleName_Inv << "(" << pxl << ".r);";
+
+    _Add_Tonescale_Compress_Inv_Shader(shaderCreator, ss, resourceIndex, s, c);
 }
 
 void Add_Gamut_Compress_Fwd_Shader(
@@ -1607,13 +1559,24 @@ void Add_Gamut_Compress_Fwd_Shader(
         {white_x, white_y}
     };
 
-    const ACES2::GamutCompressParams g = ACES2::init_GamutCompressParams(peak_luminance, primaries);
+    const ACES2::JMhParams pIn = ACES2::init_JMhParams(ACES_AP0::primaries);
+    const ACES2::JMhParams pLim = ACES2::init_JMhParams(primaries);
+    const ACES2::ToneScaleParams t = ACES2::init_ToneScaleParams(peak_luminance);
+    const ACES2::JMhParams reachGamut = ACES2::init_JMhParams(ACES_AP1::primaries);
+    const ACES2::SharedCompressionParameters s = ACES2::init_SharedCompressionParams(peak_luminance, pIn, reachGamut);
+    const ACES2::GamutCompressParams g = ACES2::init_GamutCompressParams(peak_luminance, pIn, pLim, t, s, reachGamut); 
 
     unsigned resourceIndex = shaderCreator->getNextResourceIndex();
+    const std::string pxl(shaderCreator->getPixelName());
 
-    std::string reachName = _Add_Reach_table(shaderCreator, resourceIndex, g.reach_m_table);
+    const std::string reachName = _Add_Reach_table(shaderCreator, resourceIndex, s.reach_m_table);
 
-    _Add_Gamut_Compress_Fwd_Shader(shaderCreator, ss, resourceIndex, g, reachName);
+    _Add_WrapHueChannel_Shader(shaderCreator, ss);
+    _Add_SinCos_Shader(shaderCreator, ss);
+
+    ss.newLine() << ss.floatDecl("reachMaxM") << " = " << reachName << "_sample(" << pxl << ".b);";
+
+    _Add_Gamut_Compress_Fwd_Shader(shaderCreator, ss, resourceIndex, s, g);
 }
 
 void Add_Gamut_Compress_Inv_Shader(
@@ -1639,13 +1602,24 @@ void Add_Gamut_Compress_Inv_Shader(
         {white_x, white_y}
     };
 
-    const ACES2::GamutCompressParams g = ACES2::init_GamutCompressParams(peak_luminance, primaries);
+    const ACES2::JMhParams pIn = ACES2::init_JMhParams(ACES_AP0::primaries);
+    const ACES2::JMhParams pLim = ACES2::init_JMhParams(primaries);
+    const ACES2::ToneScaleParams t = ACES2::init_ToneScaleParams(peak_luminance);
+    const ACES2::JMhParams reachGamut = ACES2::init_JMhParams(ACES_AP1::primaries);
+    const ACES2::SharedCompressionParameters s = ACES2::init_SharedCompressionParams(peak_luminance, pIn, reachGamut);
+    const ACES2::GamutCompressParams g = ACES2::init_GamutCompressParams(peak_luminance, pIn, pLim, t, s, reachGamut);
 
     unsigned resourceIndex = shaderCreator->getNextResourceIndex();
+    const std::string pxl(shaderCreator->getPixelName());
+
+    const std::string reachName = _Add_Reach_table(shaderCreator, resourceIndex, s.reach_m_table);
+
+    _Add_WrapHueChannel_Shader(shaderCreator, ss);
+    _Add_SinCos_Shader(shaderCreator, ss);
 
-    std::string reachName = _Add_Reach_table(shaderCreator, resourceIndex, g.reach_m_table);
+    ss.newLine() << ss.floatDecl("reachMaxM") << " = " << reachName << "_sample(" << pxl << ".b);";
 
-    _Add_Gamut_Compress_Inv_Shader(shaderCreator, ss, resourceIndex, g, reachName);
+    _Add_Gamut_Compress_Inv_Shader(shaderCreator, ss, resourceIndex, s, g);
 }
 
 void Add_Surround_10_Fwd_Shader(GpuShaderCreatorRcPtr & shaderCreator, GpuShaderText & ss, float gamma)
diff --git a/src/apps/ocioconvert/main.cpp b/src/apps/ocioconvert/main.cpp
index 60305e06ec..e8071c76fa 100644
--- a/src/apps/ocioconvert/main.cpp
+++ b/src/apps/ocioconvert/main.cpp
@@ -50,6 +50,7 @@ int main(int argc, const char **argv)
     std::vector<std::string> stringAttrs;
 
     std::string outputDepth;
+    std::string inputconfig;
 
     bool usegpu                 = false;
     bool usegpuLegacy           = false;
@@ -85,6 +86,7 @@ int main(int argc, const char **argv)
                "--h",                   &help,                  "Display the help and exit",
                "--help",                &help,                  "Display the help and exit",
                "-v" ,                   &verbose,               "Display general information",
+              "--iconfig %s",           &inputconfig,           "Input .ocio configuration file (default: $OCIO)",
                "<SEPARATOR>", "\nOpenImageIO or OpenEXR options:",
                "--bitdepth %s",         &outputDepth,  "Output image bitdepth",
                "--float-attribute %L",  &floatAttrs,   "\"name=float\" pair defining OIIO float attribute "
@@ -266,34 +268,51 @@ int main(int argc, const char **argv)
         outputimage     = args[2].c_str();
     }
 
+    // Load the current config.
+    OCIO::ConstConfigRcPtr config;
+    
+    try
+    {
+        if (useLut)
+        {
+            config = OCIO::Config::CreateRaw();
+        }
+        else if (!inputconfig.empty() )
+        {
+            config = OCIO::Config::CreateFromFile(inputconfig.c_str());
+        }
+        else
+        {
+            const char * env = OCIO::GetEnvVariable("OCIO");
+            if (env && *env)
+                inputconfig = env;
+            config = OCIO::GetCurrentConfig();
+        }
+    }
+    catch (const OCIO::Exception & e)
+    {
+        std::cout << "ERROR loading config file: " << e.what() << std::endl;
+        exit(1);
+    }
+    catch (...)
+    {
+
+        std::cerr << "ERROR loading config file: '" << inputconfig << "'" << std::endl;
+        exit(1);
+    }
+
     if (verbose)
     {
         std::cout << std::endl;
         std::cout << OCIO::ImageIO::GetVersion() << std::endl;
         std::cout << "OCIO Version: " << OCIO::GetVersion() << std::endl;
-        const char * env = OCIO::GetEnvVariable("OCIO");
-        if (env && *env && !useLut)
+        if (!useLut)
         {
-            try
-            {
-                std::cout << std::endl;
-                std::cout << "OCIO Config. file:    '" << env << "'" << std::endl;
-                OCIO::ConstConfigRcPtr config = OCIO::GetCurrentConfig();
-                std::cout << "OCIO Config. version: " << config->getMajorVersion() << "." 
-                                                      << config->getMinorVersion() << std::endl;
-                std::cout << "OCIO search_path:     " << config->getSearchPath() << std::endl;
-            }
-            catch (const OCIO::Exception & e)
-            {
-                std::cout << "ERROR loading config file: " << e.what() << std::endl;
-                exit(1);
-            }
-            catch (...)
-            {
-
-                std::cerr << "ERROR loading config file: '" << env << "'" << std::endl;
-                exit(1);
-            }
+            std::cout << std::endl;
+            std::cout << "OCIO Config. file:    '" << inputconfig << "'" << std::endl;
+            std::cout << "OCIO Config. version: " << config->getMajorVersion() << "." 
+                                                  << config->getMinorVersion() << std::endl;
+            std::cout << "OCIO search_path:     " << config->getSearchPath() << std::endl;
         }
     }
 
@@ -388,10 +407,6 @@ int main(int argc, const char **argv)
     // Process the image.
     try
     {
-        // Load the current config.
-        OCIO::ConstConfigRcPtr config
-            = useLut ? OCIO::Config::CreateRaw() : OCIO::GetCurrentConfig();
-
         // Get the processor.
         OCIO::ConstProcessorRcPtr processor;
 
@@ -637,7 +652,6 @@ int main(int argc, const char **argv)
     {
         if (useDisplayView)
         {
-            OCIO::ConstConfigRcPtr config = OCIO::GetCurrentConfig();
             outputcolorspace = config->getDisplayViewColorSpaceName(display, view);
         }
 
diff --git a/src/apps/ocioperf/main.cpp b/src/apps/ocioperf/main.cpp
index a3af75a587..4c7837494f 100644
--- a/src/apps/ocioperf/main.cpp
+++ b/src/apps/ocioperf/main.cpp
@@ -24,7 +24,6 @@ class CustomMeasure
 
     explicit CustomMeasure(const char * explanation)
         :   m_explanations(explanation)
-        ,   m_iterations(1)
     {
         resume();
     }
@@ -183,6 +182,10 @@ int main(int argc, const char **argv)
     bool useDisplayview = false;
     bool useInvertview  = false;
 
+    std::string inputconfig;
+    OCIO::ConstConfigRcPtr srcConfig;
+
+
     ArgParse ap;
     ap.options("ocioperf -- apply and measure a color transformation processing\n\n"
                "usage: ocioperf [options] --transform /path/to/file.clf\n\n",
@@ -206,6 +209,8 @@ int main(int argc, const char **argv)
                                             "(Deprecated) Provide the input and (display, view) pair to apply on the image",
                "--invertview %s %s %s",     &display, &view, &outColorSpace,
                                             "Provide the (display, view) pair and output color space to apply on the image",
+               "--iconfig %s",              &inputconfig,
+                                            "Input .ocio configuration file (default: $OCIO)",
                "--iter %d",                 &iterations, "Provide the number of iterations on the processing. Default is 50",
                "--bitdepths %s %s",         &inBitDepthStr, &outBitDepthStr,
                                             "Provide input and output bit-depths (i.e. ui16, f32). Default is f32",
@@ -232,25 +237,6 @@ int main(int argc, const char **argv)
     {
         std::cout << std::endl;
         std::cout << "OCIO Version: " << OCIO::GetVersion() << std::endl;
-        const char * env = OCIO::GetEnvVariable("OCIO");
-        if(env && *env)
-        {
-            try
-            {
-                std::cout << std::endl;
-                std::cout << "OCIO Config. file:    '" << env << "'" << std::endl;
-                OCIO::ConstConfigRcPtr config = OCIO::GetCurrentConfig();
-                std::cout << "OCIO Config. version: " << config->getMajorVersion() << "." 
-                                                      << config->getMinorVersion() << std::endl;
-                std::cout << "OCIO search_path:     " << config->getSearchPath() << std::endl;
-            }
-            catch(...)
-            {
-
-                std::cerr << "ERROR: Error loading the config file: '" << env << "'";
-                return 1;
-            }
-        }
     }
 
     if (!transformFile.empty())
@@ -258,7 +244,7 @@ int main(int argc, const char **argv)
         std::cout << std::endl;
         std::cout << "Processing using '" << transformFile << "'" << std::endl << std::endl;
     }
-
+    
     std::cout << std::endl << std::endl;
     std::cout << "Processing statistics:" << std::endl << std::endl;
 
@@ -296,25 +282,38 @@ int main(int argc, const char **argv)
         // Checking for an input colorspace or input (display, view) pair.
         else if (!inColorSpace.empty() || (!display.empty() && !view.empty()))
         {
+            if(!inputconfig.empty())
+            {
+                std::cout << std::endl;
+                std::cout << "Loading " << inputconfig << std::endl;
+                srcConfig = OCIO::Config::CreateFromFile(inputconfig.c_str());
+            }
+            else if(OCIO::GetEnvVariable("OCIO"))
+            {
+                std::cout << std::endl;
+                std::cout << "Loading $OCIO " << OCIO::GetEnvVariable("OCIO") << std::endl;
+                srcConfig = OCIO::Config::CreateFromEnv();
+            }
+            else
+            {
+                throw OCIO::Exception("You must specify an input OCIO configuration (either with --iconfig or $OCIO).\n");
+            }
+
             if (verbose)
             {
-                const char * env = OCIO::GetEnvVariable("OCIO");
-                if (env && *env)
-                {
-                    std::cout << std::endl;
-                    const std::string inputStr = !inColorSpace.empty() ?  inColorSpace : "(" + display + ", " + view + ")";
-                    const std::string outputStr = !outColorSpace.empty() ?  outColorSpace : "(" + display + ", " + view + ")";
-                    std::cout << "Processing from '" 
-                              << inputStr << "' to '"
-                              << outputStr << "'" << std::endl;
-                }
-                else
-                {
-                    throw OCIO::Exception("Missing the ${OCIO} env. variable.");
-                }
+                std::cout << std::endl;
+                std::cout << "OCIO Config. version: " << srcConfig->getMajorVersion() << "." 
+                                                      << srcConfig->getMinorVersion() << std::endl;
+                std::cout << "OCIO search_path:     " << srcConfig->getSearchPath() << std::endl;
+                std::cout << std::endl;
+                const std::string inputStr = !inColorSpace.empty() ?  inColorSpace : "(" + display + ", " + view + ")";
+                const std::string outputStr = !outColorSpace.empty() ?  outColorSpace : "(" + display + ", " + view + ")";
+                std::cout << "Processing from '" 
+                          << inputStr << "' to '"
+                          << outputStr << "'" << std::endl;
             }
 
-            OCIO::ConfigRcPtr config  = OCIO::Config::CreateFromEnv()->createEditableCopy();
+            OCIO::ConfigRcPtr config  = srcConfig->createEditableCopy();
             config->setProcessorCacheFlags(nocache ? OCIO::PROCESSOR_CACHE_OFF 
                                                    : OCIO::PROCESSOR_CACHE_DEFAULT);
 
diff --git a/tests/cpu/UnitTestUtils.h b/tests/cpu/UnitTestUtils.h
index bbdac35549..37b584e88c 100644
--- a/tests/cpu/UnitTestUtils.h
+++ b/tests/cpu/UnitTestUtils.h
@@ -79,7 +79,8 @@ template<typename T>
 inline bool EqualWithSafeRelError(T value,
                                   T expected,
                                   T eps,
-                                  T minExpected)
+                                  T minExpected,
+                                  T* computedError = nullptr)
 {
     // If value and expected are infinity, return true.
     if (value == expected) return true;
@@ -88,9 +89,14 @@ inline bool EqualWithSafeRelError(T value,
         ((expected < minExpected) ? minExpected : expected) :
         ((-expected < minExpected) ? minExpected : -expected);
 
-    return (
-        ((value > expected) ? value - expected : expected - value)
-        / div) <= eps;
+    T err = ((value > expected) ? value - expected : expected - value) / div;
+
+    if (computedError) 
+    {
+        *computedError = err;
+    }
+    
+    return (err <= eps);
 }
 
 // C++20 introduces new strongly typed, UTF-8 based, char8_t and u8string types
diff --git a/tests/cpu/ops/fixedfunction/FixedFunctionOpCPU_tests.cpp b/tests/cpu/ops/fixedfunction/FixedFunctionOpCPU_tests.cpp
index 1aa7dbc463..008264582e 100644
--- a/tests/cpu/ops/fixedfunction/FixedFunctionOpCPU_tests.cpp
+++ b/tests/cpu/ops/fixedfunction/FixedFunctionOpCPU_tests.cpp
@@ -33,17 +33,22 @@ void ApplyFixedFunction(float * input_32f,
         // Using rel error with a large minExpected value of 1 will transition
         // from absolute error for expected values < 1 and
         // relative error for values > 1.
+        float computedError = 0.0f;
         const bool equalRel = OCIO::EqualWithSafeRelError(input_32f[idx],
                                                           expected_32f[idx],
                                                           errorThreshold,
-                                                          1.0f);
+                                                          1.0f,
+                                                          &computedError);
         if (!equalRel)
         {
             std::ostringstream errorMsg;
             errorMsg.precision(14);
             errorMsg << "Index: " << idx;
             errorMsg << " - Values: " << input_32f[idx] << " expected: " << expected_32f[idx];
-            errorMsg << " - Threshold: " << errorThreshold;
+            errorMsg << " - Error: " << computedError << " ("
+                     << std::setprecision(3) << computedError / errorThreshold;
+            errorMsg << "x of Threshold: "
+                     << std::setprecision(6) << errorThreshold << ")";
             OCIO_CHECK_ASSERT_MESSAGE_FROM(0, errorMsg.str(), lineNo);
         }
     }
@@ -462,45 +467,45 @@ OCIO_ADD_TEST(FixedFunctionOpCPU, aces_output_transform_20)
 
     const float expected_32f[num_samples*4] = {
         // ACEScg primaries and secondaries scaled by 4
-        4.965774059f, -0.032864563f, 0.041625995f, 1.0f,
-        3.969441891f, 3.825784922f, -0.056133576f, 1.0f,
-        -0.075329021f, 3.688980103f, 0.270296901f, 1.0f,
-        -0.095423937f, 3.650517225f, 3.459972620f, 1.0f,
-        -0.028930068f, 0.196428135f, 2.796343565f, 1.0f,
-        4.900805950f, -0.064376131f, 3.838256121f, 1.0f,
+        4.966013432f, -0.033002287f, 0.041583523f, 1.0f,
+        3.969460726f, 3.825797558f, -0.056160748f, 1.0f,
+        -0.075460039f, 3.689072609f, 0.270235062f, 1.0f,
+        -0.095436633f, 3.650521517f, 3.459975719f, 1.0f,
+        -0.028881177f, 0.196473420f, 2.796123743f, 1.0f,
+        4.900828362f, -0.064385533f, 3.838270903f, 1.0f,
         // OCIO test values
-        0.096890204f, -0.001135312f, 0.018971510f, 0.5f,
-        0.809614301f, 0.479856580f, 0.814239502f, 1.0f,
-        0.107420206f, 0.920529068f, 0.726378500f, 0.0f,
+        0.096890487f, -0.001135427f, 0.018971475f, 0.5f,
+        0.809613585f, 0.479857147f, 0.814239979f, 1.0f,
+        0.107417941f, 0.920530438f, 0.726379037f, 0.0f,
         // ColorChecker24 (SMPTE 2065-1 2021)
-        0.115475260f, 0.050812904f, 0.030212952f, 1.0f,
-        0.484879673f, 0.301042974f, 0.226768956f, 1.0f,
-        0.098463766f, 0.160814658f, 0.277010560f, 1.0f,
-        0.071130365f, 0.107334383f, 0.035097566f, 1.0f,
-        0.207111493f, 0.198474824f, 0.375326216f, 1.0f,
-        0.195447892f, 0.481111974f, 0.393299013f, 1.0f,
-        0.571913838f, 0.196872935f, 0.041634772f, 1.0f,
-        0.045791931f, 0.069875360f, 0.291233480f, 1.0f,
-        0.424848706f, 0.083199009f, 0.102153838f, 1.0f,
-        0.059589427f, 0.022219172f, 0.091246888f, 1.0f,
-        0.360365510f, 0.478741467f, 0.086726837f, 1.0f,
-        0.695662081f, 0.371994525f, 0.068298168f, 1.0f,
-        0.011806309f, 0.021665439f, 0.199594811f, 1.0f,
-        0.076526314f, 0.256237417f, 0.060564656f, 1.0f,
-        0.300064564f, 0.023416257f, 0.030360471f, 1.0f,
-        0.805484772f, 0.596903503f, 0.082996152f, 1.0f,
-        0.388385952f, 0.079899102f, 0.245819211f, 1.0f,
-        0.010952532f, 0.196105912f, 0.307181358f, 1.0f,
-        0.921019495f, 0.921707213f, 0.912856042f, 1.0f,
-        0.590192318f, 0.588423848f, 0.587825358f, 1.0f,
-        0.337743521f, 0.337685764f, 0.338155121f, 1.0f,
-        0.169265985f, 0.169178501f, 0.169557109f, 1.0f,
-        0.058346048f, 0.059387825f, 0.060296260f, 1.0f,
-        0.012581184f, 0.012947139f, 0.013654195f, 1.0f,
+        0.115475342f, 0.050812997f, 0.030212998f, 1.0f,
+        0.484880149f, 0.301042914f, 0.226769030f, 1.0f,
+        0.098463453f, 0.160814837f, 0.277010798f, 1.0f,
+        0.071130276f, 0.107334509f, 0.035097614f, 1.0f,
+        0.207111374f, 0.198474824f, 0.375326097f, 1.0f,
+        0.195447117f, 0.481112540f, 0.393299103f, 1.0f,
+        0.571913302f, 0.196873263f, 0.041634843f, 1.0f,
+        0.045791976f, 0.069875412f, 0.291233569f, 1.0f,
+        0.424848884f, 0.083199054f, 0.102153927f, 1.0f,
+        0.059589352f, 0.022219239f, 0.091246955f, 1.0f,
+        0.360364884f, 0.478741497f, 0.086726815f, 1.0f,
+        0.695661962f, 0.371994466f, 0.068298057f, 1.0f,
+        0.011806240f, 0.021665439f, 0.199594870f, 1.0f,
+        0.076526135f, 0.256237596f, 0.060564563f, 1.0f,
+        0.300064713f, 0.023416281f, 0.030360531f, 1.0f,
+        0.805483222f, 0.596904039f, 0.082996234f, 1.0f,
+        0.388385385f, 0.079899333f, 0.245818958f, 1.0f,
+        0.010951802f, 0.196106046f, 0.307181537f, 1.0f,
+        0.921020269f, 0.921707630f, 0.912857533f, 1.0f,
+        0.590191603f, 0.588424563f, 0.587825298f, 1.0f,
+        0.337743223f, 0.337686002f, 0.338155240f, 1.0f,
+        0.169266403f, 0.169178575f, 0.169557154f, 1.0f,
+        0.058346011f, 0.059387885f, 0.060296256f, 1.0f,
+        0.012581199f, 0.012947144f, 0.013654212f, 1.0f,
         // Spectrally non-selective 18 % reflecting diffuser
-        0.145115465f, 0.145115525f, 0.145115510f, 1.0f,
+        0.145115077f, 0.145115703f, 0.145115480f, 1.0f,
         // Perfect reflecting diffuser
-        1.041565657f, 1.041566014f, 1.041565657f, 1.0f,
+        1.041565537f, 1.041566610f, 1.041566253f, 1.0f,
     };
 
     OCIO::FixedFunctionOpData::Params params = {
@@ -518,6 +523,17 @@ OCIO_ADD_TEST(FixedFunctionOpCPU, aces_output_transform_20)
                        1e-5f,
                        __LINE__);
 
+#if DUMP_RESULT
+    std::cout << "aces_output_transform_20 results: \n" << std::setprecision(9) << std::fixed;
+    for (unsigned i = 0; i < num_samples; ++i) 
+    {
+        std::cout   << input2_32f[i * 4 + 0] << "f, "
+                    << input2_32f[i * 4 + 1] << "f, " 
+                    << input2_32f[i * 4 + 2] << "f, "
+                    << input2_32f[i * 4 + 3] << "f,\n";
+    }
+#endif
+
     OCIO::ConstFixedFunctionOpDataRcPtr funcData2
         = std::make_shared<OCIO::FixedFunctionOpData>(OCIO::FixedFunctionOpData::ACES_OUTPUT_TRANSFORM_20_INV,
                                                       params);
@@ -528,6 +544,13 @@ OCIO_ADD_TEST(FixedFunctionOpCPU, aces_output_transform_20)
                        __LINE__);
 }
 
+// NB: The ACES 2 FixedFunction takes linear ACES2065-1 values and produces linear RGB values
+// in the encoding gamut. The relatively large tolerance on the following round-trip tests doesn't
+// fully test accuracy of saturated values. See additional tests in BuiltinTransform_tests.cpp
+// that do a similar round-trip but using gamma-corrected code values and therefore does
+// a more thorough test of colors where one or more channels is near zero, which is an area
+// that is more challenging for the algorithm to invert.
+
 OCIO_ADD_TEST(FixedFunctionOpCPU, aces_ot_20_rec709_100n_rt)
 {
     const int lut_size = 8;
@@ -778,35 +801,35 @@ OCIO_ADD_TEST(FixedFunctionOpCPU, aces_tonescale_compress_20)
 
     const float expected_32f[num_samples*4] = {
         // ACEScg primaries and secondaries scaled by 4
-        110.702453613f, 211.251770020f, 25.025110245f, 1.0f,
+        110.702453613f, 211.251770020f, 25.025110245f,  1.0f,
         168.016815186f, 129.796249390f, 106.183448792f, 1.0f,
-        140.814849854f, 193.459213257f, 147.056488037f, 1.0f,
-        156.429519653f, 110.938514709f, 192.204727173f, 1.0f,
-        80.456542969f, 98.490524292f, 268.442108154f, 1.0f,
-        135.172195435f, 175.559280396f, 341.715240479f, 1.0f,
+        140.814849854f, 193.459197998f, 147.056488037f, 1.0f,
+        156.429504395f, 110.938423157f, 192.204727173f, 1.0f,
+        80.456558228f, 98.490531921f, 268.442108154f,   1.0f,
+        135.172225952f, 175.559326172f, 341.715240479f, 1.0f,
         // OCIO test values
-        18.187314987f, 33.819175720f, 4.173158169f, 0.5f,
-        80.413116455f, 21.309329987f, 332.159759521f, 1.0f,
-        83.447891235f, 37.852291107f, 182.925750732f, 0.0f,
+        18.187316895f, 33.819190979f, 4.173158169f,   0.5f,
+        80.413101196f, 21.309329987f, 332.159759521f, 1.0f,
+        83.447883606f, 37.852523804f, 182.925750732f, 0.0f,
         // ColorChecker24 (SMPTE 2065-1 2021)
-        27.411964417f, 13.382769585f, 38.146659851f, 1.0f,
-        59.987670898f, 14.391894341f, 39.841842651f, 1.0f,
+        27.411968231f, 13.382784843f, 38.146659851f,  1.0f,
+        59.987659454f, 14.391894341f, 39.841842651f,  1.0f,
         43.298923492f, 12.199877739f, 249.107116699f, 1.0f,
-        31.489658356f, 14.075142860f, 128.878036499f, 1.0f,
-        50.749198914f, 12.731814384f, 285.658966064f, 1.0f,
-        64.728637695f, 18.593795776f, 179.324264526f, 1.0f,
-        53.399448395f, 37.394428253f, 50.924011230f, 1.0f,
+        31.489654541f, 14.075141907f, 128.878036499f, 1.0f,
+        50.749198914f, 12.731806755f, 285.658966064f, 1.0f,
+        64.728637695f, 18.593791962f, 179.324264526f, 1.0f,
+        53.399444580f, 37.394416809f, 50.924011230f,  1.0f,
         34.719596863f, 21.616765976f, 271.008331299f, 1.0f,
-        43.910713196f, 36.788166046f, 13.975610733f, 1.0f,
-        23.196525574f, 15.118354797f, 317.544281006f, 1.0f,
-        63.348674774f, 33.283493042f, 119.145133972f, 1.0f,
-        64.908889771f, 35.371044159f, 70.842193604f, 1.0f,
-        24.876911163f, 23.143159866f, 273.228973389f, 1.0f,
+        43.910709381f, 36.788166046f, 13.975610733f,  1.0f,
+        23.196529388f, 15.118354797f, 317.544281006f, 1.0f,
+        63.348682404f, 33.283519745f, 119.145133972f, 1.0f,
+        64.908874512f, 35.371063232f, 70.842193604f,  1.0f,
+        24.876913071f, 23.143159866f, 273.228973389f, 1.0f,
         44.203376770f, 28.918329239f, 144.154159546f, 1.0f,
-        32.824356079f, 43.447875977f, 17.892261505f, 1.0f,
-        75.830871582f, 39.872474670f, 90.752044678f, 1.0f,
-        45.823116302f, 34.652069092f, 348.832092285f, 1.0f,
-        43.597240448f, 23.079078674f, 218.454376221f, 1.0f,
+        32.824359894f, 43.447853088f, 17.892261505f,  1.0f,
+        75.830871582f, 39.872489929f, 90.752044678f,  1.0f,
+        45.823120117f, 34.652057648f, 348.832092285f, 1.0f,
+        43.597236633f, 23.079071045f, 218.454376221f, 1.0f,
     };
 
     OCIO::FixedFunctionOpData::Params params = {1000.f};
@@ -871,35 +894,35 @@ OCIO_ADD_TEST(FixedFunctionOpCPU, aces_gamut_map_20)
 
     const float expected_32f[num_samples*4] = {
         // ACEScg primaries and secondaries scaled by 4
-        107.831291199f, 174.252944946f, 25.025119781f, 1.0f,
-        168.028198242f, 118.224960327f, 106.183464050f, 1.0f,
-        140.030105591f, 127.177192688f, 147.056488037f, 1.0f,
-        156.512435913f, 73.218856812f, 192.204727173f, 1.0f,
-        79.378631592f, 72.613555908f, 268.442108154f, 1.0f,
-        133.827835083f, 149.929809570f, 341.715240479f, 1.0f,
+        107.829742432f, 174.270156860f, 25.025110245f,  1.0f,
+        168.028274536f, 118.227561951f, 106.183448792f, 1.0f,
+        140.030166626f, 127.184478760f, 147.056488037f, 1.0f,
+        156.512435913f, 73.219184875f,  192.204727173f, 1.0f,
+        79.378555298f,  72.608604431f,  268.442108154f, 1.0f,
+        133.827941895f, 149.930618286f, 341.715240479f, 1.0f,
         // OCIO test values
-        18.194000244f, 33.312938690f, 4.173166752f, 0.5f,
-        80.413116455f, 21.309329987f, 332.159759521f, 1.0f,
-        83.467437744f, 37.305160522f, 182.925750732f, 0.0f,
+        18.193992615f,  33.313068390f,  4.173158169f,   0.5f,
+        80.413116455f,  21.309329987f,  332.159759521f, 1.0f,
+        83.467445374f,  37.305030823f,  182.925750732f, 0.0f,
         // ColorChecker24 (SMPTE 2065-1 2021)
-        27.411962509f, 13.382793427f, 38.146591187f, 1.0f,
-        59.987670898f, 14.391893387f, 39.841842651f, 1.0f,
-        43.298923492f, 12.199877739f, 249.107116699f, 1.0f,
-        31.489658356f, 14.075142860f, 128.878036499f, 1.0f,
-        50.749198914f, 12.731814384f, 285.658966064f, 1.0f,
-        64.728637695f, 18.593795776f, 179.324264526f, 1.0f,
-        53.399448395f, 37.394428253f, 50.924011230f, 1.0f,
-        34.719596863f, 21.616765976f, 271.008331299f, 1.0f,
-        43.910709381f, 36.788166046f, 13.975610733f, 1.0f,
-        23.196525574f, 15.118361473f, 317.544250488f, 1.0f,
-        63.348674774f, 33.283493042f, 119.145133972f, 1.0f,
-        64.908889771f, 35.371044159f, 70.842193604f, 1.0f,
-        24.876916885f, 23.143167496f, 273.229034424f, 1.0f,
-        44.203376770f, 28.918329239f, 144.154159546f, 1.0f,
-        32.824352264f, 43.447864532f, 17.892255783f, 1.0f,
-        75.830871582f, 39.872474670f, 90.752044678f, 1.0f,
-        45.823104858f, 34.652038574f, 348.832092285f, 1.0f,
-        43.635551453f, 21.629474640f, 218.454376221f, 1.0f,
+        27.411962509f,  13.382769585f,  38.146659851f,  1.0f,
+        59.987674713f,  14.391894341f,  39.841842651f,  1.0f,
+        43.298919678f,  12.199877739f,  249.107116699f, 1.0f,
+        31.489658356f,  14.075142860f,  128.878036499f, 1.0f,
+        50.749198914f,  12.731814384f,  285.658966064f, 1.0f,
+        64.728637695f,  18.593795776f,  179.324264526f, 1.0f,
+        53.399448395f,  37.394428253f,  50.924011230f,  1.0f,
+        34.719596863f,  21.616765976f,  271.008331299f, 1.0f,
+        43.910713196f,  36.788166046f,  13.975610733f,  1.0f,
+        23.196525574f,  15.118354797f,  317.544281006f, 1.0f,
+        63.348674774f,  33.283493042f,  119.145133972f, 1.0f,
+        64.908882141f,  35.371044159f,  70.842193604f,  1.0f,
+        24.876911163f,  23.143159866f,  273.228973389f, 1.0f,
+        44.203376770f,  28.918329239f,  144.154159546f, 1.0f,
+        32.824356079f,  43.447875977f,  17.892261505f,  1.0f,
+        75.830871582f,  39.872474670f,  90.752044678f,  1.0f,
+        45.823112488f,  34.652069092f,  348.832092285f, 1.0f,
+        43.635547638f,  21.629518509f,  218.454376221f, 1.0f,
     };
 
     OCIO::FixedFunctionOpData::Params params = {
diff --git a/tests/cpu/transforms/BuiltinTransform_tests.cpp b/tests/cpu/transforms/BuiltinTransform_tests.cpp
index e46bd74fe6..29d252cfb9 100644
--- a/tests/cpu/transforms/BuiltinTransform_tests.cpp
+++ b/tests/cpu/transforms/BuiltinTransform_tests.cpp
@@ -8,6 +8,7 @@
 
 #include "transforms/BuiltinTransform.cpp"
 
+#include "ops/lut3d/Lut3DOp.h"
 #include "ops/matrix/MatrixOp.h"
 #include "Platform.h"
 #include "transforms/builtins/ACES.h"
@@ -18,7 +19,6 @@
 
 namespace OCIO = OCIO_NAMESPACE;
 
-
 OCIO_ADD_TEST(BuiltinTransform, creation)
 {
     // Tests around the creation of a built-in transform instance.
@@ -124,7 +124,8 @@ void ValidateValues(const char * prefixMsg, T in, T out, T errorThreshold, int l
     // Using rel error with a large minExpected value of 1 will transition
     // from absolute error for expected values < 1 and
     // relative error for values > 1.
-    if (!OCIO::EqualWithSafeRelError(in, out, errorThreshold, T(1.)))
+    T computedError{};
+    if (!OCIO::EqualWithSafeRelError(in, out, errorThreshold, T(1.), &computedError))
     {
         std::ostringstream errorMsg;
         errorMsg.precision(std::numeric_limits<T>::max_digits10);
@@ -132,7 +133,11 @@ void ValidateValues(const char * prefixMsg, T in, T out, T errorThreshold, int l
         {
             errorMsg << prefixMsg << ": ";
         }
-        errorMsg << "value = " << in << " but expected = " << out;
+        errorMsg << " - Values: " << in << " expected: " << out;
+        errorMsg << " - Error: " << computedError << " ("
+                 << std::setprecision(3) << computedError / errorThreshold;
+        errorMsg << "x of Threshold: " << std::setprecision(6) << errorThreshold
+                 << ")";
         OCIO_CHECK_ASSERT_MESSAGE_FROM(0, errorMsg.str(), lineNo);
     }
 }
@@ -328,7 +333,7 @@ namespace
 using Values = std::vector<float>;
 using AllValues = std::map<std::string, std::tuple<float, Values, Values>>;
 
-void ValidateBuiltinTransform(const char * style, const Values & in, const Values & out, float errorThreshold, int lineNo)
+void ValidateBuiltinTransform(const char * style, const Values & in, const Values & out, float errorThreshold, int lineNo, Values &results)
 {
     OCIO::BuiltinTransformRcPtr builtin = OCIO::BuiltinTransform::Create();
     OCIO_CHECK_NO_THROW_FROM(builtin->setStyle(style), lineNo);
@@ -348,16 +353,16 @@ void ValidateBuiltinTransform(const char * style, const Values & in, const Value
 
     OCIO::PackedImageDesc inDesc((void *)&in[0], long(in.size() / 3), 1, 3);
 
-    Values vals(in.size(), -1.0f);
-    OCIO::PackedImageDesc outDesc((void *)&vals[0], long(vals.size() / 3), 1, 3);
+    results = Values(in.size(), -1.0f);
+    OCIO::PackedImageDesc outDesc((void *)&results[0], long(results.size() / 3), 1, 3);
 
     OCIO_CHECK_NO_THROW_FROM(cpu->apply(inDesc, outDesc), lineNo);
 
     for (size_t idx = 0; idx < out.size(); ++idx)
     {
         std::ostringstream oss;
-        oss << style << ": for index = " << idx << " with threshold = " << errorThreshold;
-        ValidateValues(oss.str().c_str(), vals[idx], out[idx], errorThreshold, lineNo);
+        oss << style << ": for index = " << idx;
+        ValidateValues(oss.str().c_str(), results[idx], out[idx], errorThreshold, lineNo);
     }
 }
 
@@ -458,100 +463,130 @@ AllValues UnitTestValues
         { 1.0e-6f,
         { 0.5f, 0.4f, 0.3f }, { 0.22214814f,     0.21179835f,     0.15639816f } } },
 
-    { "ACES-OUTPUT - ACES2065-1_to_CIE-XYZ-D65 - SDR-100nit-REC709_2.0",
-        { 1.0e-6f,
-        { 0.5f, 0.4f, 0.3f }, { 0.26260212f, 0.25207470f, 0.20617338f } } },
-    { "ACES-OUTPUT - ACES2065-1_to_CIE-XYZ-D65 - SDR-100nit-P3-D65_2.0",
-        { 1.0e-6f,
-        { 0.5f, 0.4f, 0.3f }, { 0.26260212f, 0.25207472f, 0.20617332f } } },
-    { "ACES-OUTPUT - ACES2065-1_to_CIE-XYZ-D65 - HDR-108nit-P3-D65_2.0",
-        { 1.0e-6f,
-        { 0.5f, 0.4f, 0.3f }, { 0.16253406f, 0.15513624f, 0.12449740f } } },
-    { "ACES-OUTPUT - ACES2065-1_to_CIE-XYZ-D65 - HDR-300nit-P3-D65_2.0",
-        { 1.0e-6f,
-        { 0.5f, 0.4f, 0.3f }, { 0.20592399f, 0.19440515f, 0.15028581f } } },
-    { "ACES-OUTPUT - ACES2065-1_to_CIE-XYZ-D65 - HDR-500nit-P3-D65_2.0",
-        { 1.0e-6f,
-        { 0.5f, 0.4f, 0.3f }, { 0.41039306f, 0.38813826f, 0.30191866f } } },
-    { "ACES-OUTPUT - ACES2065-1_to_CIE-XYZ-D65 - HDR-1000nit-P3-D65_2.0",
-        { 1.0e-6f,
-        { 0.5f, 0.4f, 0.3f }, { 0.46536570f, 0.43852836f, 0.33688113f } } },
-    { "ACES-OUTPUT - ACES2065-1_to_CIE-XYZ-D65 - HDR-2000nit-P3-D65_2.0",
-        { 1.0e-6f,
-        { 0.5f, 0.4f, 0.3f }, { 0.51225936f, 0.48264506f, 0.37060050f } } },
-    { "ACES-OUTPUT - ACES2065-1_to_CIE-XYZ-D65 - HDR-4000nit-P3-D65_2.0",
-        { 1.0e-6f,
-        { 0.5f, 0.4f, 0.3f }, { 0.55653524f, 0.51967940f, 0.38678724f } } },
-    { "ACES-OUTPUT - ACES2065-1_to_CIE-XYZ-D65 - HDR-500nit-REC2020_2.0",
-        { 1.0e-6f,
-        { 0.5f, 0.4f, 0.3f }, { 0.41039258f, 0.38813800f, 0.30191845f } } },
-    { "ACES-OUTPUT - ACES2065-1_to_CIE-XYZ-D65 - HDR-1000nit-REC2020_2.0",
-        { 1.0e-6f,
-        { 0.5f, 0.4f, 0.3f }, { 0.46536540f, 0.43852820f, 0.33688095f } } },
-    { "ACES-OUTPUT - ACES2065-1_to_CIE-XYZ-D65 - HDR-2000nit-REC2020_2.0",
-        { 1.0e-6f,
-        { 0.5f, 0.4f, 0.3f }, { 0.51225930f, 0.48264477f, 0.37060022f } } },
-    { "ACES-OUTPUT - ACES2065-1_to_CIE-XYZ-D65 - HDR-4000nit-REC2020_2.0",
-        { 1.0e-6f,
-        { 0.5f, 0.4f, 0.3f }, { 0.55653550f, 0.51967950f, 0.38678730f } } },
-
-    { "ACES-OUTPUT - ACES2065-1_to_CIE-XYZ-D65 - SDR-100nit-REC709-D60-in-REC709-D65_2.0",
-        { 1.0e-6f,
-        { 0.5f, 0.4f, 0.3f }, { 0.25147703f, 0.24029444f, 0.18221131f } } },
-    { "ACES-OUTPUT - ACES2065-1_to_CIE-XYZ-D65 - SDR-100nit-REC709-D60-in-P3-D65_2.0",
-        { 1.0e-6f,
-        { 0.5f, 0.4f, 0.3f }, { 0.25373828f, 0.24245512f, 0.18384966f } } },
-    { "ACES-OUTPUT - ACES2065-1_to_CIE-XYZ-D65 - SDR-100nit-REC709-D60-in-REC2020-D65_2.0",
-        { 1.0e-6f,
-        { 0.5f, 0.4f, 0.3f }, { 0.25712878f, 0.24569483f, 0.18630630f } } },
-    { "ACES-OUTPUT - ACES2065-1_to_CIE-XYZ-D65 - SDR-100nit-P3-D60-in-P3-D65_2.0",
-        { 1.0e-6f,
-        { 0.5f, 0.4f, 0.3f }, { 0.25373834f, 0.24245517f, 0.18384990f } } },
-    { "ACES-OUTPUT - ACES2065-1_to_CIE-XYZ-D65 - SDR-100nit-P3-D60-in-XYZ-E_2.0",
-        { 1.0e-6f,
-        { 0.5f, 0.4f, 0.3f }, { 0.26332240f, 0.25161302f, 0.19079340f } } },
-    { "ACES-OUTPUT - ACES2065-1_to_CIE-XYZ-D65 - HDR-108nit-P3-D60-in-P3-D65_2.0",
-        { 1.0e-6f,
-        { 0.5f, 0.4f, 0.3f }, { 0.15705064f, 0.14920068f, 0.11100890f } } },
-    { "ACES-OUTPUT - ACES2065-1_to_CIE-XYZ-D65 - HDR-300nit-P3-D60-in-XYZ-E_2.0",
-        { 1.0e-6f,
-        { 0.5f, 0.4f, 0.3f }, { 0.20469205f, 0.19229384f, 0.13782679f } } },
-    { "ACES-OUTPUT - ACES2065-1_to_CIE-XYZ-D65 - HDR-500nit-P3-D60-in-P3-D65_2.0",
-        { 1.0e-6f,
-        { 0.5f, 0.4f, 0.3f }, { 0.39655724f, 0.37322620f, 0.26917280f } } },
-    { "ACES-OUTPUT - ACES2065-1_to_CIE-XYZ-D65 - HDR-1000nit-P3-D60-in-P3-D65_2.0",
-        { 1.0e-6f,
-        { 0.5f, 0.4f, 0.3f }, { 0.44968104f, 0.42165324f, 0.30032730f } } },
-    { "ACES-OUTPUT - ACES2065-1_to_CIE-XYZ-D65 - HDR-2000nit-P3-D60-in-P3-D65_2.0",
-        { 1.0e-6f,
-        { 0.5f, 0.4f, 0.3f }, { 0.49499428f, 0.46407104f, 0.33038715f } } },
-    { "ACES-OUTPUT - ACES2065-1_to_CIE-XYZ-D65 - HDR-4000nit-P3-D60-in-P3-D65_2.0",
-        { 1.0e-6f,
-        { 0.5f, 0.4f, 0.3f }, { 0.53778990f, 0.49960223f, 0.34477120f } } },
-    { "ACES-OUTPUT - ACES2065-1_to_CIE-XYZ-D65 - HDR-500nit-P3-D60-in-REC2020-D65_2.0",
-        { 1.0e-6f,
-        { 0.5f, 0.4f, 0.3f }, { 0.40185606f, 0.37821326f, 0.27276948f } } },
-    { "ACES-OUTPUT - ACES2065-1_to_CIE-XYZ-D65 - HDR-1000nit-P3-D60-in-REC2020-D65_2.0",
-        { 1.0e-6f,
-        { 0.5f, 0.4f, 0.3f }, { 0.45568960f, 0.42728746f, 0.30434040f } } },
-    { "ACES-OUTPUT - ACES2065-1_to_CIE-XYZ-D65 - HDR-2000nit-P3-D60-in-REC2020-D65_2.0",
-        { 1.0e-6f,
-        { 0.5f, 0.4f, 0.3f }, { 0.50160843f, 0.47027197f, 0.33480182f } } },
-    { "ACES-OUTPUT - ACES2065-1_to_CIE-XYZ-D65 - HDR-4000nit-P3-D60-in-REC2020-D65_2.0",
-        { 1.0e-6f,
-        { 0.5f, 0.4f, 0.3f }, { 0.54497580f, 0.50627790f, 0.34937808f } } },
-    { "ACES-OUTPUT - ACES2065-1_to_CIE-XYZ-D65 - HDR-500nit-REC2020-D60-in-REC2020-D65_2.0",
-        { 1.0e-6f,
-        { 0.5f, 0.4f, 0.3f }, { 0.40185624f, 0.37821335f, 0.27276933f } } },
-    { "ACES-OUTPUT - ACES2065-1_to_CIE-XYZ-D65 - HDR-1000nit-REC2020-D60-in-REC2020-D65_2.0",
-        { 1.0e-6f,
-        { 0.5f, 0.4f, 0.3f }, { 0.45568994f, 0.42728750f, 0.30434027f } } },
-    { "ACES-OUTPUT - ACES2065-1_to_CIE-XYZ-D65 - HDR-2000nit-REC2020-D60-in-REC2020-D65_2.0",
-        { 1.0e-6f,
-        { 0.5f, 0.4f, 0.3f }, { 0.50160870f, 0.47027220f, 0.33480210f } } },
-    { "ACES-OUTPUT - ACES2065-1_to_CIE-XYZ-D65 - HDR-4000nit-REC2020-D60-in-REC2020-D65_2.0",
-        { 1.0e-6f,
-        { 0.5f, 0.4f, 0.3f }, { 0.54497580f, 0.50627780f, 0.34937814f } } },
+     { "ACES-OUTPUT - ACES2065-1_to_CIE-XYZ-D65 - SDR-100nit-REC709_2.0",
+        { 1.0e-4f,
+        { 0.5f, 0.4f, 0.3f },
+        { 0.26260215f, 0.25207460f, 0.20617345f } } },
+     { "ACES-OUTPUT - ACES2065-1_to_CIE-XYZ-D65 - SDR-100nit-P3-D65_2.0",
+        { 1.0e-4f,
+        { 0.5f, 0.4f, 0.3f },
+        { 0.26260215f, 0.25207475f, 0.20617352f } } },
+     { "ACES-OUTPUT - ACES2065-1_to_CIE-XYZ-D65 - HDR-108nit-P3-D65_2.0",
+        { 1.0e-4f,
+        { 0.5f, 0.4f, 0.3f },
+        { 0.16253395f, 0.15513620f, 0.12449738f } } },
+     { "ACES-OUTPUT - ACES2065-1_to_CIE-XYZ-D65 - HDR-300nit-P3-D65_2.0",
+        { 1.0e-4f,
+        { 0.5f, 0.4f, 0.3f },
+        { 0.20592400f, 0.19440512f, 0.15028587f } } },
+     { "ACES-OUTPUT - ACES2065-1_to_CIE-XYZ-D65 - HDR-500nit-P3-D65_2.0",
+        { 1.0e-4f,
+        { 0.5f, 0.4f, 0.3f },
+        { 0.41039270f, 0.38813815f, 0.30191854f } } },
+     { "ACES-OUTPUT - ACES2065-1_to_CIE-XYZ-D65 - HDR-1000nit-P3-D65_2.0",
+        { 1.0e-4f,
+        { 0.5f, 0.4f, 0.3f },
+        { 0.46536559f, 0.43852845f, 0.33688101f } } },
+     { "ACES-OUTPUT - ACES2065-1_to_CIE-XYZ-D65 - HDR-2000nit-P3-D65_2.0",
+        { 1.0e-4f,
+        { 0.5f, 0.4f, 0.3f },
+        { 0.51225948f, 0.48264498f, 0.37060043f } } },
+     { "ACES-OUTPUT - ACES2065-1_to_CIE-XYZ-D65 - HDR-4000nit-P3-D65_2.0",
+        { 1.0e-4f,
+        { 0.5f, 0.4f, 0.3f },
+        { 0.55653530f, 0.51967967f, 0.38678783f } } },
+     { "ACES-OUTPUT - ACES2065-1_to_CIE-XYZ-D65 - HDR-500nit-REC2020_2.0",
+        { 1.0e-4f,
+        { 0.5f, 0.4f, 0.3f },
+        { 0.41039288f, 0.38813818f, 0.30191860f } } },
+     { "ACES-OUTPUT - ACES2065-1_to_CIE-XYZ-D65 - HDR-1000nit-REC2020_2.0",
+        { 1.0e-4f,
+        { 0.5f, 0.4f, 0.3f },
+        { 0.46536580f, 0.43852842f, 0.33688098f } } },
+     { "ACES-OUTPUT - ACES2065-1_to_CIE-XYZ-D65 - HDR-2000nit-REC2020_2.0",
+        { 1.0e-4f,
+        { 0.5f, 0.4f, 0.3f },
+        { 0.51225960f, 0.48264492f, 0.37060046f } } },
+     { "ACES-OUTPUT - ACES2065-1_to_CIE-XYZ-D65 - HDR-4000nit-REC2020_2.0",
+        { 1.0e-4f,
+        { 0.5f, 0.4f, 0.3f },
+        { 0.55653548f, 0.51967967f, 0.38678783f } } },
+     { "ACES-OUTPUT - ACES2065-1_to_CIE-XYZ-D65 - SDR-100nit-REC709-D60-in-REC709-D65_2.0",
+        { 1.0e-4f,
+        { 0.5f, 0.4f, 0.3f },
+        { 0.25147712f, 0.24029461f, 0.18221153f } } },
+     { "ACES-OUTPUT - ACES2065-1_to_CIE-XYZ-D65 - SDR-100nit-REC709-D60-in-P3-D65_2.0",
+        { 1.0e-4f,
+        { 0.5f, 0.4f, 0.3f },
+        { 0.25373834f, 0.24245527f, 0.18384993f } } },
+     { "ACES-OUTPUT - ACES2065-1_to_CIE-XYZ-D65 - SDR-100nit-REC709-D60-in-REC2020-D65_2.0",
+        { 1.0e-4f,
+        { 0.5f, 0.4f, 0.3f },
+        { 0.25712875f, 0.24569492f, 0.18630651f } } },
+     { "ACES-OUTPUT - ACES2065-1_to_CIE-XYZ-D65 - SDR-100nit-P3-D60-in-P3-D65_2.0",
+        { 1.0e-4f,
+        { 0.5f, 0.4f, 0.3f },
+        { 0.25373828f, 0.24245520f, 0.18384989f } } },
+     { "ACES-OUTPUT - ACES2065-1_to_CIE-XYZ-D65 - SDR-100nit-P3-D60-in-XYZ-E_2.0",
+        { 1.0e-4f,
+        { 0.5f, 0.4f, 0.3f },
+        { 0.26332238f, 0.25161314f, 0.19079420f } } },
+     { "ACES-OUTPUT - ACES2065-1_to_CIE-XYZ-D65 - HDR-108nit-P3-D60-in-P3-D65_2.0",
+        { 1.0e-4f,
+        { 0.5f, 0.4f, 0.3f },
+        { 0.15705051f, 0.14920059f, 0.11100878f } } },
+     { "ACES-OUTPUT - ACES2065-1_to_CIE-XYZ-D65 - HDR-300nit-P3-D60-in-XYZ-E_2.0",
+        { 1.0e-4f,
+        { 0.5f, 0.4f, 0.3f },
+        { 0.20469207f, 0.19229385f, 0.13782671f } } },
+     { "ACES-OUTPUT - ACES2065-1_to_CIE-XYZ-D65 - HDR-500nit-P3-D60-in-P3-D65_2.0",
+        { 1.0e-4f,
+        { 0.5f, 0.4f, 0.3f },
+        { 0.39655733f, 0.37322620f, 0.26917258f } } },
+     { "ACES-OUTPUT - ACES2065-1_to_CIE-XYZ-D65 - HDR-1000nit-P3-D60-in-P3-D65_2.0",
+        { 1.0e-4f,
+        { 0.5f, 0.4f, 0.3f },
+        { 0.44968122f, 0.42165339f, 0.30032712f } } },
+     { "ACES-OUTPUT - ACES2065-1_to_CIE-XYZ-D65 - HDR-2000nit-P3-D60-in-P3-D65_2.0",
+        { 1.0e-4f,
+        { 0.5f, 0.4f, 0.3f },
+        { 0.49499470f, 0.46407115f, 0.33038712f } } },
+     { "ACES-OUTPUT - ACES2065-1_to_CIE-XYZ-D65 - HDR-4000nit-P3-D60-in-P3-D65_2.0",
+        { 1.0e-4f,
+        { 0.5f, 0.4f, 0.3f },
+        { 0.53778988f, 0.49960214f, 0.34477147f } } },
+     { "ACES-OUTPUT - ACES2065-1_to_CIE-XYZ-D65 - HDR-500nit-P3-D60-in-REC2020-D65_2.0",
+        { 1.0e-4f,
+        { 0.5f, 0.4f, 0.3f },
+        { 0.40185603f, 0.37821317f, 0.27276924f } } },
+     { "ACES-OUTPUT - ACES2065-1_to_CIE-XYZ-D65 - HDR-1000nit-P3-D60-in-REC2020-D65_2.0",
+        { 1.0e-4f,
+        { 0.5f, 0.4f, 0.3f },
+        { 0.45568976f, 0.42728746f, 0.30434006f } } },
+     { "ACES-OUTPUT - ACES2065-1_to_CIE-XYZ-D65 - HDR-2000nit-P3-D60-in-REC2020-D65_2.0",
+        { 1.0e-4f,
+        { 0.5f, 0.4f, 0.3f },
+        { 0.50160873f, 0.47027206f, 0.33480173f } } },
+     { "ACES-OUTPUT - ACES2065-1_to_CIE-XYZ-D65 - HDR-4000nit-P3-D60-in-REC2020-D65_2.0",
+        { 1.0e-4f,
+        { 0.5f, 0.4f, 0.3f },
+        { 0.54497570f, 0.50627774f, 0.34937829f } } },
+     { "ACES-OUTPUT - ACES2065-1_to_CIE-XYZ-D65 - HDR-500nit-REC2020-D60-in-REC2020-D65_2.0",
+        { 1.0e-4f,
+        { 0.5f, 0.4f, 0.3f },
+        { 0.40185642f, 0.37821338f, 0.27276939f } } },
+     { "ACES-OUTPUT - ACES2065-1_to_CIE-XYZ-D65 - HDR-1000nit-REC2020-D60-in-REC2020-D65_2.0",
+        { 1.0e-4f,
+        { 0.5f, 0.4f, 0.3f },
+        { 0.45569009f, 0.42728764f, 0.30434042f } } },
+     { "ACES-OUTPUT - ACES2065-1_to_CIE-XYZ-D65 - HDR-2000nit-REC2020-D60-in-REC2020-D65_2.0",
+        { 1.0e-4f,
+        { 0.5f, 0.4f, 0.3f },
+        { 0.50160891f, 0.47027206f, 0.33480188f } } },
+     { "ACES-OUTPUT - ACES2065-1_to_CIE-XYZ-D65 - HDR-4000nit-REC2020-D60-in-REC2020-D65_2.0",
+        { 1.0e-4f,
+        { 0.5f, 0.4f, 0.3f },
+        { 0.54497600f, 0.50627792f, 0.34937853f } } },
 
     { "APPLE_LOG_to_ACES2065-1",
         { 1.0e-6f,
@@ -696,7 +731,8 @@ OCIO_ADD_TEST(Builtins, validate)
         }
         else
         {
-            ValidateBuiltinTransform(name, std::get<1>(values), std::get<2>(values), std::get<0>(values), __LINE__);
+            Values results;
+            ValidateBuiltinTransform(name, std::get<1>(values), std::get<2>(values), std::get<0>(values), __LINE__, results);
         }
     }
 
@@ -704,3 +740,153 @@ OCIO_ADD_TEST(Builtins, validate)
     // that don't have an associated built-in.
     OCIO_CHECK_EQUAL(UnitTestValues.size(), reg->getNumBuiltins());
 }
+
+namespace
+{
+
+void ValidateDisplayViewRoundTrip(const char * display_style, const char * view_style,
+                                  float scale, float errorThreshold, 
+                                  std::vector<int> difficultItems, float difficultThreshold,
+                                  int lineNo)
+{
+    // Built-in transform for the display.
+    OCIO::BuiltinTransformRcPtr display_builtin = OCIO::BuiltinTransform::Create();
+    OCIO_CHECK_NO_THROW_FROM(display_builtin->setStyle(display_style), lineNo);
+    OCIO_CHECK_NO_THROW_FROM(display_builtin->validate(), lineNo);
+    auto display_builtin_inv = display_builtin->createEditableCopy();
+    display_builtin_inv->setDirection(OCIO::TRANSFORM_DIR_INVERSE);
+
+    // Built-in transform for the view.
+    OCIO::BuiltinTransformRcPtr view_builtin = OCIO::BuiltinTransform::Create();
+    OCIO_CHECK_NO_THROW_FROM(view_builtin->setStyle(view_style), lineNo);
+    OCIO_CHECK_NO_THROW_FROM(view_builtin->validate(), lineNo);
+    auto view_builtin_inv = view_builtin->createEditableCopy();
+    view_builtin_inv->setDirection(OCIO::TRANSFORM_DIR_INVERSE);
+
+    // Assemble inverse and forward transform into a group transform that goes from
+    // display code values to ACES and back to code values.
+    OCIO::GroupTransformRcPtr group = OCIO::GroupTransform::Create();
+    group->appendTransform(display_builtin_inv);
+    group->appendTransform(view_builtin_inv);
+    group->appendTransform(view_builtin);
+    group->appendTransform(display_builtin);
+
+    // Create a Processor.
+    OCIO::ConstConfigRcPtr config = OCIO::Config::CreateRaw();
+    OCIO::ConstProcessorRcPtr proc;
+    OCIO_CHECK_NO_THROW_FROM(proc = config->getProcessor(group), lineNo);
+    OCIO_REQUIRE_ASSERT(proc);
+
+    // Create a CPUProcessor.
+    // Use optimization none to avoid replacing inv/fwd pairs and avoid fast pow for the display.
+    OCIO::ConstCPUProcessorRcPtr cpu;
+    OCIO_CHECK_NO_THROW_FROM(cpu = proc->getOptimizedCPUProcessor(OCIO::OPTIMIZATION_NONE), lineNo);
+    OCIO_REQUIRE_ASSERT(cpu);
+
+    // Create a 7 x 7 x 7 grid of RGBA values.
+    const unsigned lut_size = 7;
+    const unsigned num_channels = 4;
+    unsigned num_samples = lut_size * lut_size * lut_size;
+    std::vector<float> input_32f(num_samples * num_channels, 0.f);
+    std::vector<float> output_32f(num_samples * num_channels, 0.f);
+
+    GenerateIdentityLut3D(input_32f.data(), lut_size, num_channels, OCIO::LUT3DORDER_FAST_RED);
+
+    // Scale the grid of points, which is necessary when testing the ST-2084/PQ displays
+    // since the transforms are only designed to process up to a maximum luminance level.
+    for(unsigned idx=0; idx<(num_samples*4); ++idx)
+    {
+        input_32f[idx] *= scale;
+    }
+
+    // Process the values.
+    OCIO::PackedImageDesc inDesc((void *)&input_32f[0], num_samples, 1, 4);
+    OCIO::PackedImageDesc outDesc((void *)&output_32f[0], num_samples, 1, 4);
+    OCIO_CHECK_NO_THROW_FROM(cpu->apply(inDesc, outDesc), lineNo);
+
+    // Check if values are within tolerance.
+    for(unsigned idx=0; idx<(num_samples*4); idx+=4)
+    {
+        float computedErrorR, computedErrorG, computedErrorB = 0.0f;
+
+        const bool isDifficult = std::find(difficultItems.begin(), difficultItems.end(), idx)
+                                        != difficultItems.end();
+        const float tol = isDifficult ? difficultThreshold: errorThreshold;
+
+        const bool equalRelR = OCIO::EqualWithSafeRelError(output_32f[idx], input_32f[idx],
+                                                           tol, 1.0f,
+                                                           &computedErrorR);
+        const bool equalRelG = OCIO::EqualWithSafeRelError(output_32f[idx+1], input_32f[idx+1],
+                                                           tol, 1.0f,
+                                                           &computedErrorG);
+        const bool equalRelB = OCIO::EqualWithSafeRelError(output_32f[idx+2], input_32f[idx+2],
+                                                           tol, 1.0f,
+                                                           &computedErrorB);
+        if (!equalRelR || !equalRelG || !equalRelB)
+        {
+            std::ostringstream errorMsg;
+            errorMsg.precision(10);
+            errorMsg << "Index: " << idx << " - Tol.: " << tol << "\n - Expected: ";
+            errorMsg << std::fixed;
+            errorMsg << input_32f[idx] << ", " << input_32f[idx+1] << ", " << input_32f[idx+2];
+            errorMsg << "\n - Actual:   ";
+            errorMsg << output_32f[idx] << ", " << output_32f[idx+1] << ", " << output_32f[idx+2];
+            errorMsg << "\n - Error:    ";
+            errorMsg << computedErrorR << ", " << computedErrorG << ", " << computedErrorB;
+            OCIO_CHECK_ASSERT_MESSAGE_FROM(0, errorMsg.str(), lineNo);
+        }
+    }
+}
+
+} // anon.
+
+OCIO_ADD_TEST(Builtins, aces2_displayview_roundtrip)
+{
+    // Perform a round-trip test from display code-values to ACES and back to code values.
+    // This uses a 7 x 7 x 7 grid of RGB values.
+
+    ValidateDisplayViewRoundTrip("DISPLAY - CIE-XYZ-D65_to_REC.1886-REC.709",
+                                 "ACES-OUTPUT - ACES2065-1_to_CIE-XYZ-D65 - SDR-100nit-REC709_2.0",
+                                 1.0f,          // scale factor
+                                 0.004f,        // tolerance
+                                 {}, 0.f,
+                                 __LINE__);
+
+    ValidateDisplayViewRoundTrip("DISPLAY - CIE-XYZ-D65_to_DisplayP3",
+                                 "ACES-OUTPUT - ACES2065-1_to_CIE-XYZ-D65 - SDR-100nit-P3-D65_2.0",
+                                 1.0f,          // scale factor
+                                 0.001f,        // tolerance
+                                 {}, 0.f,
+                                 __LINE__);
+
+    ValidateDisplayViewRoundTrip("DISPLAY - CIE-XYZ-D65_to_ST2084-P3-D65",
+                                 "ACES-OUTPUT - ACES2065-1_to_CIE-XYZ-D65 - HDR-1000nit-P3-D65_2.0",
+                                 // Need to lower the max value from 1000 to 990 nits.
+                                 0.7507f,       // scale factor = 990 nits
+                                 0.005f,        // main tolerance
+                                 {168, 196, 364, 392, 1344},    // difficult values
+                                 0.03f,         // tolerance for difficult values
+                                 __LINE__);
+
+    ValidateDisplayViewRoundTrip("DISPLAY - CIE-XYZ-D65_to_ST2084-P3-D65",
+                                 "ACES-OUTPUT - ACES2065-1_to_CIE-XYZ-D65 - HDR-4000nit-P3-D65_2.0",
+                                 // Need to lower the max value from 4000 to 3860 nits.
+                                 0.8987f,       // scale factor = 3860 nits
+                                 0.007f,        // main tolerance
+                                 {168, 196, 392, 396, 588, 592, 952, 1148, 1196, 1200, 1260, 1288},
+                                 0.2f,         // tolerance for difficult values
+                                 __LINE__);
+
+    // TODO: The Rec.2100 transforms have too many values that don't invert to easily validate.
+//     ValidateDisplayViewRoundTrip("DISPLAY - CIE-XYZ-D65_to_REC.2100-PQ",
+//                                  "ACES-OUTPUT - ACES2065-1_to_CIE-XYZ-D65 - HDR-1000nit-REC2020_2.0",
+//                                  0.7507f,       // scale factor = 990 nits
+//                                  5e-3f,         // tolerance
+//                                  __LINE__);
+// 
+//     ValidateDisplayViewRoundTrip("DISPLAY - CIE-XYZ-D65_to_REC.2100-PQ",
+//                                  "ACES-OUTPUT - ACES2065-1_to_CIE-XYZ-D65 - HDR-4000nit-REC2020_2.0",
+//                                  0.8987f,       // scale factor = 3860 nits
+//                                  5e-3f,         // tolerance
+//                                  __LINE__);
+}
diff --git a/tests/gpu/FixedFunctionOp_test.cpp b/tests/gpu/FixedFunctionOp_test.cpp
index a7e77bc2d2..523ff66ef1 100644
--- a/tests/gpu/FixedFunctionOp_test.cpp
+++ b/tests/gpu/FixedFunctionOp_test.cpp
@@ -434,7 +434,11 @@ OCIO_ADD_GPU_TEST(FixedFunction, style_aces2_output_transform_inv)
 
 OCIO_ADD_GPU_TEST(FixedFunction, style_aces2_output_transform_invfwd)
 {
-     const double data_inv[9] = {
+    // Test that there are no shader resource (textures, functions, etc) conflicts between
+    // the 2 different inverse and forward transforms.  This is not a round-trip test,
+    // it tests that the forward and inverse may exist in the same shader with no issue.
+
+    const double data_inv[9] = {
         // Peak luminance
         100.f,
         // REC709 gamut
@@ -458,11 +462,161 @@ OCIO_ADD_GPU_TEST(FixedFunction, style_aces2_output_transform_invfwd)
     grp->appendTransform(func_inv);
     grp->appendTransform(func_fwd);
 
+    test.setTestNaN(false); // TODO: Partially running the output transform without the clamp so do not test Nan or Inf values
+    test.setTestInfinity(false);
+
     test.setProcessor(grp);
 
+    // This is not expected to be an identity, but it should be the same between CPU & GPU.
     test.setErrorThreshold(7e-4f);
 }
 
+namespace
+{
+
+OCIO::GroupTransformRcPtr BuildRoundTripTransform(const char * display_style, const char * view_style)
+{
+    // Built-in transform for the display.
+    OCIO::BuiltinTransformRcPtr display_builtin = OCIO::BuiltinTransform::Create();
+    display_builtin->setStyle(display_style);
+    display_builtin->validate();
+    auto display_builtin_inv = display_builtin->createEditableCopy();
+    display_builtin_inv->setDirection(OCIO::TRANSFORM_DIR_INVERSE);
+
+    // Built-in transform for the view.
+    OCIO::BuiltinTransformRcPtr view_builtin = OCIO::BuiltinTransform::Create();
+    view_builtin->setStyle(view_style);
+    view_builtin->validate();
+    auto view_builtin_inv = view_builtin->createEditableCopy();
+    view_builtin_inv->setDirection(OCIO::TRANSFORM_DIR_INVERSE);
+
+    // Assemble inverse and forward transform into a group transform that goes from
+    // display code values to ACES and back to code values.
+    OCIO::GroupTransformRcPtr group = OCIO::GroupTransform::Create();
+    group->appendTransform(display_builtin_inv);
+    group->appendTransform(view_builtin_inv);
+    group->appendTransform(view_builtin);
+    group->appendTransform(display_builtin);
+
+    return group;
+}
+
+void GenerateIdentityLut3D(OCIOGPUTest::CustomValues & values, int edgeLen, int numChannels, float scale)
+{
+    int num_samples = edgeLen * edgeLen * edgeLen;
+    std::vector<float> img(num_samples * numChannels, 0.f);
+
+    float c = 1.0f / ((float)edgeLen - 1.0f);
+    for (int i = 0; i < edgeLen*edgeLen*edgeLen; i++)
+    {
+        img[numChannels*i + 0] = scale * (float)(i%edgeLen) * c;
+        img[numChannels*i + 1] = scale * (float)((i / edgeLen) % edgeLen) * c;
+        img[numChannels*i + 2] = scale * (float)((i / edgeLen / edgeLen) % edgeLen) * c;
+    }
+    values.m_inputValues = img;
+}
+
+} // anon.
+
+// The following group of tests compares the display code value to ACES and back to code value
+// round-trip. The round-trip is not perfect (see BuiltinTransform_tests.cpp) but the tests 
+// here simply check if the CPU and GPU are giving the same result.
+
+OCIO_ADD_GPU_TEST(FixedFunction, style_aces2_rec709_rndtrip)
+{
+    const char * display_style = "DISPLAY - CIE-XYZ-D65_to_REC.1886-REC.709";
+    const char * view_style = "ACES-OUTPUT - ACES2065-1_to_CIE-XYZ-D65 - SDR-100nit-REC709_2.0";
+    auto group = BuildRoundTripTransform(display_style, view_style);
+
+    // The test harness gets a processor from the transform with the default optimization
+    // level. However, the forward/inverse does not optimize out due to the clamp to AP1
+    // in-between the FixedFunctions.
+    test.setProcessor(group);
+
+    // Set up a grid of RGBA custom values.
+    const int lut_size = 17;
+    const int num_channels = 4;
+    OCIOGPUTest::CustomValues values;
+    GenerateIdentityLut3D(values, lut_size, num_channels, 1.0f);
+
+    test.setCustomValues(values);
+
+    test.setErrorThreshold(0.004f);
+}
+
+OCIO_ADD_GPU_TEST(FixedFunction, style_aces2_displayp3_rndtrip)
+{
+    const char * display_style = "DISPLAY - CIE-XYZ-D65_to_DisplayP3";
+    const char * view_style = "ACES-OUTPUT - ACES2065-1_to_CIE-XYZ-D65 - SDR-100nit-P3-D65_2.0";
+    auto group = BuildRoundTripTransform(display_style, view_style);
+    test.setProcessor(group);
+
+    const int lut_size = 17;
+    const int num_channels = 4;
+    OCIOGPUTest::CustomValues values;
+    GenerateIdentityLut3D(values, lut_size, num_channels, 1.0f);
+
+    test.setCustomValues(values);
+
+    test.setErrorThreshold(0.001f);
+}
+
+OCIO_ADD_GPU_TEST(FixedFunction, style_aces2_1000nit_p3_rndtrip)
+{
+    const char * display_style = "DISPLAY - CIE-XYZ-D65_to_ST2084-P3-D65";
+    const char * view_style = "ACES-OUTPUT - ACES2065-1_to_CIE-XYZ-D65 - HDR-1000nit-P3-D65_2.0";
+    auto group = BuildRoundTripTransform(display_style, view_style);
+    test.setProcessor(group);
+
+    const int lut_size = 17;
+    const int num_channels = 4;
+    OCIOGPUTest::CustomValues values;
+    GenerateIdentityLut3D(values, lut_size, num_channels, 0.75183f);  // scale to 1000 nits
+
+    test.setCustomValues(values);
+
+    // TODO: Investigate why this is not closer.
+    // Setting the CPUProcessor to OPTIMIZATION_NONE helps slightly, but is not the main
+    // cause of the error.
+    test.setErrorThreshold(0.012f);
+}
+
+OCIO_ADD_GPU_TEST(FixedFunction, style_aces2_4000nit_p3_rndtrip)
+{
+    const char * display_style = "DISPLAY - CIE-XYZ-D65_to_ST2084-P3-D65";
+    const char * view_style = "ACES-OUTPUT - ACES2065-1_to_CIE-XYZ-D65 - HDR-4000nit-P3-D65_2.0";
+    auto group = BuildRoundTripTransform(display_style, view_style);
+    test.setProcessor(group);
+
+    const int lut_size = 17;
+    const int num_channels = 4;
+    OCIOGPUTest::CustomValues values;
+    GenerateIdentityLut3D(values, lut_size, num_channels, 0.90257f);  // scale to 4000 nits
+
+    test.setCustomValues(values);
+
+    // TODO: Investigate why this is not closer.
+    test.setErrorThreshold(0.018f);
+}
+
+OCIO_ADD_GPU_TEST(FixedFunction, style_aces2_4000nit_rec2020_rndtrip)
+{
+    const char * display_style = "DISPLAY - CIE-XYZ-D65_to_REC.2100-PQ";
+    const char * view_style = "ACES-OUTPUT - ACES2065-1_to_CIE-XYZ-D65 - HDR-4000nit-REC2020_2.0";
+    auto group = BuildRoundTripTransform(display_style, view_style);
+    test.setProcessor(group);
+
+    const int lut_size = 17;
+    const int num_channels = 4;
+    OCIOGPUTest::CustomValues values;
+    GenerateIdentityLut3D(values, lut_size, num_channels, 0.90257f);  // scale to 4000 nits
+
+    test.setCustomValues(values);
+
+    // TODO: Investigate why this is not closer.
+    test.setErrorThreshold(0.03f);
+}
+
 OCIO_ADD_GPU_TEST(FixedFunction, style_aces2_rgb_to_jmh_fwd)
 {
     // ACES AP0