From 34912e4b63eac862b193297b1540c873fc7e9593 Mon Sep 17 00:00:00 2001 From: Morteza Date: Fri, 7 Feb 2025 22:20:17 +0000 Subject: [PATCH 1/2] Issue #2116 : Improves Metal Backend Perf. moves the constant float/int declaration to constant space so it doesnt get initialized per thread. This improved color correction performance on M4 Max 3-4 times better. Signed-off-by: Morteza --- src/OpenColorIO/GpuShaderUtils.cpp | 12 ++++++++++-- tests/cpu/GpuShader_tests.cpp | 8 ++++---- 2 files changed, 14 insertions(+), 6 deletions(-) diff --git a/src/OpenColorIO/GpuShaderUtils.cpp b/src/OpenColorIO/GpuShaderUtils.cpp index 03f90c579f..9b30c377d5 100644 --- a/src/OpenColorIO/GpuShaderUtils.cpp +++ b/src/OpenColorIO/GpuShaderUtils.cpp @@ -540,7 +540,11 @@ void GpuShaderText::declareFloatArrayConst(const std::string & name, int size, c case GPU_LANGUAGE_HLSL_SM_5_0: case GPU_LANGUAGE_MSL_2_0: { - nl << floatKeywordConst() << " " << name << "[" << size << "] = {"; + if(m_lang == GPU_LANGUAGE_MSL_2_0) + nl << "constant constexpr static float"; + else + nl << floatKeywordConst(); + nl << " " << name << "[" << size << "] = {"; for (int i = 0; i < size; ++i) { nl << getFloatString(v[i], m_lang); @@ -592,7 +596,11 @@ void GpuShaderText::declareIntArrayConst(const std::string & name, int size, con case GPU_LANGUAGE_HLSL_SM_5_0: case GPU_LANGUAGE_MSL_2_0: { - nl << intKeywordConst() << " " << name << "[" << size << "] = {"; + if(m_lang == GPU_LANGUAGE_MSL_2_0) + nl << "constant constexpr static int"; + else + nl << intKeywordConst(); + nl << " " << name << "[" << size << "] = {"; for (int i = 0; i < size; ++i) { nl << v[i]; diff --git a/tests/cpu/GpuShader_tests.cpp b/tests/cpu/GpuShader_tests.cpp index fa01fef1c2..961fe269bf 100644 --- a/tests/cpu/GpuShader_tests.cpp +++ b/tests/cpu/GpuShader_tests.cpp @@ -1032,10 +1032,10 @@ ocioOCIOMain( // Declaration of all helper methods -const int ocio_grading_rgbcurve_knotsOffsets_0[8] = {0, 5, -1, 0, -1, 0, -1, 0}; -const float ocio_grading_rgbcurve_knots_0[5] = {0., 0.333333343, 0.5, 0.666666508, 1.}; -const int ocio_grading_rgbcurve_coefsOffsets_0[8] = {0, 12, -1, 0, -1, 0, -1, 0}; -const float ocio_grading_rgbcurve_coefs_0[12] = {0.0982520878, 0.393008381, 0.347727984, 0.08693178, 0.934498608, 1., 1.13100278, 1.246912, 0., 0.322416425, 0.5, 0.698159397}; +constant constexpr static int ocio_grading_rgbcurve_knotsOffsets_0[8] = {0, 5, -1, 0, -1, 0, -1, 0}; +constant constexpr static float ocio_grading_rgbcurve_knots_0[5] = {0., 0.333333343, 0.5, 0.666666508, 1.}; +constant constexpr static int ocio_grading_rgbcurve_coefsOffsets_0[8] = {0, 12, -1, 0, -1, 0, -1, 0}; +constant constexpr static float ocio_grading_rgbcurve_coefs_0[12] = {0.0982520878, 0.393008381, 0.347727984, 0.08693178, 0.934498608, 1., 1.13100278, 1.246912, 0., 0.322416425, 0.5, 0.698159397}; float ocio_grading_rgbcurve_evalBSplineCurve_0(int curveIdx, float x) { From 92b1e7ad89f95060b889928d1d310c331f22de2f Mon Sep 17 00:00:00 2001 From: Morteza Date: Sun, 9 Feb 2025 14:03:12 +0000 Subject: [PATCH 2/2] Tiny refactoring to improve code maintainability Signed-off-by: Morteza --- src/OpenColorIO/GpuShaderUtils.cpp | 95 +++++++++++++++--------------- 1 file changed, 47 insertions(+), 48 deletions(-) diff --git a/src/OpenColorIO/GpuShaderUtils.cpp b/src/OpenColorIO/GpuShaderUtils.cpp index 9b30c377d5..c3558418f2 100644 --- a/src/OpenColorIO/GpuShaderUtils.cpp +++ b/src/OpenColorIO/GpuShaderUtils.cpp @@ -513,6 +513,18 @@ void GpuShaderText::declareFloatArrayConst(const std::string & name, int size, c } auto nl = newLine(); + + auto emitArrayValues = [&]() + { + for (int i = 0; i < size; ++i) + { + nl << getFloatString(v[i], m_lang); + if (i + 1 != size) + { + nl << ", "; + } + } + }; switch (m_lang) { @@ -524,38 +536,30 @@ void GpuShaderText::declareFloatArrayConst(const std::string & name, int size, c { nl << floatKeywordConst() << " " << name << "[" << size << "] = "; nl << floatKeyword() << "[" << size << "]("; - for (int i = 0; i < size; ++i) - { - nl << getFloatString(v[i], m_lang); - if (i + 1 != size) - { - nl << ", "; - } - } + emitArrayValues(); nl << ");"; break; } case LANGUAGE_OSL_1: case GPU_LANGUAGE_CG: case GPU_LANGUAGE_HLSL_SM_5_0: + { + nl << floatKeywordConst(); + nl << " " << name << "[" << size << "] = {"; + emitArrayValues(); + nl << "};"; + break; + } + case GPU_LANGUAGE_MSL_2_0: { - if(m_lang == GPU_LANGUAGE_MSL_2_0) - nl << "constant constexpr static float"; - else - nl << floatKeywordConst(); + nl << "constant constexpr static float"; nl << " " << name << "[" << size << "] = {"; - for (int i = 0; i < size; ++i) - { - nl << getFloatString(v[i], m_lang); - if (i + 1 != size) - { - nl << ", "; - } - } + emitArrayValues(); nl << "};"; break; } + } } @@ -571,6 +575,18 @@ void GpuShaderText::declareIntArrayConst(const std::string & name, int size, con } auto nl = newLine(); + + auto emitArrayValues = [&]() + { + for (int i = 0; i < size; ++i) + { + nl << v[i]; + if (i + 1 != size) + { + nl << ", "; + } + } + }; switch (m_lang) { @@ -582,33 +598,23 @@ void GpuShaderText::declareIntArrayConst(const std::string & name, int size, con { nl << intKeywordConst() << " " << name << "[" << size << "] = " << intKeyword() << "[" << size << "]("; - for (int i = 0; i < size; ++i) - { - nl << v[i]; - if (i + 1 != size) - { - nl << ", "; - } - } + emitArrayValues(); nl << ");"; break; } case GPU_LANGUAGE_HLSL_SM_5_0: + { + nl << intKeywordConst(); + nl << " " << name << "[" << size << "] = {"; + emitArrayValues(); + nl << "};"; + break; + } case GPU_LANGUAGE_MSL_2_0: { - if(m_lang == GPU_LANGUAGE_MSL_2_0) - nl << "constant constexpr static int"; - else - nl << intKeywordConst(); + nl << "constant constexpr static int"; nl << " " << name << "[" << size << "] = {"; - for (int i = 0; i < size; ++i) - { - nl << v[i]; - if (i + 1 != size) - { - nl << ", "; - } - } + emitArrayValues(); nl << "};"; break; } @@ -616,14 +622,7 @@ void GpuShaderText::declareIntArrayConst(const std::string & name, int size, con case GPU_LANGUAGE_CG: { nl << intKeyword() << " " << name << "[" << size << "] = {"; - for (int i = 0; i < size; ++i) - { - nl << v[i]; - if (i + 1 != size) - { - nl << ", "; - } - } + emitArrayValues(); nl << "};"; break; }