From 88b8fa9ed414c498feffc464b615565dc023372b Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Mon, 7 Mar 2022 22:39:11 +0100 Subject: [PATCH 01/15] [pptt] attempt fix for #343 in ggtt manual: replace scalar 'denominators' by vector 'denominators[1]' --- .../gg_tt/SubProcesses/P1_Sigma_sm_gg_ttx/CPPProcess.cc | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/epochX/cudacpp/gg_tt/SubProcesses/P1_Sigma_sm_gg_ttx/CPPProcess.cc b/epochX/cudacpp/gg_tt/SubProcesses/P1_Sigma_sm_gg_ttx/CPPProcess.cc index f70c4ca46d..92ead2c1c4 100644 --- a/epochX/cudacpp/gg_tt/SubProcesses/P1_Sigma_sm_gg_ttx/CPPProcess.cc +++ b/epochX/cudacpp/gg_tt/SubProcesses/P1_Sigma_sm_gg_ttx/CPPProcess.cc @@ -516,7 +516,9 @@ namespace mg5amcCpu mgDebugInitialise(); // Denominators: spins, colors and identical particles - const int denominators = 256; // FIXME: assume process.nprocesses == 1 for the moment (eventually denominators[nprocesses]?) + constexpr int nprocesses = 1; + static_assert( nprocesses == 1, "Assume nprocesses == 1" ); + constexpr int denominators[1] = {256}; // FIXME (#343): assume nprocesses == 1 // Set the parameters which change event by event // Need to discuss this with Stefan @@ -566,12 +568,12 @@ namespace mg5amcCpu // https://www.uzh.ch/cmsssl/physik/dam/jcr:2e24b7b1-f4d7-4160-817e-47b13dbf1d7c/Handout_4_2016-UZH.pdf] // FIXME: assume process.nprocesses == 1 for the moment (eventually: need a loop over processes here?) #ifdef __CUDACC__ - allMEs[ievt] /= denominators; + allMEs[ievt] /= denominators[0]; // FIXME (#343): assume nprocesses == 1 #else for( int ipagV = 0; ipagV < npagV; ++ipagV ) { for( int ieppV = 0; ieppV < neppV; ieppV++ ) - allMEs[ipagV * neppV + ieppV] /= denominators; + allMEs[ipagV * neppV + ieppV] /= denominators[0]; // FIXME (#343): assume nprocesses == 1 } #endif mgDebugFinalise(); From ea24f427ca2c5f8644fb6575537a555f5c7e32aa Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Mon, 7 Mar 2022 22:39:11 +0100 Subject: [PATCH 02/15] [pptt] fix #343 in codegen: disable mirror processes as in 3.1.1_lo_vectorization/madgraph/iolibs/export_cpp.py (check tkdiff 2.7.0_gpu/madgraph/iolibs/export_cpp.py 3.1.1_lo_vectorization/madgraph/iolibs) --- .../cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/model_handling.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/model_handling.py b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/model_handling.py index e038f2d85b..d10864fd45 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/model_handling.py +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/model_handling.py @@ -1050,6 +1050,9 @@ def get_process_info_lines(self, matrix_element): def generate_process_files(self): """Generate mgOnGpuConfig.h, CPPProcess.cc, CPPProcess.h, check_sa.cc, gXXX.cu links""" misc.sprint('Entering PLUGIN_OneProcessExporter.generate_process_files') + if self.matrix_elements[0].get('has_mirror_process'): + self.matrix_elements[0].set('has_mirror_process', False) + self.nprocesses/=2 super(export_cpp.OneProcessExporterGPU, self).generate_process_files() self.edit_check_sa() self.edit_mgonGPU() From f8154edcee23e4aef740b8dff77bb2d601ddc60f Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Mon, 7 Mar 2022 22:39:56 +0100 Subject: [PATCH 03/15] [pptt] regenerate ggtt auto after changing mirror processes: no effect! must fix also the template... --- .../gg_tt.auto/CODEGEN_cudacpp_gg_tt_log.txt | 32 +++++++++---------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/epochX/cudacpp/gg_tt.auto/CODEGEN_cudacpp_gg_tt_log.txt b/epochX/cudacpp/gg_tt.auto/CODEGEN_cudacpp_gg_tt_log.txt index 7591ae9292..a30c8dace9 100644 --- a/epochX/cudacpp/gg_tt.auto/CODEGEN_cudacpp_gg_tt_log.txt +++ b/epochX/cudacpp/gg_tt.auto/CODEGEN_cudacpp_gg_tt_log.txt @@ -50,7 +50,7 @@ generate g g > t t~ No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.006856679916381836  +DEBUG: model prefixing takes 0.006847381591796875  INFO: Restrict model sm with file models/sm/restrict_default.dat . INFO: Change particles name to pass to MG5 convention Defined multiparticle p = g u c d s u~ c~ d~ s~ @@ -65,7 +65,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=2: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ WEIGHTED<=2 @1 INFO: Process has 3 diagrams -1 processes with 3 diagrams generated in 0.011 s +1 processes with 3 diagrams generated in 0.010 s Total: 1 processes with 3 diagrams output standalone_cudacpp CODEGEN_cudacpp_gg_tt Plugin PLUGIN.CUDACPP_SA_OUTPUT has marked as NOT being validated with this version. @@ -85,25 +85,25 @@ INFO: Processing color information for process: g g > t t~ @1 INFO: Creating files in directory /data/avalassi/GPU2020/MG5aMC/3.1.1_lo_vectorization/CODEGEN_cudacpp_gg_tt/SubProcesses/P1_Sigma_sm_gg_ttx DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1052]  FileWriter for /data/avalassi/GPU2020/MG5aMC/3.1.1_lo_vectorization/CODEGEN_cudacpp_gg_tt/SubProcesses/P1_Sigma_sm_gg_ttx/./CPPProcess.h -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1118]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1121]  FileWriter for /data/avalassi/GPU2020/MG5aMC/3.1.1_lo_vectorization/CODEGEN_cudacpp_gg_tt/SubProcesses/P1_Sigma_sm_gg_ttx/./CPPProcess.cc -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1140]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1426]  -DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1427]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1426]  -DEBUG: ('ZERO', 1, -1, 1, 1) [model_handling.py at line 1427]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1143]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1429]  +DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1430]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1429]  +DEBUG: ('ZERO', 1, -1, 1, 1) [model_handling.py at line 1430]  DEBUG: only one Matrix-element supported?  INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2020/MG5aMC/3.1.1_lo_vectorization/CODEGEN_cudacpp_gg_tt/SubProcesses/P1_Sigma_sm_gg_ttx/. -DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1070]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1087]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1094]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1106]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1073]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1090]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1097]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1109]  Generated helas calls for 1 subprocesses (3 diagrams) in 0.008 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 176]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines -ALOHA: aloha creates 2 routines in 0.163 s +ALOHA: aloha creates 2 routines in 0.161 s VVV1 DEBUG: language =  [aloha_writers.py at line 2451]  FFV1 @@ -121,6 +121,6 @@ INFO: /data/avalassi/GPU2020/MG5aMC/3.1.1_lo_vectorization/CODEGEN_cudacpp_gg_tt DEBUG: Entering PLUGIN_ProcessExporter.finalize [output.py at line 185]  quit -real 0m1.080s -user 0m0.773s -sys 0m0.129s +real 0m0.925s +user 0m0.778s +sys 0m0.117s From 7f434e48cc4201e3021845daf0628b5cbab10568 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Tue, 8 Mar 2022 15:13:10 +0100 Subject: [PATCH 04/15] [pptt] fix #343: disable mirror processes *** NB HOWEVER STILL ASSUME NPROCESSES == 1 *** Fix codegen templates, regenerate ggtt auto, fix also ggtt manual --- .../gpu/process_function_definitions.inc | 4 +++- .../template_files/gpu/process_sigmaKin_function.inc | 4 ++-- .../cudacpp/gg_tt.auto/CODEGEN_cudacpp_gg_tt_log.txt | 10 +++++----- .../SubProcesses/P1_Sigma_sm_gg_ttx/CPPProcess.cc | 8 +++++--- .../SubProcesses/P1_Sigma_sm_gg_ttx/CPPProcess.cc | 4 ++-- 5 files changed, 17 insertions(+), 13 deletions(-) diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/process_function_definitions.inc b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/process_function_definitions.inc index c2c5bca0be..b0e40ab1f5 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/process_function_definitions.inc +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/process_function_definitions.inc @@ -285,7 +285,9 @@ namespace mg5amcCpu mgDebugInitialise(); // Denominators: spins, colors and identical particles - const int denominators = %(den_factors)s; // FIXME: assume process.nprocesses == 1 for the moment (eventually denominators[nprocesses]?) + constexpr int nprocesses = %(nproc)i; + static_assert( nprocesses == 1, "Assume nprocesses == 1" ); // FIXME (#343): assume nprocesses == 1 + constexpr int denominators[%(nproc)i] = { %(den_factors)s }; // Set the parameters which change event by event // Need to discuss this with Stefan diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/process_sigmaKin_function.inc b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/process_sigmaKin_function.inc index d15e2b2980..da4042bde9 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/process_sigmaKin_function.inc +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/process_sigmaKin_function.inc @@ -32,12 +32,12 @@ // https://www.uzh.ch/cmsssl/physik/dam/jcr:2e24b7b1-f4d7-4160-817e-47b13dbf1d7c/Handout_4_2016-UZH.pdf] // FIXME: assume process.nprocesses == 1 for the moment (eventually: need a loop over processes here?) #ifdef __CUDACC__ - allMEs[ievt] /= denominators; + allMEs[ievt] /= denominators[0]; // FIXME (#343): assume nprocesses == 1 #else for( int ipagV = 0; ipagV < npagV; ++ipagV ) { for( int ieppV = 0; ieppV < neppV; ieppV++ ) - allMEs[ipagV * neppV + ieppV] /= denominators; + allMEs[ipagV * neppV + ieppV] /= denominators[0]; // FIXME (#343): assume nprocesses == 1 } #endif mgDebugFinalise(); diff --git a/epochX/cudacpp/gg_tt.auto/CODEGEN_cudacpp_gg_tt_log.txt b/epochX/cudacpp/gg_tt.auto/CODEGEN_cudacpp_gg_tt_log.txt index a30c8dace9..ff41a3df9c 100644 --- a/epochX/cudacpp/gg_tt.auto/CODEGEN_cudacpp_gg_tt_log.txt +++ b/epochX/cudacpp/gg_tt.auto/CODEGEN_cudacpp_gg_tt_log.txt @@ -50,7 +50,7 @@ generate g g > t t~ No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.006847381591796875  +DEBUG: model prefixing takes 0.0068683624267578125  INFO: Restrict model sm with file models/sm/restrict_default.dat . INFO: Change particles name to pass to MG5 convention Defined multiparticle p = g u c d s u~ c~ d~ s~ @@ -103,7 +103,7 @@ Generated helas calls for 1 subprocesses (3 diagrams) in 0.008 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines -ALOHA: aloha creates 2 routines in 0.161 s +ALOHA: aloha creates 2 routines in 0.162 s VVV1 DEBUG: language =  [aloha_writers.py at line 2451]  FFV1 @@ -121,6 +121,6 @@ INFO: /data/avalassi/GPU2020/MG5aMC/3.1.1_lo_vectorization/CODEGEN_cudacpp_gg_tt DEBUG: Entering PLUGIN_ProcessExporter.finalize [output.py at line 185]  quit -real 0m0.925s -user 0m0.778s -sys 0m0.117s +real 0m0.918s +user 0m0.779s +sys 0m0.114s diff --git a/epochX/cudacpp/gg_tt.auto/SubProcesses/P1_Sigma_sm_gg_ttx/CPPProcess.cc b/epochX/cudacpp/gg_tt.auto/SubProcesses/P1_Sigma_sm_gg_ttx/CPPProcess.cc index f70c4ca46d..fc4d9a4488 100644 --- a/epochX/cudacpp/gg_tt.auto/SubProcesses/P1_Sigma_sm_gg_ttx/CPPProcess.cc +++ b/epochX/cudacpp/gg_tt.auto/SubProcesses/P1_Sigma_sm_gg_ttx/CPPProcess.cc @@ -516,7 +516,9 @@ namespace mg5amcCpu mgDebugInitialise(); // Denominators: spins, colors and identical particles - const int denominators = 256; // FIXME: assume process.nprocesses == 1 for the moment (eventually denominators[nprocesses]?) + constexpr int nprocesses = 1; + static_assert( nprocesses == 1, "Assume nprocesses == 1" ); // FIXME (#343): assume nprocesses == 1 + constexpr int denominators[1] = { 256 }; // Set the parameters which change event by event // Need to discuss this with Stefan @@ -566,12 +568,12 @@ namespace mg5amcCpu // https://www.uzh.ch/cmsssl/physik/dam/jcr:2e24b7b1-f4d7-4160-817e-47b13dbf1d7c/Handout_4_2016-UZH.pdf] // FIXME: assume process.nprocesses == 1 for the moment (eventually: need a loop over processes here?) #ifdef __CUDACC__ - allMEs[ievt] /= denominators; + allMEs[ievt] /= denominators[0]; // FIXME (#343): assume nprocesses == 1 #else for( int ipagV = 0; ipagV < npagV; ++ipagV ) { for( int ieppV = 0; ieppV < neppV; ieppV++ ) - allMEs[ipagV * neppV + ieppV] /= denominators; + allMEs[ipagV * neppV + ieppV] /= denominators[0]; // FIXME (#343): assume nprocesses == 1 } #endif mgDebugFinalise(); diff --git a/epochX/cudacpp/gg_tt/SubProcesses/P1_Sigma_sm_gg_ttx/CPPProcess.cc b/epochX/cudacpp/gg_tt/SubProcesses/P1_Sigma_sm_gg_ttx/CPPProcess.cc index 92ead2c1c4..fc4d9a4488 100644 --- a/epochX/cudacpp/gg_tt/SubProcesses/P1_Sigma_sm_gg_ttx/CPPProcess.cc +++ b/epochX/cudacpp/gg_tt/SubProcesses/P1_Sigma_sm_gg_ttx/CPPProcess.cc @@ -517,8 +517,8 @@ namespace mg5amcCpu // Denominators: spins, colors and identical particles constexpr int nprocesses = 1; - static_assert( nprocesses == 1, "Assume nprocesses == 1" ); - constexpr int denominators[1] = {256}; // FIXME (#343): assume nprocesses == 1 + static_assert( nprocesses == 1, "Assume nprocesses == 1" ); // FIXME (#343): assume nprocesses == 1 + constexpr int denominators[1] = { 256 }; // Set the parameters which change event by event // Need to discuss this with Stefan From d90cfa1f6a30e906fba9a41158ff38e36a00a5df Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Tue, 8 Mar 2022 15:15:58 +0100 Subject: [PATCH 05/15] [pptt] regenerate ggttg+ and eemumu auto, resync manual --- .../CODEGEN_cudacpp_ee_mumu_log.txt | 22 ++++----- .../P1_Sigma_sm_epem_mupmum/CPPProcess.cc | 8 ++-- .../P1_Sigma_sm_epem_mupmum/CPPProcess.cc | 8 ++-- .../CODEGEN_cudacpp_gg_ttg_log.txt | 34 +++++++------- .../P1_Sigma_sm_gg_ttxg/CPPProcess.cc | 8 ++-- .../P1_Sigma_sm_gg_ttxg/CPPProcess.cc | 8 ++-- .../CODEGEN_cudacpp_gg_ttgg_log.txt | 38 +++++++-------- .../P1_Sigma_sm_gg_ttxgg/CPPProcess.cc | 8 ++-- .../P1_Sigma_sm_gg_ttxgg/CPPProcess.cc | 8 ++-- .../CODEGEN_cudacpp_gg_ttggg_log.txt | 46 +++++++++---------- .../P1_Sigma_sm_gg_ttxggg/CPPProcess.cc | 8 ++-- .../P1_Sigma_sm_gg_ttxggg/CPPProcess.cc | 8 ++-- 12 files changed, 110 insertions(+), 94 deletions(-) diff --git a/epochX/cudacpp/ee_mumu.auto/CODEGEN_cudacpp_ee_mumu_log.txt b/epochX/cudacpp/ee_mumu.auto/CODEGEN_cudacpp_ee_mumu_log.txt index 32c2eb9721..6f5b5aa3f0 100644 --- a/epochX/cudacpp/ee_mumu.auto/CODEGEN_cudacpp_ee_mumu_log.txt +++ b/epochX/cudacpp/ee_mumu.auto/CODEGEN_cudacpp_ee_mumu_log.txt @@ -50,7 +50,7 @@ generate e+ e- > mu+ mu- No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.006870269775390625  +DEBUG: model prefixing takes 0.0068318843841552734  INFO: Restrict model sm with file models/sm/restrict_default.dat . INFO: Change particles name to pass to MG5 convention Defined multiparticle p = g u c d s u~ c~ d~ s~ @@ -84,15 +84,15 @@ INFO: Processing color information for process: e+ e- > mu+ mu- @1 INFO: Creating files in directory /data/avalassi/GPU2020/MG5aMC/3.1.1_lo_vectorization/CODEGEN_cudacpp_ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1052]  FileWriter for /data/avalassi/GPU2020/MG5aMC/3.1.1_lo_vectorization/CODEGEN_cudacpp_ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum/./CPPProcess.h -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1118]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1121]  FileWriter for /data/avalassi/GPU2020/MG5aMC/3.1.1_lo_vectorization/CODEGEN_cudacpp_ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum/./CPPProcess.cc -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1140]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1143]  DEBUG: only one Matrix-element supported?  INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2020/MG5aMC/3.1.1_lo_vectorization/CODEGEN_cudacpp_ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum/. -DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1070]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1087]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1094]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1106]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1073]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1090]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1097]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1109]  Generated helas calls for 1 subprocesses (2 diagrams) in 0.005 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 176]  ALOHA: aloha starts to compute helicity amplitudes @@ -100,7 +100,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFV2 routines ALOHA: aloha creates FFV4 routines ALOHA: aloha creates FFV2_4 routines -ALOHA: aloha creates 4 routines in 0.321 s +ALOHA: aloha creates 4 routines in 0.327 s FFV1 DEBUG: language =  [aloha_writers.py at line 2451]  FFV1 @@ -126,6 +126,6 @@ INFO: /data/avalassi/GPU2020/MG5aMC/3.1.1_lo_vectorization/CODEGEN_cudacpp_ee_mu DEBUG: Entering PLUGIN_ProcessExporter.finalize [output.py at line 185]  quit -real 0m1.059s -user 0m0.914s -sys 0m0.125s +real 0m1.071s +user 0m0.919s +sys 0m0.127s diff --git a/epochX/cudacpp/ee_mumu.auto/SubProcesses/P1_Sigma_sm_epem_mupmum/CPPProcess.cc b/epochX/cudacpp/ee_mumu.auto/SubProcesses/P1_Sigma_sm_epem_mupmum/CPPProcess.cc index 90e77434ac..86554d9439 100644 --- a/epochX/cudacpp/ee_mumu.auto/SubProcesses/P1_Sigma_sm_epem_mupmum/CPPProcess.cc +++ b/epochX/cudacpp/ee_mumu.auto/SubProcesses/P1_Sigma_sm_epem_mupmum/CPPProcess.cc @@ -511,7 +511,9 @@ namespace mg5amcCpu mgDebugInitialise(); // Denominators: spins, colors and identical particles - const int denominators = 4; // FIXME: assume process.nprocesses == 1 for the moment (eventually denominators[nprocesses]?) + constexpr int nprocesses = 1; + static_assert( nprocesses == 1, "Assume nprocesses == 1" ); // FIXME (#343): assume nprocesses == 1 + constexpr int denominators[1] = { 4 }; // Set the parameters which change event by event // Need to discuss this with Stefan @@ -561,12 +563,12 @@ namespace mg5amcCpu // https://www.uzh.ch/cmsssl/physik/dam/jcr:2e24b7b1-f4d7-4160-817e-47b13dbf1d7c/Handout_4_2016-UZH.pdf] // FIXME: assume process.nprocesses == 1 for the moment (eventually: need a loop over processes here?) #ifdef __CUDACC__ - allMEs[ievt] /= denominators; + allMEs[ievt] /= denominators[0]; // FIXME (#343): assume nprocesses == 1 #else for( int ipagV = 0; ipagV < npagV; ++ipagV ) { for( int ieppV = 0; ieppV < neppV; ieppV++ ) - allMEs[ipagV * neppV + ieppV] /= denominators; + allMEs[ipagV * neppV + ieppV] /= denominators[0]; // FIXME (#343): assume nprocesses == 1 } #endif mgDebugFinalise(); diff --git a/epochX/cudacpp/ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum/CPPProcess.cc b/epochX/cudacpp/ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum/CPPProcess.cc index 90e77434ac..86554d9439 100644 --- a/epochX/cudacpp/ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum/CPPProcess.cc +++ b/epochX/cudacpp/ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum/CPPProcess.cc @@ -511,7 +511,9 @@ namespace mg5amcCpu mgDebugInitialise(); // Denominators: spins, colors and identical particles - const int denominators = 4; // FIXME: assume process.nprocesses == 1 for the moment (eventually denominators[nprocesses]?) + constexpr int nprocesses = 1; + static_assert( nprocesses == 1, "Assume nprocesses == 1" ); // FIXME (#343): assume nprocesses == 1 + constexpr int denominators[1] = { 4 }; // Set the parameters which change event by event // Need to discuss this with Stefan @@ -561,12 +563,12 @@ namespace mg5amcCpu // https://www.uzh.ch/cmsssl/physik/dam/jcr:2e24b7b1-f4d7-4160-817e-47b13dbf1d7c/Handout_4_2016-UZH.pdf] // FIXME: assume process.nprocesses == 1 for the moment (eventually: need a loop over processes here?) #ifdef __CUDACC__ - allMEs[ievt] /= denominators; + allMEs[ievt] /= denominators[0]; // FIXME (#343): assume nprocesses == 1 #else for( int ipagV = 0; ipagV < npagV; ++ipagV ) { for( int ieppV = 0; ieppV < neppV; ieppV++ ) - allMEs[ipagV * neppV + ieppV] /= denominators; + allMEs[ipagV * neppV + ieppV] /= denominators[0]; // FIXME (#343): assume nprocesses == 1 } #endif mgDebugFinalise(); diff --git a/epochX/cudacpp/gg_ttg.auto/CODEGEN_cudacpp_gg_ttg_log.txt b/epochX/cudacpp/gg_ttg.auto/CODEGEN_cudacpp_gg_ttg_log.txt index f655f98184..19832693f8 100644 --- a/epochX/cudacpp/gg_ttg.auto/CODEGEN_cudacpp_gg_ttg_log.txt +++ b/epochX/cudacpp/gg_ttg.auto/CODEGEN_cudacpp_gg_ttg_log.txt @@ -50,7 +50,7 @@ generate g g > t t~ g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.006880521774291992  +DEBUG: model prefixing takes 0.007297515869140625  INFO: Restrict model sm with file models/sm/restrict_default.dat . INFO: Change particles name to pass to MG5 convention Defined multiparticle p = g u c d s u~ c~ d~ s~ @@ -65,7 +65,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=3: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g WEIGHTED<=3 @1 INFO: Process has 16 diagrams -1 processes with 16 diagrams generated in 0.029 s +1 processes with 16 diagrams generated in 0.028 s Total: 1 processes with 16 diagrams output standalone_cudacpp CODEGEN_cudacpp_gg_ttg Plugin PLUGIN.CUDACPP_SA_OUTPUT has marked as NOT being validated with this version. @@ -85,21 +85,21 @@ INFO: Processing color information for process: g g > t t~ g @1 INFO: Creating files in directory /data/avalassi/GPU2020/MG5aMC/3.1.1_lo_vectorization/CODEGEN_cudacpp_gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1052]  FileWriter for /data/avalassi/GPU2020/MG5aMC/3.1.1_lo_vectorization/CODEGEN_cudacpp_gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg/./CPPProcess.h -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1118]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1121]  FileWriter for /data/avalassi/GPU2020/MG5aMC/3.1.1_lo_vectorization/CODEGEN_cudacpp_gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg/./CPPProcess.cc -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1140]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1426]  -DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1427]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1426]  -DEBUG: ('ZERO', 1, -1, 1, 1) [model_handling.py at line 1427]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1426]  -DEBUG: ('ZERO', 4, 1, 4, 4) [model_handling.py at line 1427]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1143]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1429]  +DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1430]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1429]  +DEBUG: ('ZERO', 1, -1, 1, 1) [model_handling.py at line 1430]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1429]  +DEBUG: ('ZERO', 4, 1, 4, 4) [model_handling.py at line 1430]  DEBUG: only one Matrix-element supported?  INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2020/MG5aMC/3.1.1_lo_vectorization/CODEGEN_cudacpp_gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg/. -DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1070]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1087]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1094]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1106]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1073]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1090]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1097]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1109]  Generated helas calls for 1 subprocesses (16 diagrams) in 0.050 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 176]  ALOHA: aloha starts to compute helicity amplitudes @@ -136,6 +136,6 @@ INFO: /data/avalassi/GPU2020/MG5aMC/3.1.1_lo_vectorization/CODEGEN_cudacpp_gg_tt DEBUG: Entering PLUGIN_ProcessExporter.finalize [output.py at line 185]  quit -real 0m1.239s -user 0m1.093s -sys 0m0.125s +real 0m1.252s +user 0m1.094s +sys 0m0.128s diff --git a/epochX/cudacpp/gg_ttg.auto/SubProcesses/P1_Sigma_sm_gg_ttxg/CPPProcess.cc b/epochX/cudacpp/gg_ttg.auto/SubProcesses/P1_Sigma_sm_gg_ttxg/CPPProcess.cc index dfa58b9e1d..ed8f4308db 100644 --- a/epochX/cudacpp/gg_ttg.auto/SubProcesses/P1_Sigma_sm_gg_ttxg/CPPProcess.cc +++ b/epochX/cudacpp/gg_ttg.auto/SubProcesses/P1_Sigma_sm_gg_ttxg/CPPProcess.cc @@ -689,7 +689,9 @@ namespace mg5amcCpu mgDebugInitialise(); // Denominators: spins, colors and identical particles - const int denominators = 256; // FIXME: assume process.nprocesses == 1 for the moment (eventually denominators[nprocesses]?) + constexpr int nprocesses = 1; + static_assert( nprocesses == 1, "Assume nprocesses == 1" ); // FIXME (#343): assume nprocesses == 1 + constexpr int denominators[1] = { 256 }; // Set the parameters which change event by event // Need to discuss this with Stefan @@ -739,12 +741,12 @@ namespace mg5amcCpu // https://www.uzh.ch/cmsssl/physik/dam/jcr:2e24b7b1-f4d7-4160-817e-47b13dbf1d7c/Handout_4_2016-UZH.pdf] // FIXME: assume process.nprocesses == 1 for the moment (eventually: need a loop over processes here?) #ifdef __CUDACC__ - allMEs[ievt] /= denominators; + allMEs[ievt] /= denominators[0]; // FIXME (#343): assume nprocesses == 1 #else for( int ipagV = 0; ipagV < npagV; ++ipagV ) { for( int ieppV = 0; ieppV < neppV; ieppV++ ) - allMEs[ipagV * neppV + ieppV] /= denominators; + allMEs[ipagV * neppV + ieppV] /= denominators[0]; // FIXME (#343): assume nprocesses == 1 } #endif mgDebugFinalise(); diff --git a/epochX/cudacpp/gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg/CPPProcess.cc b/epochX/cudacpp/gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg/CPPProcess.cc index dfa58b9e1d..ed8f4308db 100644 --- a/epochX/cudacpp/gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg/CPPProcess.cc +++ b/epochX/cudacpp/gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg/CPPProcess.cc @@ -689,7 +689,9 @@ namespace mg5amcCpu mgDebugInitialise(); // Denominators: spins, colors and identical particles - const int denominators = 256; // FIXME: assume process.nprocesses == 1 for the moment (eventually denominators[nprocesses]?) + constexpr int nprocesses = 1; + static_assert( nprocesses == 1, "Assume nprocesses == 1" ); // FIXME (#343): assume nprocesses == 1 + constexpr int denominators[1] = { 256 }; // Set the parameters which change event by event // Need to discuss this with Stefan @@ -739,12 +741,12 @@ namespace mg5amcCpu // https://www.uzh.ch/cmsssl/physik/dam/jcr:2e24b7b1-f4d7-4160-817e-47b13dbf1d7c/Handout_4_2016-UZH.pdf] // FIXME: assume process.nprocesses == 1 for the moment (eventually: need a loop over processes here?) #ifdef __CUDACC__ - allMEs[ievt] /= denominators; + allMEs[ievt] /= denominators[0]; // FIXME (#343): assume nprocesses == 1 #else for( int ipagV = 0; ipagV < npagV; ++ipagV ) { for( int ieppV = 0; ieppV < neppV; ieppV++ ) - allMEs[ipagV * neppV + ieppV] /= denominators; + allMEs[ipagV * neppV + ieppV] /= denominators[0]; // FIXME (#343): assume nprocesses == 1 } #endif mgDebugFinalise(); diff --git a/epochX/cudacpp/gg_ttgg.auto/CODEGEN_cudacpp_gg_ttgg_log.txt b/epochX/cudacpp/gg_ttgg.auto/CODEGEN_cudacpp_gg_ttgg_log.txt index 2da617d841..488810b838 100644 --- a/epochX/cudacpp/gg_ttgg.auto/CODEGEN_cudacpp_gg_ttgg_log.txt +++ b/epochX/cudacpp/gg_ttgg.auto/CODEGEN_cudacpp_gg_ttgg_log.txt @@ -50,7 +50,7 @@ generate g g > t t~ g g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.006897449493408203  +DEBUG: model prefixing takes 0.00693058967590332  INFO: Restrict model sm with file models/sm/restrict_default.dat . INFO: Change particles name to pass to MG5 convention Defined multiparticle p = g u c d s u~ c~ d~ s~ @@ -85,23 +85,23 @@ INFO: Processing color information for process: g g > t t~ g g @1 INFO: Creating files in directory /data/avalassi/GPU2020/MG5aMC/3.1.1_lo_vectorization/CODEGEN_cudacpp_gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1052]  FileWriter for /data/avalassi/GPU2020/MG5aMC/3.1.1_lo_vectorization/CODEGEN_cudacpp_gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg/./CPPProcess.h -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1118]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1121]  FileWriter for /data/avalassi/GPU2020/MG5aMC/3.1.1_lo_vectorization/CODEGEN_cudacpp_gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg/./CPPProcess.cc -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1140]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1426]  -DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1427]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1426]  -DEBUG: ('ZERO', 1, -1, 1, 1) [model_handling.py at line 1427]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1426]  -DEBUG: ('ZERO', 4, 1, 4, 4) [model_handling.py at line 1427]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1426]  -DEBUG: ('ZERO', 5, 1, 5, 5) [model_handling.py at line 1427]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1143]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1429]  +DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1430]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1429]  +DEBUG: ('ZERO', 1, -1, 1, 1) [model_handling.py at line 1430]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1429]  +DEBUG: ('ZERO', 4, 1, 4, 4) [model_handling.py at line 1430]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1429]  +DEBUG: ('ZERO', 5, 1, 5, 5) [model_handling.py at line 1430]  DEBUG: only one Matrix-element supported?  INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2020/MG5aMC/3.1.1_lo_vectorization/CODEGEN_cudacpp_gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg/. -DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1070]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1087]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1094]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1106]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1073]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1090]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1097]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1109]  Generated helas calls for 1 subprocesses (123 diagrams) in 0.575 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 176]  ALOHA: aloha starts to compute helicity amplitudes @@ -110,7 +110,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.386 s +ALOHA: aloha creates 5 routines in 0.383 s VVV1 DEBUG: language =  [aloha_writers.py at line 2451]  VVV1 @@ -144,6 +144,6 @@ INFO: /data/avalassi/GPU2020/MG5aMC/3.1.1_lo_vectorization/CODEGEN_cudacpp_gg_tt DEBUG: Entering PLUGIN_ProcessExporter.finalize [output.py at line 185]  quit -real 0m2.131s -user 0m1.982s -sys 0m0.130s +real 0m2.139s +user 0m1.995s +sys 0m0.115s diff --git a/epochX/cudacpp/gg_ttgg.auto/SubProcesses/P1_Sigma_sm_gg_ttxgg/CPPProcess.cc b/epochX/cudacpp/gg_ttgg.auto/SubProcesses/P1_Sigma_sm_gg_ttxgg/CPPProcess.cc index 3e5c563da5..350c54635b 100644 --- a/epochX/cudacpp/gg_ttgg.auto/SubProcesses/P1_Sigma_sm_gg_ttxgg/CPPProcess.cc +++ b/epochX/cudacpp/gg_ttgg.auto/SubProcesses/P1_Sigma_sm_gg_ttxgg/CPPProcess.cc @@ -2275,7 +2275,9 @@ namespace mg5amcCpu mgDebugInitialise(); // Denominators: spins, colors and identical particles - const int denominators = 512; // FIXME: assume process.nprocesses == 1 for the moment (eventually denominators[nprocesses]?) + constexpr int nprocesses = 1; + static_assert( nprocesses == 1, "Assume nprocesses == 1" ); // FIXME (#343): assume nprocesses == 1 + constexpr int denominators[1] = { 512 }; // Set the parameters which change event by event // Need to discuss this with Stefan @@ -2325,12 +2327,12 @@ namespace mg5amcCpu // https://www.uzh.ch/cmsssl/physik/dam/jcr:2e24b7b1-f4d7-4160-817e-47b13dbf1d7c/Handout_4_2016-UZH.pdf] // FIXME: assume process.nprocesses == 1 for the moment (eventually: need a loop over processes here?) #ifdef __CUDACC__ - allMEs[ievt] /= denominators; + allMEs[ievt] /= denominators[0]; // FIXME (#343): assume nprocesses == 1 #else for( int ipagV = 0; ipagV < npagV; ++ipagV ) { for( int ieppV = 0; ieppV < neppV; ieppV++ ) - allMEs[ipagV * neppV + ieppV] /= denominators; + allMEs[ipagV * neppV + ieppV] /= denominators[0]; // FIXME (#343): assume nprocesses == 1 } #endif mgDebugFinalise(); diff --git a/epochX/cudacpp/gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg/CPPProcess.cc b/epochX/cudacpp/gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg/CPPProcess.cc index 3e5c563da5..350c54635b 100644 --- a/epochX/cudacpp/gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg/CPPProcess.cc +++ b/epochX/cudacpp/gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg/CPPProcess.cc @@ -2275,7 +2275,9 @@ namespace mg5amcCpu mgDebugInitialise(); // Denominators: spins, colors and identical particles - const int denominators = 512; // FIXME: assume process.nprocesses == 1 for the moment (eventually denominators[nprocesses]?) + constexpr int nprocesses = 1; + static_assert( nprocesses == 1, "Assume nprocesses == 1" ); // FIXME (#343): assume nprocesses == 1 + constexpr int denominators[1] = { 512 }; // Set the parameters which change event by event // Need to discuss this with Stefan @@ -2325,12 +2327,12 @@ namespace mg5amcCpu // https://www.uzh.ch/cmsssl/physik/dam/jcr:2e24b7b1-f4d7-4160-817e-47b13dbf1d7c/Handout_4_2016-UZH.pdf] // FIXME: assume process.nprocesses == 1 for the moment (eventually: need a loop over processes here?) #ifdef __CUDACC__ - allMEs[ievt] /= denominators; + allMEs[ievt] /= denominators[0]; // FIXME (#343): assume nprocesses == 1 #else for( int ipagV = 0; ipagV < npagV; ++ipagV ) { for( int ieppV = 0; ieppV < neppV; ieppV++ ) - allMEs[ipagV * neppV + ieppV] /= denominators; + allMEs[ipagV * neppV + ieppV] /= denominators[0]; // FIXME (#343): assume nprocesses == 1 } #endif mgDebugFinalise(); diff --git a/epochX/cudacpp/gg_ttggg.auto/CODEGEN_cudacpp_gg_ttggg_log.txt b/epochX/cudacpp/gg_ttggg.auto/CODEGEN_cudacpp_gg_ttggg_log.txt index 012b9d9551..501ed59600 100644 --- a/epochX/cudacpp/gg_ttggg.auto/CODEGEN_cudacpp_gg_ttggg_log.txt +++ b/epochX/cudacpp/gg_ttggg.auto/CODEGEN_cudacpp_gg_ttggg_log.txt @@ -50,7 +50,7 @@ generate g g > t t~ g g g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.006964683532714844  +DEBUG: model prefixing takes 0.006897449493408203  INFO: Restrict model sm with file models/sm/restrict_default.dat . INFO: Change particles name to pass to MG5 convention Defined multiparticle p = g u c d s u~ c~ d~ s~ @@ -65,7 +65,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=5: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g g g WEIGHTED<=5 @1 INFO: Process has 1240 diagrams -1 processes with 1240 diagrams generated in 2.495 s +1 processes with 1240 diagrams generated in 2.488 s Total: 1 processes with 1240 diagrams output standalone_cudacpp CODEGEN_cudacpp_gg_ttggg Plugin PLUGIN.CUDACPP_SA_OUTPUT has marked as NOT being validated with this version. @@ -85,26 +85,26 @@ INFO: Processing color information for process: g g > t t~ g g g @1 INFO: Creating files in directory /data/avalassi/GPU2020/MG5aMC/3.1.1_lo_vectorization/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1052]  FileWriter for /data/avalassi/GPU2020/MG5aMC/3.1.1_lo_vectorization/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/./CPPProcess.h -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1118]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1121]  FileWriter for /data/avalassi/GPU2020/MG5aMC/3.1.1_lo_vectorization/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/./CPPProcess.cc -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1140]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1426]  -DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1427]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1426]  -DEBUG: ('ZERO', 1, -1, 1, 1) [model_handling.py at line 1427]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1426]  -DEBUG: ('ZERO', 4, 1, 4, 4) [model_handling.py at line 1427]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1426]  -DEBUG: ('ZERO', 5, 1, 5, 5) [model_handling.py at line 1427]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1426]  -DEBUG: ('ZERO', 6, 1, 6, 6) [model_handling.py at line 1427]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1143]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1429]  +DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1430]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1429]  +DEBUG: ('ZERO', 1, -1, 1, 1) [model_handling.py at line 1430]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1429]  +DEBUG: ('ZERO', 4, 1, 4, 4) [model_handling.py at line 1430]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1429]  +DEBUG: ('ZERO', 5, 1, 5, 5) [model_handling.py at line 1430]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1429]  +DEBUG: ('ZERO', 6, 1, 6, 6) [model_handling.py at line 1430]  DEBUG: only one Matrix-element supported?  INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2020/MG5aMC/3.1.1_lo_vectorization/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/. -DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1070]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1087]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1094]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1106]  -Generated helas calls for 1 subprocesses (1240 diagrams) in 9.011 s +DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1073]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1090]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1097]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1109]  +Generated helas calls for 1 subprocesses (1240 diagrams) in 8.934 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 176]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -112,7 +112,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.384 s +ALOHA: aloha creates 5 routines in 0.393 s VVV1 DEBUG: language =  [aloha_writers.py at line 2451]  VVV1 @@ -146,6 +146,6 @@ INFO: /data/avalassi/GPU2020/MG5aMC/3.1.1_lo_vectorization/CODEGEN_cudacpp_gg_tt DEBUG: Entering PLUGIN_ProcessExporter.finalize [output.py at line 185]  quit -real 0m17.506s -user 0m17.297s -sys 0m0.188s +real 0m17.425s +user 0m17.197s +sys 0m0.200s diff --git a/epochX/cudacpp/gg_ttggg.auto/SubProcesses/P1_Sigma_sm_gg_ttxggg/CPPProcess.cc b/epochX/cudacpp/gg_ttggg.auto/SubProcesses/P1_Sigma_sm_gg_ttxggg/CPPProcess.cc index 2e1ebb692e..ea2b3b341f 100644 --- a/epochX/cudacpp/gg_ttggg.auto/SubProcesses/P1_Sigma_sm_gg_ttxggg/CPPProcess.cc +++ b/epochX/cudacpp/gg_ttggg.auto/SubProcesses/P1_Sigma_sm_gg_ttxggg/CPPProcess.cc @@ -26582,7 +26582,9 @@ namespace mg5amcCpu mgDebugInitialise(); // Denominators: spins, colors and identical particles - const int denominators = 1536; // FIXME: assume process.nprocesses == 1 for the moment (eventually denominators[nprocesses]?) + constexpr int nprocesses = 1; + static_assert( nprocesses == 1, "Assume nprocesses == 1" ); // FIXME (#343): assume nprocesses == 1 + constexpr int denominators[1] = { 1536 }; // Set the parameters which change event by event // Need to discuss this with Stefan @@ -26632,12 +26634,12 @@ namespace mg5amcCpu // https://www.uzh.ch/cmsssl/physik/dam/jcr:2e24b7b1-f4d7-4160-817e-47b13dbf1d7c/Handout_4_2016-UZH.pdf] // FIXME: assume process.nprocesses == 1 for the moment (eventually: need a loop over processes here?) #ifdef __CUDACC__ - allMEs[ievt] /= denominators; + allMEs[ievt] /= denominators[0]; // FIXME (#343): assume nprocesses == 1 #else for( int ipagV = 0; ipagV < npagV; ++ipagV ) { for( int ieppV = 0; ieppV < neppV; ieppV++ ) - allMEs[ipagV * neppV + ieppV] /= denominators; + allMEs[ipagV * neppV + ieppV] /= denominators[0]; // FIXME (#343): assume nprocesses == 1 } #endif mgDebugFinalise(); diff --git a/epochX/cudacpp/gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/CPPProcess.cc b/epochX/cudacpp/gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/CPPProcess.cc index 2e1ebb692e..ea2b3b341f 100644 --- a/epochX/cudacpp/gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/CPPProcess.cc +++ b/epochX/cudacpp/gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/CPPProcess.cc @@ -26582,7 +26582,9 @@ namespace mg5amcCpu mgDebugInitialise(); // Denominators: spins, colors and identical particles - const int denominators = 1536; // FIXME: assume process.nprocesses == 1 for the moment (eventually denominators[nprocesses]?) + constexpr int nprocesses = 1; + static_assert( nprocesses == 1, "Assume nprocesses == 1" ); // FIXME (#343): assume nprocesses == 1 + constexpr int denominators[1] = { 1536 }; // Set the parameters which change event by event // Need to discuss this with Stefan @@ -26632,12 +26634,12 @@ namespace mg5amcCpu // https://www.uzh.ch/cmsssl/physik/dam/jcr:2e24b7b1-f4d7-4160-817e-47b13dbf1d7c/Handout_4_2016-UZH.pdf] // FIXME: assume process.nprocesses == 1 for the moment (eventually: need a loop over processes here?) #ifdef __CUDACC__ - allMEs[ievt] /= denominators; + allMEs[ievt] /= denominators[0]; // FIXME (#343): assume nprocesses == 1 #else for( int ipagV = 0; ipagV < npagV; ++ipagV ) { for( int ieppV = 0; ieppV < neppV; ieppV++ ) - allMEs[ipagV * neppV + ieppV] /= denominators; + allMEs[ipagV * neppV + ieppV] /= denominators[0]; // FIXME (#343): assume nprocesses == 1 } #endif mgDebugFinalise(); From 310b82c32c91dd81ad29a16b6db4c74a66917877 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Tue, 8 Mar 2022 15:56:14 +0100 Subject: [PATCH 06/15] [pptt] bug fix for fbridge.inc in ggtt makefile: use "\cp" instead of "cp -i" (do not prompt if the file already exists... not sure why I have not seen this before) --- epochX/cudacpp/gg_tt/SubProcesses/Makefile | 2 +- .../log_eemumu_manu_d_inl0_hrd0.txt | 196 +----------------- 2 files changed, 4 insertions(+), 194 deletions(-) diff --git a/epochX/cudacpp/gg_tt/SubProcesses/Makefile b/epochX/cudacpp/gg_tt/SubProcesses/Makefile index 31ec657ecb..f52e503781 100644 --- a/epochX/cudacpp/gg_tt/SubProcesses/Makefile +++ b/epochX/cudacpp/gg_tt/SubProcesses/Makefile @@ -405,7 +405,7 @@ endif # Target (and build rules): Fortran include files $(INCDIR)/%.inc : ../%.inc @if [ ! -d $(INCDIR) ]; then echo "mkdir -p $(INCDIR)"; mkdir -p $(INCDIR); fi - cp -i $< $@ + \cp $< $@ #------------------------------------------------------------------------------- diff --git a/epochX/cudacpp/tput/logs_eemumu_manu/log_eemumu_manu_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_eemumu_manu/log_eemumu_manu_d_inl0_hrd0.txt index 03e0553ebf..41c8f107cf 100644 --- a/epochX/cudacpp/tput/logs_eemumu_manu/log_eemumu_manu_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_eemumu_manu/log_eemumu_manu_d_inl0_hrd0.txt @@ -7,6 +7,7 @@ HELINL=0 HRDCOD=0 RNDGEN=hasCurand Building in BUILDDIR=build.512y_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand (USEBUILDDIR is set = 1) +cp -i ../fbridge.inc ../../include/fbridge.inc make USEBUILDDIR=1 AVX=none OMPFLAGS= @@ -17,7 +18,7 @@ HRDCOD=0 RNDGEN=hasCurand Building in BUILDDIR=build.none_d_inl0_hrd0 for tag=none_d_inl0_hrd0_hasCurand (USEBUILDDIR is set = 1) make[1]: Entering directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum' -make[1]: Nothing to be done for `all.none_d_inl0_hrd0_hasCurand'. +cp -i ../fbridge.inc ../../include/fbridge.inc make[1]: Leaving directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum' make USEBUILDDIR=1 AVX=sse4 @@ -29,195 +30,4 @@ HRDCOD=0 RNDGEN=hasCurand Building in BUILDDIR=build.sse4_d_inl0_hrd0 for tag=sse4_d_inl0_hrd0_hasCurand (USEBUILDDIR is set = 1) make[1]: Entering directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum' -make[1]: Nothing to be done for `all.sse4_d_inl0_hrd0_hasCurand'. -make[1]: Leaving directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum' - -make USEBUILDDIR=1 AVX=avx2 -OMPFLAGS= -AVX=avx2 -FPTYPE=d -HELINL=0 -HRDCOD=0 -RNDGEN=hasCurand -Building in BUILDDIR=build.avx2_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasCurand (USEBUILDDIR is set = 1) -make[1]: Entering directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum' -make[1]: Nothing to be done for `all.avx2_d_inl0_hrd0_hasCurand'. -make[1]: Leaving directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum' - -make USEBUILDDIR=1 AVX=512y -OMPFLAGS= -AVX=512y -FPTYPE=d -HELINL=0 -HRDCOD=0 -RNDGEN=hasCurand -Building in BUILDDIR=build.512y_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand (USEBUILDDIR is set = 1) -make[1]: Entering directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum' -make[1]: Nothing to be done for `all.512y_d_inl0_hrd0_hasCurand'. -make[1]: Leaving directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum' - -make USEBUILDDIR=1 AVX=512z -OMPFLAGS= -AVX=512z -FPTYPE=d -HELINL=0 -HRDCOD=0 -RNDGEN=hasCurand -Building in BUILDDIR=build.512z_d_inl0_hrd0 for tag=512z_d_inl0_hrd0_hasCurand (USEBUILDDIR is set = 1) -make[1]: Entering directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum' -make[1]: Nothing to be done for `all.512z_d_inl0_hrd0_hasCurand'. -make[1]: Leaving directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum' - -DATE: 2022-03-02_20:42:21 - -On itscrd70.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -========================================================================= -runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum/build.none_d_inl0_hrd0/gcheck.exe -p 2048 256 12 OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 11.6.112 (gcc 10.2.0)] [inlineHel=0] [hardcodeCIPC=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.650391e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.543701e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.328033e+09 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 0.513662 sec - 413,968,051 cycles:u # 0.931 GHz - 751,994,111 instructions:u # 1.82 insn per cycle - 0.732801350 seconds time elapsed -==PROF== Profiling "sigmaKin": launch__registers_per_thread 128 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum/build.none_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum/build.none_d_inl0_hrd0/fgcheck.exe 2 64 2 -Avg ME (C++/CUDA) = 1.282804e-02 -Avg ME (F77/CUDA) = 1.2828039868165201E-002 -Relative difference = 1.0277080522138477e-08 -OK (relative difference <= 2E-4) -========================================================================= -runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum/build.none_d_inl0_hrd0/check.exe -p 2048 256 12 OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 10.2.0] [inlineHel=0] [hardcodeCIPC=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.082769e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.634916e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.634916e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 6.270509 sec - 16,617,081,523 cycles:u # 2.648 GHz - 40,586,917,676 instructions:u # 2.44 insn per cycle - 6.318167103 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 280) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum/build.none_d_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum/build.none_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum/build.none_d_inl0_hrd0/fgcheck.exe 2 64 2 -Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039868165201E-002 -Relative difference = 1.0277080522138477e-08 -OK (relative difference <= 2E-4) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum/build.sse4_d_inl0_hrd0/check.exe -p 2048 256 12 OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 10.2.0] [inlineHel=0] [hardcodeCIPC=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.546202e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.098641e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.098641e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 4.517435 sec - 11,951,541,790 cycles:u # 2.641 GHz - 26,063,732,111 instructions:u # 2.18 insn per cycle - 4.637719681 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 1267) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum/build.sse4_d_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum/build.sse4_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum/build.sse4_d_inl0_hrd0/fgcheck.exe 2 64 2 -Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039868165201E-002 -Relative difference = 1.0277080522138477e-08 -OK (relative difference <= 2E-4) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum/build.avx2_d_inl0_hrd0/check.exe -p 2048 256 12 OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 10.2.0] [inlineHel=0] [hardcodeCIPC=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.012668e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.420060e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.420060e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.578034 sec - 8,982,829,582 cycles:u # 2.505 GHz - 15,500,272,859 instructions:u # 1.73 insn per cycle - 3.717096111 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1044) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum/build.avx2_d_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum/build.avx2_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum/build.avx2_d_inl0_hrd0/fgcheck.exe 2 64 2 -Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039868165201E-002 -Relative difference = 1.0277080522138477e-08 -OK (relative difference <= 2E-4) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum/build.512y_d_inl0_hrd0/check.exe -p 2048 256 12 OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 10.2.0] [inlineHel=0] [hardcodeCIPC=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.073252e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.890733e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.890733e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.483272 sec - 8,768,552,952 cycles:u # 2.512 GHz - 15,380,735,835 instructions:u # 1.75 insn per cycle - 3.550889508 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1018) (512y: 1) (512z: 0) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum/build.512y_d_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum/build.512y_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum/build.512y_d_inl0_hrd0/fgcheck.exe 2 64 2 -Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039868165201E-002 -Relative difference = 1.0277080522138477e-08 -OK (relative difference <= 2E-4) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum/build.512z_d_inl0_hrd0/check.exe -p 2048 256 12 OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 10.2.0] [inlineHel=0] [hardcodeCIPC=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.918396e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.772420e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.772420e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.730114 sec - 8,413,652,592 cycles:u # 2.251 GHz - 12,463,041,181 instructions:u # 1.48 insn per cycle - 3.822048507 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 238) (512y: 2) (512z: 787) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum/build.512z_d_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum/build.512z_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum/build.512z_d_inl0_hrd0/fgcheck.exe 2 64 2 -Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039868165201E-002 -Relative difference = 1.0277080522138477e-08 -OK (relative difference <= 2E-4) -========================================================================= - -TEST COMPLETED +cp -i ../fbridge.inc ../../include/fbridge.inc From 2a8b26ff96ddef7d9b76b5bbb40fda8616166229 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Tue, 8 Mar 2022 15:59:04 +0100 Subject: [PATCH 07/15] [pptt] bug fix for "\cp fbridge.inc" in makefile: fix codegen, regenerate ggtt auto, copy manually elsewhere --- .../madgraph/iolibs/template_files/gpu/Makefile | 2 +- epochX/cudacpp/ee_mumu.auto/SubProcesses/Makefile | 2 +- epochX/cudacpp/ee_mumu/SubProcesses/Makefile | 2 +- .../cudacpp/gg_tt.auto/CODEGEN_cudacpp_gg_tt_log.txt | 10 +++++----- epochX/cudacpp/gg_tt.auto/SubProcesses/Makefile | 2 +- epochX/cudacpp/gg_ttg.auto/SubProcesses/Makefile | 2 +- epochX/cudacpp/gg_ttg/SubProcesses/Makefile | 2 +- epochX/cudacpp/gg_ttgg.auto/SubProcesses/Makefile | 2 +- epochX/cudacpp/gg_ttgg/SubProcesses/Makefile | 2 +- epochX/cudacpp/gg_ttggg.auto/SubProcesses/Makefile | 2 +- epochX/cudacpp/gg_ttggg/SubProcesses/Makefile | 2 +- 11 files changed, 15 insertions(+), 15 deletions(-) diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/Makefile b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/Makefile index 916667d0ef..eb2c75841d 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/Makefile +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/Makefile @@ -405,7 +405,7 @@ endif # Target (and build rules): Fortran include files $(INCDIR)/%%.inc : ../%%.inc @if [ ! -d $(INCDIR) ]; then echo "mkdir -p $(INCDIR)"; mkdir -p $(INCDIR); fi - cp -i $< $@ + \cp $< $@ #------------------------------------------------------------------------------- diff --git a/epochX/cudacpp/ee_mumu.auto/SubProcesses/Makefile b/epochX/cudacpp/ee_mumu.auto/SubProcesses/Makefile index 31ec657ecb..f52e503781 100644 --- a/epochX/cudacpp/ee_mumu.auto/SubProcesses/Makefile +++ b/epochX/cudacpp/ee_mumu.auto/SubProcesses/Makefile @@ -405,7 +405,7 @@ endif # Target (and build rules): Fortran include files $(INCDIR)/%.inc : ../%.inc @if [ ! -d $(INCDIR) ]; then echo "mkdir -p $(INCDIR)"; mkdir -p $(INCDIR); fi - cp -i $< $@ + \cp $< $@ #------------------------------------------------------------------------------- diff --git a/epochX/cudacpp/ee_mumu/SubProcesses/Makefile b/epochX/cudacpp/ee_mumu/SubProcesses/Makefile index 31ec657ecb..f52e503781 100644 --- a/epochX/cudacpp/ee_mumu/SubProcesses/Makefile +++ b/epochX/cudacpp/ee_mumu/SubProcesses/Makefile @@ -405,7 +405,7 @@ endif # Target (and build rules): Fortran include files $(INCDIR)/%.inc : ../%.inc @if [ ! -d $(INCDIR) ]; then echo "mkdir -p $(INCDIR)"; mkdir -p $(INCDIR); fi - cp -i $< $@ + \cp $< $@ #------------------------------------------------------------------------------- diff --git a/epochX/cudacpp/gg_tt.auto/CODEGEN_cudacpp_gg_tt_log.txt b/epochX/cudacpp/gg_tt.auto/CODEGEN_cudacpp_gg_tt_log.txt index ff41a3df9c..8914bb78d8 100644 --- a/epochX/cudacpp/gg_tt.auto/CODEGEN_cudacpp_gg_tt_log.txt +++ b/epochX/cudacpp/gg_tt.auto/CODEGEN_cudacpp_gg_tt_log.txt @@ -50,7 +50,7 @@ generate g g > t t~ No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.0068683624267578125  +DEBUG: model prefixing takes 0.006888151168823242  INFO: Restrict model sm with file models/sm/restrict_default.dat . INFO: Change particles name to pass to MG5 convention Defined multiparticle p = g u c d s u~ c~ d~ s~ @@ -103,7 +103,7 @@ Generated helas calls for 1 subprocesses (3 diagrams) in 0.008 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines -ALOHA: aloha creates 2 routines in 0.162 s +ALOHA: aloha creates 2 routines in 0.163 s VVV1 DEBUG: language =  [aloha_writers.py at line 2451]  FFV1 @@ -121,6 +121,6 @@ INFO: /data/avalassi/GPU2020/MG5aMC/3.1.1_lo_vectorization/CODEGEN_cudacpp_gg_tt DEBUG: Entering PLUGIN_ProcessExporter.finalize [output.py at line 185]  quit -real 0m0.918s -user 0m0.779s -sys 0m0.114s +real 0m1.019s +user 0m0.763s +sys 0m0.148s diff --git a/epochX/cudacpp/gg_tt.auto/SubProcesses/Makefile b/epochX/cudacpp/gg_tt.auto/SubProcesses/Makefile index 31ec657ecb..f52e503781 100644 --- a/epochX/cudacpp/gg_tt.auto/SubProcesses/Makefile +++ b/epochX/cudacpp/gg_tt.auto/SubProcesses/Makefile @@ -405,7 +405,7 @@ endif # Target (and build rules): Fortran include files $(INCDIR)/%.inc : ../%.inc @if [ ! -d $(INCDIR) ]; then echo "mkdir -p $(INCDIR)"; mkdir -p $(INCDIR); fi - cp -i $< $@ + \cp $< $@ #------------------------------------------------------------------------------- diff --git a/epochX/cudacpp/gg_ttg.auto/SubProcesses/Makefile b/epochX/cudacpp/gg_ttg.auto/SubProcesses/Makefile index 31ec657ecb..f52e503781 100644 --- a/epochX/cudacpp/gg_ttg.auto/SubProcesses/Makefile +++ b/epochX/cudacpp/gg_ttg.auto/SubProcesses/Makefile @@ -405,7 +405,7 @@ endif # Target (and build rules): Fortran include files $(INCDIR)/%.inc : ../%.inc @if [ ! -d $(INCDIR) ]; then echo "mkdir -p $(INCDIR)"; mkdir -p $(INCDIR); fi - cp -i $< $@ + \cp $< $@ #------------------------------------------------------------------------------- diff --git a/epochX/cudacpp/gg_ttg/SubProcesses/Makefile b/epochX/cudacpp/gg_ttg/SubProcesses/Makefile index 31ec657ecb..f52e503781 100644 --- a/epochX/cudacpp/gg_ttg/SubProcesses/Makefile +++ b/epochX/cudacpp/gg_ttg/SubProcesses/Makefile @@ -405,7 +405,7 @@ endif # Target (and build rules): Fortran include files $(INCDIR)/%.inc : ../%.inc @if [ ! -d $(INCDIR) ]; then echo "mkdir -p $(INCDIR)"; mkdir -p $(INCDIR); fi - cp -i $< $@ + \cp $< $@ #------------------------------------------------------------------------------- diff --git a/epochX/cudacpp/gg_ttgg.auto/SubProcesses/Makefile b/epochX/cudacpp/gg_ttgg.auto/SubProcesses/Makefile index 31ec657ecb..f52e503781 100644 --- a/epochX/cudacpp/gg_ttgg.auto/SubProcesses/Makefile +++ b/epochX/cudacpp/gg_ttgg.auto/SubProcesses/Makefile @@ -405,7 +405,7 @@ endif # Target (and build rules): Fortran include files $(INCDIR)/%.inc : ../%.inc @if [ ! -d $(INCDIR) ]; then echo "mkdir -p $(INCDIR)"; mkdir -p $(INCDIR); fi - cp -i $< $@ + \cp $< $@ #------------------------------------------------------------------------------- diff --git a/epochX/cudacpp/gg_ttgg/SubProcesses/Makefile b/epochX/cudacpp/gg_ttgg/SubProcesses/Makefile index 31ec657ecb..f52e503781 100644 --- a/epochX/cudacpp/gg_ttgg/SubProcesses/Makefile +++ b/epochX/cudacpp/gg_ttgg/SubProcesses/Makefile @@ -405,7 +405,7 @@ endif # Target (and build rules): Fortran include files $(INCDIR)/%.inc : ../%.inc @if [ ! -d $(INCDIR) ]; then echo "mkdir -p $(INCDIR)"; mkdir -p $(INCDIR); fi - cp -i $< $@ + \cp $< $@ #------------------------------------------------------------------------------- diff --git a/epochX/cudacpp/gg_ttggg.auto/SubProcesses/Makefile b/epochX/cudacpp/gg_ttggg.auto/SubProcesses/Makefile index 31ec657ecb..f52e503781 100644 --- a/epochX/cudacpp/gg_ttggg.auto/SubProcesses/Makefile +++ b/epochX/cudacpp/gg_ttggg.auto/SubProcesses/Makefile @@ -405,7 +405,7 @@ endif # Target (and build rules): Fortran include files $(INCDIR)/%.inc : ../%.inc @if [ ! -d $(INCDIR) ]; then echo "mkdir -p $(INCDIR)"; mkdir -p $(INCDIR); fi - cp -i $< $@ + \cp $< $@ #------------------------------------------------------------------------------- diff --git a/epochX/cudacpp/gg_ttggg/SubProcesses/Makefile b/epochX/cudacpp/gg_ttggg/SubProcesses/Makefile index 31ec657ecb..f52e503781 100644 --- a/epochX/cudacpp/gg_ttggg/SubProcesses/Makefile +++ b/epochX/cudacpp/gg_ttggg/SubProcesses/Makefile @@ -405,7 +405,7 @@ endif # Target (and build rules): Fortran include files $(INCDIR)/%.inc : ../%.inc @if [ ! -d $(INCDIR) ]; then echo "mkdir -p $(INCDIR)"; mkdir -p $(INCDIR); fi - cp -i $< $@ + \cp $< $@ #------------------------------------------------------------------------------- From 538c3f9cef1b056048cf60eab297c6f2531cf098 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Tue, 8 Mar 2022 16:05:07 +0100 Subject: [PATCH 08/15] [pptt] test that all five processes build and test ok --- .../log_eemumu_manu_d_inl0_hrd0.txt | 194 +++++++++++++++++- .../log_ggtt_manu_d_inl0_hrd0.txt | 88 ++++---- .../log_ggttg_manu_d_inl0_hrd0.txt | 102 +++++---- .../log_ggttgg_manu_d_inl0_hrd0.txt | 102 +++++---- .../log_ggttggg_manu_d_inl0_hrd0.txt | 102 +++++---- 5 files changed, 384 insertions(+), 204 deletions(-) diff --git a/epochX/cudacpp/tput/logs_eemumu_manu/log_eemumu_manu_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_eemumu_manu/log_eemumu_manu_d_inl0_hrd0.txt index 41c8f107cf..4234341cba 100644 --- a/epochX/cudacpp/tput/logs_eemumu_manu/log_eemumu_manu_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_eemumu_manu/log_eemumu_manu_d_inl0_hrd0.txt @@ -7,7 +7,6 @@ HELINL=0 HRDCOD=0 RNDGEN=hasCurand Building in BUILDDIR=build.512y_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand (USEBUILDDIR is set = 1) -cp -i ../fbridge.inc ../../include/fbridge.inc make USEBUILDDIR=1 AVX=none OMPFLAGS= @@ -18,7 +17,7 @@ HRDCOD=0 RNDGEN=hasCurand Building in BUILDDIR=build.none_d_inl0_hrd0 for tag=none_d_inl0_hrd0_hasCurand (USEBUILDDIR is set = 1) make[1]: Entering directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum' -cp -i ../fbridge.inc ../../include/fbridge.inc +make[1]: Nothing to be done for `all.none_d_inl0_hrd0_hasCurand'. make[1]: Leaving directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum' make USEBUILDDIR=1 AVX=sse4 @@ -30,4 +29,193 @@ HRDCOD=0 RNDGEN=hasCurand Building in BUILDDIR=build.sse4_d_inl0_hrd0 for tag=sse4_d_inl0_hrd0_hasCurand (USEBUILDDIR is set = 1) make[1]: Entering directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum' -cp -i ../fbridge.inc ../../include/fbridge.inc +make[1]: Nothing to be done for `all.sse4_d_inl0_hrd0_hasCurand'. +make[1]: Leaving directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum' + +make USEBUILDDIR=1 AVX=avx2 +OMPFLAGS= +AVX=avx2 +FPTYPE=d +HELINL=0 +HRDCOD=0 +RNDGEN=hasCurand +Building in BUILDDIR=build.avx2_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasCurand (USEBUILDDIR is set = 1) +make[1]: Entering directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum' +make[1]: Nothing to be done for `all.avx2_d_inl0_hrd0_hasCurand'. +make[1]: Leaving directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum' + +make USEBUILDDIR=1 AVX=512y +OMPFLAGS= +AVX=512y +FPTYPE=d +HELINL=0 +HRDCOD=0 +RNDGEN=hasCurand +Building in BUILDDIR=build.512y_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand (USEBUILDDIR is set = 1) +make[1]: Entering directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum' +make[1]: Nothing to be done for `all.512y_d_inl0_hrd0_hasCurand'. +make[1]: Leaving directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum' + +make USEBUILDDIR=1 AVX=512z +OMPFLAGS= +AVX=512z +FPTYPE=d +HELINL=0 +HRDCOD=0 +RNDGEN=hasCurand +Building in BUILDDIR=build.512z_d_inl0_hrd0 for tag=512z_d_inl0_hrd0_hasCurand (USEBUILDDIR is set = 1) +make[1]: Entering directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum' +make[1]: Nothing to be done for `all.512z_d_inl0_hrd0_hasCurand'. +make[1]: Leaving directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum' + +DATE: 2022-03-08_16:00:03 + +On itscrd70.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +========================================================================= +runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum/build.none_d_inl0_hrd0/gcheck.exe -p 2048 256 12 OMP= +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 11.6.112 (gcc 10.2.0)] [inlineHel=0] [hardcodeCIPC=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 6.605186e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.556574e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.338519e+09 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 0.792568 sec + 416,419,207 cycles:u # 0.422 GHz + 748,904,913 instructions:u # 1.80 insn per cycle + 1.113616127 seconds time elapsed +==PROF== Profiling "sigmaKin": launch__registers_per_thread 128 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum/build.none_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum/build.none_d_inl0_hrd0/fgcheck.exe 2 64 2 +Avg ME (C++/CUDA) = 1.282804e-02 +Avg ME (F77/CUDA) = 1.2828039868165201E-002 +Relative difference = 1.0277080522138477e-08 +OK (relative difference <= 2E-4) +========================================================================= +runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum/build.none_d_inl0_hrd0/check.exe -p 2048 256 12 OMP= +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 10.2.0] [inlineHel=0] [hardcodeCIPC=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) +EvtsPerSec[Rmb+ME] (23) = ( 1.097316e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.660352e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.660352e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 6.176695 sec + 16,419,354,433 cycles:u # 2.657 GHz + 40,586,920,269 instructions:u # 2.47 insn per cycle + 6.184426716 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 280) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum/build.none_d_inl0_hrd0/runTest.exe +[ PASSED ] 6 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum/build.none_d_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum/build.none_d_inl0_hrd0/fgcheck.exe 2 64 2 +Avg ME (C++/C++) = 1.282804e-02 +Avg ME (F77/C++) = 1.2828039868165201E-002 +Relative difference = 1.0277080522138477e-08 +OK (relative difference <= 2E-4) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum/build.sse4_d_inl0_hrd0/check.exe -p 2048 256 12 OMP= +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 10.2.0] [inlineHel=0] [hardcodeCIPC=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 1.551454e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.082720e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.082720e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 4.490753 sec + 11,910,679,858 cycles:u # 2.650 GHz + 26,063,734,355 instructions:u # 2.19 insn per cycle + 4.498909611 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 1267) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum/build.sse4_d_inl0_hrd0/runTest.exe +[ PASSED ] 6 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum/build.sse4_d_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum/build.sse4_d_inl0_hrd0/fgcheck.exe 2 64 2 +Avg ME (C++/C++) = 1.282804e-02 +Avg ME (F77/C++) = 1.2828039868165201E-002 +Relative difference = 1.0277080522138477e-08 +OK (relative difference <= 2E-4) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum/build.avx2_d_inl0_hrd0/check.exe -p 2048 256 12 OMP= +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 10.2.0] [inlineHel=0] [hardcodeCIPC=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 2.028826e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.423420e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.423420e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 3.539552 sec + 8,909,570,703 cycles:u # 2.514 GHz + 15,500,274,472 instructions:u # 1.74 insn per cycle + 3.548019492 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1044) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum/build.avx2_d_inl0_hrd0/runTest.exe +[ PASSED ] 6 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum/build.avx2_d_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum/build.avx2_d_inl0_hrd0/fgcheck.exe 2 64 2 +Avg ME (C++/C++) = 1.282804e-02 +Avg ME (F77/C++) = 1.2828039868165201E-002 +Relative difference = 1.0277080522138477e-08 +OK (relative difference <= 2E-4) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum/build.512y_d_inl0_hrd0/check.exe -p 2048 256 12 OMP= +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 10.2.0] [inlineHel=0] [hardcodeCIPC=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 2.092807e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.890204e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.890204e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 3.443010 sec + 8,691,751,181 cycles:u # 2.521 GHz + 15,380,738,092 instructions:u # 1.77 insn per cycle + 3.450832357 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1018) (512y: 1) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum/build.512y_d_inl0_hrd0/runTest.exe +[ PASSED ] 6 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum/build.512y_d_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum/build.512y_d_inl0_hrd0/fgcheck.exe 2 64 2 +Avg ME (C++/C++) = 1.282804e-02 +Avg ME (F77/C++) = 1.2828039868165201E-002 +Relative difference = 1.0277080522138477e-08 +OK (relative difference <= 2E-4) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum/build.512z_d_inl0_hrd0/check.exe -p 2048 256 12 OMP= +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 10.2.0] [inlineHel=0] [hardcodeCIPC=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 2.005591e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.305117e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.305117e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 3.575752 sec + 8,172,745,705 cycles:u # 2.282 GHz + 12,463,043,474 instructions:u # 1.52 insn per cycle + 3.584094401 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 238) (512y: 2) (512z: 787) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum/build.512z_d_inl0_hrd0/runTest.exe +[ PASSED ] 6 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum/build.512z_d_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum/build.512z_d_inl0_hrd0/fgcheck.exe 2 64 2 +Avg ME (C++/C++) = 1.282804e-02 +Avg ME (F77/C++) = 1.2828039868165201E-002 +Relative difference = 1.0277080522138477e-08 +OK (relative difference <= 2E-4) +========================================================================= diff --git a/epochX/cudacpp/tput/logs_ggtt_manu/log_ggtt_manu_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggtt_manu/log_ggtt_manu_d_inl0_hrd0.txt index eda19986b5..09099ff550 100644 --- a/epochX/cudacpp/tput/logs_ggtt_manu/log_ggtt_manu_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggtt_manu/log_ggtt_manu_d_inl0_hrd0.txt @@ -68,7 +68,7 @@ make[1]: Entering directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for `all.512z_d_inl0_hrd0_hasCurand'. make[1]: Leaving directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_tt/SubProcesses/P1_Sigma_sm_gg_ttx' -DATE: 2022-03-02_20:44:01 +DATE: 2022-03-08_16:00:42 On itscrd70.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -76,14 +76,14 @@ runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_tt/SubProcesses/P1 Process = SIGMA_SM_GG_TTX_CUDA [nvcc 11.6.112 (gcc 10.2.0)] [inlineHel=0] [hardcodeCIPC=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.783311e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.286976e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.423520e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.648207e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.276729e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.418412e+08 ) sec^-1 MeanMatrixElemValue = ( 2.085623e+00 +- 4.835084e-03 ) GeV^0 -TOTAL : 0.192238 sec - 121,032,856 cycles:u # 0.477 GHz - 148,693,174 instructions:u # 1.23 insn per cycle - 0.470459066 seconds time elapsed +TOTAL : 0.559610 sec + 123,069,441 cycles:u # 0.154 GHz + 149,218,102 instructions:u # 1.21 insn per cycle + 0.858517104 seconds time elapsed ==PROF== Profiling "sigmaKin": launch__registers_per_thread 170 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- @@ -99,14 +99,14 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 10.2.0] [inlineHel=0] [ha Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.880869e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.998415e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.998415e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.883856e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.000505e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.000505e+05 ) sec^-1 MeanMatrixElemValue = ( 2.085623e+00 +- 4.835084e-03 ) GeV^0 -TOTAL : 2.868103 sec - 7,569,490,302 cycles:u # 2.632 GHz - 22,043,027,778 instructions:u # 2.91 insn per cycle - 2.930334332 seconds time elapsed +TOTAL : 2.851161 sec + 7,550,831,640 cycles:u # 2.644 GHz + 22,043,029,064 instructions:u # 2.92 insn per cycle + 2.859199918 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 449) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_tt/SubProcesses/P1_Sigma_sm_gg_ttx/build.none_d_inl0_hrd0/runTest.exe @@ -124,14 +124,14 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 10.2.0] [inlineHel=0] [ha Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.875237e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.171967e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.171967e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.899013e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.201531e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.201531e+05 ) sec^-1 MeanMatrixElemValue = ( 2.085623e+00 +- 4.835084e-03 ) GeV^0 -TOTAL : 1.904920 sec - 4,986,509,471 cycles:u # 2.606 GHz - 12,986,329,320 instructions:u # 2.60 insn per cycle - 1.967185501 seconds time elapsed +TOTAL : 1.877591 sec + 4,941,583,744 cycles:u # 2.627 GHz + 12,986,329,014 instructions:u # 2.63 insn per cycle + 1.885182752 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 2360) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_tt/SubProcesses/P1_Sigma_sm_gg_ttx/build.sse4_d_inl0_hrd0/runTest.exe @@ -149,14 +149,14 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 10.2.0] [inlineHel=0] [ha Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.784499e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.627868e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.627868e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.783095e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.613009e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.613009e+05 ) sec^-1 MeanMatrixElemValue = ( 2.085623e+00 +- 4.835084e-03 ) GeV^0 -TOTAL : 1.176503 sec - 2,668,866,456 cycles:u # 2.253 GHz - 5,530,142,603 instructions:u # 2.07 insn per cycle - 1.233542634 seconds time elapsed +TOTAL : 1.164587 sec + 2,665,831,368 cycles:u # 2.279 GHz + 5,530,144,009 instructions:u # 2.07 insn per cycle + 1.172304702 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2177) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_tt/SubProcesses/P1_Sigma_sm_gg_ttx/build.avx2_d_inl0_hrd0/runTest.exe @@ -174,14 +174,14 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 10.2.0] [inlineHel=0] [ha Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.158898e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.147894e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.147894e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.099669e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.044804e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.044804e+05 ) sec^-1 MeanMatrixElemValue = ( 2.085623e+00 +- 4.835084e-03 ) GeV^0 -TOTAL : 1.096895 sec - 2,489,998,374 cycles:u # 2.253 GHz - 5,353,978,508 instructions:u # 2.15 insn per cycle - 1.210113960 seconds time elapsed +TOTAL : 1.096902 sec + 2,510,271,222 cycles:u # 2.279 GHz + 5,353,978,964 instructions:u # 2.13 insn per cycle + 1.104815066 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2013) (512y: 115) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_tt/SubProcesses/P1_Sigma_sm_gg_ttx/build.512y_d_inl0_hrd0/runTest.exe @@ -199,14 +199,14 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 10.2.0] [inlineHel=0] [ha Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.420075e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.836944e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.836944e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.431631e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.838221e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.838221e+05 ) sec^-1 MeanMatrixElemValue = ( 2.085623e+00 +- 4.835084e-03 ) GeV^0 -TOTAL : 1.614425 sec - 2,714,080,080 cycles:u # 1.673 GHz - 3,546,569,379 instructions:u # 1.31 insn per cycle - 1.660243395 seconds time elapsed +TOTAL : 1.599298 sec + 2,696,767,373 cycles:u # 1.683 GHz + 3,546,569,526 instructions:u # 1.32 insn per cycle + 1.606903008 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1026) (512y: 83) (512z: 1568) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_tt/SubProcesses/P1_Sigma_sm_gg_ttx/build.512z_d_inl0_hrd0/runTest.exe @@ -219,5 +219,3 @@ Avg ME (F77/C++) = 2.0288066358987087 Relative difference = 1.7946571129689766e-07 OK (relative difference <= 2E-4) ========================================================================= - -TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttg_manu/log_ggttg_manu_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttg_manu/log_ggttg_manu_d_inl0_hrd0.txt index 3dc1947f2f..e9000a7f8b 100644 --- a/epochX/cudacpp/tput/logs_ggttg_manu/log_ggttg_manu_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttg_manu/log_ggttg_manu_d_inl0_hrd0.txt @@ -68,7 +68,7 @@ make[1]: Entering directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for `all.512z_d_inl0_hrd0_hasCurand'. make[1]: Leaving directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg' -DATE: 2022-03-02_22:15:06 +DATE: 2022-03-08_16:01:07 On itscrd70.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -76,14 +76,14 @@ runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttg/SubProcesses/P Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 11.6.112 (gcc 10.2.0)] [inlineHel=0] [hardcodeCIPC=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.884329e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.122004e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.142852e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.030196e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.108563e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.126534e+07 ) sec^-1 MeanMatrixElemValue = ( 4.061783e+02 +- 3.760219e+02 ) GeV^-2 -TOTAL : 0.202382 sec - 94,563,161 cycles:u # 0.493 GHz - 89,904,458 instructions:u # 0.95 insn per cycle - 0.252128833 seconds time elapsed +TOTAL : 0.493765 sec + 93,095,079 cycles:u # 0.131 GHz + 89,190,010 instructions:u # 0.96 insn per cycle + 0.770026333 seconds time elapsed ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... @@ -91,14 +91,14 @@ runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttg/SubProcesses/P Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 11.6.112 (gcc 10.2.0)] [inlineHel=0] [hardcodeCIPC=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.150746e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.426221e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.442531e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.148487e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.426159e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.442753e+07 ) sec^-1 MeanMatrixElemValue = ( 6.734461e+02 +- 4.775415e+02 ) GeV^-2 -TOTAL : 0.276354 sec - 182,799,600 cycles:u # 0.528 GHz - 285,563,716 instructions:u # 1.56 insn per cycle - 0.352869456 seconds time elapsed +TOTAL : 0.641802 sec + 188,099,971 cycles:u # 0.212 GHz + 279,774,182 instructions:u # 1.49 insn per cycle + 0.947851869 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg/build.none_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg/build.none_d_inl0_hrd0/fgcheck.exe 2 64 2 @@ -112,14 +112,14 @@ Process = SIGMA_SM_GG_TTXG_CPP [gcc 10.2.0] [inlineHel=0] [h Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.444930e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.474432e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.474432e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.447886e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.477059e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.477059e+04 ) sec^-1 MeanMatrixElemValue = ( 4.061783e+02 +- 3.760219e+02 ) GeV^-2 -TOTAL : 0.681342 sec - 1,804,067,404 cycles:u # 2.636 GHz - 5,728,310,505 instructions:u # 3.18 insn per cycle - 0.688457745 seconds time elapsed +TOTAL : 0.678856 sec + 1,804,013,324 cycles:u # 2.642 GHz + 5,728,320,127 instructions:u # 3.18 insn per cycle + 0.685812984 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 713) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg/build.none_d_inl0_hrd0/runTest.exe @@ -137,14 +137,14 @@ Process = SIGMA_SM_GG_TTXG_CPP [gcc 10.2.0] [inlineHel=0] [h Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.403870e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.498913e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.498913e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.401457e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.495632e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.495632e+04 ) sec^-1 MeanMatrixElemValue = ( 4.061783e+02 +- 3.760219e+02 ) GeV^-2 -TOTAL : 0.381299 sec - 1,008,612,859 cycles:u # 2.616 GHz - 2,995,457,153 instructions:u # 2.97 insn per cycle - 0.388252431 seconds time elapsed +TOTAL : 0.382275 sec + 1,008,619,342 cycles:u # 2.614 GHz + 2,995,467,082 instructions:u # 2.97 insn per cycle + 0.389251087 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 4237) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg/build.sse4_d_inl0_hrd0/runTest.exe @@ -162,14 +162,14 @@ Process = SIGMA_SM_GG_TTXG_CPP [gcc 10.2.0] [inlineHel=0] [h Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 8.477066e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.836452e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.836452e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.489284e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.847212e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.847212e+04 ) sec^-1 MeanMatrixElemValue = ( 4.061783e+02 +- 3.760219e+02 ) GeV^-2 -TOTAL : 0.202323 sec - 456,303,954 cycles:u # 2.209 GHz - 1,064,622,815 instructions:u # 2.33 insn per cycle - 0.209416914 seconds time elapsed +TOTAL : 0.202643 sec + 456,180,716 cycles:u # 2.202 GHz + 1,064,632,946 instructions:u # 2.33 insn per cycle + 0.209721703 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3582) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg/build.avx2_d_inl0_hrd0/runTest.exe @@ -187,14 +187,14 @@ Process = SIGMA_SM_GG_TTXG_CPP [gcc 10.2.0] [inlineHel=0] [h Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 9.382412e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.813856e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.813856e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.404847e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.837296e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.837296e+04 ) sec^-1 MeanMatrixElemValue = ( 4.061783e+02 +- 3.760219e+02 ) GeV^-2 -TOTAL : 0.184437 sec - 414,437,290 cycles:u # 2.192 GHz - 1,006,126,068 instructions:u # 2.43 insn per cycle - 0.191759500 seconds time elapsed +TOTAL : 0.183703 sec + 413,371,540 cycles:u # 2.197 GHz + 1,006,135,865 instructions:u # 2.43 insn per cycle + 0.190806857 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3421) (512y: 70) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg/build.512y_d_inl0_hrd0/runTest.exe @@ -212,14 +212,14 @@ Process = SIGMA_SM_GG_TTXG_CPP [gcc 10.2.0] [inlineHel=0] [h Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.968373e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.203025e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.203025e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.992766e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.229034e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.229034e+04 ) sec^-1 MeanMatrixElemValue = ( 4.061783e+02 +- 3.760219e+02 ) GeV^-2 -TOTAL : 0.245146 sec - 392,781,640 cycles:u # 1.571 GHz - 558,073,754 instructions:u # 1.42 insn per cycle - 0.252616903 seconds time elapsed +TOTAL : 0.244230 sec + 391,700,757 cycles:u # 1.575 GHz + 558,083,488 instructions:u # 1.42 insn per cycle + 0.251220703 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1240) (512y: 69) (512z: 2828) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg/build.512z_d_inl0_hrd0/runTest.exe @@ -232,5 +232,3 @@ Avg ME (F77/C++) = 1.4131216787944412 Relative difference = 2.2730207216187906e-07 OK (relative difference <= 2E-4) ========================================================================= - -TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttgg_manu/log_ggttgg_manu_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttgg_manu/log_ggttgg_manu_d_inl0_hrd0.txt index 1d6d17ffec..c327c47b48 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_manu/log_ggttgg_manu_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_manu/log_ggttgg_manu_d_inl0_hrd0.txt @@ -68,7 +68,7 @@ make[1]: Entering directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for `all.512z_d_inl0_hrd0_hasCurand'. make[1]: Leaving directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg' -DATE: 2022-03-02_20:45:04 +DATE: 2022-03-08_16:01:27 On itscrd70.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -76,14 +76,14 @@ runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg/SubProcesses/ Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 11.6.112 (gcc 10.2.0)] [inlineHel=0] [hardcodeCIPC=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.448381e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.492028e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.495404e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.423835e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.471160e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.474282e+05 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 0.203547 sec - 160,556,197 cycles:u # 0.643 GHz - 234,164,869 instructions:u # 1.46 insn per cycle - 0.510651371 seconds time elapsed +TOTAL : 0.570926 sec + 162,234,921 cycles:u # 0.202 GHz + 230,470,285 instructions:u # 1.42 insn per cycle + 0.866395948 seconds time elapsed ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... @@ -91,14 +91,14 @@ runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg/SubProcesses/ Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 11.6.112 (gcc 10.2.0)] [inlineHel=0] [hardcodeCIPC=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.141580e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.192307e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.194436e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.138555e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.191833e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.194036e+05 ) sec^-1 MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 -TOTAL : 2.251792 sec - 2,018,066,532 cycles:u # 0.868 GHz - 4,060,131,840 instructions:u # 2.01 insn per cycle - 2.330274883 seconds time elapsed +TOTAL : 2.616002 sec + 2,073,114,069 cycles:u # 0.719 GHz + 4,148,123,947 instructions:u # 2.00 insn per cycle + 2.942212048 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg/build.none_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg/build.none_d_inl0_hrd0/fgcheck.exe 2 64 2 @@ -112,14 +112,14 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 10.2.0] [inlineHel=0] [ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.795381e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.797351e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.797351e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.798730e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.800694e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.800694e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 9.146190 sec - 24,408,471,737 cycles:u # 2.667 GHz - 75,802,803,874 instructions:u # 3.11 insn per cycle - 9.189273129 seconds time elapsed +TOTAL : 9.127687 sec + 24,397,483,761 cycles:u # 2.672 GHz + 75,802,812,822 instructions:u # 3.11 insn per cycle + 9.135133538 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 1234) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg/build.none_d_inl0_hrd0/runTest.exe @@ -137,14 +137,14 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 10.2.0] [inlineHel=0] [ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.322167e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.328838e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.328838e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.336404e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.343122e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.343122e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 4.946382 sec - 13,185,710,971 cycles:u # 2.663 GHz - 39,954,200,268 instructions:u # 3.03 insn per cycle - 5.047701273 seconds time elapsed +TOTAL : 4.925486 sec + 13,158,057,745 cycles:u # 2.669 GHz + 39,954,210,434 instructions:u # 3.04 insn per cycle + 4.932539858 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 7957) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg/build.sse4_d_inl0_hrd0/runTest.exe @@ -162,14 +162,14 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 10.2.0] [inlineHel=0] [ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.791437e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.819283e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.819283e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.790313e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.817863e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.817863e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.429143 sec - 5,511,865,684 cycles:u # 2.265 GHz - 13,780,488,133 instructions:u # 2.50 insn per cycle - 2.607078173 seconds time elapsed +TOTAL : 2.425886 sec + 5,511,814,075 cycles:u # 2.268 GHz + 13,780,497,990 instructions:u # 2.50 insn per cycle + 2.433185374 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 6819) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg/build.avx2_d_inl0_hrd0/runTest.exe @@ -187,14 +187,14 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 10.2.0] [inlineHel=0] [ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.459552e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.492443e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.492443e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.463406e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.496256e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.496256e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.208541 sec - 5,017,577,651 cycles:u # 2.267 GHz - 12,680,180,504 instructions:u # 2.53 insn per cycle - 2.274128563 seconds time elapsed +TOTAL : 2.208497 sec + 5,016,876,465 cycles:u # 2.267 GHz + 12,680,189,845 instructions:u # 2.53 insn per cycle + 2.216835549 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 6604) (512y: 57) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg/build.512y_d_inl0_hrd0/runTest.exe @@ -212,14 +212,14 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 10.2.0] [inlineHel=0] [ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.513080e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.538311e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.538311e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.522548e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.547410e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.547410e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.529753 sec - 3,990,412,420 cycles:u # 1.575 GHz - 6,424,103,333 instructions:u # 1.61 insn per cycle - 2.629224079 seconds time elapsed +TOTAL : 2.524598 sec + 3,987,354,486 cycles:u # 1.577 GHz + 6,424,112,639 instructions:u # 1.61 insn per cycle + 2.532114949 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1749) (512y: 73) (512z: 5663) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg/build.512z_d_inl0_hrd0/runTest.exe @@ -232,5 +232,3 @@ Avg ME (F77/C++) = 6.6266750603003670E-004 Relative difference = 9.099641544231038e-09 OK (relative difference <= 2E-4) ========================================================================= - -TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttggg_manu/log_ggttggg_manu_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttggg_manu/log_ggttggg_manu_d_inl0_hrd0.txt index 471c326da9..28aeeb0205 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_manu/log_ggttggg_manu_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_manu/log_ggttggg_manu_d_inl0_hrd0.txt @@ -68,7 +68,7 @@ make[1]: Entering directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for `all.512z_d_inl0_hrd0_hasCurand'. make[1]: Leaving directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg' -DATE: 2022-03-03_00:40:13 +DATE: 2022-03-08_16:02:10 On itscrd70.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -76,14 +76,14 @@ runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttggg/SubProcesses Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 11.6.112 (gcc 10.2.0)] [inlineHel=0] [hardcodeCIPC=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.850344e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.850939e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.851233e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.849251e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.849989e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.850226e+02 ) sec^-1 MeanMatrixElemValue = ( 2.064592e-05 +- 1.952360e-05 ) GeV^-6 -TOTAL : 1.519137 sec - 1,240,540,962 cycles:u # 0.875 GHz - 2,538,404,158 instructions:u # 2.05 insn per cycle - 1.773924836 seconds time elapsed +TOTAL : 1.660097 sec + 1,204,054,201 cycles:u # 0.607 GHz + 2,446,966,242 instructions:u # 2.03 insn per cycle + 2.040667007 seconds time elapsed ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... @@ -91,14 +91,14 @@ runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttggg/SubProcesses Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 11.6.112 (gcc 10.2.0)] [inlineHel=0] [hardcodeCIPC=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.218943e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.219420e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.219458e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.222097e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.222498e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.222552e+04 ) sec^-1 MeanMatrixElemValue = ( 1.856249e-04 +- 8.329951e-05 ) GeV^-6 -TOTAL : 2.923202 sec - 2,623,210,949 cycles:u # 0.879 GHz - 5,669,746,807 instructions:u # 2.16 insn per cycle - 2.987413652 seconds time elapsed +TOTAL : 3.219514 sec + 2,714,964,049 cycles:u # 0.766 GHz + 5,572,835,733 instructions:u # 2.05 insn per cycle + 3.602345143 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/build.none_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/build.none_d_inl0_hrd0/fgcheck.exe 2 64 2 @@ -112,14 +112,14 @@ Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 10.2.0] [inlineHel=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 7.016970e+01 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.017442e+01 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.017442e+01 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.021705e+01 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.022163e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.022163e+01 ) sec^-1 MeanMatrixElemValue = ( 2.064592e-05 +- 1.952360e-05 ) GeV^-6 -TOTAL : 3.890593 sec - 10,337,069,750 cycles:u # 2.658 GHz - 28,670,989,343 instructions:u # 2.77 insn per cycle - 3.962849544 seconds time elapsed +TOTAL : 3.881759 sec + 10,323,373,923 cycles:u # 2.657 GHz + 28,670,999,047 instructions:u # 2.78 insn per cycle + 3.888739191 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 7356) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/build.none_d_inl0_hrd0/runTest.exe @@ -137,14 +137,14 @@ Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 10.2.0] [inlineHel=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.303340e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.303514e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.303514e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.283901e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.284056e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.284056e+02 ) sec^-1 MeanMatrixElemValue = ( 2.064592e-05 +- 1.952360e-05 ) GeV^-6 -TOTAL : 2.092670 sec - 5,564,868,671 cycles:u # 2.654 GHz - 15,096,999,560 instructions:u # 2.71 insn per cycle - 2.163061036 seconds time elapsed +TOTAL : 2.125019 sec + 5,651,230,587 cycles:u # 2.655 GHz + 15,097,009,220 instructions:u # 2.67 insn per cycle + 2.131714356 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:66501) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/build.sse4_d_inl0_hrd0/runTest.exe @@ -162,14 +162,14 @@ Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 10.2.0] [inlineHel=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.582720e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.583393e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.583393e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.589549e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.590172e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.590172e+02 ) sec^-1 MeanMatrixElemValue = ( 2.064592e-05 +- 1.952360e-05 ) GeV^-6 -TOTAL : 1.059318 sec - 2,395,389,422 cycles:u # 2.251 GHz - 5,272,417,521 instructions:u # 2.20 insn per cycle - 1.167034390 seconds time elapsed +TOTAL : 1.056391 sec + 2,390,867,768 cycles:u # 2.254 GHz + 5,272,427,235 instructions:u # 2.21 insn per cycle + 1.063379036 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:57213) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/build.avx2_d_inl0_hrd0/runTest.exe @@ -187,14 +187,14 @@ Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 10.2.0] [inlineHel=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.845911e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.846735e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.846735e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.835857e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.836624e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.836624e+02 ) sec^-1 MeanMatrixElemValue = ( 2.064592e-05 +- 1.952360e-05 ) GeV^-6 -TOTAL : 0.961917 sec - 2,174,781,832 cycles:u # 2.250 GHz - 4,832,261,415 instructions:u # 2.22 insn per cycle - 1.140115499 seconds time elapsed +TOTAL : 0.965457 sec + 2,183,296,317 cycles:u # 2.251 GHz + 4,832,271,130 instructions:u # 2.21 insn per cycle + 0.973008203 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:57716) (512y: 51) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/build.512y_d_inl0_hrd0/runTest.exe @@ -212,14 +212,14 @@ Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 10.2.0] [inlineHel=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.944918e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.945733e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.945733e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.939543e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.940343e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.940343e+02 ) sec^-1 MeanMatrixElemValue = ( 2.064592e-05 +- 1.952360e-05 ) GeV^-6 -TOTAL : 0.930126 sec - 1,457,417,419 cycles:u # 1.559 GHz - 2,415,819,335 instructions:u # 1.66 insn per cycle - 1.003260116 seconds time elapsed +TOTAL : 0.932234 sec + 1,459,150,294 cycles:u # 1.558 GHz + 2,415,829,308 instructions:u # 1.66 insn per cycle + 0.939053758 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 6172) (512y: 49) (512z:52234) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/build.512z_d_inl0_hrd0/runTest.exe @@ -232,5 +232,3 @@ Avg ME (F77/C++) = 9.8722631420503186E-003 Relative difference = 1.4388830547142576e-08 OK (relative difference <= 2E-4) ========================================================================= - -TEST COMPLETED From 69873d4865c13ea802e48405bc7f5962b1cff1f4 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Tue, 8 Mar 2022 16:06:24 +0100 Subject: [PATCH 09/15] [pptt] go back to upstream/master (or 09e482e) logs --- .../log_eemumu_manu_d_inl0_hrd0.txt | 88 +++++++-------- .../log_ggtt_manu_d_inl0_hrd0.txt | 88 +++++++-------- .../log_ggttg_manu_d_inl0_hrd0.txt | 102 +++++++++--------- .../log_ggttgg_manu_d_inl0_hrd0.txt | 102 +++++++++--------- .../log_ggttggg_manu_d_inl0_hrd0.txt | 102 +++++++++--------- 5 files changed, 246 insertions(+), 236 deletions(-) diff --git a/epochX/cudacpp/tput/logs_eemumu_manu/log_eemumu_manu_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_eemumu_manu/log_eemumu_manu_d_inl0_hrd0.txt index 4234341cba..03e0553ebf 100644 --- a/epochX/cudacpp/tput/logs_eemumu_manu/log_eemumu_manu_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_eemumu_manu/log_eemumu_manu_d_inl0_hrd0.txt @@ -68,7 +68,7 @@ make[1]: Entering directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for `all.512z_d_inl0_hrd0_hasCurand'. make[1]: Leaving directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum' -DATE: 2022-03-08_16:00:03 +DATE: 2022-03-02_20:42:21 On itscrd70.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -76,14 +76,14 @@ runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu/SubProcesses/ Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 11.6.112 (gcc 10.2.0)] [inlineHel=0] [hardcodeCIPC=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.605186e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.556574e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.338519e+09 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.650391e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.543701e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.328033e+09 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 0.792568 sec - 416,419,207 cycles:u # 0.422 GHz - 748,904,913 instructions:u # 1.80 insn per cycle - 1.113616127 seconds time elapsed +TOTAL : 0.513662 sec + 413,968,051 cycles:u # 0.931 GHz + 751,994,111 instructions:u # 1.82 insn per cycle + 0.732801350 seconds time elapsed ==PROF== Profiling "sigmaKin": launch__registers_per_thread 128 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- @@ -99,14 +99,14 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 10.2.0] [inlineHel=0 Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.097316e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.660352e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.660352e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.082769e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.634916e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.634916e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 6.176695 sec - 16,419,354,433 cycles:u # 2.657 GHz - 40,586,920,269 instructions:u # 2.47 insn per cycle - 6.184426716 seconds time elapsed +TOTAL : 6.270509 sec + 16,617,081,523 cycles:u # 2.648 GHz + 40,586,917,676 instructions:u # 2.44 insn per cycle + 6.318167103 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 280) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum/build.none_d_inl0_hrd0/runTest.exe @@ -124,14 +124,14 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 10.2.0] [inlineHel=0 Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.551454e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.082720e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.082720e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.546202e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.098641e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.098641e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 4.490753 sec - 11,910,679,858 cycles:u # 2.650 GHz - 26,063,734,355 instructions:u # 2.19 insn per cycle - 4.498909611 seconds time elapsed +TOTAL : 4.517435 sec + 11,951,541,790 cycles:u # 2.641 GHz + 26,063,732,111 instructions:u # 2.18 insn per cycle + 4.637719681 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 1267) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum/build.sse4_d_inl0_hrd0/runTest.exe @@ -149,14 +149,14 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 10.2.0] [inlineHel=0 Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.028826e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.423420e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.423420e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.012668e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.420060e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.420060e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.539552 sec - 8,909,570,703 cycles:u # 2.514 GHz - 15,500,274,472 instructions:u # 1.74 insn per cycle - 3.548019492 seconds time elapsed +TOTAL : 3.578034 sec + 8,982,829,582 cycles:u # 2.505 GHz + 15,500,272,859 instructions:u # 1.73 insn per cycle + 3.717096111 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1044) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum/build.avx2_d_inl0_hrd0/runTest.exe @@ -174,14 +174,14 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 10.2.0] [inlineHel=0 Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.092807e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.890204e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.890204e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.073252e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.890733e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.890733e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.443010 sec - 8,691,751,181 cycles:u # 2.521 GHz - 15,380,738,092 instructions:u # 1.77 insn per cycle - 3.450832357 seconds time elapsed +TOTAL : 3.483272 sec + 8,768,552,952 cycles:u # 2.512 GHz + 15,380,735,835 instructions:u # 1.75 insn per cycle + 3.550889508 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1018) (512y: 1) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum/build.512y_d_inl0_hrd0/runTest.exe @@ -199,14 +199,14 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 10.2.0] [inlineHel=0 Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.005591e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.305117e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.305117e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.918396e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.772420e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.772420e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.575752 sec - 8,172,745,705 cycles:u # 2.282 GHz - 12,463,043,474 instructions:u # 1.52 insn per cycle - 3.584094401 seconds time elapsed +TOTAL : 3.730114 sec + 8,413,652,592 cycles:u # 2.251 GHz + 12,463,041,181 instructions:u # 1.48 insn per cycle + 3.822048507 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 238) (512y: 2) (512z: 787) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum/build.512z_d_inl0_hrd0/runTest.exe @@ -219,3 +219,5 @@ Avg ME (F77/C++) = 1.2828039868165201E-002 Relative difference = 1.0277080522138477e-08 OK (relative difference <= 2E-4) ========================================================================= + +TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggtt_manu/log_ggtt_manu_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggtt_manu/log_ggtt_manu_d_inl0_hrd0.txt index 09099ff550..eda19986b5 100644 --- a/epochX/cudacpp/tput/logs_ggtt_manu/log_ggtt_manu_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggtt_manu/log_ggtt_manu_d_inl0_hrd0.txt @@ -68,7 +68,7 @@ make[1]: Entering directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for `all.512z_d_inl0_hrd0_hasCurand'. make[1]: Leaving directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_tt/SubProcesses/P1_Sigma_sm_gg_ttx' -DATE: 2022-03-08_16:00:42 +DATE: 2022-03-02_20:44:01 On itscrd70.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -76,14 +76,14 @@ runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_tt/SubProcesses/P1 Process = SIGMA_SM_GG_TTX_CUDA [nvcc 11.6.112 (gcc 10.2.0)] [inlineHel=0] [hardcodeCIPC=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.648207e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.276729e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.418412e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.783311e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.286976e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.423520e+08 ) sec^-1 MeanMatrixElemValue = ( 2.085623e+00 +- 4.835084e-03 ) GeV^0 -TOTAL : 0.559610 sec - 123,069,441 cycles:u # 0.154 GHz - 149,218,102 instructions:u # 1.21 insn per cycle - 0.858517104 seconds time elapsed +TOTAL : 0.192238 sec + 121,032,856 cycles:u # 0.477 GHz + 148,693,174 instructions:u # 1.23 insn per cycle + 0.470459066 seconds time elapsed ==PROF== Profiling "sigmaKin": launch__registers_per_thread 170 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- @@ -99,14 +99,14 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 10.2.0] [inlineHel=0] [ha Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.883856e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.000505e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.000505e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.880869e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.998415e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.998415e+05 ) sec^-1 MeanMatrixElemValue = ( 2.085623e+00 +- 4.835084e-03 ) GeV^0 -TOTAL : 2.851161 sec - 7,550,831,640 cycles:u # 2.644 GHz - 22,043,029,064 instructions:u # 2.92 insn per cycle - 2.859199918 seconds time elapsed +TOTAL : 2.868103 sec + 7,569,490,302 cycles:u # 2.632 GHz + 22,043,027,778 instructions:u # 2.91 insn per cycle + 2.930334332 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 449) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_tt/SubProcesses/P1_Sigma_sm_gg_ttx/build.none_d_inl0_hrd0/runTest.exe @@ -124,14 +124,14 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 10.2.0] [inlineHel=0] [ha Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.899013e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.201531e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.201531e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.875237e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.171967e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.171967e+05 ) sec^-1 MeanMatrixElemValue = ( 2.085623e+00 +- 4.835084e-03 ) GeV^0 -TOTAL : 1.877591 sec - 4,941,583,744 cycles:u # 2.627 GHz - 12,986,329,014 instructions:u # 2.63 insn per cycle - 1.885182752 seconds time elapsed +TOTAL : 1.904920 sec + 4,986,509,471 cycles:u # 2.606 GHz + 12,986,329,320 instructions:u # 2.60 insn per cycle + 1.967185501 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 2360) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_tt/SubProcesses/P1_Sigma_sm_gg_ttx/build.sse4_d_inl0_hrd0/runTest.exe @@ -149,14 +149,14 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 10.2.0] [inlineHel=0] [ha Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.783095e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.613009e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.613009e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.784499e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.627868e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.627868e+05 ) sec^-1 MeanMatrixElemValue = ( 2.085623e+00 +- 4.835084e-03 ) GeV^0 -TOTAL : 1.164587 sec - 2,665,831,368 cycles:u # 2.279 GHz - 5,530,144,009 instructions:u # 2.07 insn per cycle - 1.172304702 seconds time elapsed +TOTAL : 1.176503 sec + 2,668,866,456 cycles:u # 2.253 GHz + 5,530,142,603 instructions:u # 2.07 insn per cycle + 1.233542634 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2177) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_tt/SubProcesses/P1_Sigma_sm_gg_ttx/build.avx2_d_inl0_hrd0/runTest.exe @@ -174,14 +174,14 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 10.2.0] [inlineHel=0] [ha Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.099669e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.044804e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.044804e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.158898e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.147894e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.147894e+05 ) sec^-1 MeanMatrixElemValue = ( 2.085623e+00 +- 4.835084e-03 ) GeV^0 -TOTAL : 1.096902 sec - 2,510,271,222 cycles:u # 2.279 GHz - 5,353,978,964 instructions:u # 2.13 insn per cycle - 1.104815066 seconds time elapsed +TOTAL : 1.096895 sec + 2,489,998,374 cycles:u # 2.253 GHz + 5,353,978,508 instructions:u # 2.15 insn per cycle + 1.210113960 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2013) (512y: 115) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_tt/SubProcesses/P1_Sigma_sm_gg_ttx/build.512y_d_inl0_hrd0/runTest.exe @@ -199,14 +199,14 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 10.2.0] [inlineHel=0] [ha Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.431631e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.838221e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.838221e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.420075e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.836944e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.836944e+05 ) sec^-1 MeanMatrixElemValue = ( 2.085623e+00 +- 4.835084e-03 ) GeV^0 -TOTAL : 1.599298 sec - 2,696,767,373 cycles:u # 1.683 GHz - 3,546,569,526 instructions:u # 1.32 insn per cycle - 1.606903008 seconds time elapsed +TOTAL : 1.614425 sec + 2,714,080,080 cycles:u # 1.673 GHz + 3,546,569,379 instructions:u # 1.31 insn per cycle + 1.660243395 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1026) (512y: 83) (512z: 1568) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_tt/SubProcesses/P1_Sigma_sm_gg_ttx/build.512z_d_inl0_hrd0/runTest.exe @@ -219,3 +219,5 @@ Avg ME (F77/C++) = 2.0288066358987087 Relative difference = 1.7946571129689766e-07 OK (relative difference <= 2E-4) ========================================================================= + +TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttg_manu/log_ggttg_manu_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttg_manu/log_ggttg_manu_d_inl0_hrd0.txt index e9000a7f8b..3dc1947f2f 100644 --- a/epochX/cudacpp/tput/logs_ggttg_manu/log_ggttg_manu_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttg_manu/log_ggttg_manu_d_inl0_hrd0.txt @@ -68,7 +68,7 @@ make[1]: Entering directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for `all.512z_d_inl0_hrd0_hasCurand'. make[1]: Leaving directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg' -DATE: 2022-03-08_16:01:07 +DATE: 2022-03-02_22:15:06 On itscrd70.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -76,14 +76,14 @@ runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttg/SubProcesses/P Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 11.6.112 (gcc 10.2.0)] [inlineHel=0] [hardcodeCIPC=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 9.030196e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.108563e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.126534e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.884329e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.122004e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.142852e+07 ) sec^-1 MeanMatrixElemValue = ( 4.061783e+02 +- 3.760219e+02 ) GeV^-2 -TOTAL : 0.493765 sec - 93,095,079 cycles:u # 0.131 GHz - 89,190,010 instructions:u # 0.96 insn per cycle - 0.770026333 seconds time elapsed +TOTAL : 0.202382 sec + 94,563,161 cycles:u # 0.493 GHz + 89,904,458 instructions:u # 0.95 insn per cycle + 0.252128833 seconds time elapsed ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... @@ -91,14 +91,14 @@ runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttg/SubProcesses/P Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 11.6.112 (gcc 10.2.0)] [inlineHel=0] [hardcodeCIPC=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.148487e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.426159e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.442753e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.150746e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.426221e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.442531e+07 ) sec^-1 MeanMatrixElemValue = ( 6.734461e+02 +- 4.775415e+02 ) GeV^-2 -TOTAL : 0.641802 sec - 188,099,971 cycles:u # 0.212 GHz - 279,774,182 instructions:u # 1.49 insn per cycle - 0.947851869 seconds time elapsed +TOTAL : 0.276354 sec + 182,799,600 cycles:u # 0.528 GHz + 285,563,716 instructions:u # 1.56 insn per cycle + 0.352869456 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg/build.none_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg/build.none_d_inl0_hrd0/fgcheck.exe 2 64 2 @@ -112,14 +112,14 @@ Process = SIGMA_SM_GG_TTXG_CPP [gcc 10.2.0] [inlineHel=0] [h Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.447886e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.477059e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.477059e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.444930e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.474432e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.474432e+04 ) sec^-1 MeanMatrixElemValue = ( 4.061783e+02 +- 3.760219e+02 ) GeV^-2 -TOTAL : 0.678856 sec - 1,804,013,324 cycles:u # 2.642 GHz - 5,728,320,127 instructions:u # 3.18 insn per cycle - 0.685812984 seconds time elapsed +TOTAL : 0.681342 sec + 1,804,067,404 cycles:u # 2.636 GHz + 5,728,310,505 instructions:u # 3.18 insn per cycle + 0.688457745 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 713) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg/build.none_d_inl0_hrd0/runTest.exe @@ -137,14 +137,14 @@ Process = SIGMA_SM_GG_TTXG_CPP [gcc 10.2.0] [inlineHel=0] [h Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.401457e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.495632e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.495632e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.403870e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.498913e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.498913e+04 ) sec^-1 MeanMatrixElemValue = ( 4.061783e+02 +- 3.760219e+02 ) GeV^-2 -TOTAL : 0.382275 sec - 1,008,619,342 cycles:u # 2.614 GHz - 2,995,467,082 instructions:u # 2.97 insn per cycle - 0.389251087 seconds time elapsed +TOTAL : 0.381299 sec + 1,008,612,859 cycles:u # 2.616 GHz + 2,995,457,153 instructions:u # 2.97 insn per cycle + 0.388252431 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 4237) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg/build.sse4_d_inl0_hrd0/runTest.exe @@ -162,14 +162,14 @@ Process = SIGMA_SM_GG_TTXG_CPP [gcc 10.2.0] [inlineHel=0] [h Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 8.489284e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.847212e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.847212e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.477066e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.836452e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.836452e+04 ) sec^-1 MeanMatrixElemValue = ( 4.061783e+02 +- 3.760219e+02 ) GeV^-2 -TOTAL : 0.202643 sec - 456,180,716 cycles:u # 2.202 GHz - 1,064,632,946 instructions:u # 2.33 insn per cycle - 0.209721703 seconds time elapsed +TOTAL : 0.202323 sec + 456,303,954 cycles:u # 2.209 GHz + 1,064,622,815 instructions:u # 2.33 insn per cycle + 0.209416914 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3582) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg/build.avx2_d_inl0_hrd0/runTest.exe @@ -187,14 +187,14 @@ Process = SIGMA_SM_GG_TTXG_CPP [gcc 10.2.0] [inlineHel=0] [h Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 9.404847e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.837296e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.837296e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.382412e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.813856e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.813856e+04 ) sec^-1 MeanMatrixElemValue = ( 4.061783e+02 +- 3.760219e+02 ) GeV^-2 -TOTAL : 0.183703 sec - 413,371,540 cycles:u # 2.197 GHz - 1,006,135,865 instructions:u # 2.43 insn per cycle - 0.190806857 seconds time elapsed +TOTAL : 0.184437 sec + 414,437,290 cycles:u # 2.192 GHz + 1,006,126,068 instructions:u # 2.43 insn per cycle + 0.191759500 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3421) (512y: 70) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg/build.512y_d_inl0_hrd0/runTest.exe @@ -212,14 +212,14 @@ Process = SIGMA_SM_GG_TTXG_CPP [gcc 10.2.0] [inlineHel=0] [h Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.992766e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.229034e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.229034e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.968373e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.203025e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.203025e+04 ) sec^-1 MeanMatrixElemValue = ( 4.061783e+02 +- 3.760219e+02 ) GeV^-2 -TOTAL : 0.244230 sec - 391,700,757 cycles:u # 1.575 GHz - 558,083,488 instructions:u # 1.42 insn per cycle - 0.251220703 seconds time elapsed +TOTAL : 0.245146 sec + 392,781,640 cycles:u # 1.571 GHz + 558,073,754 instructions:u # 1.42 insn per cycle + 0.252616903 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1240) (512y: 69) (512z: 2828) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg/build.512z_d_inl0_hrd0/runTest.exe @@ -232,3 +232,5 @@ Avg ME (F77/C++) = 1.4131216787944412 Relative difference = 2.2730207216187906e-07 OK (relative difference <= 2E-4) ========================================================================= + +TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttgg_manu/log_ggttgg_manu_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttgg_manu/log_ggttgg_manu_d_inl0_hrd0.txt index c327c47b48..1d6d17ffec 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_manu/log_ggttgg_manu_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_manu/log_ggttgg_manu_d_inl0_hrd0.txt @@ -68,7 +68,7 @@ make[1]: Entering directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for `all.512z_d_inl0_hrd0_hasCurand'. make[1]: Leaving directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg' -DATE: 2022-03-08_16:01:27 +DATE: 2022-03-02_20:45:04 On itscrd70.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -76,14 +76,14 @@ runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg/SubProcesses/ Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 11.6.112 (gcc 10.2.0)] [inlineHel=0] [hardcodeCIPC=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.423835e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.471160e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.474282e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.448381e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.492028e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.495404e+05 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 0.570926 sec - 162,234,921 cycles:u # 0.202 GHz - 230,470,285 instructions:u # 1.42 insn per cycle - 0.866395948 seconds time elapsed +TOTAL : 0.203547 sec + 160,556,197 cycles:u # 0.643 GHz + 234,164,869 instructions:u # 1.46 insn per cycle + 0.510651371 seconds time elapsed ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... @@ -91,14 +91,14 @@ runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg/SubProcesses/ Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 11.6.112 (gcc 10.2.0)] [inlineHel=0] [hardcodeCIPC=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.138555e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.191833e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.194036e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.141580e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.192307e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.194436e+05 ) sec^-1 MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 -TOTAL : 2.616002 sec - 2,073,114,069 cycles:u # 0.719 GHz - 4,148,123,947 instructions:u # 2.00 insn per cycle - 2.942212048 seconds time elapsed +TOTAL : 2.251792 sec + 2,018,066,532 cycles:u # 0.868 GHz + 4,060,131,840 instructions:u # 2.01 insn per cycle + 2.330274883 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg/build.none_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg/build.none_d_inl0_hrd0/fgcheck.exe 2 64 2 @@ -112,14 +112,14 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 10.2.0] [inlineHel=0] [ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.798730e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.800694e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.800694e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.795381e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.797351e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.797351e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 9.127687 sec - 24,397,483,761 cycles:u # 2.672 GHz - 75,802,812,822 instructions:u # 3.11 insn per cycle - 9.135133538 seconds time elapsed +TOTAL : 9.146190 sec + 24,408,471,737 cycles:u # 2.667 GHz + 75,802,803,874 instructions:u # 3.11 insn per cycle + 9.189273129 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 1234) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg/build.none_d_inl0_hrd0/runTest.exe @@ -137,14 +137,14 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 10.2.0] [inlineHel=0] [ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.336404e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.343122e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.343122e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.322167e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.328838e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.328838e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 4.925486 sec - 13,158,057,745 cycles:u # 2.669 GHz - 39,954,210,434 instructions:u # 3.04 insn per cycle - 4.932539858 seconds time elapsed +TOTAL : 4.946382 sec + 13,185,710,971 cycles:u # 2.663 GHz + 39,954,200,268 instructions:u # 3.03 insn per cycle + 5.047701273 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 7957) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg/build.sse4_d_inl0_hrd0/runTest.exe @@ -162,14 +162,14 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 10.2.0] [inlineHel=0] [ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.790313e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.817863e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.817863e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.791437e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.819283e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.819283e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.425886 sec - 5,511,814,075 cycles:u # 2.268 GHz - 13,780,497,990 instructions:u # 2.50 insn per cycle - 2.433185374 seconds time elapsed +TOTAL : 2.429143 sec + 5,511,865,684 cycles:u # 2.265 GHz + 13,780,488,133 instructions:u # 2.50 insn per cycle + 2.607078173 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 6819) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg/build.avx2_d_inl0_hrd0/runTest.exe @@ -187,14 +187,14 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 10.2.0] [inlineHel=0] [ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.463406e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.496256e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.496256e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.459552e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.492443e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.492443e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.208497 sec - 5,016,876,465 cycles:u # 2.267 GHz - 12,680,189,845 instructions:u # 2.53 insn per cycle - 2.216835549 seconds time elapsed +TOTAL : 2.208541 sec + 5,017,577,651 cycles:u # 2.267 GHz + 12,680,180,504 instructions:u # 2.53 insn per cycle + 2.274128563 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 6604) (512y: 57) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg/build.512y_d_inl0_hrd0/runTest.exe @@ -212,14 +212,14 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 10.2.0] [inlineHel=0] [ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.522548e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.547410e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.547410e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.513080e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.538311e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.538311e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.524598 sec - 3,987,354,486 cycles:u # 1.577 GHz - 6,424,112,639 instructions:u # 1.61 insn per cycle - 2.532114949 seconds time elapsed +TOTAL : 2.529753 sec + 3,990,412,420 cycles:u # 1.575 GHz + 6,424,103,333 instructions:u # 1.61 insn per cycle + 2.629224079 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1749) (512y: 73) (512z: 5663) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg/build.512z_d_inl0_hrd0/runTest.exe @@ -232,3 +232,5 @@ Avg ME (F77/C++) = 6.6266750603003670E-004 Relative difference = 9.099641544231038e-09 OK (relative difference <= 2E-4) ========================================================================= + +TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttggg_manu/log_ggttggg_manu_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttggg_manu/log_ggttggg_manu_d_inl0_hrd0.txt index 28aeeb0205..471c326da9 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_manu/log_ggttggg_manu_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_manu/log_ggttggg_manu_d_inl0_hrd0.txt @@ -68,7 +68,7 @@ make[1]: Entering directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for `all.512z_d_inl0_hrd0_hasCurand'. make[1]: Leaving directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg' -DATE: 2022-03-08_16:02:10 +DATE: 2022-03-03_00:40:13 On itscrd70.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -76,14 +76,14 @@ runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttggg/SubProcesses Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 11.6.112 (gcc 10.2.0)] [inlineHel=0] [hardcodeCIPC=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.849251e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.849989e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.850226e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.850344e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.850939e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.851233e+02 ) sec^-1 MeanMatrixElemValue = ( 2.064592e-05 +- 1.952360e-05 ) GeV^-6 -TOTAL : 1.660097 sec - 1,204,054,201 cycles:u # 0.607 GHz - 2,446,966,242 instructions:u # 2.03 insn per cycle - 2.040667007 seconds time elapsed +TOTAL : 1.519137 sec + 1,240,540,962 cycles:u # 0.875 GHz + 2,538,404,158 instructions:u # 2.05 insn per cycle + 1.773924836 seconds time elapsed ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... @@ -91,14 +91,14 @@ runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttggg/SubProcesses Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 11.6.112 (gcc 10.2.0)] [inlineHel=0] [hardcodeCIPC=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.222097e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.222498e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.222552e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.218943e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.219420e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.219458e+04 ) sec^-1 MeanMatrixElemValue = ( 1.856249e-04 +- 8.329951e-05 ) GeV^-6 -TOTAL : 3.219514 sec - 2,714,964,049 cycles:u # 0.766 GHz - 5,572,835,733 instructions:u # 2.05 insn per cycle - 3.602345143 seconds time elapsed +TOTAL : 2.923202 sec + 2,623,210,949 cycles:u # 0.879 GHz + 5,669,746,807 instructions:u # 2.16 insn per cycle + 2.987413652 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/build.none_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/build.none_d_inl0_hrd0/fgcheck.exe 2 64 2 @@ -112,14 +112,14 @@ Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 10.2.0] [inlineHel=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 7.021705e+01 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.022163e+01 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.022163e+01 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.016970e+01 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.017442e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.017442e+01 ) sec^-1 MeanMatrixElemValue = ( 2.064592e-05 +- 1.952360e-05 ) GeV^-6 -TOTAL : 3.881759 sec - 10,323,373,923 cycles:u # 2.657 GHz - 28,670,999,047 instructions:u # 2.78 insn per cycle - 3.888739191 seconds time elapsed +TOTAL : 3.890593 sec + 10,337,069,750 cycles:u # 2.658 GHz + 28,670,989,343 instructions:u # 2.77 insn per cycle + 3.962849544 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 7356) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/build.none_d_inl0_hrd0/runTest.exe @@ -137,14 +137,14 @@ Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 10.2.0] [inlineHel=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.283901e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.284056e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.284056e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.303340e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.303514e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.303514e+02 ) sec^-1 MeanMatrixElemValue = ( 2.064592e-05 +- 1.952360e-05 ) GeV^-6 -TOTAL : 2.125019 sec - 5,651,230,587 cycles:u # 2.655 GHz - 15,097,009,220 instructions:u # 2.67 insn per cycle - 2.131714356 seconds time elapsed +TOTAL : 2.092670 sec + 5,564,868,671 cycles:u # 2.654 GHz + 15,096,999,560 instructions:u # 2.71 insn per cycle + 2.163061036 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:66501) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/build.sse4_d_inl0_hrd0/runTest.exe @@ -162,14 +162,14 @@ Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 10.2.0] [inlineHel=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.589549e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.590172e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.590172e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.582720e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.583393e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.583393e+02 ) sec^-1 MeanMatrixElemValue = ( 2.064592e-05 +- 1.952360e-05 ) GeV^-6 -TOTAL : 1.056391 sec - 2,390,867,768 cycles:u # 2.254 GHz - 5,272,427,235 instructions:u # 2.21 insn per cycle - 1.063379036 seconds time elapsed +TOTAL : 1.059318 sec + 2,395,389,422 cycles:u # 2.251 GHz + 5,272,417,521 instructions:u # 2.20 insn per cycle + 1.167034390 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:57213) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/build.avx2_d_inl0_hrd0/runTest.exe @@ -187,14 +187,14 @@ Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 10.2.0] [inlineHel=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.835857e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.836624e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.836624e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.845911e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.846735e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.846735e+02 ) sec^-1 MeanMatrixElemValue = ( 2.064592e-05 +- 1.952360e-05 ) GeV^-6 -TOTAL : 0.965457 sec - 2,183,296,317 cycles:u # 2.251 GHz - 4,832,271,130 instructions:u # 2.21 insn per cycle - 0.973008203 seconds time elapsed +TOTAL : 0.961917 sec + 2,174,781,832 cycles:u # 2.250 GHz + 4,832,261,415 instructions:u # 2.22 insn per cycle + 1.140115499 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:57716) (512y: 51) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/build.512y_d_inl0_hrd0/runTest.exe @@ -212,14 +212,14 @@ Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 10.2.0] [inlineHel=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.939543e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.940343e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.940343e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.944918e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.945733e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.945733e+02 ) sec^-1 MeanMatrixElemValue = ( 2.064592e-05 +- 1.952360e-05 ) GeV^-6 -TOTAL : 0.932234 sec - 1,459,150,294 cycles:u # 1.558 GHz - 2,415,829,308 instructions:u # 1.66 insn per cycle - 0.939053758 seconds time elapsed +TOTAL : 0.930126 sec + 1,457,417,419 cycles:u # 1.559 GHz + 2,415,819,335 instructions:u # 1.66 insn per cycle + 1.003260116 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 6172) (512y: 49) (512z:52234) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/build.512z_d_inl0_hrd0/runTest.exe @@ -232,3 +232,5 @@ Avg ME (F77/C++) = 9.8722631420503186E-003 Relative difference = 1.4388830547142576e-08 OK (relative difference <= 2E-4) ========================================================================= + +TEST COMPLETED From 4beacf5d09d8f996d5a323ca238fc81e3e888ac4 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Tue, 8 Mar 2022 17:50:19 +0100 Subject: [PATCH 10/15] [pptt] rename libraries as suggested by @roiser to avoid clashes #397 --- epochX/cudacpp/gg_tt/SubProcesses/Makefile | 6 +++--- epochX/cudacpp/gg_tt/src/Makefile | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/epochX/cudacpp/gg_tt/SubProcesses/Makefile b/epochX/cudacpp/gg_tt/SubProcesses/Makefile index f52e503781..53b13367b5 100644 --- a/epochX/cudacpp/gg_tt/SubProcesses/Makefile +++ b/epochX/cudacpp/gg_tt/SubProcesses/Makefile @@ -20,7 +20,7 @@ INCFLAGS = -I. OPTFLAGS = -O3 # this ends up in CUFLAGS too (should it?), cannot add -Ofast or -ffast-math here # Dependency on src directory -MG5AMC_COMMONLIB = mg5amc_common +MG5AMC_COMMONLIB = mg5amc_gg_ttx_common LIBFLAGS = -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) INCFLAGS += -I../../src @@ -379,11 +379,11 @@ $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so: ../../src/*.h ../../src/*.cc #------------------------------------------------------------------------------- -MG5AMC_CXXLIB = mg5amc_cxx +MG5AMC_CXXLIB = mg5amc_gg_ttx_cxx cxx_objects=$(BUILDDIR)/CPPProcess.o $(BUILDDIR)/RandomNumberKernels.o $(BUILDDIR)/RamboSamplingKernels.o $(BUILDDIR)/MatrixElementKernels.o $(BUILDDIR)/BridgeKernels.o $(BUILDDIR)/CrossSectionKernels.o ifneq ($(NVCC),) -MG5AMC_CULIB = mg5amc_cu +MG5AMC_CULIB = mg5amc_gg_ttx_cu cu_objects=$(BUILDDIR)/gCPPProcess.o $(BUILDDIR)/gRandomNumberKernels.o $(BUILDDIR)/gRamboSamplingKernels.o $(BUILDDIR)/gMatrixElementKernels.o $(BUILDDIR)/gBridgeKernels.o $(BUILDDIR)/gCrossSectionKernels.o endif diff --git a/epochX/cudacpp/gg_tt/src/Makefile b/epochX/cudacpp/gg_tt/src/Makefile index 847adc49f6..48e66eaeb2 100644 --- a/epochX/cudacpp/gg_tt/src/Makefile +++ b/epochX/cudacpp/gg_tt/src/Makefile @@ -161,7 +161,7 @@ endif # NB1: there are no CUDA targets in src as we avoid RDC! # NB2: CUDA includes for curand.h are no longer needed in the C++ code anywhere in src! -MG5AMC_COMMONLIB = mg5amc_common +MG5AMC_COMMONLIB = mg5amc_gg_ttx_common # First target (default goal) all.$(TAG): $(BUILDDIR)/.build.$(TAG) $(LIBDIR)/.build.$(TAG) $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so From 8eaeccac8ef7c0415e9d688965439b55028cab67 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Tue, 8 Mar 2022 18:03:26 +0100 Subject: [PATCH 11/15] [pptt] prepare Makefile templates with %(processid_short)s to be replaced... However this demands overriding ProcessExporterCPP.copy_template in export_cpp upstream. Maybe another option is possible in the Makefile... --- .../madgraph/iolibs/template_files/gpu/Makefile | 6 +++--- .../madgraph/iolibs/template_files/gpu/Makefile_src | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/Makefile b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/Makefile index eb2c75841d..e24f450326 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/Makefile +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/Makefile @@ -20,7 +20,7 @@ INCFLAGS = -I. OPTFLAGS = -O3 # this ends up in CUFLAGS too (should it?), cannot add -Ofast or -ffast-math here # Dependency on src directory -MG5AMC_COMMONLIB = mg5amc_common +MG5AMC_COMMONLIB = mg5amc_%(processid_short)s_common LIBFLAGS = -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) INCFLAGS += -I../../src @@ -379,11 +379,11 @@ $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so: ../../src/*.h ../../src/*.cc #------------------------------------------------------------------------------- -MG5AMC_CXXLIB = mg5amc_cxx +MG5AMC_CXXLIB = mg5amc_%(processid_short)s_cxx cxx_objects=$(BUILDDIR)/CPPProcess.o $(BUILDDIR)/RandomNumberKernels.o $(BUILDDIR)/RamboSamplingKernels.o $(BUILDDIR)/MatrixElementKernels.o $(BUILDDIR)/BridgeKernels.o $(BUILDDIR)/CrossSectionKernels.o ifneq ($(NVCC),) -MG5AMC_CULIB = mg5amc_cu +MG5AMC_CULIB = mg5amc_%(processid_short)s_cu cu_objects=$(BUILDDIR)/gCPPProcess.o $(BUILDDIR)/gRandomNumberKernels.o $(BUILDDIR)/gRamboSamplingKernels.o $(BUILDDIR)/gMatrixElementKernels.o $(BUILDDIR)/gBridgeKernels.o $(BUILDDIR)/gCrossSectionKernels.o endif diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/Makefile_src b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/Makefile_src index eb94525c69..e27a9be803 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/Makefile_src +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/Makefile_src @@ -161,7 +161,7 @@ endif # NB1: there are no CUDA targets in src as we avoid RDC! # NB2: CUDA includes for curand.h are no longer needed in the C++ code anywhere in src! -MG5AMC_COMMONLIB = mg5amc_common +MG5AMC_COMMONLIB = mg5amc_%(processid_short)s_common # First target (default goal) all.$(TAG): $(BUILDDIR)/.build.$(TAG) $(LIBDIR)/.build.$(TAG) $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so From c0511e5be3e635ce94a18f9fe72040f8b4bf322a Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Tue, 8 Mar 2022 18:05:44 +0100 Subject: [PATCH 12/15] [pptt] revert the last two changes - will try a different option in makefiles Revert "[pptt] prepare Makefile templates with %(processid_short)s to be replaced..." This reverts commit 8eaeccac8ef7c0415e9d688965439b55028cab67. Revert "[pptt] rename libraries as suggested by @roiser to avoid clashes #397" This reverts commit 4beacf5d09d8f996d5a323ca238fc81e3e888ac4. --- .../madgraph/iolibs/template_files/gpu/Makefile | 6 +++--- .../madgraph/iolibs/template_files/gpu/Makefile_src | 2 +- epochX/cudacpp/gg_tt/SubProcesses/Makefile | 6 +++--- epochX/cudacpp/gg_tt/src/Makefile | 2 +- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/Makefile b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/Makefile index e24f450326..eb2c75841d 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/Makefile +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/Makefile @@ -20,7 +20,7 @@ INCFLAGS = -I. OPTFLAGS = -O3 # this ends up in CUFLAGS too (should it?), cannot add -Ofast or -ffast-math here # Dependency on src directory -MG5AMC_COMMONLIB = mg5amc_%(processid_short)s_common +MG5AMC_COMMONLIB = mg5amc_common LIBFLAGS = -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) INCFLAGS += -I../../src @@ -379,11 +379,11 @@ $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so: ../../src/*.h ../../src/*.cc #------------------------------------------------------------------------------- -MG5AMC_CXXLIB = mg5amc_%(processid_short)s_cxx +MG5AMC_CXXLIB = mg5amc_cxx cxx_objects=$(BUILDDIR)/CPPProcess.o $(BUILDDIR)/RandomNumberKernels.o $(BUILDDIR)/RamboSamplingKernels.o $(BUILDDIR)/MatrixElementKernels.o $(BUILDDIR)/BridgeKernels.o $(BUILDDIR)/CrossSectionKernels.o ifneq ($(NVCC),) -MG5AMC_CULIB = mg5amc_%(processid_short)s_cu +MG5AMC_CULIB = mg5amc_cu cu_objects=$(BUILDDIR)/gCPPProcess.o $(BUILDDIR)/gRandomNumberKernels.o $(BUILDDIR)/gRamboSamplingKernels.o $(BUILDDIR)/gMatrixElementKernels.o $(BUILDDIR)/gBridgeKernels.o $(BUILDDIR)/gCrossSectionKernels.o endif diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/Makefile_src b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/Makefile_src index e27a9be803..eb94525c69 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/Makefile_src +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/Makefile_src @@ -161,7 +161,7 @@ endif # NB1: there are no CUDA targets in src as we avoid RDC! # NB2: CUDA includes for curand.h are no longer needed in the C++ code anywhere in src! -MG5AMC_COMMONLIB = mg5amc_%(processid_short)s_common +MG5AMC_COMMONLIB = mg5amc_common # First target (default goal) all.$(TAG): $(BUILDDIR)/.build.$(TAG) $(LIBDIR)/.build.$(TAG) $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so diff --git a/epochX/cudacpp/gg_tt/SubProcesses/Makefile b/epochX/cudacpp/gg_tt/SubProcesses/Makefile index 53b13367b5..f52e503781 100644 --- a/epochX/cudacpp/gg_tt/SubProcesses/Makefile +++ b/epochX/cudacpp/gg_tt/SubProcesses/Makefile @@ -20,7 +20,7 @@ INCFLAGS = -I. OPTFLAGS = -O3 # this ends up in CUFLAGS too (should it?), cannot add -Ofast or -ffast-math here # Dependency on src directory -MG5AMC_COMMONLIB = mg5amc_gg_ttx_common +MG5AMC_COMMONLIB = mg5amc_common LIBFLAGS = -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) INCFLAGS += -I../../src @@ -379,11 +379,11 @@ $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so: ../../src/*.h ../../src/*.cc #------------------------------------------------------------------------------- -MG5AMC_CXXLIB = mg5amc_gg_ttx_cxx +MG5AMC_CXXLIB = mg5amc_cxx cxx_objects=$(BUILDDIR)/CPPProcess.o $(BUILDDIR)/RandomNumberKernels.o $(BUILDDIR)/RamboSamplingKernels.o $(BUILDDIR)/MatrixElementKernels.o $(BUILDDIR)/BridgeKernels.o $(BUILDDIR)/CrossSectionKernels.o ifneq ($(NVCC),) -MG5AMC_CULIB = mg5amc_gg_ttx_cu +MG5AMC_CULIB = mg5amc_cu cu_objects=$(BUILDDIR)/gCPPProcess.o $(BUILDDIR)/gRandomNumberKernels.o $(BUILDDIR)/gRamboSamplingKernels.o $(BUILDDIR)/gMatrixElementKernels.o $(BUILDDIR)/gBridgeKernels.o $(BUILDDIR)/gCrossSectionKernels.o endif diff --git a/epochX/cudacpp/gg_tt/src/Makefile b/epochX/cudacpp/gg_tt/src/Makefile index 48e66eaeb2..847adc49f6 100644 --- a/epochX/cudacpp/gg_tt/src/Makefile +++ b/epochX/cudacpp/gg_tt/src/Makefile @@ -161,7 +161,7 @@ endif # NB1: there are no CUDA targets in src as we avoid RDC! # NB2: CUDA includes for curand.h are no longer needed in the C++ code anywhere in src! -MG5AMC_COMMONLIB = mg5amc_gg_ttx_common +MG5AMC_COMMONLIB = mg5amc_common # First target (default goal) all.$(TAG): $(BUILDDIR)/.build.$(TAG) $(LIBDIR)/.build.$(TAG) $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so From 6276da89b9f745963023f2366b104b7cf9a42b96 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Tue, 8 Mar 2022 18:24:57 +0100 Subject: [PATCH 13/15] [pptt] alternative implementation: rename libraries as suggested by @roiser to avoid clashes #397 NB: only the cxx and cu libraries need to be renamed, while the common library includes only common src files for now... --- epochX/cudacpp/gg_tt/SubProcesses/Makefile | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/epochX/cudacpp/gg_tt/SubProcesses/Makefile b/epochX/cudacpp/gg_tt/SubProcesses/Makefile index f52e503781..361206bf29 100644 --- a/epochX/cudacpp/gg_tt/SubProcesses/Makefile +++ b/epochX/cudacpp/gg_tt/SubProcesses/Makefile @@ -379,11 +379,14 @@ $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so: ../../src/*.h ../../src/*.cc #------------------------------------------------------------------------------- -MG5AMC_CXXLIB = mg5amc_cxx +processid_short=$(shell basename $(CURDIR) | awk -F_ '{print $$(NF-1)"_"$$NF}') +###$(info processid_short=$(processid_short)) + +MG5AMC_CXXLIB = mg5amc_$(processid_short)_cxx cxx_objects=$(BUILDDIR)/CPPProcess.o $(BUILDDIR)/RandomNumberKernels.o $(BUILDDIR)/RamboSamplingKernels.o $(BUILDDIR)/MatrixElementKernels.o $(BUILDDIR)/BridgeKernels.o $(BUILDDIR)/CrossSectionKernels.o ifneq ($(NVCC),) -MG5AMC_CULIB = mg5amc_cu +MG5AMC_CULIB = mg5amc_$(processid_short)_cu cu_objects=$(BUILDDIR)/gCPPProcess.o $(BUILDDIR)/gRandomNumberKernels.o $(BUILDDIR)/gRamboSamplingKernels.o $(BUILDDIR)/gMatrixElementKernels.o $(BUILDDIR)/gBridgeKernels.o $(BUILDDIR)/gCrossSectionKernels.o endif From a4649ba9a6d9d1048e421a272fb41198e5873dee Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Tue, 8 Mar 2022 18:27:09 +0100 Subject: [PATCH 14/15] [pptt] backport to codegen and regenerate ggtt auto --- .../madgraph/iolibs/template_files/gpu/Makefile | 7 +++++-- .../cudacpp/gg_tt.auto/CODEGEN_cudacpp_gg_tt_log.txt | 12 ++++++------ epochX/cudacpp/gg_tt.auto/SubProcesses/Makefile | 7 +++++-- 3 files changed, 16 insertions(+), 10 deletions(-) diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/Makefile b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/Makefile index eb2c75841d..9ce0cb9d7a 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/Makefile +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/Makefile @@ -379,11 +379,14 @@ $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so: ../../src/*.h ../../src/*.cc #------------------------------------------------------------------------------- -MG5AMC_CXXLIB = mg5amc_cxx +processid_short=$(shell basename $(CURDIR) | awk -F_ '{print $$(NF-1)"_"$$NF}') +###$(info processid_short=$(processid_short)) + +MG5AMC_CXXLIB = mg5amc_$(processid_short)_cxx cxx_objects=$(BUILDDIR)/CPPProcess.o $(BUILDDIR)/RandomNumberKernels.o $(BUILDDIR)/RamboSamplingKernels.o $(BUILDDIR)/MatrixElementKernels.o $(BUILDDIR)/BridgeKernels.o $(BUILDDIR)/CrossSectionKernels.o ifneq ($(NVCC),) -MG5AMC_CULIB = mg5amc_cu +MG5AMC_CULIB = mg5amc_$(processid_short)_cu cu_objects=$(BUILDDIR)/gCPPProcess.o $(BUILDDIR)/gRandomNumberKernels.o $(BUILDDIR)/gRamboSamplingKernels.o $(BUILDDIR)/gMatrixElementKernels.o $(BUILDDIR)/gBridgeKernels.o $(BUILDDIR)/gCrossSectionKernels.o endif diff --git a/epochX/cudacpp/gg_tt.auto/CODEGEN_cudacpp_gg_tt_log.txt b/epochX/cudacpp/gg_tt.auto/CODEGEN_cudacpp_gg_tt_log.txt index 8914bb78d8..1b721b869f 100644 --- a/epochX/cudacpp/gg_tt.auto/CODEGEN_cudacpp_gg_tt_log.txt +++ b/epochX/cudacpp/gg_tt.auto/CODEGEN_cudacpp_gg_tt_log.txt @@ -50,7 +50,7 @@ generate g g > t t~ No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.006888151168823242  +DEBUG: model prefixing takes 0.006908893585205078  INFO: Restrict model sm with file models/sm/restrict_default.dat . INFO: Change particles name to pass to MG5 convention Defined multiparticle p = g u c d s u~ c~ d~ s~ @@ -65,7 +65,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=2: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ WEIGHTED<=2 @1 INFO: Process has 3 diagrams -1 processes with 3 diagrams generated in 0.010 s +1 processes with 3 diagrams generated in 0.011 s Total: 1 processes with 3 diagrams output standalone_cudacpp CODEGEN_cudacpp_gg_tt Plugin PLUGIN.CUDACPP_SA_OUTPUT has marked as NOT being validated with this version. @@ -103,7 +103,7 @@ Generated helas calls for 1 subprocesses (3 diagrams) in 0.008 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines -ALOHA: aloha creates 2 routines in 0.163 s +ALOHA: aloha creates 2 routines in 0.162 s VVV1 DEBUG: language =  [aloha_writers.py at line 2451]  FFV1 @@ -121,6 +121,6 @@ INFO: /data/avalassi/GPU2020/MG5aMC/3.1.1_lo_vectorization/CODEGEN_cudacpp_gg_tt DEBUG: Entering PLUGIN_ProcessExporter.finalize [output.py at line 185]  quit -real 0m1.019s -user 0m0.763s -sys 0m0.148s +real 0m1.152s +user 0m0.777s +sys 0m0.141s diff --git a/epochX/cudacpp/gg_tt.auto/SubProcesses/Makefile b/epochX/cudacpp/gg_tt.auto/SubProcesses/Makefile index f52e503781..361206bf29 100644 --- a/epochX/cudacpp/gg_tt.auto/SubProcesses/Makefile +++ b/epochX/cudacpp/gg_tt.auto/SubProcesses/Makefile @@ -379,11 +379,14 @@ $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so: ../../src/*.h ../../src/*.cc #------------------------------------------------------------------------------- -MG5AMC_CXXLIB = mg5amc_cxx +processid_short=$(shell basename $(CURDIR) | awk -F_ '{print $$(NF-1)"_"$$NF}') +###$(info processid_short=$(processid_short)) + +MG5AMC_CXXLIB = mg5amc_$(processid_short)_cxx cxx_objects=$(BUILDDIR)/CPPProcess.o $(BUILDDIR)/RandomNumberKernels.o $(BUILDDIR)/RamboSamplingKernels.o $(BUILDDIR)/MatrixElementKernels.o $(BUILDDIR)/BridgeKernels.o $(BUILDDIR)/CrossSectionKernels.o ifneq ($(NVCC),) -MG5AMC_CULIB = mg5amc_cu +MG5AMC_CULIB = mg5amc_$(processid_short)_cu cu_objects=$(BUILDDIR)/gCPPProcess.o $(BUILDDIR)/gRandomNumberKernels.o $(BUILDDIR)/gRamboSamplingKernels.o $(BUILDDIR)/gMatrixElementKernels.o $(BUILDDIR)/gBridgeKernels.o $(BUILDDIR)/gCrossSectionKernels.o endif From dabb202039fb89e73848e186990726aa102c38f9 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Tue, 8 Mar 2022 18:29:02 +0100 Subject: [PATCH 15/15] [pptt] *** COMPLETE PPTT *** regenerate all auto and resync manual --- .../ee_mumu.auto/CODEGEN_cudacpp_ee_mumu_log.txt | 10 +++++----- epochX/cudacpp/ee_mumu.auto/SubProcesses/Makefile | 7 +++++-- epochX/cudacpp/ee_mumu/SubProcesses/Makefile | 7 +++++-- .../gg_ttg.auto/CODEGEN_cudacpp_gg_ttg_log.txt | 10 +++++----- epochX/cudacpp/gg_ttg.auto/SubProcesses/Makefile | 7 +++++-- epochX/cudacpp/gg_ttg/SubProcesses/Makefile | 7 +++++-- .../gg_ttgg.auto/CODEGEN_cudacpp_gg_ttgg_log.txt | 14 +++++++------- epochX/cudacpp/gg_ttgg.auto/SubProcesses/Makefile | 7 +++++-- epochX/cudacpp/gg_ttgg/SubProcesses/Makefile | 7 +++++-- .../gg_ttggg.auto/CODEGEN_cudacpp_gg_ttggg_log.txt | 14 +++++++------- epochX/cudacpp/gg_ttggg.auto/SubProcesses/Makefile | 7 +++++-- epochX/cudacpp/gg_ttggg/SubProcesses/Makefile | 7 +++++-- 12 files changed, 64 insertions(+), 40 deletions(-) diff --git a/epochX/cudacpp/ee_mumu.auto/CODEGEN_cudacpp_ee_mumu_log.txt b/epochX/cudacpp/ee_mumu.auto/CODEGEN_cudacpp_ee_mumu_log.txt index 6f5b5aa3f0..3883738c37 100644 --- a/epochX/cudacpp/ee_mumu.auto/CODEGEN_cudacpp_ee_mumu_log.txt +++ b/epochX/cudacpp/ee_mumu.auto/CODEGEN_cudacpp_ee_mumu_log.txt @@ -50,7 +50,7 @@ generate e+ e- > mu+ mu- No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.0068318843841552734  +DEBUG: model prefixing takes 0.006913900375366211  INFO: Restrict model sm with file models/sm/restrict_default.dat . INFO: Change particles name to pass to MG5 convention Defined multiparticle p = g u c d s u~ c~ d~ s~ @@ -100,7 +100,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFV2 routines ALOHA: aloha creates FFV4 routines ALOHA: aloha creates FFV2_4 routines -ALOHA: aloha creates 4 routines in 0.327 s +ALOHA: aloha creates 4 routines in 0.321 s FFV1 DEBUG: language =  [aloha_writers.py at line 2451]  FFV1 @@ -126,6 +126,6 @@ INFO: /data/avalassi/GPU2020/MG5aMC/3.1.1_lo_vectorization/CODEGEN_cudacpp_ee_mu DEBUG: Entering PLUGIN_ProcessExporter.finalize [output.py at line 185]  quit -real 0m1.071s -user 0m0.919s -sys 0m0.127s +real 0m1.076s +user 0m0.925s +sys 0m0.123s diff --git a/epochX/cudacpp/ee_mumu.auto/SubProcesses/Makefile b/epochX/cudacpp/ee_mumu.auto/SubProcesses/Makefile index f52e503781..361206bf29 100644 --- a/epochX/cudacpp/ee_mumu.auto/SubProcesses/Makefile +++ b/epochX/cudacpp/ee_mumu.auto/SubProcesses/Makefile @@ -379,11 +379,14 @@ $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so: ../../src/*.h ../../src/*.cc #------------------------------------------------------------------------------- -MG5AMC_CXXLIB = mg5amc_cxx +processid_short=$(shell basename $(CURDIR) | awk -F_ '{print $$(NF-1)"_"$$NF}') +###$(info processid_short=$(processid_short)) + +MG5AMC_CXXLIB = mg5amc_$(processid_short)_cxx cxx_objects=$(BUILDDIR)/CPPProcess.o $(BUILDDIR)/RandomNumberKernels.o $(BUILDDIR)/RamboSamplingKernels.o $(BUILDDIR)/MatrixElementKernels.o $(BUILDDIR)/BridgeKernels.o $(BUILDDIR)/CrossSectionKernels.o ifneq ($(NVCC),) -MG5AMC_CULIB = mg5amc_cu +MG5AMC_CULIB = mg5amc_$(processid_short)_cu cu_objects=$(BUILDDIR)/gCPPProcess.o $(BUILDDIR)/gRandomNumberKernels.o $(BUILDDIR)/gRamboSamplingKernels.o $(BUILDDIR)/gMatrixElementKernels.o $(BUILDDIR)/gBridgeKernels.o $(BUILDDIR)/gCrossSectionKernels.o endif diff --git a/epochX/cudacpp/ee_mumu/SubProcesses/Makefile b/epochX/cudacpp/ee_mumu/SubProcesses/Makefile index f52e503781..361206bf29 100644 --- a/epochX/cudacpp/ee_mumu/SubProcesses/Makefile +++ b/epochX/cudacpp/ee_mumu/SubProcesses/Makefile @@ -379,11 +379,14 @@ $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so: ../../src/*.h ../../src/*.cc #------------------------------------------------------------------------------- -MG5AMC_CXXLIB = mg5amc_cxx +processid_short=$(shell basename $(CURDIR) | awk -F_ '{print $$(NF-1)"_"$$NF}') +###$(info processid_short=$(processid_short)) + +MG5AMC_CXXLIB = mg5amc_$(processid_short)_cxx cxx_objects=$(BUILDDIR)/CPPProcess.o $(BUILDDIR)/RandomNumberKernels.o $(BUILDDIR)/RamboSamplingKernels.o $(BUILDDIR)/MatrixElementKernels.o $(BUILDDIR)/BridgeKernels.o $(BUILDDIR)/CrossSectionKernels.o ifneq ($(NVCC),) -MG5AMC_CULIB = mg5amc_cu +MG5AMC_CULIB = mg5amc_$(processid_short)_cu cu_objects=$(BUILDDIR)/gCPPProcess.o $(BUILDDIR)/gRandomNumberKernels.o $(BUILDDIR)/gRamboSamplingKernels.o $(BUILDDIR)/gMatrixElementKernels.o $(BUILDDIR)/gBridgeKernels.o $(BUILDDIR)/gCrossSectionKernels.o endif diff --git a/epochX/cudacpp/gg_ttg.auto/CODEGEN_cudacpp_gg_ttg_log.txt b/epochX/cudacpp/gg_ttg.auto/CODEGEN_cudacpp_gg_ttg_log.txt index 19832693f8..fd04b6f083 100644 --- a/epochX/cudacpp/gg_ttg.auto/CODEGEN_cudacpp_gg_ttg_log.txt +++ b/epochX/cudacpp/gg_ttg.auto/CODEGEN_cudacpp_gg_ttg_log.txt @@ -50,7 +50,7 @@ generate g g > t t~ g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.007297515869140625  +DEBUG: model prefixing takes 0.0068988800048828125  INFO: Restrict model sm with file models/sm/restrict_default.dat . INFO: Change particles name to pass to MG5 convention Defined multiparticle p = g u c d s u~ c~ d~ s~ @@ -108,7 +108,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 5 routines in 0.389 s +ALOHA: aloha creates 5 routines in 0.391 s VVV1 DEBUG: language =  [aloha_writers.py at line 2451]  VVV1 @@ -136,6 +136,6 @@ INFO: /data/avalassi/GPU2020/MG5aMC/3.1.1_lo_vectorization/CODEGEN_cudacpp_gg_tt DEBUG: Entering PLUGIN_ProcessExporter.finalize [output.py at line 185]  quit -real 0m1.252s -user 0m1.094s -sys 0m0.128s +real 0m1.253s +user 0m1.083s +sys 0m0.145s diff --git a/epochX/cudacpp/gg_ttg.auto/SubProcesses/Makefile b/epochX/cudacpp/gg_ttg.auto/SubProcesses/Makefile index f52e503781..361206bf29 100644 --- a/epochX/cudacpp/gg_ttg.auto/SubProcesses/Makefile +++ b/epochX/cudacpp/gg_ttg.auto/SubProcesses/Makefile @@ -379,11 +379,14 @@ $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so: ../../src/*.h ../../src/*.cc #------------------------------------------------------------------------------- -MG5AMC_CXXLIB = mg5amc_cxx +processid_short=$(shell basename $(CURDIR) | awk -F_ '{print $$(NF-1)"_"$$NF}') +###$(info processid_short=$(processid_short)) + +MG5AMC_CXXLIB = mg5amc_$(processid_short)_cxx cxx_objects=$(BUILDDIR)/CPPProcess.o $(BUILDDIR)/RandomNumberKernels.o $(BUILDDIR)/RamboSamplingKernels.o $(BUILDDIR)/MatrixElementKernels.o $(BUILDDIR)/BridgeKernels.o $(BUILDDIR)/CrossSectionKernels.o ifneq ($(NVCC),) -MG5AMC_CULIB = mg5amc_cu +MG5AMC_CULIB = mg5amc_$(processid_short)_cu cu_objects=$(BUILDDIR)/gCPPProcess.o $(BUILDDIR)/gRandomNumberKernels.o $(BUILDDIR)/gRamboSamplingKernels.o $(BUILDDIR)/gMatrixElementKernels.o $(BUILDDIR)/gBridgeKernels.o $(BUILDDIR)/gCrossSectionKernels.o endif diff --git a/epochX/cudacpp/gg_ttg/SubProcesses/Makefile b/epochX/cudacpp/gg_ttg/SubProcesses/Makefile index f52e503781..361206bf29 100644 --- a/epochX/cudacpp/gg_ttg/SubProcesses/Makefile +++ b/epochX/cudacpp/gg_ttg/SubProcesses/Makefile @@ -379,11 +379,14 @@ $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so: ../../src/*.h ../../src/*.cc #------------------------------------------------------------------------------- -MG5AMC_CXXLIB = mg5amc_cxx +processid_short=$(shell basename $(CURDIR) | awk -F_ '{print $$(NF-1)"_"$$NF}') +###$(info processid_short=$(processid_short)) + +MG5AMC_CXXLIB = mg5amc_$(processid_short)_cxx cxx_objects=$(BUILDDIR)/CPPProcess.o $(BUILDDIR)/RandomNumberKernels.o $(BUILDDIR)/RamboSamplingKernels.o $(BUILDDIR)/MatrixElementKernels.o $(BUILDDIR)/BridgeKernels.o $(BUILDDIR)/CrossSectionKernels.o ifneq ($(NVCC),) -MG5AMC_CULIB = mg5amc_cu +MG5AMC_CULIB = mg5amc_$(processid_short)_cu cu_objects=$(BUILDDIR)/gCPPProcess.o $(BUILDDIR)/gRandomNumberKernels.o $(BUILDDIR)/gRamboSamplingKernels.o $(BUILDDIR)/gMatrixElementKernels.o $(BUILDDIR)/gBridgeKernels.o $(BUILDDIR)/gCrossSectionKernels.o endif diff --git a/epochX/cudacpp/gg_ttgg.auto/CODEGEN_cudacpp_gg_ttgg_log.txt b/epochX/cudacpp/gg_ttgg.auto/CODEGEN_cudacpp_gg_ttgg_log.txt index 488810b838..e33ca04cc6 100644 --- a/epochX/cudacpp/gg_ttgg.auto/CODEGEN_cudacpp_gg_ttgg_log.txt +++ b/epochX/cudacpp/gg_ttgg.auto/CODEGEN_cudacpp_gg_ttgg_log.txt @@ -50,7 +50,7 @@ generate g g > t t~ g g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.00693058967590332  +DEBUG: model prefixing takes 0.006940603256225586  INFO: Restrict model sm with file models/sm/restrict_default.dat . INFO: Change particles name to pass to MG5 convention Defined multiparticle p = g u c d s u~ c~ d~ s~ @@ -65,7 +65,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=4: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g g WEIGHTED<=4 @1 INFO: Process has 123 diagrams -1 processes with 123 diagrams generated in 0.209 s +1 processes with 123 diagrams generated in 0.210 s Total: 1 processes with 123 diagrams output standalone_cudacpp CODEGEN_cudacpp_gg_ttgg Plugin PLUGIN.CUDACPP_SA_OUTPUT has marked as NOT being validated with this version. @@ -102,7 +102,7 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/G DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1090]  DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1097]  DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1109]  -Generated helas calls for 1 subprocesses (123 diagrams) in 0.575 s +Generated helas calls for 1 subprocesses (123 diagrams) in 0.571 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 176]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -110,7 +110,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.383 s +ALOHA: aloha creates 5 routines in 0.384 s VVV1 DEBUG: language =  [aloha_writers.py at line 2451]  VVV1 @@ -144,6 +144,6 @@ INFO: /data/avalassi/GPU2020/MG5aMC/3.1.1_lo_vectorization/CODEGEN_cudacpp_gg_tt DEBUG: Entering PLUGIN_ProcessExporter.finalize [output.py at line 185]  quit -real 0m2.139s -user 0m1.995s -sys 0m0.115s +real 0m2.154s +user 0m1.981s +sys 0m0.137s diff --git a/epochX/cudacpp/gg_ttgg.auto/SubProcesses/Makefile b/epochX/cudacpp/gg_ttgg.auto/SubProcesses/Makefile index f52e503781..361206bf29 100644 --- a/epochX/cudacpp/gg_ttgg.auto/SubProcesses/Makefile +++ b/epochX/cudacpp/gg_ttgg.auto/SubProcesses/Makefile @@ -379,11 +379,14 @@ $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so: ../../src/*.h ../../src/*.cc #------------------------------------------------------------------------------- -MG5AMC_CXXLIB = mg5amc_cxx +processid_short=$(shell basename $(CURDIR) | awk -F_ '{print $$(NF-1)"_"$$NF}') +###$(info processid_short=$(processid_short)) + +MG5AMC_CXXLIB = mg5amc_$(processid_short)_cxx cxx_objects=$(BUILDDIR)/CPPProcess.o $(BUILDDIR)/RandomNumberKernels.o $(BUILDDIR)/RamboSamplingKernels.o $(BUILDDIR)/MatrixElementKernels.o $(BUILDDIR)/BridgeKernels.o $(BUILDDIR)/CrossSectionKernels.o ifneq ($(NVCC),) -MG5AMC_CULIB = mg5amc_cu +MG5AMC_CULIB = mg5amc_$(processid_short)_cu cu_objects=$(BUILDDIR)/gCPPProcess.o $(BUILDDIR)/gRandomNumberKernels.o $(BUILDDIR)/gRamboSamplingKernels.o $(BUILDDIR)/gMatrixElementKernels.o $(BUILDDIR)/gBridgeKernels.o $(BUILDDIR)/gCrossSectionKernels.o endif diff --git a/epochX/cudacpp/gg_ttgg/SubProcesses/Makefile b/epochX/cudacpp/gg_ttgg/SubProcesses/Makefile index f52e503781..361206bf29 100644 --- a/epochX/cudacpp/gg_ttgg/SubProcesses/Makefile +++ b/epochX/cudacpp/gg_ttgg/SubProcesses/Makefile @@ -379,11 +379,14 @@ $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so: ../../src/*.h ../../src/*.cc #------------------------------------------------------------------------------- -MG5AMC_CXXLIB = mg5amc_cxx +processid_short=$(shell basename $(CURDIR) | awk -F_ '{print $$(NF-1)"_"$$NF}') +###$(info processid_short=$(processid_short)) + +MG5AMC_CXXLIB = mg5amc_$(processid_short)_cxx cxx_objects=$(BUILDDIR)/CPPProcess.o $(BUILDDIR)/RandomNumberKernels.o $(BUILDDIR)/RamboSamplingKernels.o $(BUILDDIR)/MatrixElementKernels.o $(BUILDDIR)/BridgeKernels.o $(BUILDDIR)/CrossSectionKernels.o ifneq ($(NVCC),) -MG5AMC_CULIB = mg5amc_cu +MG5AMC_CULIB = mg5amc_$(processid_short)_cu cu_objects=$(BUILDDIR)/gCPPProcess.o $(BUILDDIR)/gRandomNumberKernels.o $(BUILDDIR)/gRamboSamplingKernels.o $(BUILDDIR)/gMatrixElementKernels.o $(BUILDDIR)/gBridgeKernels.o $(BUILDDIR)/gCrossSectionKernels.o endif diff --git a/epochX/cudacpp/gg_ttggg.auto/CODEGEN_cudacpp_gg_ttggg_log.txt b/epochX/cudacpp/gg_ttggg.auto/CODEGEN_cudacpp_gg_ttggg_log.txt index 501ed59600..4b6bacb98d 100644 --- a/epochX/cudacpp/gg_ttggg.auto/CODEGEN_cudacpp_gg_ttggg_log.txt +++ b/epochX/cudacpp/gg_ttggg.auto/CODEGEN_cudacpp_gg_ttggg_log.txt @@ -50,7 +50,7 @@ generate g g > t t~ g g g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.006897449493408203  +DEBUG: model prefixing takes 0.006931781768798828  INFO: Restrict model sm with file models/sm/restrict_default.dat . INFO: Change particles name to pass to MG5 convention Defined multiparticle p = g u c d s u~ c~ d~ s~ @@ -65,7 +65,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=5: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g g g WEIGHTED<=5 @1 INFO: Process has 1240 diagrams -1 processes with 1240 diagrams generated in 2.488 s +1 processes with 1240 diagrams generated in 2.493 s Total: 1 processes with 1240 diagrams output standalone_cudacpp CODEGEN_cudacpp_gg_ttggg Plugin PLUGIN.CUDACPP_SA_OUTPUT has marked as NOT being validated with this version. @@ -104,7 +104,7 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/G DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1090]  DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1097]  DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1109]  -Generated helas calls for 1 subprocesses (1240 diagrams) in 8.934 s +Generated helas calls for 1 subprocesses (1240 diagrams) in 8.963 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 176]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -112,7 +112,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.393 s +ALOHA: aloha creates 5 routines in 0.385 s VVV1 DEBUG: language =  [aloha_writers.py at line 2451]  VVV1 @@ -146,6 +146,6 @@ INFO: /data/avalassi/GPU2020/MG5aMC/3.1.1_lo_vectorization/CODEGEN_cudacpp_gg_tt DEBUG: Entering PLUGIN_ProcessExporter.finalize [output.py at line 185]  quit -real 0m17.425s -user 0m17.197s -sys 0m0.200s +real 0m17.460s +user 0m17.208s +sys 0m0.225s diff --git a/epochX/cudacpp/gg_ttggg.auto/SubProcesses/Makefile b/epochX/cudacpp/gg_ttggg.auto/SubProcesses/Makefile index f52e503781..361206bf29 100644 --- a/epochX/cudacpp/gg_ttggg.auto/SubProcesses/Makefile +++ b/epochX/cudacpp/gg_ttggg.auto/SubProcesses/Makefile @@ -379,11 +379,14 @@ $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so: ../../src/*.h ../../src/*.cc #------------------------------------------------------------------------------- -MG5AMC_CXXLIB = mg5amc_cxx +processid_short=$(shell basename $(CURDIR) | awk -F_ '{print $$(NF-1)"_"$$NF}') +###$(info processid_short=$(processid_short)) + +MG5AMC_CXXLIB = mg5amc_$(processid_short)_cxx cxx_objects=$(BUILDDIR)/CPPProcess.o $(BUILDDIR)/RandomNumberKernels.o $(BUILDDIR)/RamboSamplingKernels.o $(BUILDDIR)/MatrixElementKernels.o $(BUILDDIR)/BridgeKernels.o $(BUILDDIR)/CrossSectionKernels.o ifneq ($(NVCC),) -MG5AMC_CULIB = mg5amc_cu +MG5AMC_CULIB = mg5amc_$(processid_short)_cu cu_objects=$(BUILDDIR)/gCPPProcess.o $(BUILDDIR)/gRandomNumberKernels.o $(BUILDDIR)/gRamboSamplingKernels.o $(BUILDDIR)/gMatrixElementKernels.o $(BUILDDIR)/gBridgeKernels.o $(BUILDDIR)/gCrossSectionKernels.o endif diff --git a/epochX/cudacpp/gg_ttggg/SubProcesses/Makefile b/epochX/cudacpp/gg_ttggg/SubProcesses/Makefile index f52e503781..361206bf29 100644 --- a/epochX/cudacpp/gg_ttggg/SubProcesses/Makefile +++ b/epochX/cudacpp/gg_ttggg/SubProcesses/Makefile @@ -379,11 +379,14 @@ $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so: ../../src/*.h ../../src/*.cc #------------------------------------------------------------------------------- -MG5AMC_CXXLIB = mg5amc_cxx +processid_short=$(shell basename $(CURDIR) | awk -F_ '{print $$(NF-1)"_"$$NF}') +###$(info processid_short=$(processid_short)) + +MG5AMC_CXXLIB = mg5amc_$(processid_short)_cxx cxx_objects=$(BUILDDIR)/CPPProcess.o $(BUILDDIR)/RandomNumberKernels.o $(BUILDDIR)/RamboSamplingKernels.o $(BUILDDIR)/MatrixElementKernels.o $(BUILDDIR)/BridgeKernels.o $(BUILDDIR)/CrossSectionKernels.o ifneq ($(NVCC),) -MG5AMC_CULIB = mg5amc_cu +MG5AMC_CULIB = mg5amc_$(processid_short)_cu cu_objects=$(BUILDDIR)/gCPPProcess.o $(BUILDDIR)/gRandomNumberKernels.o $(BUILDDIR)/gRamboSamplingKernels.o $(BUILDDIR)/gMatrixElementKernels.o $(BUILDDIR)/gBridgeKernels.o $(BUILDDIR)/gCrossSectionKernels.o endif