diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/.clang-format b/epochX/cudacpp/smeft_gg_tttt.sa/.clang-format new file mode 100644 index 0000000000..12afd69b12 --- /dev/null +++ b/epochX/cudacpp/smeft_gg_tttt.sa/.clang-format @@ -0,0 +1,226 @@ +# AV's draft .clang-format +# --- +# February 2022: latest draft for clang 13.0.0 (BasedOnStyle: Google) +# See https://releases.llvm.org/13.0.0/tools/clang/docs/ClangFormatStyleOptions.html +--- +Language: Cpp +BasedOnStyle: Google + +AccessModifierOffset: -2 # AV was -1 +AlignAfterOpenBracket: Align # AV ok +AlignArrayOfStructures: None # AV ok (alternative: Right, but code-generating it would be too complex) +AlignConsecutiveAssignments: None # AV ok +AlignConsecutiveBitFields: None # AV ok +AlignConsecutiveDeclarations: None # AV ok +AlignConsecutiveMacros: None # AV ok +AlignEscapedNewlines: DontAlign # AV was Left +AlignOperands: DontAlign # AV was Align +AlignTrailingComments: true # AV ok +AllowAllArgumentsOnNextLine: true # AV ok(?) +AllowAllConstructorInitializersOnNextLine: true # AV ok (NB: relevant only if ConstructorInitializerAllOnOneLineOrOnePerLine=true) +AllowAllParametersOfDeclarationOnNextLine: true # AV ok(?) +AllowShortBlocksOnASingleLine: Always # AV was Never +AllowShortEnumsOnASingleLine: true # AV ok +AllowShortCaseLabelsOnASingleLine: true # AV was false +AllowShortFunctionsOnASingleLine: All # AV ok +AllowShortLambdasOnASingleLine: All # AV ok +AllowShortIfStatementsOnASingleLine: WithoutElse # AV ok +AllowShortLoopsOnASingleLine: true # AV ok +###AlwaysBreakAfterDefinitionReturnType: None # AV keep defaults (deprecated) +#AlwaysBreakAfterReturnType: All # AV use this initially, then switch to TopLevelDefinitions! +AlwaysBreakAfterReturnType: TopLevelDefinitions # AV was None (altearnative: All?) +AlwaysBreakBeforeMultilineStrings: false # AV was true +AlwaysBreakTemplateDeclarations: Yes # AV ok +###AttributeMacros: # AV keep defaults (NB this is not about '__host__' attributes, see llvm/llvm-project/issues/45968) +### - __capability +BinPackArguments: false # AV was true +BinPackParameters: false # AV was true +BitFieldColonSpacing: Both # AV ok +BraceWrapping: # (NB: this is only relevant for "BreakBeforeBraces: Custom") + AfterCaseLabel: true # AV was false + AfterClass: true # AV was false + AfterControlStatement: Always # AV was Never + AfterEnum: true # AV was false + AfterFunction: true # AV was false + AfterNamespace: true # AV was false + AfterObjCDeclaration: true # AV was false + AfterStruct: true # AV was false + AfterUnion: true # AV was false + AfterExternBlock: true # AV was false (NB: does not work unless IndentExternBlock is AfterExternBlock?!) + BeforeCatch: true # AV was false + BeforeElse: true # AV was false + BeforeLambdaBody: true # AV was false + BeforeWhile: true # AV was false + IndentBraces: false # AV ok + SplitEmptyFunction: true # AV ok + SplitEmptyRecord: true # AV ok + SplitEmptyNamespace: true # AV ok +BreakAfterJavaFieldAnnotations: false +BreakBeforeBinaryOperators: None # AV ok +BreakBeforeBraces: Custom # AV was Attach (alternative: Allman) +BreakBeforeConceptDeclarations: true # AV ok +###BreakBeforeInheritanceComma: false # (obsolete???) +BreakBeforeTernaryOperators: true # AV ok +###BreakConstructorInitializersBeforeComma: true # AV was false (obsolete???) +BreakConstructorInitializers: BeforeComma # AV was BeforeColon +BreakInheritanceList: BeforeColon # AV ok (alternative: BeforeComma?) +BreakStringLiterals: false # AV was true +ColumnLimit: 0 # AV was 80 +###CommentPragmas: '^[^ ]*' # AV use SpacesInLineCommentPrefix Min=0 Max=1 to allow both "//comment" and "// comment" +CompactNamespaces: false # AV ok +ConstructorInitializerAllOnOneLineOrOnePerLine: true +ConstructorInitializerIndentWidth: 2 # AV was 4 +ContinuationIndentWidth: 2 # AV was 4 +Cpp11BracedListStyle: true # AV ok +DeriveLineEnding: false # AV was true +DerivePointerAlignment: false # AV was true +DisableFormat: false # AV ok +EmptyLineAfterAccessModifier: Leave # AV was Never +EmptyLineBeforeAccessModifier: Leave # AV was LogicalBlock +ExperimentalAutoDetectBinPacking: false # AV ok ("use at your own risk") +FixNamespaceComments: false # AV was true +###ForEachMacros: # AV keep defaults +### - foreach +### - Q_FOREACH +### - BOOST_FOREACH +###IfMacros: # AV keep defaults +### - KJ_IF_MAYBE +IncludeBlocks: Regroup # AV ok +IncludeCategories: + - Regex: '^' + Priority: 4 # AV was 2 + SortPriority: 0 + CaseSensitive: false + - Regex: '^<.*\.h>' + Priority: 5 # AV was 1 + SortPriority: 0 + CaseSensitive: false + - Regex: '^<.*' + Priority: 6 # AV was 2 + SortPriority: 0 + CaseSensitive: false + - Regex: 'mgOnGpuConfig.h' + Priority: 1 # AV new + SortPriority: 0 + CaseSensitive: false + - Regex: 'mgOnGpu*.*' + Priority: 2 # AV new + SortPriority: 0 + CaseSensitive: false + - Regex: '.*' + Priority: 3 # AV was 3 + SortPriority: 0 + CaseSensitive: false +###IncludeIsMainRegex: '([-_](test|unittest))?$' # AV keep defaults +###IncludeIsMainSourceRegex: '' # AV keep defaults +IndentAccessModifiers: false # AV ok +IndentCaseLabels: true # AV ok +IndentCaseBlocks: false # AV ok +IndentGotoLabels: false # AV was true +IndentPPDirectives: None # AV ok (NB: AfterHash and BeforeHash do not seem to work as intended) +###IndentExternBlock: Indent # AV was AfterExternBlock +IndentExternBlock: AfterExternBlock # AV ok (only with Custom BraceWrapping.AfterExternBlock = true) +IndentRequires: false # AV ok(?) +IndentWidth: 2 # AV ok +IndentWrappedFunctionNames: false # AV ok +###InsertTrailingCommas: None # AV keep defaults (Java only?) +###JavaScriptQuotes: Leave # AV irrelevant +###JavaScriptWrapImports: true # AV irrelevant +KeepEmptyLinesAtTheStartOfBlocks: false # AV ok +LambdaBodyIndentation: Signature # AV ok +###MacroBlockBegin: '' # AV keep defaults +###MacroBlockEnd: '' # AV keep defaults +MaxEmptyLinesToKeep: 1 # AV ok +NamespaceIndentation: All # AV was None +###ObjCBinPackProtocolList: Never # AV irrelevant +###ObjCBlockIndentWidth: 2 # AV irrelevant +###ObjCBreakBeforeNestedBlockParam: true # AV irrelevant +###ObjCSpaceAfterProperty: false # AV irrelevant +###ObjCSpaceBeforeProtocolList: true # AV irrelevant +###PenaltyBreakAssignment: 2 # AV keep defaults +###PenaltyBreakBeforeFirstCallParameter: 1 # AV keep defaults +###PenaltyBreakComment: 300 # AV keep defaults +###PenaltyBreakFirstLessLess: 120 # AV keep defaults +###PenaltyBreakString: 1000 # AV keep defaults +###PenaltyBreakTemplateDeclaration: 10 # AV keep defaults +###PenaltyExcessCharacter: 1000000 # AV keep defaults +###PenaltyReturnTypeOnItsOwnLine: 200 # AV keep defaults +###PenaltyIndentedWhitespace: 0 # AV keep defaults +PointerAlignment: Left # AV ok +PPIndentWidth: 0 # AV was -1 +###RawStringFormats: # AV keep defaults +### - Language: Cpp +### Delimiters: +### - cc +### - CC +### - cpp +### - Cpp +### - CPP +### - 'c++' +### - 'C++' +### CanonicalDelimiter: '' +### BasedOnStyle: google +### - Language: TextProto +### Delimiters: +### - pb +### - PB +### - proto +### - PROTO +### EnclosingFunctions: +### - EqualsProto +### - EquivToProto +### - PARSE_PARTIAL_TEXT_PROTO +### - PARSE_TEST_PROTO +### - PARSE_TEXT_PROTO +### - ParseTextOrDie +### - ParseTextProtoOrDie +### - ParseTestProto +### - ParsePartialTestProto +### CanonicalDelimiter: pb +### BasedOnStyle: google +ReferenceAlignment: Pointer # AV ok +ReflowComments: false # AV was true +ShortNamespaceLines: 1 # AV ok +SortIncludes: CaseSensitive # AV ok +###SortJavaStaticImport: Before # irrelevant +SortUsingDeclarations: false # AV was true +SpaceAfterCStyleCast: false # AV ok +SpaceAfterLogicalNot: false # AV ok +SpaceAfterTemplateKeyword: false # AV was true +SpaceAroundPointerQualifiers: Default # AV ok (alternative: Before?) +SpaceBeforeAssignmentOperators: true # AV ok +SpaceBeforeCaseColon: false # AV ok +SpaceBeforeCpp11BracedList: false # AV ok +SpaceBeforeCtorInitializerColon: true # AV ok +SpaceBeforeInheritanceColon: true # AV ok +SpaceBeforeParens: Never # AV was ControlStatements +SpaceBeforeRangeBasedForLoopColon: false # AV was true +SpaceBeforeSquareBrackets: false # AV ok +SpaceInEmptyBlock: false # AV ok +SpaceInEmptyParentheses: false # AV ok +SpacesBeforeTrailingComments: 1 # AV was 2 +SpacesInAngles: Never # AV ok +SpacesInConditionalStatement: false # AV ok (does this work?) +SpacesInContainerLiterals: false # AV was true +SpacesInCStyleCastParentheses: false # AV ok +SpacesInLineCommentPrefix: + Minimum: 0 # AV was 1 + Maximum: 1 # AV was -1 +SpacesInParentheses: true # AV was false +SpacesInSquareBrackets: false # AV ok +Standard: c++17 # AV was Auto +###StatementAttributeLikeMacros: # AV keep defaults +### - Q_EMIT +###StatementMacros: # AV keep defaults +### - Q_UNUSED +### - QT_REQUIRE_VERSION +###TabWidth: 8 # AV irrelevant if UseTab=Never? +UseCRLF: false # AV ok (but set DeriveLineEnding=false) +UseTab: Never # AV ok +###WhitespaceSensitiveMacros: # AV keep defaults +### - STRINGIZE +### - PP_STRINGIZE +### - BOOST_PP_STRINGIZE +### - NS_SWIFT_NAME +### - CF_SWIFT_NAME +... diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/CMake/Compilers.txt b/epochX/cudacpp/smeft_gg_tttt.sa/CMake/Compilers.txt new file mode 100644 index 0000000000..eec4baed28 --- /dev/null +++ b/epochX/cudacpp/smeft_gg_tttt.sa/CMake/Compilers.txt @@ -0,0 +1,2 @@ +set(CMAKE_CXX_STANDARD 17) +set(CMAKE_CXX_STANDARD_REQUIRED True) diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/CMake/Macros.txt b/epochX/cudacpp/smeft_gg_tttt.sa/CMake/Macros.txt new file mode 100644 index 0000000000..9a0e141b81 --- /dev/null +++ b/epochX/cudacpp/smeft_gg_tttt.sa/CMake/Macros.txt @@ -0,0 +1,10 @@ +MACRO(SUBDIRLIST result) + FILE(GLOB children RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/*) + SET(dirlist "") + FOREACH(child ${children}) + IF(IS_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/${child}) + LIST(APPEND dirlist ${child}) + ENDIF() + ENDFOREACH() + SET(${result} ${dirlist}) +ENDMACRO() diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/CMake/Platforms.txt b/epochX/cudacpp/smeft_gg_tttt.sa/CMake/Platforms.txt new file mode 100644 index 0000000000..ab73e53db8 --- /dev/null +++ b/epochX/cudacpp/smeft_gg_tttt.sa/CMake/Platforms.txt @@ -0,0 +1,3 @@ +if (CMAKE_HOST_APPLE) + add_definitions(-DMGONGPU_HAS_NO_CURAND) +endif(CMAKE_HOST_APPLE) diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/CMakeLists.txt b/epochX/cudacpp/smeft_gg_tttt.sa/CMakeLists.txt new file mode 100644 index 0000000000..d3010411fc --- /dev/null +++ b/epochX/cudacpp/smeft_gg_tttt.sa/CMakeLists.txt @@ -0,0 +1,14 @@ +# Minimal CMake configuration to build a functional CPU version + +cmake_minimum_required(VERSION 3.22) + +project(Madgraph4GPU) + +include(${PROJECT_SOURCE_DIR}/CMake/Platforms.txt) +include(${PROJECT_SOURCE_DIR}/CMake/Compilers.txt) +include(${PROJECT_SOURCE_DIR}/CMake/Macros.txt) + +set(PROJECT_GITROOT_DIR ${PROJECT_SOURCE_DIR}/../../..) + +add_subdirectory(src) +add_subdirectory(SubProcesses) diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/CODEGEN_cudacpp_smeft_gg_tttt_log.txt b/epochX/cudacpp/smeft_gg_tttt.sa/CODEGEN_cudacpp_smeft_gg_tttt_log.txt new file mode 100644 index 0000000000..1ef8242c54 --- /dev/null +++ b/epochX/cudacpp/smeft_gg_tttt.sa/CODEGEN_cudacpp_smeft_gg_tttt_log.txt @@ -0,0 +1,186 @@ +Note that this is a development version. +This version is intended for development/beta testing and NOT for production. +This version has not been fully tested (if at all) and might have limited user support (if at all) +Running MG5 in debug mode +************************************************************ +* * +* W E L C O M E to * +* M A D G R A P H 5 _ a M C @ N L O * +* * +* * +* * * * +* * * * * * +* * * * * 5 * * * * * +* * * * * * +* * * * +* * +* VERSION 3.5.0_lo_vect 2023-01-26 * +* * +* WARNING: UNKNOWN DEVELOPMENT VERSION. * +* WARNING: DO NOT USE FOR PRODUCTION * +* * +* * +* The MadGraph5_aMC@NLO Development Team - Find us at * +* https://server06.fynu.ucl.ac.be/projects/madgraph * +* and * +* http://amcatnlo.web.cern.ch/amcatnlo/ * +* * +* Type 'help' for in-line help. * +* Type 'tutorial' to learn how MG5 works * +* Type 'tutorial aMCatNLO' to learn how aMC@NLO works * +* Type 'tutorial MadLoop' to learn how MadLoop works * +* * +************************************************************ +load MG5 configuration from input/mg5_configuration.txt +fastjet-config does not seem to correspond to a valid fastjet-config executable (v3+). We will use fjcore instead. + Please set the 'fastjet'variable to the full (absolute) /PATH/TO/fastjet-config (including fastjet-config). + MG5_aMC> set fastjet /PATH/TO/fastjet-config + +eMELA-config does not seem to correspond to a valid eMELA-config executable. + Please set the 'fastjet'variable to the full (absolute) /PATH/TO/eMELA-config (including eMELA-config). + MG5_aMC> set eMELA /PATH/TO/eMELA-config + +lhapdf-config does not seem to correspond to a valid lhapdf-config executable. +Please set the 'lhapdf' variable to the (absolute) /PATH/TO/lhapdf-config (including lhapdf-config). +Note that you can still compile and run aMC@NLO with the built-in PDFs + MG5_aMC> set lhapdf /PATH/TO/lhapdf-config + +None does not seem to correspond to a valid lhapdf-config executable. +Please set the 'lhapdf' variable to the (absolute) /PATH/TO/lhapdf-config (including lhapdf-config). +Note that you can still compile and run aMC@NLO with the built-in PDFs + MG5_aMC> set lhapdf /PATH/TO/lhapdf-config + +Using default text editor "vi". Set another one in ./input/mg5_configuration.txt +No valid eps viewer found. Please set in ./input/mg5_configuration.txt +No valid web browser found. Please set in ./input/mg5_configuration.txt +import /data/avalassi/GPU2023/MG5aMC/ghav-mg5amcnlo/CODEGEN_cudacpp_smeft_gg_tttt.mg +The import format was not given, so we guess it as command +set stdout_level DEBUG +set output information to level: 10 +set zerowidth_tchannel F +set auto_convert_model T; import model SMEFTsim_topU3l_MwScheme_UFO -massless_4t; generate g g > t t~ t t~ +save options auto_convert_model +save configuration file to /data/avalassi/GPU2023/MG5aMC/ghav-mg5amcnlo/input/mg5_configuration.txt +INFO: load particles +INFO: load vertices +CRITICAL: Model with non QCD emission of gluon (found 14 of those). + This type of model is not fully supported within MG5aMC. + Restriction on LO dynamical scale and MLM matching/merging can occur for some processes. + Use such features with care.  +DEBUG: MG5 converter defines FFFF110 to Gamma(-1,2,-2)*Gamma(-1,4,-3)*ProjM(-2,3)*ProjP(-3,1) + Gamma(-1,2,-3)*Gamma(-1,4,-2)*ProjM(-2,1)*ProjP(-3,3)  +DEBUG: MG5 converter defines FFFF111 to Gamma(-1,2,-3)*Gamma(-1,4,-2)*ProjM(-2,3)*ProjP(-3,1) + Gamma(-1,2,-2)*Gamma(-1,4,-3)*ProjM(-2,1)*ProjP(-3,3)  +DEBUG: MG5 converter defines FFFF22 to ProjM(2,1)*ProjP(4,3) + ProjM(4,3)*ProjP(2,1)  +DEBUG: MG5 converter defines FFFF23 to Gamma(-2,-4,-3)*Gamma(-2,2,-6)*Gamma(-1,-6,-5)*Gamma(-1,4,-4)*ProjM(-5,1)*ProjM(-3,3) + Gamma(-2,-4,-3)*Gamma(-2,4,-6)*Gamma(-1,-6,-5)*Gamma(-1,2,-4)*ProjM(-5,3)*ProjM(-3,1)  +DEBUG: MG5 converter defines FFFF24 to Gamma(-2,-4,-3)*Gamma(-2,2,-6)*Gamma(-1,-6,-5)*Gamma(-1,4,-4)*ProjP(-5,1)*ProjP(-3,3) + Gamma(-2,-4,-3)*Gamma(-2,4,-6)*Gamma(-1,-6,-5)*Gamma(-1,2,-4)*ProjP(-5,3)*ProjP(-3,1)  +DEBUG: MG5 converter defines FFFF25 to Gamma(-2,-6,-5)*Gamma(-2,-4,-3)*Gamma(-1,2,-4)*Gamma(-1,4,-6)*ProjP(-5,3)*ProjP(-3,1) + Gamma(-2,-6,-5)*Gamma(-2,-4,-3)*Gamma(-1,2,-4)*Gamma(-1,4,-6)*ProjM(-5,3)*ProjM(-3,1)  +DEBUG: MG5 converter defines FFFF26 to Gamma(-2,-4,-3)*Gamma(-2,2,-6)*Gamma(-1,-6,-5)*Gamma(-1,4,-4)*ProjP(-5,1)*ProjP(-3,3) + Gamma(-2,-4,-3)*Gamma(-2,4,-6)*Gamma(-1,-6,-5)*Gamma(-1,2,-4)*ProjP(-5,3)*ProjP(-3,1) + Gamma(-2,-4,-3)*Gamma(-2,2,-6)*Gamma(-1,-6,-5)*Gamma(-1,4,-4)*ProjM(-5,1)*ProjM(-3,3) + Gamma(-2,-4,-3)*Gamma(-2,4,-6)*Gamma(-1,-6,-5)*Gamma(-1,2,-4)*ProjM(-5,3)*ProjM(-3,1)  +DEBUG: MG5 converter defines FFFF27 to ProjP(2,1)*ProjP(4,3) + ProjM(2,1)*ProjM(4,3)  +DEBUG: MG5 converter defines FFFF112 to ProjM(2,3)*ProjM(4,1) + ProjP(2,3)*ProjP(4,1)  +DEBUG: model prefixing takes 0.15017271041870117  +INFO: Change particles name to pass to MG5 convention +Defined multiparticle p = g u c d s u~ c~ d~ s~ +Defined multiparticle j = g u c d s u~ c~ d~ s~ +Defined multiparticle l+ = e+ mu+ +Defined multiparticle l- = e- mu- +Defined multiparticle vl = ve vm vt +Defined multiparticle vl~ = ve~ vm~ vt~ +Defined multiparticle all = g a ve vm vt ve~ vm~ vt~ u c t d s b t1 u~ c~ t~ d~ s~ b~ t1~ z w+ z1 w1+ h h1 w- w1- e- mu- ta- e+ mu+ ta+ +INFO: Checking for minimal orders which gives processes. +INFO: Please specify coupling orders to bypass this step. +INFO: Trying coupling order WEIGHTED<=4: WEIGTHED IS QCD+2*QED+99*SMHLOOP+99*NP+99*NPshifts+99*NPprop+99*NPcpv+NPcbb+NPcbB+NPcbBB+NPcbd1+NPcbd8+NPcbe+NPcbG+NPcbH+NPcbj1+NPcbj8+NPcbl+NPcbu1+NPcbu8+NPcbW+NPcdB+NPcdd1+NPcdd8+NPcdG+NPcdH+NPcdW+NPceB+NPced+NPcee+NPceH+NPceu+NPceW+NPcG+NPcGtil+NPcH+NPcHB+NPcHbox+NPcHbq+NPcHBtil+NPcHd+NPcHDD+NPcHe+NPcHG+NPcHGtil+NPcHj1+NPcHj3+NPcHl1+NPcHl3+NPcHQ1+NPcHQ3+NPcHt+NPcHtb+NPcHu+NPcHud+NPcHW+NPcHWB+NPcHWBtil+NPcHWtil+NPcjd1+NPcjd8+NPcje+NPcjj11+NPcjj18+NPcjj31+NPcjj38+NPcjQbd1+NPcjQbd8+NPcjQtu1+NPcjQtu8+NPcjtQd1+NPcjtQd8+NPcju1+NPcju8+NPcjujd1+NPcjujd11+NPcjujd8+NPcjujd81+NPcjuQb1+NPcjuQb8+NPcld+NPcle+NPclebQ+NPcledj+NPcleju1+NPcleju3+NPcleQt1+NPcleQt3+NPclj1+NPclj3+NPcll+NPcll1+NPclu+NPcQb1+NPcQb8+NPcQd1+NPcQd8+NPcQe+NPcQj11+NPcQj18+NPcQj31+NPcQj38+NPcQl1+NPcQl3+NPcQQ1+NPcQQ8+NPcQt1+NPcQt8+NPcQtjd1+NPcQtjd8+NPcQtQb1+NPcQtQb8+NPcQu1+NPcQu8+NPcQujb1+NPcQujb8+NPctB+NPctb1+NPctb8+NPctd1+NPctd8+NPcte+NPctG+NPctH+NPctj1+NPctj8+NPctl+NPctt+NPctu1+NPctu8+NPctW+NPcuB+NPcud1+NPcud8+NPcuG+NPcuH+NPcutbd1+NPcutbd8+NPcuu1+NPcuu8+NPcuW+NPcW+NPcWtil+NPQjujb8 +INFO: Trying process: g g > t t~ t t~ WEIGHTED<=4 @1 +INFO: Process has 72 diagrams +1 processes with 72 diagrams generated in 4.222 s +Total: 1 processes with 72 diagrams +output standalone_cudacpp CODEGEN_cudacpp_smeft_gg_tttt +Load PLUGIN.CUDACPP_SA_OUTPUT +Output will be done with PLUGIN: CUDACPP_SA_OUTPUT +DEBUG: cformat =  plugin [export_cpp.py at line 3071]  +DEBUG: Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [output.py at line 143]  +DEBUG: Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [output.py at line 148]  +INFO: Creating subdirectories in directory /data/avalassi/GPU2023/MG5aMC/ghav-mg5amcnlo/CODEGEN_cudacpp_smeft_gg_tttt +INFO: Organizing processes into subprocess groups +INFO: Generating Helas calls for process: g g > t t~ t t~ WEIGHTED<=4 @1 +INFO: Processing color information for process: g g > t t~ t t~ @1 +DEBUG: Entering PLUGIN_ProcessExporter.generate_subprocess_directory (create the directory) [output.py at line 173]  +DEBUG: type(subproc_group)= [output.py at line 174]  +DEBUG: type(fortran_model)= [output.py at line 175]  +DEBUG: type(me)= me=0 [output.py at line 176]  +DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1027]  +DEBUG: proc_id =  0 [model_handling.py at line 1033]  +INFO: Creating files in directory /data/avalassi/GPU2023/MG5aMC/ghav-mg5amcnlo/CODEGEN_cudacpp_smeft_gg_tttt/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx +DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1268]  +DEBUG: self.include_multi_channel is not yet defined: this is standalone_cudacpp mode [model_handling.py at line 1272]  +FileWriter for /data/avalassi/GPU2023/MG5aMC/ghav-mg5amcnlo/CODEGEN_cudacpp_smeft_gg_tttt/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/./CPPProcess.h +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1411]  +FileWriter for /data/avalassi/GPU2023/MG5aMC/ghav-mg5amcnlo/CODEGEN_cudacpp_smeft_gg_tttt/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/./CPPProcess.cc +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1433]  +DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1122]  +DEBUG: self.include_multi_channel =  False [model_handling.py at line 1123]  +DEBUG: self.support_multichannel =  True [model_handling.py at line 1124]  +DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1225]  +DEBUG: self.support_multichannel, self.include_multi_channel =  True False [model_handling.py at line 1226]  +DEBUG: multi_channel_map =  None [model_handling.py at line 1612]  +DEBUG: diag_to_config =  {} [model_handling.py at line 1667]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1779]  +DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1780]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1779]  +DEBUG: ('ZERO', 1, -1, 1, 1) [model_handling.py at line 1780]  +INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/MG5aMC/ghav-mg5amcnlo/CODEGEN_cudacpp_smeft_gg_tttt/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/. +DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1301]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1310]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1327]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1347]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1377]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1388]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1399]  +Generated helas calls for 1 subprocesses (72 diagrams) in 0.212 s +DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 181]  +ALOHA: aloha starts to compute helicity amplitudes +ALOHA: aloha creates VVV5 routines +ALOHA: aloha creates FFV1 routines +ALOHA: aloha creates VVVV1 routines +ALOHA: aloha creates VVVV9 routines +ALOHA: aloha creates VVVV10 routines +ALOHA: aloha creates 5 routines in 0.340 s + VVV5 + VVV5 + FFV1 + FFV1 + FFV1 + FFV1 + VVVV1 + VVVV9 + VVVV10 +FileWriter for /data/avalassi/GPU2023/MG5aMC/ghav-mg5amcnlo/CODEGEN_cudacpp_smeft_gg_tttt/src/./HelAmps_SMEFTsim_topU3l_MwScheme_UFO.h +INFO: Created file HelAmps_SMEFTsim_topU3l_MwScheme_UFO.h in directory /data/avalassi/GPU2023/MG5aMC/ghav-mg5amcnlo/CODEGEN_cudacpp_smeft_gg_tttt/src/. +super_write_set_parameters_onlyfixMajorana (hardcoded=False) +DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 344 , keys size = 344 [model_handling.py at line 722]  +DEBUG: 'parset_pars size =', len(parset_pars) =  parset_pars size = 344 [model_handling.py at line 738]  +DEBUG: 'parset_lines size =', len(parset_lines), ', keys size =', len(parset_lines.keys()) =  parset_lines size = 344 , keys size = 344 [model_handling.py at line 739]  +super_write_set_parameters_onlyfixMajorana (hardcoded=True) +DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 10 , keys size = 10 [model_handling.py at line 722]  +DEBUG: 'parset_pars size =', len(parset_pars) =  parset_pars size = 10 [model_handling.py at line 738]  +DEBUG: 'parset_lines size =', len(parset_lines), ', keys size =', len(parset_lines.keys()) =  parset_lines size = 10 , keys size = 10 [model_handling.py at line 739]  +DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 3 , keys size = 3 [model_handling.py at line 722]  +DEBUG: 'parset_pars size =', len(parset_pars) =  parset_pars size = 3 [model_handling.py at line 738]  +DEBUG: 'parset_lines size =', len(parset_lines), ', keys size =', len(parset_lines.keys()) =  parset_lines size = 3 , keys size = 3 [model_handling.py at line 739]  +DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 10 , keys size = 10 [model_handling.py at line 722]  +DEBUG: 'parset_pars size =', len(parset_pars) =  parset_pars size = 10 [model_handling.py at line 738]  +DEBUG: 'parset_lines size =', len(parset_lines), ', keys size =', len(parset_lines.keys()) =  parset_lines size = 10 , keys size = 10 [model_handling.py at line 739]  +DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 3 , keys size = 3 [model_handling.py at line 722]  +DEBUG: 'parset_pars size =', len(parset_pars) =  parset_pars size = 3 [model_handling.py at line 738]  +DEBUG: 'parset_lines size =', len(parset_lines), ', keys size =', len(parset_lines.keys()) =  parset_lines size = 3 , keys size = 3 [model_handling.py at line 739]  +DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 3 , keys size = 3 [model_handling.py at line 722]  +DEBUG: 'parset_pars size =', len(parset_pars) =  parset_pars size = 3 [model_handling.py at line 738]  +DEBUG: 'parset_lines size =', len(parset_lines), ', keys size =', len(parset_lines.keys()) =  parset_lines size = 3 , keys size = 3 [model_handling.py at line 739]  +FileWriter for /data/avalassi/GPU2023/MG5aMC/ghav-mg5amcnlo/CODEGEN_cudacpp_smeft_gg_tttt/src/./Parameters_SMEFTsim_topU3l_MwScheme_UFO.h +FileWriter for /data/avalassi/GPU2023/MG5aMC/ghav-mg5amcnlo/CODEGEN_cudacpp_smeft_gg_tttt/src/./Parameters_SMEFTsim_topU3l_MwScheme_UFO.cc +INFO: Created files Parameters_SMEFTsim_topU3l_MwScheme_UFO.h and Parameters_SMEFTsim_topU3l_MwScheme_UFO.cc in directory +INFO: /data/avalassi/GPU2023/MG5aMC/ghav-mg5amcnlo/CODEGEN_cudacpp_smeft_gg_tttt/src/. and /data/avalassi/GPU2023/MG5aMC/ghav-mg5amcnlo/CODEGEN_cudacpp_smeft_gg_tttt/src/. +DEBUG: Entering PLUGIN_ProcessExporter.finalize [output.py at line 190]  +quit + +real 0m5.957s +user 0m5.786s +sys 0m0.085s diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/Cards/param_card.dat b/epochX/cudacpp/smeft_gg_tttt.sa/Cards/param_card.dat new file mode 100644 index 0000000000..4a29fbe719 --- /dev/null +++ b/epochX/cudacpp/smeft_gg_tttt.sa/Cards/param_card.dat @@ -0,0 +1,320 @@ +###################################################################### +## PARAM_CARD AUTOMATICALY GENERATED BY MG5 FOLLOWING UFO MODEL #### +###################################################################### +## ## +## Width set on Auto will be computed following the information ## +## present in the decay.py files of the model. ## +## See arXiv:1402.1178 for more details. ## +## ## +###################################################################### + +################################### +## INFORMATION FOR MASS +################################### +Block mass + 1 4.670000e-03 # MD + 2 2.160000e-03 # MU + 3 9.300000e-02 # MS + 4 1.270000e+00 # MC + 5 4.180000e+00 # MB + 6 1.727600e+02 # MT + 11 5.110000e-04 # Me + 13 1.056600e-01 # MMU + 15 1.777000e+00 # MTA + 23 9.118760e+01 # MZ + 25 1.250900e+02 # MH +## Dependent parameters, given by model restrictions. +## Those values should be edited following the +## analytical expression. MG5 ignores those values +## but they are important for interfacing the output of MG5 +## to external program such as Pythia. + 12 0.000000e+00 # ve : 0.0 + 14 0.000000e+00 # vm : 0.0 + 16 0.000000e+00 # vt : 0.0 + 21 0.000000e+00 # g : 0.0 + 22 0.000000e+00 # a : 0.0 + 9000005 9.118760e+01 # z1 : MZ + 9000006 8.038700e+01 # w1+ : MWsm + 9000007 1.727600e+02 # t1 : MT + 9000008 1.250900e+02 # h1 : MH + 24 8.038700e+01 # w+ : MW + +################################### +## INFORMATION FOR SMEFT +################################### +Block smeft + 1 0.000000e+00 # cG + 2 0.000000e+00 # cW + 3 0.000000e+00 # cH + 4 0.000000e+00 # cHbox + 5 0.000000e+00 # cHDD + 6 0.000000e+00 # cHG + 7 0.000000e+00 # cHW + 8 0.000000e+00 # cHB + 9 0.000000e+00 # cHWB + 10 0.000000e+00 # cuHRe + 11 0.000000e+00 # ctHRe + 12 0.000000e+00 # cdHRe + 13 0.000000e+00 # cbHRe + 14 0.000000e+00 # cuGRe + 15 0.000000e+00 # ctGRe + 16 0.000000e+00 # cuWRe + 17 0.000000e+00 # ctWRe + 18 0.000000e+00 # cuBRe + 19 0.000000e+00 # ctBRe + 20 0.000000e+00 # cdGRe + 21 0.000000e+00 # cbGRe + 22 0.000000e+00 # cdWRe + 23 0.000000e+00 # cbWRe + 24 0.000000e+00 # cdBRe + 25 0.000000e+00 # cbBRe + 26 0.000000e+00 # cHj1 + 27 0.000000e+00 # cHQ1 + 28 0.000000e+00 # cHj3 + 29 0.000000e+00 # cHQ3 + 30 0.000000e+00 # cHu + 31 0.000000e+00 # cHt + 32 0.000000e+00 # cHd + 33 0.000000e+00 # cHbq + 34 0.000000e+00 # cHudRe + 35 0.000000e+00 # cHtbRe + 36 0.000000e+00 # cjj11 + 37 0.000000e+00 # cjj18 + 38 0.000000e+00 # cjj31 + 39 0.000000e+00 # cjj38 + 40 0.000000e+00 # cQj11 + 41 0.000000e+00 # cQj18 + 42 0.000000e+00 # cQj31 + 43 0.000000e+00 # cQj38 + 44 0.000000e+00 # cQQ1 + 45 0.000000e+00 # cQQ8 + 46 0.000000e+00 # cuu1 + 47 0.000000e+00 # cuu8 + 48 0.000000e+00 # ctt + 49 0.000000e+00 # ctu1 + 50 0.000000e+00 # ctu8 + 51 0.000000e+00 # cdd1 + 52 0.000000e+00 # cdd8 + 53 0.000000e+00 # cbb + 54 0.000000e+00 # cbd1 + 55 0.000000e+00 # cbd8 + 56 0.000000e+00 # cud1 + 57 0.000000e+00 # ctb1 + 58 0.000000e+00 # ctd1 + 59 0.000000e+00 # cbu1 + 60 0.000000e+00 # cud8 + 61 0.000000e+00 # ctb8 + 62 0.000000e+00 # ctd8 + 63 0.000000e+00 # cbu8 + 64 0.000000e+00 # cutbd1Re + 65 0.000000e+00 # cutbd8Re + 66 0.000000e+00 # cju1 + 67 0.000000e+00 # cQu1 + 68 0.000000e+00 # cju8 + 69 0.000000e+00 # cQu8 + 70 0.000000e+00 # ctj1 + 71 0.000000e+00 # ctj8 + 72 0.000000e+00 # cQt1 + 73 0.000000e+00 # cQt8 + 74 0.000000e+00 # cjd1 + 75 0.000000e+00 # cjd8 + 76 0.000000e+00 # cQd1 + 77 0.000000e+00 # cQd8 + 78 0.000000e+00 # cbj1 + 79 0.000000e+00 # cbj8 + 80 0.000000e+00 # cQb1 + 81 0.000000e+00 # cQb8 + 82 0.000000e+00 # cjQtu1Re + 83 0.000000e+00 # cjQtu8Re + 84 0.000000e+00 # cjQbd1Re + 85 0.000000e+00 # cjQbd8Re + 86 0.000000e+00 # cjujd1Re + 87 0.000000e+00 # cjujd8Re + 88 0.000000e+00 # cjujd11Re + 89 0.000000e+00 # cjujd81Re + 90 0.000000e+00 # cQtjd1Re + 91 0.000000e+00 # cQtjd8Re + 92 0.000000e+00 # cjuQb1Re + 93 0.000000e+00 # cjuQb8Re + 94 0.000000e+00 # cQujb1Re + 95 0.000000e+00 # cQujb8Re + 96 0.000000e+00 # cjtQd1Re + 97 0.000000e+00 # cjtQd8Re + 98 0.000000e+00 # cQtQb1Re + 99 0.000000e+00 # cQtQb8Re + 100 0.000000e+00 # ceHRe + 101 0.000000e+00 # ceWRe + 102 0.000000e+00 # ceBRe + 103 0.000000e+00 # cHl1 + 104 0.000000e+00 # cHl3 + 105 0.000000e+00 # cHe + 106 0.000000e+00 # cll + 107 0.000000e+00 # cll1 + 108 0.000000e+00 # clj1 + 109 0.000000e+00 # clj3 + 110 0.000000e+00 # cQl1 + 111 0.000000e+00 # cQl3 + 112 0.000000e+00 # cee + 113 0.000000e+00 # ceu + 114 0.000000e+00 # cte + 115 0.000000e+00 # ced + 116 0.000000e+00 # cbe + 117 0.000000e+00 # cje + 118 0.000000e+00 # cQe + 119 0.000000e+00 # clu + 120 0.000000e+00 # ctl + 121 0.000000e+00 # cld + 122 0.000000e+00 # cbl + 123 0.000000e+00 # cle + 124 0.000000e+00 # cledjRe + 125 0.000000e+00 # clebQRe + 126 0.000000e+00 # cleju1Re + 127 0.000000e+00 # cleQt1Re + 128 0.000000e+00 # cleju3Re + 129 0.000000e+00 # cleQt3Re + +################################### +## INFORMATION FOR SMEFTCPV +################################### +Block smeftcpv + 1 0.000000e+00 # cGtil + 2 0.000000e+00 # cWtil + 3 0.000000e+00 # cHGtil + 4 0.000000e+00 # cHWtil + 5 0.000000e+00 # cHBtil + 6 0.000000e+00 # cHWBtil + 7 0.000000e+00 # cuGIm + 8 0.000000e+00 # ctGIm + 9 0.000000e+00 # cuWIm + 10 0.000000e+00 # ctWIm + 11 0.000000e+00 # cuBIm + 12 0.000000e+00 # ctBIm + 13 0.000000e+00 # cdGIm + 14 0.000000e+00 # cbGIm + 15 0.000000e+00 # cdWIm + 16 0.000000e+00 # cbWIm + 17 0.000000e+00 # cdBIm + 18 0.000000e+00 # cbBIm + 19 0.000000e+00 # cuHIm + 20 0.000000e+00 # ctHIm + 21 0.000000e+00 # cdHIm + 22 0.000000e+00 # cbHIm + 23 0.000000e+00 # cHudIm + 24 0.000000e+00 # cHtbIm + 25 0.000000e+00 # cutbd1Im + 26 0.000000e+00 # cutbd8Im + 27 0.000000e+00 # cjQtu1Im + 28 0.000000e+00 # cjQtu8Im + 29 0.000000e+00 # cjQbd1Im + 30 0.000000e+00 # cjQbd8Im + 31 0.000000e+00 # cjujd1Im + 32 0.000000e+00 # cjujd8Im + 33 0.000000e+00 # cjujd11Im + 34 0.000000e+00 # cjujd81Im + 35 0.000000e+00 # cQtjd1Im + 36 0.000000e+00 # cQtjd8Im + 37 0.000000e+00 # cjuQb1Im + 38 0.000000e+00 # cjuQb8Im + 39 0.000000e+00 # cQujb1Im + 40 0.000000e+00 # cQujb8Im + 41 0.000000e+00 # cjtQd1Im + 42 0.000000e+00 # cjtQd8Im + 43 0.000000e+00 # cQtQb1Im + 44 0.000000e+00 # cQtQb8Im + 45 0.000000e+00 # ceHIm + 46 0.000000e+00 # ceWIm + 47 0.000000e+00 # ceBIm + 48 0.000000e+00 # cledjIm + 49 0.000000e+00 # clebQIm + 50 0.000000e+00 # cleju1Im + 51 0.000000e+00 # cleju3Im + 52 0.000000e+00 # cleQt1Im + 53 0.000000e+00 # cleQt3Im + +################################### +## INFORMATION FOR SMEFTCUTOFF +################################### +Block smeftcutoff + 1 1.000000e+03 # LambdaSMEFT + +################################### +## INFORMATION FOR SMINPUTS +################################### +Block sminputs + 1 8.038700e+01 # MW + 2 1.166379e-05 # Gf + 3 1.179000e-01 # aS (Note that Parameter not used if you use a PDF set) + +################################### +## INFORMATION FOR SWITCHES +################################### +Block switches + 1 0.000000e+00 # linearPropCorrections + +################################### +## INFORMATION FOR YUKAWA +################################### +Block yukawa + 1 4.670000e-03 # ymdo + 2 2.160000e-03 # ymup + 3 9.300000e-02 # yms + 4 1.270000e+00 # ymc + 5 4.180000e+00 # ymb + 6 1.727600e+02 # ymt + 11 5.110000e-04 # yme + 13 1.056600e-01 # ymm + 15 1.777000e+00 # ymtau + +################################### +## INFORMATION FOR DECAY +################################### +DECAY 6 1.330000e+00 # WT +DECAY 23 2.495200e+00 # WZ +DECAY 24 2.085000e+00 # WW +DECAY 25 4.070000e-03 # WH +## Dependent parameters, given by model restrictions. +## Those values should be edited following the +## analytical expression. MG5 ignores those values +## but they are important for interfacing the output of MG5 +## to external program such as Pythia. +DECAY 1 0.000000e+00 # d : 0.0 +DECAY 2 0.000000e+00 # u : 0.0 +DECAY 3 0.000000e+00 # s : 0.0 +DECAY 4 0.000000e+00 # c : 0.0 +DECAY 5 0.000000e+00 # b : 0.0 +DECAY 11 0.000000e+00 # e- : 0.0 +DECAY 12 0.000000e+00 # ve : 0.0 +DECAY 13 0.000000e+00 # mu- : 0.0 +DECAY 14 0.000000e+00 # vm : 0.0 +DECAY 15 0.000000e+00 # ta- : 0.0 +DECAY 16 0.000000e+00 # vt : 0.0 +DECAY 21 0.000000e+00 # g : 0.0 +DECAY 22 0.000000e+00 # a : 0.0 +DECAY 9000005 2.495200e+00 # z1 : WZ +DECAY 9000006 2.085000e+00 # w1+ : WW +DECAY 9000007 1.330000e+00 # t1 : WT +DECAY 9000008 4.070000e-03 # h1 : WH +#=========================================================== +# QUANTUM NUMBERS OF NEW STATE(S) (NON SM PDG CODE) +#=========================================================== + +Block QNUMBERS 9000005 # z1 + 1 0 # 3 times electric charge + 2 3 # number of spin states (2S+1) + 3 1 # colour rep (1: singlet, 3: triplet, 8: octet) + 4 0 # Particle/Antiparticle distinction (0=own anti) +Block QNUMBERS 9000006 # w1+ + 1 3 # 3 times electric charge + 2 3 # number of spin states (2S+1) + 3 1 # colour rep (1: singlet, 3: triplet, 8: octet) + 4 1 # Particle/Antiparticle distinction (0=own anti) +Block QNUMBERS 9000007 # t1 + 1 2 # 3 times electric charge + 2 2 # number of spin states (2S+1) + 3 3 # colour rep (1: singlet, 3: triplet, 8: octet) + 4 1 # Particle/Antiparticle distinction (0=own anti) +Block QNUMBERS 9000008 # h1 + 1 0 # 3 times electric charge + 2 1 # number of spin states (2S+1) + 3 1 # colour rep (1: singlet, 3: triplet, 8: octet) + 4 0 # Particle/Antiparticle distinction (0=own anti) diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/Bridge.h b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/Bridge.h new file mode 100644 index 0000000000..faa8f95d1d --- /dev/null +++ b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/Bridge.h @@ -0,0 +1,519 @@ +#ifndef BRIDGE_H +#define BRIDGE_H 1 + +// Includes from Cuda/C++ matrix element calculations +#include "mgOnGpuConfig.h" // for mgOnGpu::npar, mgOnGpu::np4 + +#include "CPPProcess.h" // for CPPProcess +#include "CrossSectionKernels.h" // for flagAbnormalMEs +#include "MatrixElementKernels.h" // for MatrixElementKernelHost, MatrixElementKernelDevice +#include "MemoryAccessMomenta.h" // for MemoryAccessMomenta::neppM +#include "MemoryBuffers.h" // for HostBufferMomenta, DeviceBufferMomenta etc + +#include +#include +#include +#include +#include +#include +#include + +#ifdef __CUDACC__ +namespace mg5amcGpu +#else +namespace mg5amcCpu +#endif +{ + //-------------------------------------------------------------------------- + /** + * A base class for a class whose pointer is passed between Fortran and C++. + * This is not really necessary, but it allows minimal type checks on all such pointers. + */ + struct CppObjectInFortran + { + CppObjectInFortran() {} + virtual ~CppObjectInFortran() {} + }; + + //-------------------------------------------------------------------------- + /** + * A templated class for calling the CUDA/C++ matrix element calculations of the event generation workflow. + * The FORTRANFPTYPE template parameter indicates the precision of the Fortran momenta from MadEvent (float or double). + * The precision of the matrix element calculation is hardcoded in the fptype typedef in CUDA/C++. + * + * The Fortran momenta passed in are in the form of + * DOUBLE PRECISION P_MULTI(0:3, NEXTERNAL, VECSIZE_USED) + * where the dimensions are , , . + * In memory, this is stored in a way that C reads as an array P_MULTI[nevtF][nparF][np4F]. + * The CUDA/C++ momenta are stored as an array[npagM][npar][np4][neppM] with nevt=npagM*neppM. + * The Bridge is configured to store nevt==nevtF events in CUDA/C++. + * It also checks that Fortran and C++ parameters match, nparF==npar and np4F==np4. + * + * The cpu/gpu sequences take FORTRANFPTYPE* (not fptype*) momenta/MEs. + * This allows mixing double in MadEvent Fortran with float in CUDA/C++ sigmaKin. + * In the fcheck_sa.f test, Fortran uses double while CUDA/C++ may use double or float. + * In the check_sa "--bridge" test, everything is implemented in fptype (double or float). + */ + template + class Bridge final : public CppObjectInFortran + { + public: + /** + * Constructor + * + * @param nevtF (VECSIZE_USED, vector.inc) number of events in Fortran array loops (VECSIZE_USED <= VECSIZE_MEMMAX) + * @param nparF (NEXTERNAL, nexternal.inc) number of external particles in Fortran arrays (KEPT FOR SANITY CHECKS ONLY) + * @param np4F number of momenta components, usually 4, in Fortran arrays (KEPT FOR SANITY CHECKS ONLY) + */ + Bridge( unsigned int nevtF, unsigned int nparF, unsigned int np4F ); + + /** + * Destructor + */ + virtual ~Bridge() {} + + // Delete copy/move constructors and assignment operators + Bridge( const Bridge& ) = delete; + Bridge( Bridge&& ) = delete; + Bridge& operator=( const Bridge& ) = delete; + Bridge& operator=( Bridge&& ) = delete; + +#ifdef __CUDACC__ + /** + * Set the gpublocks and gputhreads for the gpusequence - throws if evnt != gpublocks*gputhreads + * (this is needed for BridgeKernel tests rather than for actual production use in Fortran) + * + * @param gpublocks number of gpublocks + * @param gputhreads number of gputhreads + */ + void set_gpugrid( const int gpublocks, const int gputhreads ); + + /** + * Sequence to be executed for the Cuda matrix element calculation + * + * @param momenta the pointer to the input 4-momenta + * @param gs the pointer to the input Gs (running QCD coupling constant alphas) + * @param rndhel the pointer to the input random numbers for helicity selection + * @param rndcol the pointer to the input random numbers for color selection + * @param channelId the Feynman diagram to enhance in multi-channel mode if 1 to n (disable multi-channel if 0) + * @param mes the pointer to the output matrix elements + * @param goodHelOnly quit after computing good helicities? + * @param selhel the pointer to the output selected helicities + * @param selcol the pointer to the output selected colors + */ + void gpu_sequence( const FORTRANFPTYPE* momenta, + const FORTRANFPTYPE* gs, + const FORTRANFPTYPE* rndhel, + const FORTRANFPTYPE* rndcol, + const unsigned int channelId, + FORTRANFPTYPE* mes, + int* selhel, + int* selcol, + const bool goodHelOnly = false ); +#else + /** + * Sequence to be executed for the vectorized CPU matrix element calculation + * + * @param momenta the pointer to the input 4-momenta + * @param gs the pointer to the input Gs (running QCD coupling constant alphas) + * @param rndhel the pointer to the input random numbers for helicity selection + * @param rndcol the pointer to the input random numbers for color selection + * @param channelId the Feynman diagram to enhance in multi-channel mode if 1 to n (disable multi-channel if 0) + * @param mes the pointer to the output matrix elements + * @param selhel the pointer to the output selected helicities + * @param selcol the pointer to the output selected colors + * @param goodHelOnly quit after computing good helicities? + */ + void cpu_sequence( const FORTRANFPTYPE* momenta, + const FORTRANFPTYPE* gs, + const FORTRANFPTYPE* rndhel, + const FORTRANFPTYPE* rndcol, + const unsigned int channelId, + FORTRANFPTYPE* mes, + int* selhel, + int* selcol, + const bool goodHelOnly = false ); +#endif + + // Return the number of good helicities (-1 initially when they have not yet been calculated) + int nGoodHel() const { return m_nGoodHel; } + + // Return the total number of helicities (expose cudacpp ncomb in the Bridge interface to Fortran) + constexpr int nTotHel() const { return mgOnGpu::ncomb; } + + private: + unsigned int m_nevt; // number of events + int m_nGoodHel; // the number of good helicities (-1 initially when they have not yet been calculated) + +#ifdef __CUDACC__ + int m_gputhreads; // number of gpu threads (default set from number of events, can be modified) + int m_gpublocks; // number of gpu blocks (default set from number of events, can be modified) + mg5amcGpu::DeviceBuffer m_devMomentaF; + mg5amcGpu::DeviceBufferMomenta m_devMomentaC; + mg5amcGpu::DeviceBufferGs m_devGs; + mg5amcGpu::DeviceBufferRndNumHelicity m_devRndHel; + mg5amcGpu::DeviceBufferRndNumColor m_devRndCol; + mg5amcGpu::DeviceBufferMatrixElements m_devMEs; + mg5amcGpu::DeviceBufferSelectedHelicity m_devSelHel; + mg5amcGpu::DeviceBufferSelectedColor m_devSelCol; + mg5amcGpu::PinnedHostBufferGs m_hstGs; + mg5amcGpu::PinnedHostBufferRndNumHelicity m_hstRndHel; + mg5amcGpu::PinnedHostBufferRndNumColor m_hstRndCol; + mg5amcGpu::PinnedHostBufferMatrixElements m_hstMEs; + mg5amcGpu::PinnedHostBufferSelectedHelicity m_hstSelHel; + mg5amcGpu::PinnedHostBufferSelectedColor m_hstSelCol; + std::unique_ptr m_pmek; + //static constexpr int s_gputhreadsmin = 16; // minimum number of gpu threads (TEST VALUE FOR MADEVENT) + static constexpr int s_gputhreadsmin = 32; // minimum number of gpu threads (DEFAULT) +#else + mg5amcCpu::HostBufferMomenta m_hstMomentaC; + mg5amcCpu::HostBufferGs m_hstGs; + mg5amcCpu::HostBufferRndNumHelicity m_hstRndHel; + mg5amcCpu::HostBufferRndNumColor m_hstRndCol; + mg5amcCpu::HostBufferMatrixElements m_hstMEs; + mg5amcCpu::HostBufferSelectedHelicity m_hstSelHel; + mg5amcCpu::HostBufferSelectedColor m_hstSelCol; + std::unique_ptr m_pmek; +#endif + }; + + //-------------------------------------------------------------------------- + // + // Forward declare transposition methods + // + +#ifdef __CUDACC__ + + template + __global__ void dev_transposeMomentaF2C( const Tin* in, Tout* out, const unsigned int nevt ); + +#endif // __CUDACC__ + + template + void hst_transposeMomentaF2C( const Tin* in, Tout* out, const unsigned int nevt ); + + template + void hst_transposeMomentaC2F( const Tin* in, Tout* out, const unsigned int nevt ); + + //-------------------------------------------------------------------------- + // + // Implementations of member functions of class Bridge + // + + template + Bridge::Bridge( unsigned int nevtF, unsigned int nparF, unsigned int np4F ) + : m_nevt( nevtF ) + , m_nGoodHel( -1 ) +#ifdef __CUDACC__ + , m_gputhreads( 256 ) // default number of gpu threads + , m_gpublocks( m_nevt / m_gputhreads ) // this ensures m_nevt <= m_gpublocks*m_gputhreads + , m_devMomentaF( m_nevt ) + , m_devMomentaC( m_nevt ) + , m_devGs( m_nevt ) + , m_devRndHel( m_nevt ) + , m_devRndCol( m_nevt ) + , m_devMEs( m_nevt ) + , m_devSelHel( m_nevt ) + , m_devSelCol( m_nevt ) +#else + , m_hstMomentaC( m_nevt ) +#endif + , m_hstGs( m_nevt ) + , m_hstRndHel( m_nevt ) + , m_hstRndCol( m_nevt ) + , m_hstMEs( m_nevt ) + , m_hstSelHel( m_nevt ) + , m_hstSelCol( m_nevt ) + , m_pmek( nullptr ) + { + if( nparF != mgOnGpu::npar ) throw std::runtime_error( "Bridge constructor: npar mismatch" ); + if( np4F != mgOnGpu::np4 ) throw std::runtime_error( "Bridge constructor: np4 mismatch" ); +#ifdef __CUDACC__ + if( ( m_nevt < s_gputhreadsmin ) || ( m_nevt % s_gputhreadsmin != 0 ) ) + throw std::runtime_error( "Bridge constructor: nevt should be a multiple of " + std::to_string( s_gputhreadsmin ) ); + while( m_nevt != m_gpublocks * m_gputhreads ) + { + m_gputhreads /= 2; + if( m_gputhreads < s_gputhreadsmin ) + throw std::logic_error( "Bridge constructor: FIXME! cannot choose gputhreads" ); // this should never happen! + m_gpublocks = m_nevt / m_gputhreads; + } + std::cout << "WARNING! Instantiate device Bridge (nevt=" << m_nevt << ", gpublocks=" << m_gpublocks << ", gputhreads=" << m_gputhreads + << ", gpublocks*gputhreads=" << m_gpublocks * m_gputhreads << ")" << std::endl; + mg5amcGpu::CPPProcess process( /*verbose=*/false ); + m_pmek.reset( new mg5amcGpu::MatrixElementKernelDevice( m_devMomentaC, m_devGs, m_devRndHel, m_devRndCol, m_devMEs, m_devSelHel, m_devSelCol, m_gpublocks, m_gputhreads ) ); +#else + std::cout << "WARNING! Instantiate host Bridge (nevt=" << m_nevt << ")" << std::endl; + mg5amcCpu::CPPProcess process( /*verbose=*/false ); + m_pmek.reset( new mg5amcCpu::MatrixElementKernelHost( m_hstMomentaC, m_hstGs, m_hstRndHel, m_hstRndCol, m_hstMEs, m_hstSelHel, m_hstSelCol, m_nevt ) ); +#endif // __CUDACC__ + process.initProc( "../../Cards/param_card.dat" ); + } + +#ifdef __CUDACC__ + template + void Bridge::set_gpugrid( const int gpublocks, const int gputhreads ) + { + if( m_nevt != gpublocks * gputhreads ) + throw std::runtime_error( "Bridge: gpublocks*gputhreads must equal m_nevt in set_gpugrid" ); + m_gpublocks = gpublocks; + m_gputhreads = gputhreads; + std::cout << "WARNING! Set grid in Bridge (nevt=" << m_nevt << ", gpublocks=" << m_gpublocks << ", gputhreads=" << m_gputhreads + << ", gpublocks*gputhreads=" << m_gpublocks * m_gputhreads << ")" << std::endl; + m_pmek->setGrid( m_gpublocks, m_gputhreads ); + } +#endif + +#ifdef __CUDACC__ + template + void Bridge::gpu_sequence( const FORTRANFPTYPE* momenta, + const FORTRANFPTYPE* gs, + const FORTRANFPTYPE* rndhel, + const FORTRANFPTYPE* rndcol, + const unsigned int channelId, + FORTRANFPTYPE* mes, + int* selhel, + int* selcol, + const bool goodHelOnly ) + { + constexpr int neppM = MemoryAccessMomenta::neppM; + if constexpr( neppM == 1 && std::is_same_v ) + { + checkCuda( cudaMemcpy( m_devMomentaC.data(), momenta, m_devMomentaC.bytes(), cudaMemcpyHostToDevice ) ); + } + else + { + checkCuda( cudaMemcpy( m_devMomentaF.data(), momenta, m_devMomentaF.bytes(), cudaMemcpyHostToDevice ) ); + const int thrPerEvt = mgOnGpu::npar * mgOnGpu::np4; // AV: transpose alg does 1 element per thread (NOT 1 event per thread) + //const int thrPerEvt = 1; // AV: try new alg with 1 event per thread... this seems slower + dev_transposeMomentaF2C<<>>( m_devMomentaF.data(), m_devMomentaC.data(), m_nevt ); + } + if constexpr( std::is_same_v ) + { + memcpy( m_hstGs.data(), gs, m_nevt * sizeof( FORTRANFPTYPE ) ); + memcpy( m_hstRndHel.data(), rndhel, m_nevt * sizeof( FORTRANFPTYPE ) ); + memcpy( m_hstRndCol.data(), rndcol, m_nevt * sizeof( FORTRANFPTYPE ) ); + } + else + { + std::copy( gs, gs + m_nevt, m_hstGs.data() ); + std::copy( rndhel, rndhel + m_nevt, m_hstRndHel.data() ); + std::copy( rndcol, rndcol + m_nevt, m_hstRndCol.data() ); + } + copyDeviceFromHost( m_devGs, m_hstGs ); + copyDeviceFromHost( m_devRndHel, m_hstRndHel ); + copyDeviceFromHost( m_devRndCol, m_hstRndCol ); + if( m_nGoodHel < 0 ) + { + m_nGoodHel = m_pmek->computeGoodHelicities(); + if( m_nGoodHel < 0 ) throw std::runtime_error( "Bridge gpu_sequence: computeGoodHelicities returned nGoodHel<0" ); + } + if( goodHelOnly ) return; + m_pmek->computeMatrixElements( channelId ); + copyHostFromDevice( m_hstMEs, m_devMEs ); + flagAbnormalMEs( m_hstMEs.data(), m_nevt ); + copyHostFromDevice( m_hstSelHel, m_devSelHel ); + copyHostFromDevice( m_hstSelCol, m_devSelCol ); + if constexpr( std::is_same_v ) + { + memcpy( mes, m_hstMEs.data(), m_hstMEs.bytes() ); + memcpy( selhel, m_hstSelHel.data(), m_hstSelHel.bytes() ); + memcpy( selcol, m_hstSelCol.data(), m_hstSelCol.bytes() ); + } + else + { + std::copy( m_hstMEs.data(), m_hstMEs.data() + m_nevt, mes ); + std::copy( m_hstSelHel.data(), m_hstSelHel.data() + m_nevt, selhel ); + std::copy( m_hstSelCol.data(), m_hstSelCol.data() + m_nevt, selcol ); + } + } +#endif + +#ifndef __CUDACC__ + template + void Bridge::cpu_sequence( const FORTRANFPTYPE* momenta, + const FORTRANFPTYPE* gs, + const FORTRANFPTYPE* rndhel, + const FORTRANFPTYPE* rndcol, + const unsigned int channelId, + FORTRANFPTYPE* mes, + int* selhel, + int* selcol, + const bool goodHelOnly ) + { + hst_transposeMomentaF2C( momenta, m_hstMomentaC.data(), m_nevt ); + if constexpr( std::is_same_v ) + { + memcpy( m_hstGs.data(), gs, m_nevt * sizeof( FORTRANFPTYPE ) ); + memcpy( m_hstRndHel.data(), rndhel, m_nevt * sizeof( FORTRANFPTYPE ) ); + memcpy( m_hstRndCol.data(), rndcol, m_nevt * sizeof( FORTRANFPTYPE ) ); + } + else + { + std::copy( gs, gs + m_nevt, m_hstGs.data() ); + std::copy( rndhel, rndhel + m_nevt, m_hstRndHel.data() ); + std::copy( rndcol, rndcol + m_nevt, m_hstRndCol.data() ); + } + if( m_nGoodHel < 0 ) + { + m_nGoodHel = m_pmek->computeGoodHelicities(); + if( m_nGoodHel < 0 ) throw std::runtime_error( "Bridge cpu_sequence: computeGoodHelicities returned nGoodHel<0" ); + } + if( goodHelOnly ) return; + m_pmek->computeMatrixElements( channelId ); + flagAbnormalMEs( m_hstMEs.data(), m_nevt ); + if constexpr( std::is_same_v ) + { + memcpy( mes, m_hstMEs.data(), m_hstMEs.bytes() ); + memcpy( selhel, m_hstSelHel.data(), m_hstSelHel.bytes() ); + memcpy( selcol, m_hstSelCol.data(), m_hstSelCol.bytes() ); + } + else + { + std::copy( m_hstMEs.data(), m_hstMEs.data() + m_nevt, mes ); + std::copy( m_hstSelHel.data(), m_hstSelHel.data() + m_nevt, selhel ); + std::copy( m_hstSelCol.data(), m_hstSelCol.data() + m_nevt, selcol ); + } + } +#endif + + //-------------------------------------------------------------------------- + // + // Implementations of transposition methods + // - FORTRAN arrays: P_MULTI(0:3, NEXTERNAL, VECSIZE_USED) ==> p_multi[nevtF][nparF][np4F] in C++ (AOS) + // - C++ array: momenta[npagM][npar][np4][neppM] with nevt=npagM*neppM (AOSOA) + // + +#ifdef __CUDACC__ + template + __global__ void dev_transposeMomentaF2C( const Tin* in, Tout* out, const unsigned int nevt ) + { + constexpr bool oldImplementation = true; // default: use old implementation + if constexpr( oldImplementation ) + { + // SR initial implementation + constexpr int part = mgOnGpu::npar; + constexpr int mome = mgOnGpu::np4; + constexpr int strd = MemoryAccessMomenta::neppM; + int pos = blockDim.x * blockIdx.x + threadIdx.x; + int arrlen = nevt * part * mome; + if( pos < arrlen ) + { + int page_i = pos / ( strd * mome * part ); + int rest_1 = pos % ( strd * mome * part ); + int part_i = rest_1 / ( strd * mome ); + int rest_2 = rest_1 % ( strd * mome ); + int mome_i = rest_2 / strd; + int strd_i = rest_2 % strd; + int inpos = + ( page_i * strd + strd_i ) // event number + * ( part * mome ) // event size (pos of event) + + part_i * mome // particle inside event + + mome_i; // momentum inside particle + out[pos] = in[inpos]; // F2C (Fortran to C) + } + } + else + { + // AV attempt another implementation with 1 event per thread: this seems slower... + // F-style: AOS[nevtF][nparF][np4F] + // C-style: AOSOA[npagM][npar][np4][neppM] with nevt=npagM*neppM + constexpr int npar = mgOnGpu::npar; + constexpr int np4 = mgOnGpu::np4; + constexpr int neppM = MemoryAccessMomenta::neppM; + assert( nevt % neppM == 0 ); // number of events is not a multiple of neppM??? + int ievt = blockDim.x * blockIdx.x + threadIdx.x; + int ipagM = ievt / neppM; + int ieppM = ievt % neppM; + for( int ip4 = 0; ip4 < np4; ip4++ ) + for( int ipar = 0; ipar < npar; ipar++ ) + { + int cpos = ipagM * npar * np4 * neppM + ipar * np4 * neppM + ip4 * neppM + ieppM; + int fpos = ievt * npar * np4 + ipar * np4 + ip4; + out[cpos] = in[fpos]; // F2C (Fortran to C) + } + } + } +#endif + + template + void hst_transposeMomenta( const Tin* in, Tout* out, const unsigned int nevt ) + { + constexpr bool oldImplementation = false; // default: use new implementation + if constexpr( oldImplementation ) + { + // SR initial implementation + constexpr unsigned int part = mgOnGpu::npar; + constexpr unsigned int mome = mgOnGpu::np4; + constexpr unsigned int strd = MemoryAccessMomenta::neppM; + unsigned int arrlen = nevt * part * mome; + for( unsigned int pos = 0; pos < arrlen; ++pos ) + { + unsigned int page_i = pos / ( strd * mome * part ); + unsigned int rest_1 = pos % ( strd * mome * part ); + unsigned int part_i = rest_1 / ( strd * mome ); + unsigned int rest_2 = rest_1 % ( strd * mome ); + unsigned int mome_i = rest_2 / strd; + unsigned int strd_i = rest_2 % strd; + unsigned int inpos = + ( page_i * strd + strd_i ) // event number + * ( part * mome ) // event size (pos of event) + + part_i * mome // particle inside event + + mome_i; // momentum inside particle + if constexpr( F2C ) // needs c++17 and cuda >=11.2 (#333) + out[pos] = in[inpos]; // F2C (Fortran to C) + else + out[inpos] = in[pos]; // C2F (C to Fortran) + } + } + else + { + // AV attempt another implementation: this is slightly faster (better c++ pipelining?) + // [NB! this is not a transposition, it is an AOS to AOSOA conversion: if neppM=1, a memcpy is enough] + // F-style: AOS[nevtF][nparF][np4F] + // C-style: AOSOA[npagM][npar][np4][neppM] with nevt=npagM*neppM + constexpr unsigned int npar = mgOnGpu::npar; + constexpr unsigned int np4 = mgOnGpu::np4; + constexpr unsigned int neppM = MemoryAccessMomenta::neppM; + if constexpr( neppM == 1 && std::is_same_v ) + { + memcpy( out, in, nevt * npar * np4 * sizeof( Tin ) ); + } + else + { + const unsigned int npagM = nevt / neppM; + assert( nevt % neppM == 0 ); // number of events is not a multiple of neppM??? + for( unsigned int ipagM = 0; ipagM < npagM; ipagM++ ) + for( unsigned int ip4 = 0; ip4 < np4; ip4++ ) + for( unsigned int ipar = 0; ipar < npar; ipar++ ) + for( unsigned int ieppM = 0; ieppM < neppM; ieppM++ ) + { + unsigned int ievt = ipagM * neppM + ieppM; + unsigned int cpos = ipagM * npar * np4 * neppM + ipar * np4 * neppM + ip4 * neppM + ieppM; + unsigned int fpos = ievt * npar * np4 + ipar * np4 + ip4; + if constexpr( F2C ) + out[cpos] = in[fpos]; // F2C (Fortran to C) + else + out[fpos] = in[cpos]; // C2F (C to Fortran) + } + } + } + } + + template + void hst_transposeMomentaF2C( const Tin* in, Tout* out, const unsigned int nevt ) + { + constexpr bool F2C = true; + hst_transposeMomenta( in, out, nevt ); + } + + template + void hst_transposeMomentaC2F( const Tin* in, Tout* out, const unsigned int nevt ) + { + constexpr bool F2C = false; + hst_transposeMomenta( in, out, nevt ); + } + + //-------------------------------------------------------------------------- +} +#endif // BRIDGE_H diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/BridgeKernels.cc b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/BridgeKernels.cc new file mode 100644 index 0000000000..c2c16ff038 --- /dev/null +++ b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/BridgeKernels.cc @@ -0,0 +1,149 @@ +#include "BridgeKernels.h" + +#include "MemoryAccessMomenta.h" + +#include + +using mgOnGpu::npar; // the number of particles (external = initial + final) +using mgOnGpu::np4; // the number of dimensions of 4-momenta (E,px,py,pz) + +//============================================================================ + +#ifdef __CUDACC__ +namespace mg5amcGpu +#else +namespace mg5amcCpu +#endif +{ + //-------------------------------------------------------------------------- + + BridgeKernelBase::BridgeKernelBase( const BufferMomenta& momenta, // input: momenta + const BufferGs& gs, // input: gs for alphaS + const BufferRndNumHelicity& rndhel, // input: random numbers for helicity selection + const BufferRndNumColor& rndcol, // input: random numbers for color selection + BufferMatrixElements& matrixElements, // output: matrix elements + BufferSelectedHelicity& selhel, // output: helicity selection + BufferSelectedColor& selcol, // output: color selection + const size_t nevt ) + : MatrixElementKernelBase( momenta, gs, rndhel, rndcol, matrixElements, selhel, selcol ) + , NumberOfEvents( nevt ) + , m_bridge( nevt, npar, np4 ) + { + if( m_momenta.isOnDevice() ) throw std::runtime_error( "BridgeKernelBase: momenta must be a host array" ); + if( m_matrixElements.isOnDevice() ) throw std::runtime_error( "BridgeKernelBase: matrixElements must be a host array" ); + if( this->nevt() != m_momenta.nevt() ) throw std::runtime_error( "BridgeKernelBase: nevt mismatch with momenta" ); + if( this->nevt() != m_matrixElements.nevt() ) throw std::runtime_error( "BridgeKernelBase: nevt mismatch with matrixElements" ); + } + + //-------------------------------------------------------------------------- +} + +//============================================================================ + +#ifndef __CUDACC__ +namespace mg5amcCpu +{ + + //-------------------------------------------------------------------------- + + BridgeKernelHost::BridgeKernelHost( const BufferMomenta& momenta, // input: momenta + const BufferGs& gs, // input: Gs for alphaS + const BufferRndNumHelicity& rndhel, // input: random numbers for helicity selection + const BufferRndNumColor& rndcol, // input: random numbers for color selection + BufferMatrixElements& matrixElements, // output: matrix elements + BufferSelectedHelicity& selhel, // output: helicity selection + BufferSelectedColor& selcol, // output: color selection + const size_t nevt ) + : BridgeKernelBase( momenta, gs, rndhel, rndcol, matrixElements, selhel, selcol, nevt ) + , m_fortranMomenta( nevt ) + { + } + + //-------------------------------------------------------------------------- + + void BridgeKernelHost::transposeInputMomentaC2F() + { + hst_transposeMomentaC2F( m_momenta.data(), m_fortranMomenta.data(), nevt() ); + } + + //-------------------------------------------------------------------------- + + int BridgeKernelHost::computeGoodHelicities() + { + constexpr bool goodHelOnly = true; + constexpr unsigned int channelId = 0; // disable multi-channel for helicity filtering + m_bridge.cpu_sequence( m_fortranMomenta.data(), m_gs.data(), m_rndhel.data(), m_rndcol.data(), channelId, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), goodHelOnly ); + return m_bridge.nGoodHel(); + } + + //-------------------------------------------------------------------------- + + void BridgeKernelHost::computeMatrixElements( const unsigned int channelId ) + { + constexpr bool goodHelOnly = false; + m_bridge.cpu_sequence( m_fortranMomenta.data(), m_gs.data(), m_rndhel.data(), m_rndcol.data(), channelId, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), goodHelOnly ); + } + + //-------------------------------------------------------------------------- + +} +#endif + +//============================================================================ + +#ifdef __CUDACC__ +namespace mg5amcGpu +{ + + //-------------------------------------------------------------------------- + + BridgeKernelDevice::BridgeKernelDevice( const BufferMomenta& momenta, // input: momenta + const BufferGs& gs, // input: Gs for alphaS + const BufferRndNumHelicity& rndhel, // input: random numbers for helicity selection + const BufferRndNumColor& rndcol, // input: random numbers for color selection + BufferMatrixElements& matrixElements, // output: matrix elements + BufferSelectedHelicity& selhel, // output: helicity selection + BufferSelectedColor& selcol, // output: color selection + const size_t gpublocks, + const size_t gputhreads ) + : BridgeKernelBase( momenta, gs, rndhel, rndcol, matrixElements, selhel, selcol, gpublocks * gputhreads ) + , m_fortranMomenta( nevt() ) + , m_gpublocks( gpublocks ) + , m_gputhreads( gputhreads ) + { + if( m_gpublocks == 0 ) throw std::runtime_error( "BridgeKernelDevice: gpublocks must be > 0" ); + if( m_gputhreads == 0 ) throw std::runtime_error( "BridgeKernelDevice: gputhreads must be > 0" ); + m_bridge.set_gpugrid( gpublocks, gputhreads ); + } + + //-------------------------------------------------------------------------- + + void BridgeKernelDevice::transposeInputMomentaC2F() + { + hst_transposeMomentaC2F( m_momenta.data(), m_fortranMomenta.data(), nevt() ); + } + + //-------------------------------------------------------------------------- + + int BridgeKernelDevice::computeGoodHelicities() + { + constexpr bool goodHelOnly = true; + constexpr unsigned int channelId = 0; // disable multi-channel for helicity filtering + m_bridge.gpu_sequence( m_fortranMomenta.data(), m_gs.data(), m_rndhel.data(), m_rndcol.data(), channelId, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), goodHelOnly ); + return m_bridge.nGoodHel(); + } + + //-------------------------------------------------------------------------- + + void BridgeKernelDevice::computeMatrixElements( const unsigned int channelId ) + { + constexpr bool goodHelOnly = false; + m_bridge.gpu_sequence( m_fortranMomenta.data(), m_gs.data(), m_rndhel.data(), m_rndcol.data(), channelId, m_matrixElements.data(), m_selhel.data(), m_selcol.data(), goodHelOnly ); + } + + //-------------------------------------------------------------------------- + +} +#endif + +//============================================================================ diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/BridgeKernels.h b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/BridgeKernels.h new file mode 100644 index 0000000000..10e664a4c4 --- /dev/null +++ b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/BridgeKernels.h @@ -0,0 +1,134 @@ +#ifndef BRIDGEKERNELS_H +#define BRIDGEKERNELS_H 1 + +#include "mgOnGpuConfig.h" + +#include "Bridge.h" +#include "MatrixElementKernels.h" +#include "MemoryBuffers.h" + +#ifdef __CUDACC__ +namespace mg5amcGpu +#else +namespace mg5amcCpu +#endif +{ + //-------------------------------------------------------------------------- + + // A Bridge wrapper base class encapsulating matrix element calculations on a CPU host + class BridgeKernelBase : public MatrixElementKernelBase, public NumberOfEvents + { + public: + + // Constructor from existing input and output buffers + BridgeKernelBase( const BufferMomenta& momenta, // input: momenta + const BufferGs& gs, // input: gs for alphaS + const BufferRndNumHelicity& rndhel, // input: random numbers for helicity selection + const BufferRndNumColor& rndcol, // input: random numbers for color selection + BufferMatrixElements& matrixElements, // output: matrix elements + BufferSelectedHelicity& selhel, // output: helicity selection + BufferSelectedColor& selcol, // output: color selection + const size_t nevt ); + + // Destructor + virtual ~BridgeKernelBase() {} + + // Transpose input momenta from C to Fortran before the matrix element calculation in the Bridge + virtual void transposeInputMomentaC2F() = 0; + + protected: + + // The wrapped bridge + Bridge m_bridge; + }; + + //-------------------------------------------------------------------------- + +#ifndef __CUDACC__ + // A Bridge wrapper class encapsulating matrix element calculations on a CPU host + class BridgeKernelHost final : public BridgeKernelBase + { + public: + + // Constructor from existing input and output buffers + BridgeKernelHost( const BufferMomenta& momenta, // input: momenta + const BufferGs& gs, // input: gs for alphaS + const BufferRndNumHelicity& rndhel, // input: random numbers for helicity selection + const BufferRndNumColor& rndcol, // input: random numbers for color selection + BufferMatrixElements& matrixElements, // output: matrix elements + BufferSelectedHelicity& selhel, // output: helicity selection + BufferSelectedColor& selcol, // output: color selection + const size_t nevt ); + + // Destructor + virtual ~BridgeKernelHost() {} + + // Transpose input momenta from C to Fortran before the matrix element calculation in the Bridge + void transposeInputMomentaC2F() override final; + + // Compute good helicities (returns nGoodHel, the number of good helicity combinations out of ncomb) + int computeGoodHelicities() override final; + + // Compute matrix elements + void computeMatrixElements( const unsigned int channelId ) override final; + + // Is this a host or device kernel? + bool isOnDevice() const override final { return false; } + + private: + + // The buffer for the input momenta, transposed to Fortran array indexing + HostBufferMomenta m_fortranMomenta; + }; +#endif + + //-------------------------------------------------------------------------- + +#ifdef __CUDACC__ + // A Bridge wrapper class encapsulating matrix element calculations on a GPU device + class BridgeKernelDevice : public BridgeKernelBase + { + public: + + // Constructor from existing input and output buffers + BridgeKernelDevice( const BufferMomenta& momenta, // input: momenta + const BufferGs& gs, // input: gs for alphaS + const BufferRndNumHelicity& rndhel, // input: random numbers for helicity selection + const BufferRndNumColor& rndcol, // input: random numbers for color selection + BufferMatrixElements& matrixElements, // output: matrix elements + BufferSelectedHelicity& selhel, // output: helicity selection + BufferSelectedColor& selcol, // output: color selection + const size_t gpublocks, + const size_t gputhreads ); + + // Destructor + virtual ~BridgeKernelDevice() {} + + // Transpose input momenta from C to Fortran before the matrix element calculation in the Bridge + void transposeInputMomentaC2F() override final; + + // Compute good helicities (returns nGoodHel, the number of good helicity combinations out of ncomb) + int computeGoodHelicities() override final; + + // Compute matrix elements + void computeMatrixElements( const unsigned int channelId ) override final; + + // Is this a host or device kernel? + bool isOnDevice() const override final { return true; } + + private: + + // The buffer for the input momenta, transposed to Fortran array indexing + PinnedHostBufferMomenta m_fortranMomenta; + + // The number of blocks in the GPU grid + size_t m_gpublocks; + + // The number of threads in the GPU grid + size_t m_gputhreads; + }; +#endif + + //-------------------------------------------------------------------------- +} +#endif // BRIDGEKERNELS_H diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/CMakeLists.txt b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/CMakeLists.txt new file mode 100644 index 0000000000..1e15f3e9ed --- /dev/null +++ b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/CMakeLists.txt @@ -0,0 +1,4 @@ +SUBDIRLIST(SUBDIRS) +FOREACH(subdir ${SUBDIRS}) + ADD_SUBDIRECTORY(${subdir}) +ENDFOREACH() diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/CrossSectionKernels.cc b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/CrossSectionKernels.cc new file mode 100644 index 0000000000..398f8a87bd --- /dev/null +++ b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/CrossSectionKernels.cc @@ -0,0 +1,231 @@ +#include "CrossSectionKernels.h" + +#include "MemoryAccessMatrixElements.h" +#include "MemoryAccessWeights.h" +#include "MemoryBuffers.h" + +#include + +// ****************************************************************************************** +// *** NB: Disabling fast math is essential here, otherwise results are undefined *** +// *** NB: This file CrossSectionKernels.cc IS BUILT WITH -fno-fast-math in the Makefile! *** +// *** NB: Attempts with __attribute__((optimize("-fno-fast-math"))) were unsatisfactory *** +// ****************************************************************************************** + +inline bool +fp_is_nan( const fptype& fp ) +{ + //#pragma clang diagnostic push + //#pragma clang diagnostic ignored "-Wtautological-compare" // for icpx2021/clang13 (https://stackoverflow.com/a/15864661) + return std::isnan( fp ); // always false for clang in fast math mode (tautological compare)? + //#pragma clang diagnostic pop +} + +inline bool +fp_is_abnormal( const fptype& fp ) +{ + if( fp_is_nan( fp ) ) return true; + if( fp != fp ) return true; + return false; +} + +inline bool +fp_is_zero( const fptype& fp ) +{ + if( fp == 0 ) return true; + return false; +} + +// See https://en.cppreference.com/w/cpp/numeric/math/FP_categories +inline const char* +fp_show_class( const fptype& fp ) +{ + switch( std::fpclassify( fp ) ) + { + case FP_INFINITE: return "Inf"; + case FP_NAN: return "NaN"; + case FP_NORMAL: return "normal"; + case FP_SUBNORMAL: return "subnormal"; + case FP_ZERO: return "zero"; + default: return "unknown"; + } +} + +inline void +debug_me_is_abnormal( const fptype& me, size_t ievtALL ) +{ + std::cout << "DEBUG[" << ievtALL << "]" + << " ME=" << me + << " fpisabnormal=" << fp_is_abnormal( me ) + << " fpclass=" << fp_show_class( me ) + << " (me==me)=" << ( me == me ) + << " (me==me+1)=" << ( me == me + 1 ) + << " isnan=" << fp_is_nan( me ) + << " isfinite=" << std::isfinite( me ) + << " isnormal=" << std::isnormal( me ) + << " is0=" << ( me == 0 ) + << " is1=" << ( me == 1 ) + << " abs(ME)=" << std::abs( me ) + << " isnan=" << fp_is_nan( std::abs( me ) ) + << std::endl; +} + +//============================================================================ + +#ifdef __CUDACC__ +namespace mg5amcGpu +#else +namespace mg5amcCpu +#endif +{ + //-------------------------------------------------------------------------- + + void flagAbnormalMEs( fptype* hstMEs, unsigned int nevt ) + { + for( unsigned int ievt = 0; ievt < nevt; ievt++ ) + { + if( fp_is_abnormal( hstMEs[ievt] ) ) + { + std::cout << "WARNING! flagging abnormal ME for ievt=" << ievt << std::endl; + hstMEs[ievt] = std::sqrt( -1. ); + } + } + } + + //-------------------------------------------------------------------------- + + CrossSectionKernelHost::CrossSectionKernelHost( const BufferWeights& samplingWeights, // input: sampling weights + const BufferMatrixElements& matrixElements, // input: matrix elements + EventStatistics& stats, // output: event statistics + const size_t nevt ) + : CrossSectionKernelBase( samplingWeights, matrixElements, stats ) + , NumberOfEvents( nevt ) + { + if( m_samplingWeights.isOnDevice() ) throw std::runtime_error( "CrossSectionKernelHost: samplingWeights must be a host array" ); + if( m_matrixElements.isOnDevice() ) throw std::runtime_error( "CrossSectionKernelHost: matrixElements must be a host array" ); + if( this->nevt() != m_samplingWeights.nevt() ) throw std::runtime_error( "CrossSectionKernelHost: nevt mismatch with samplingWeights" ); + if( this->nevt() != m_matrixElements.nevt() ) throw std::runtime_error( "CrossSectionKernelHost: nevt mismatch with matrixElements" ); + } + + //-------------------------------------------------------------------------- + + void CrossSectionKernelHost::updateEventStatistics( const bool debug ) + { + EventStatistics stats; // new statistics for the new nevt events + // FIRST PASS: COUNT ALL/ABN/ZERO EVENTS, COMPUTE MIN/MAX, COMPUTE REFS AS MEANS OF SIMPLE SUMS + for( size_t ievt = 0; ievt < nevt(); ++ievt ) // Loop over all events in this iteration + { + const fptype& me = MemoryAccessMatrixElements::ieventAccessConst( m_matrixElements.data(), ievt ); + const fptype& wg = MemoryAccessWeights::ieventAccessConst( m_samplingWeights.data(), ievt ); + const size_t ievtALL = m_iter * nevt() + ievt; + // The following events are abnormal in a run with "-p 2048 256 12 -d" + // - check.exe/commonrand: ME[310744,451171,3007871,3163868,4471038,5473927] with fast math + // - check.exe/curand: ME[578162,1725762,2163579,5407629,5435532,6014690] with fast math + // - gcheck.exe/curand: ME[596016,1446938] with fast math + // Debug NaN/abnormal issues + //if ( ievtALL == 310744 ) // this ME is abnormal both with and without fast math + // debug_me_is_abnormal( me, ievtALL ); + //if ( ievtALL == 5473927 ) // this ME is abnormal only with fast math + // debug_me_is_abnormal( me, ievtALL ); + stats.nevtALL++; + if( fp_is_abnormal( me ) ) + { + if( debug ) // only printed out with "-p -d" (matrixelementALL is not filled without -p) + std::cout << "WARNING! ME[" << ievtALL << "] is NaN/abnormal" << std::endl; + stats.nevtABN++; + continue; + } + if( fp_is_zero( me ) ) stats.nevtZERO++; + stats.minME = std::min( stats.minME, (double)me ); + stats.maxME = std::max( stats.maxME, (double)me ); + stats.minWG = std::min( stats.minWG, (double)wg ); + stats.maxWG = std::max( stats.maxWG, (double)wg ); + stats.sumMEdiff += me; // NB stats.refME is 0 here + stats.sumWGdiff += wg; // NB stats.refWG is 0 here + } + stats.refME = stats.meanME(); // draft ref + stats.refWG = stats.meanWG(); // draft ref + stats.sumMEdiff = 0; + stats.sumWGdiff = 0; + // SECOND PASS: IMPROVE MEANS FROM SUMS OF DIFFS TO PREVIOUS REF, UPDATE REF + for( size_t ievt = 0; ievt < nevt(); ++ievt ) // Loop over all events in this iteration + { + const fptype& me = MemoryAccessMatrixElements::ieventAccessConst( m_matrixElements.data(), ievt ); + const fptype& wg = MemoryAccessWeights::ieventAccessConst( m_samplingWeights.data(), ievt ); + if( fp_is_abnormal( me ) ) continue; + stats.sumMEdiff += ( me - stats.refME ); + stats.sumWGdiff += ( wg - stats.refWG ); + } + stats.refME = stats.meanME(); // final ref + stats.refWG = stats.meanWG(); // final ref + stats.sumMEdiff = 0; + stats.sumWGdiff = 0; + // THIRD PASS: COMPUTE STDDEV FROM SQUARED SUMS OF DIFFS TO REF + for( size_t ievt = 0; ievt < nevt(); ++ievt ) // Loop over all events in this iteration + { + const fptype& me = MemoryAccessMatrixElements::ieventAccessConst( m_matrixElements.data(), ievt ); + const fptype& wg = MemoryAccessWeights::ieventAccessConst( m_samplingWeights.data(), ievt ); + if( fp_is_abnormal( me ) ) continue; + stats.sqsMEdiff += std::pow( me - stats.refME, 2 ); + stats.sqsWGdiff += std::pow( wg - stats.refWG, 2 ); + } + // FOURTH PASS: UPDATE THE OVERALL STATS BY ADDING THE NEW STATS + m_stats += stats; + // Increment the iterations counter + m_iter++; + } + + //-------------------------------------------------------------------------- +} + +//============================================================================ + +#ifdef __CUDACC__ +namespace mg5amcGpu +{ + + /* + //-------------------------------------------------------------------------- + + CrossSectionKernelDevice::CrossSectionKernelDevice( const BufferWeights& samplingWeights, // input: sampling weights + const BufferMatrixElements& matrixElements, // input: matrix elements + EventStatistics& stats, // output: event statistics + const size_t gpublocks, + const size_t gputhreads ) + : CrossSectionKernelBase( samplingWeights, matrixElements, stats ) + , NumberOfEvents( gpublocks*gputhreads ) + , m_gpublocks( gpublocks ) + , m_gputhreads( gputhreads ) + { + if ( ! m_samplingWeights.isOnDevice() ) throw std::runtime_error( "CrossSectionKernelDevice: samplingWeights must be a device array" ); + if ( ! m_matrixElements.isOnDevice() ) throw std::runtime_error( "CrossSectionKernelDevice: matrixElements must be a device array" ); + if ( m_gpublocks == 0 ) throw std::runtime_error( "CrossSectionKernelDevice: gpublocks must be > 0" ); + if ( m_gputhreads == 0 ) throw std::runtime_error( "CrossSectionKernelDevice: gputhreads must be > 0" ); + if ( this->nevt() != m_samplingWeights.nevt() ) throw std::runtime_error( "CrossSectionKernelDevice: nevt mismatch with samplingWeights" ); + if ( this->nevt() != m_matrixElements.nevt() ) throw std::runtime_error( "CrossSectionKernelDevice: nevt mismatch with matrixElements" ); + } + + //-------------------------------------------------------------------------- + + void CrossSectionKernelDevice::setGrid( const size_t gpublocks, const size_t gputhreads ) + { + if ( m_gpublocks == 0 ) throw std::runtime_error( "CrossSectionKernelDevice: gpublocks must be > 0 in setGrid" ); + if ( m_gputhreads == 0 ) throw std::runtime_error( "CrossSectionKernelDevice: gputhreads must be > 0 in setGrid" ); + if ( this->nevt() != m_gpublocks * m_gputhreads ) throw std::runtime_error( "CrossSectionKernelDevice: nevt mismatch in setGrid" ); + } + + //-------------------------------------------------------------------------- + + void CrossSectionKernelDevice::updateEventStatistics( const bool debug ) + { + // Increment the iterations counter + m_iter++; + } + + //-------------------------------------------------------------------------- + */ + +} +#endif + +//============================================================================ diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/CrossSectionKernels.h b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/CrossSectionKernels.h new file mode 100644 index 0000000000..6098157b4e --- /dev/null +++ b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/CrossSectionKernels.h @@ -0,0 +1,133 @@ +#ifndef CROSSSECTIONKERNELS_H +#define CROSSSECTIONKERNELS_H 1 + +#include "mgOnGpuConfig.h" + +#include "EventStatistics.h" +#include "MemoryBuffers.h" + +//============================================================================ + +#ifdef __CUDACC__ +namespace mg5amcGpu +#else +namespace mg5amcCpu +#endif +{ + //-------------------------------------------------------------------------- + + // Helper function for Bridge.h: must be compiled without fast math + // Iterate through all output MEs and replace any NaN/abnormal ones by sqrt(-1) + void flagAbnormalMEs( fptype* hstMEs, unsigned int nevt ); + + //-------------------------------------------------------------------------- + + // A base class encapsulating the calculation of event statistics on a CPU host or on a GPU device + class CrossSectionKernelBase //: virtual public ICrossSectionKernel + { + protected: + + // Constructor from existing input and output buffers + CrossSectionKernelBase( const BufferWeights& samplingWeights, // input: sampling weights + const BufferMatrixElements& matrixElements, // input: matrix elements + EventStatistics& stats ) // output: event statistics + : m_samplingWeights( samplingWeights ) + , m_matrixElements( matrixElements ) + , m_stats( stats ) + , m_iter( 0 ) + { + // NB: do not initialise EventStatistics (you may be asked to update an existing result) + } + + public: + + // Destructor + virtual ~CrossSectionKernelBase() {} + + // Update event statistics + virtual void updateEventStatistics( const bool debug = false ) = 0; + + // Is this a host or device kernel? + virtual bool isOnDevice() const = 0; + + protected: + + // The buffer for the sampling weights + const BufferWeights& m_samplingWeights; + + // The buffer for the output matrix elements + const BufferMatrixElements& m_matrixElements; + + // The event statistics + EventStatistics& m_stats; + + // The number of iterations processed so far + size_t m_iter; + }; + + //-------------------------------------------------------------------------- + + // A class encapsulating the calculation of event statistics on a CPU host + class CrossSectionKernelHost final : public CrossSectionKernelBase, public NumberOfEvents + { + public: + + // Constructor from existing input and output buffers + CrossSectionKernelHost( const BufferWeights& samplingWeights, // input: sampling weights + const BufferMatrixElements& matrixElements, // input: matrix elements + EventStatistics& stats, // output: event statistics + const size_t nevt ); + + // Destructor + virtual ~CrossSectionKernelHost() {} + + // Update event statistics + void updateEventStatistics( const bool debug = false ) override final; + + // Is this a host or device kernel? + bool isOnDevice() const override final { return false; } + }; + + //-------------------------------------------------------------------------- + + /* +#ifdef __CUDACC__ + // A class encapsulating the calculation of event statistics on a GPU device + class CrossSectionKernelDevice : public CrossSectionKernelBase, public NumberOfEvents + { + public: + + // Constructor from existing input and output buffers + CrossSectionKernelDevice( const BufferWeights& samplingWeights, // input: sampling weights + const BufferMatrixElements& matrixElements, // input: matrix elements + EventStatistics& stats, // output: event statistics + const size_t gpublocks, + const size_t gputhreads ); + + // Destructor + virtual ~CrossSectionKernelDevice(){} + + // Reset gpublocks and gputhreads + void setGrid( const size_t gpublocks, const size_t gputhreads ); + + // Update event statistics + void updateEventStatistics( const bool debug=false ) override final; + + // Is this a host or device kernel? + bool isOnDevice() const override final { return true; } + + private: + + // The number of blocks in the GPU grid + size_t m_gpublocks; + + // The number of threads in the GPU grid + size_t m_gputhreads; + + }; +#endif + */ + + //-------------------------------------------------------------------------- +} +#endif // CROSSSECTIONKERNELS_H diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/CudaRuntime.h b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/CudaRuntime.h new file mode 100644 index 0000000000..e16ed2c703 --- /dev/null +++ b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/CudaRuntime.h @@ -0,0 +1,80 @@ +#ifndef MG5AMC_CUDARUNTIME_H +#define MG5AMC_CUDARUNTIME_H 1 + +// MG5AMC on GPU uses the CUDA runtime API, not the lower level CUDA driver API +// See https://docs.nvidia.com/cuda/cuda-runtime-api/driver-vs-runtime-api.html#driver-vs-runtime-api + +#include +#include + +//-------------------------------------------------------------------------- + +// See https://stackoverflow.com/a/14038590 +#ifdef __CUDACC__ /* clang-format off */ +#define checkCuda( code ) { assertCuda( code, __FILE__, __LINE__ ); } +inline void assertCuda( cudaError_t code, const char* file, int line, bool abort = true ) +{ + if( code != cudaSuccess ) + { + printf( "ERROR! assertCuda: '%s' (%d) in %s:%d\n", cudaGetErrorString( code ), code, file, line ); + if( abort ) assert( code == cudaSuccess ); + } +} +#endif /* clang-format on */ + +//-------------------------------------------------------------------------- + +#ifdef __CUDACC__ +namespace mg5amcGpu +{ + // Instantiate a CudaRuntime at the beginnining of the application's main to + // invoke cudaSetDevice(0) in the constructor and book a cudaDeviceReset() call in the destructor + // *** FIXME! This will all need to be designed differently when going to multi-GPU nodes! *** + struct CudaRuntime final + { + CudaRuntime( const bool debug = true ) + : m_debug( debug ) { setUp( m_debug ); } + ~CudaRuntime() { tearDown( m_debug ); } + CudaRuntime( const CudaRuntime& ) = delete; + CudaRuntime( CudaRuntime&& ) = delete; + CudaRuntime& operator=( const CudaRuntime& ) = delete; + CudaRuntime& operator=( CudaRuntime&& ) = delete; + bool m_debug; + + // Set up CUDA application + // ** NB: strictly speaking this is not needed when using the CUDA runtime API ** + // Calling cudaSetDevice on startup is useful to properly book-keep the time spent in CUDA initialization + static void setUp( const bool debug = true ) + { + // ** NB: it is useful to call cudaSetDevice, or cudaFree, to properly book-keep the time spent in CUDA initialization + // ** NB: otherwise, the first CUDA operation (eg a cudaMemcpyToSymbol in CPPProcess ctor) appears to take much longer! + /* + // [We initially added cudaFree(0) to "ease profile analysis" only because it shows up as a big recognizable block!] + // No explicit initialization is needed: https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#initialization + // It is not clear what cudaFree(0) does at all: https://stackoverflow.com/questions/69967813/ + if ( debug ) std::cout << "__CudaRuntime: calling cudaFree(0)" << std::endl; + checkCuda( cudaFree( 0 ) ); // SLOW! + */ + // Replace cudaFree(0) by cudaSetDevice(0), even if it is not really needed either + // (but see https://developer.nvidia.com/blog/cuda-pro-tip-always-set-current-device-avoid-multithreading-bugs) + if( debug ) std::cout << "__CudaRuntime: calling cudaSetDevice(0)" << std::endl; + checkCuda( cudaSetDevice( 0 ) ); // SLOW! + } + + // Tear down CUDA application (call cudaDeviceReset) + // ** NB: strictly speaking this is not needed when using the CUDA runtime API ** + // Calling cudaDeviceReset on shutdown is only needed for checking memory leaks in cuda-memcheck + // See https://docs.nvidia.com/cuda/cuda-memcheck/index.html#leak-checking + static void tearDown( const bool debug = true ) + { + if( debug ) std::cout << "__CudaRuntime: calling cudaDeviceReset()" << std::endl; + checkCuda( cudaDeviceReset() ); + } + }; + +} +#endif + +//-------------------------------------------------------------------------- + +#endif // MG5AMC_CUDARUNTIME_H diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/EventStatistics.h b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/EventStatistics.h new file mode 100644 index 0000000000..19c5199bcc --- /dev/null +++ b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/EventStatistics.h @@ -0,0 +1,160 @@ +#ifndef EventStatistics_H +#define EventStatistics_H 1 + +#include "mgOnGpuConfig.h" // for npar (meGeVexponent) + +#include +#include +#include +#include +#include + +#ifdef __CUDACC__ +namespace mg5amcGpu +#else +namespace mg5amcCpu +#endif +{ + //-------------------------------------------------------------------------- + + // The EventStatistics struct is used to accumulate running aggregates of event statistics. + // This will eventually include the process cross section and the process maximum weight: + // one important case of EventStatistics will then be the "gridpack" result set, which is + // the output of the "integration" step and the input to "unweighted event generation" step. + // The current implementation only includes statistics for matrix elements (ME) and sampling weights (WG); + // in first approximation, the process cross section and maximum weight are just the mean ME and maximum ME, + // but eventually the sampling weights WG (e.g. from Rambo) must also be taken into account in the calculation. + // The implementation uses differences to reference values to improve numerical precision. + struct EventStatistics + { + public: + size_t nevtALL; // total number of events used + size_t nevtABN; // number of events used, where ME is abnormal (nevtABN <= nevtALL) + size_t nevtZERO; // number of not-abnormal events used, where ME is zero (nevtZERO <= nevtOK) + double minME; // minimum matrix element + double maxME; // maximum matrix element + double minWG; // minimum sampling weight + double maxWG; // maximum sampling weight + double refME; // "reference" matrix element (normally the current mean) + double refWG; // "reference" sampling weight (normally the current mean) + double sumMEdiff; // sum of diff to ref for matrix element + double sumWGdiff; // sum of diff to ref for sampling weight + double sqsMEdiff; // squared sum of diff to ref for matrix element + double sqsWGdiff; // squared sum of diff to ref for sampling weight + std::string tag; // a text tag for printouts + // Number of events used, where ME is not abnormal + size_t nevtOK() const { return nevtALL - nevtABN; } + // Mean matrix element + // [x = ref+d => mean(x) = sum(x)/n = ref+sum(d)/n] + double meanME() const + { + return refME + ( nevtOK() > 0 ? sumMEdiff / nevtOK() : 0 ); + } + // Mean sampling weight + // [x = ref+d => mean(x) = sum(x)/n = ref+sum(d)/n] + double meanWG() const + { + return refWG + ( nevtOK() > 0 ? sumWGdiff / nevtOK() : 0 ); + } + // Variance matrix element + // [x = ref+d => n*var(x) = sum((x-mean(x))^2) = sum((ref+d-ref-sum(d)/n)^2) = sum((d-sum(d)/n)^2)/n = sum(d^2)-(sum(d))^2/n] + double varME() const { return ( sqsMEdiff - std::pow( sumMEdiff, 2 ) / nevtOK() ) / nevtOK(); } + // Variance sampling weight + // [x = ref+d => n*var(x) = sum((x-mean(x))^2) = sum((ref+d-ref-sum(d)/n)^2) = sum((d-sum(d)/n)^2)/n = sum(d^2)-(sum(d))^2/n] + double varWG() const { return ( sqsWGdiff - std::pow( sumWGdiff, 2 ) / nevtOK() ) / nevtOK(); } + // Standard deviation matrix element + double stdME() const { return std::sqrt( varME() ); } + // Standard deviation sampling weight + double stdWG() const { return std::sqrt( varWG() ); } + // Update reference matrix element + void updateRefME( const double newRef ) + { + const double deltaRef = refME - newRef; + sqsMEdiff += deltaRef * ( 2 * sumMEdiff + nevtOK() * deltaRef ); + sumMEdiff += deltaRef * nevtOK(); + refME = newRef; + } + // Update reference sampling weight + void updateRefWG( const double newRef ) + { + const double deltaRef = refWG - newRef; + sqsWGdiff += deltaRef * ( 2 * sumWGdiff + nevtOK() * deltaRef ); + sumWGdiff += deltaRef * nevtOK(); + refWG = newRef; + } + // Constructor + EventStatistics() + : nevtALL( 0 ) + , nevtABN( 0 ) + , nevtZERO( 0 ) + , minME( std::numeric_limits::max() ) + , maxME( std::numeric_limits::lowest() ) + , minWG( std::numeric_limits::max() ) + , maxWG( std::numeric_limits::lowest() ) + , refME( 0 ) + , refWG( 0 ) + , sumMEdiff( 0 ) + , sumWGdiff( 0 ) + , sqsMEdiff( 0 ) + , sqsWGdiff( 0 ) + , tag( "" ) {} + // Combine two EventStatistics + EventStatistics& operator+=( const EventStatistics& stats ) + { + EventStatistics s1 = *this; // temporary copy + EventStatistics s2 = stats; // temporary copy + EventStatistics& sum = *this; + sum.nevtALL = s1.nevtALL + s2.nevtALL; + sum.nevtABN = s1.nevtABN + s2.nevtABN; + sum.nevtZERO = s1.nevtZERO + s2.nevtZERO; + sum.minME = std::min( s1.minME, s2.minME ); + sum.maxME = std::max( s1.maxME, s2.maxME ); + sum.minWG = std::min( s1.minWG, s2.minWG ); + sum.maxWG = std::max( s1.maxWG, s2.maxWG ); + sum.refME = ( s1.meanME() * s1.nevtOK() + s2.meanME() * s2.nevtOK() ) / sum.nevtOK(); // new mean ME + s1.updateRefME( sum.refME ); + s2.updateRefME( sum.refME ); + sum.sumMEdiff = s1.sumMEdiff + s2.sumMEdiff; + sum.sqsMEdiff = s1.sqsMEdiff + s2.sqsMEdiff; + sum.refWG = ( s1.meanWG() * s1.nevtOK() + s2.meanWG() * s2.nevtOK() ) / sum.nevtOK(); // new mean WG + s1.updateRefWG( sum.refWG ); + s2.updateRefWG( sum.refWG ); + sum.sumWGdiff = s1.sumWGdiff + s2.sumWGdiff; + sum.sqsWGdiff = s1.sqsWGdiff + s2.sqsWGdiff; + return sum; + } + // Printout + void printout( std::ostream& out ) const + { + const EventStatistics& s = *this; + constexpr int meGeVexponent = -( 2 * mgOnGpu::npar - 8 ); + out << s.tag << "NumMatrixElems(notAbnormal) = " << s.nevtOK() << std::endl + << std::scientific // fixed format: affects all floats (default precision: 6) + << s.tag << "MeanMatrixElemValue = ( " << s.meanME() + << " +- " << s.stdME() / std::sqrt( s.nevtOK() ) << " ) GeV^" << meGeVexponent << std::endl // standard error + << s.tag << "[Min,Max]MatrixElemValue = [ " << s.minME + << " , " << s.maxME << " ] GeV^" << meGeVexponent << std::endl + << s.tag << "StdDevMatrixElemValue = ( " << s.stdME() + << std::string( 16, ' ' ) << " ) GeV^" << meGeVexponent << std::endl + << s.tag << "MeanWeight = ( " << s.meanWG() + << " +- " << s.stdWG() / std::sqrt( s.nevtOK() ) << std::endl // standard error + << s.tag << "[Min,Max]Weight = [ " << s.minWG + << " , " << s.maxWG << " ]" << std::endl + << s.tag << "StdDevWeight = ( " << s.stdWG() + << std::string( 16, ' ' ) << " )" << std::endl + << std::defaultfloat; // default format: affects all floats + } + }; + + //-------------------------------------------------------------------------- + + inline std::ostream& operator<<( std::ostream& out, const EventStatistics& s ) + { + s.printout( out ); + return out; + } + + //-------------------------------------------------------------------------- +} + +#endif // EventStatistics_H diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/MadgraphTest.h b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/MadgraphTest.h new file mode 100644 index 0000000000..2a0be47978 --- /dev/null +++ b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/MadgraphTest.h @@ -0,0 +1,300 @@ +// Stephan Hageboeck, CERN, 12/2020 +#ifndef MADGRAPHTEST_H_ +#define MADGRAPHTEST_H_ 1 + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +namespace +{ + + struct ReferenceData + { + std::vector>> momenta; + std::vector MEs; + }; + + /// Read batches of reference data from a file and store them in a map. + std::map readReferenceData( const std::string& refFileName ) + { + std::ifstream referenceFile( refFileName.c_str() ); + EXPECT_TRUE( referenceFile.is_open() ) << refFileName; + std::map referenceData; + unsigned int evtNo; + unsigned int batchNo; + for( std::string line; std::getline( referenceFile, line ); ) + { + std::stringstream lineStr( line ); + if( line.empty() || line[0] == '#' ) + { + continue; + } + else if( line.find( "Event" ) != std::string::npos ) + { + std::string dummy; + lineStr >> dummy >> evtNo >> dummy >> batchNo; + } + else if( line.find( "ME" ) != std::string::npos ) + { + if( evtNo <= referenceData[batchNo].MEs.size() ) + referenceData[batchNo].MEs.resize( evtNo + 1 ); + + std::string dummy; + lineStr >> dummy >> referenceData[batchNo].MEs[evtNo]; + } + else + { + unsigned int particleIndex; + lineStr >> particleIndex; + if( evtNo <= referenceData[batchNo].momenta.size() ) + referenceData[batchNo].momenta.resize( evtNo + 1 ); + if( particleIndex <= referenceData[batchNo].momenta[evtNo].size() ) + referenceData[batchNo].momenta[evtNo].resize( particleIndex + 1 ); + auto& fourVec = referenceData[batchNo].momenta[evtNo][particleIndex]; + for( unsigned int i = 0; i < fourVec.size(); ++i ) + { + EXPECT_TRUE( lineStr.good() ); + lineStr >> fourVec[i]; + } + EXPECT_TRUE( lineStr.eof() ); + } + } + return referenceData; + } + +} + +/** + * Test driver providing a common interface for testing different implementations. + * Users need to implement: + * - Functions to retrieve matrix element and 4-momenta. These are used in the tests. + * - Driver functions that run the madgraph workflow. + * + * Usage: + * ``` + * class TestImplementation : public TestDriverBase { + * + * } + * + * class TestImplementation2 : public TestDriverBase { + * + * } + * + * INSTANTIATE_TEST_SUITE_P( TestName, + * MadgraphTest, + * testing::Values( new TestImplementation, new TestImplementation2, ... ) ); + *``` + * + * For adapting the test workflow, see the .cc and adapt + * TEST_P(MadgraphTest, CompareMomentaAndME) + * + * To add a test that should be runnable with all test implementations that derive from TestDriverBase, add a new + * TEST_P(MadgraphTest, ) { + * + * } + */ +class TestDriverBase +{ + std::string m_refFileName; +public: + const unsigned int nparticle; + static constexpr unsigned int niter = 2; + static constexpr unsigned int gpublocks = 2; + static constexpr unsigned int gputhreads = 128; + static constexpr unsigned int nevt = gpublocks * gputhreads; + + TestDriverBase( unsigned int npart, const std::string& refFileName ) + : m_refFileName( refFileName ) + , nparticle( npart ) + { + } + TestDriverBase() = delete; + virtual ~TestDriverBase() {} + const std::string& getRefFileName() { return m_refFileName; } + + // ------------------------------------------------ + // Interface for retrieving info from madgraph + // ------------------------------------------------ + virtual fptype getMomentum( std::size_t evtNo, unsigned int particleNo, unsigned int component ) const = 0; + virtual fptype getMatrixElement( std::size_t evtNo ) const = 0; + + // ------------------------------------------------ + // Interface for steering madgraph run + // ------------------------------------------------ + virtual void prepareRandomNumbers( unsigned int iiter ) = 0; + virtual void prepareMomenta( fptype energy ) = 0; + virtual void runSigmaKin( std::size_t iiter ) = 0; + + /// Print the requested event into the stream. If the reference data has enough events, it will be printed as well. + void dumpParticles( std::ostream& stream, std::size_t ievt, unsigned int numParticles, unsigned int nDigit, const ReferenceData& referenceData ) const + { + const auto width = nDigit + 8; + for( unsigned int ipar = 0; ipar < numParticles; ipar++ ) + { + // NB: 'setw' affects only the next field (of any type) + stream << std::scientific // fixed format: affects all floats (default nDigit: 6) + << std::setprecision( nDigit ) + << std::setw( 4 ) << ipar + << std::setw( width ) << getMomentum( ievt, ipar, 0 ) + << std::setw( width ) << getMomentum( ievt, ipar, 1 ) + << std::setw( width ) << getMomentum( ievt, ipar, 2 ) + << std::setw( width ) << getMomentum( ievt, ipar, 3 ) + << "\n"; + if( ievt < referenceData.momenta.size() ) + { + stream << "ref" << ipar; + stream << std::setw( width ) << referenceData.momenta[ievt][ipar][0] + << std::setw( width ) << referenceData.momenta[ievt][ipar][1] + << std::setw( width ) << referenceData.momenta[ievt][ipar][2] + << std::setw( width ) << referenceData.momenta[ievt][ipar][3] + << "\n\n"; + } + stream << std::flush << std::defaultfloat; // default format: affects all floats + } + } +}; + +/** + * Test class that's defining all tests to run with a Madgraph workflow. + * The tests are defined below using TEST_P. + * Instantiate them using: + * ``` + * INSTANTIATE_TEST_SUITE_P( TestName, + * MadgraphTest, + * testing::Values( new TestImplementation, new TestImplementation2, ... ) ); + * ``` + */ +class MadgraphTest : public testing::TestWithParam +{ +protected: + std::unique_ptr testDriver; + + MadgraphTest() + : TestWithParam(), testDriver( GetParam() ) + { + } +}; + +// Since we link both the CPU-only and GPU tests into the same executable, we prevent +// a multiply defined symbol by only compiling this in the non-CUDA phase: +#ifndef __CUDACC__ + +/// Compare momenta and matrix elements. +/// This uses an implementation of TestDriverBase to run a madgraph workflow, +/// and compares momenta and matrix elements with a reference file. +TEST_P( MadgraphTest, CompareMomentaAndME ) +{ + // Set to true to dump events: + constexpr bool dumpEvents = false; + constexpr fptype energy = 1500; // historical default, Ecms = 1500 GeV = 1.5 TeV (above the Z peak) + const fptype toleranceMomenta = std::is_same::value ? 1.E-10 : 3.E-2; +#ifdef __APPLE__ + const fptype toleranceMEs = std::is_same::value ? 1.E-6 : 3.E-2; // see #583 +#else + const fptype toleranceMEs = std::is_same::value ? 1.E-6 : 2.E-3; +#endif + std::string dumpFileName = std::string( "dump_" ) + testing::UnitTest::GetInstance()->current_test_info()->test_suite_name() + '.' + testing::UnitTest::GetInstance()->current_test_info()->name() + ".txt"; + while( dumpFileName.find( '/' ) != std::string::npos ) + { + dumpFileName.replace( dumpFileName.find( '/' ), 1, "_" ); + } + std::ofstream dumpFile; + if( dumpEvents ) + { + dumpFile.open( dumpFileName, std::ios::trunc ); + } + // Read reference data + const std::string refFileName = testDriver->getRefFileName(); + std::map referenceData; + if( !dumpEvents ) + { + referenceData = readReferenceData( refFileName ); + } + ASSERT_FALSE( HasFailure() ); // It doesn't make any sense to continue if we couldn't read the reference file. + // ************************************** + // *** START MAIN LOOP ON #ITERATIONS *** + // ************************************** + for( unsigned int iiter = 0; iiter < testDriver->niter; ++iiter ) + { + testDriver->prepareRandomNumbers( iiter ); + testDriver->prepareMomenta( energy ); + testDriver->runSigmaKin( iiter ); + // --- Run checks on all events produced in this iteration + for( std::size_t ievt = 0; ievt < testDriver->nevt && !HasFailure(); ++ievt ) + { + if( dumpEvents ) + { + ASSERT_TRUE( dumpFile.is_open() ) << dumpFileName; + dumpFile << "Event " << std::setw( 8 ) << ievt << " " + << "Batch " << std::setw( 4 ) << iiter << "\n"; + testDriver->dumpParticles( dumpFile, ievt, testDriver->nparticle, 15, ReferenceData() ); + // Dump matrix element + dumpFile << std::setw( 4 ) << "ME" << std::scientific << std::setw( 15 + 8 ) + << testDriver->getMatrixElement( ievt ) << "\n" + << std::endl + << std::defaultfloat; + continue; + } + // Check that we have the required reference data + ASSERT_GT( referenceData.size(), iiter ) + << "Don't have enough reference data for iteration " << iiter << ". Ref file:" << refFileName; + ASSERT_GT( referenceData[iiter].MEs.size(), ievt ) + << "Don't have enough reference MEs for iteration " << iiter << " event " << ievt << ".\nRef file: " << refFileName; + ASSERT_GT( referenceData[iiter].momenta.size(), ievt ) + << "Don't have enough reference momenta for iteration " << iiter << " event " << ievt << ".\nRef file: " << refFileName; + ASSERT_GE( referenceData[iiter].momenta[ievt].size(), testDriver->nparticle ) + << "Don't have enough reference particles for iteration " << iiter << " event " << ievt << ".\nRef file: " << refFileName; + // This trace will help to understand the event that is being checked. + // It will only be printed in case of failures: + std::stringstream eventTrace; + eventTrace << "In comparing event " << ievt << " from iteration " << iiter << "\n"; + testDriver->dumpParticles( eventTrace, ievt, testDriver->nparticle, 15, referenceData[iiter] ); + eventTrace << std::setw( 4 ) << "ME" << std::scientific << std::setw( 15 + 8 ) + << testDriver->getMatrixElement( ievt ) << "\n" + << std::setw( 4 ) << "r.ME" << std::scientific << std::setw( 15 + 8 ) + << referenceData[iiter].MEs[ievt] << std::endl + << std::defaultfloat; + SCOPED_TRACE( eventTrace.str() ); + // Compare Momenta + for( unsigned int ipar = 0; ipar < testDriver->nparticle; ++ipar ) + { + std::stringstream momentumErrors; + for( unsigned int icomp = 0; icomp < mgOnGpu::np4; ++icomp ) + { + const fptype pMadg = testDriver->getMomentum( ievt, ipar, icomp ); + const fptype pOrig = referenceData[iiter].momenta[ievt][ipar][icomp]; + const fptype relDelta = fabs( ( pMadg - pOrig ) / pOrig ); + if( relDelta > toleranceMomenta ) + { + momentumErrors << std::setprecision( 15 ) << std::scientific << "\nparticle " << ipar << "\tcomponent " << icomp + << "\n\t madGraph: " << std::setw( 22 ) << pMadg + << "\n\t reference: " << std::setw( 22 ) << pOrig + << "\n\t rel delta: " << std::setw( 22 ) << relDelta << " exceeds tolerance of " << toleranceMomenta; + } + } + ASSERT_TRUE( momentumErrors.str().empty() ) << momentumErrors.str(); + } + // Compare ME: + EXPECT_NEAR( testDriver->getMatrixElement( ievt ), + referenceData[iiter].MEs[ievt], + toleranceMEs * referenceData[iiter].MEs[ievt] ); + } + } + if( dumpEvents ) + { + std::cout << "Event dump written to " << dumpFileName << std::endl; + } +} + +#endif // __CUDACC__ + +#endif /* MADGRAPHTEST_H_ */ diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/MatrixElementKernels.cc b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/MatrixElementKernels.cc new file mode 100644 index 0000000000..da81c99218 --- /dev/null +++ b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/MatrixElementKernels.cc @@ -0,0 +1,237 @@ +#include "MatrixElementKernels.h" + +#include "CPPProcess.h" +#include "CudaRuntime.h" +#include "MemoryAccessMomenta.h" +#include "MemoryBuffers.h" + +#include + +//============================================================================ + +#ifndef __CUDACC__ +namespace mg5amcCpu +{ + + //-------------------------------------------------------------------------- + + MatrixElementKernelHost::MatrixElementKernelHost( const BufferMomenta& momenta, // input: momenta + const BufferGs& gs, // input: gs for alphaS + const BufferRndNumHelicity& rndhel, // input: random numbers for helicity selection + const BufferRndNumColor& rndcol, // input: random numbers for color selection + BufferMatrixElements& matrixElements, // output: matrix elements + BufferSelectedHelicity& selhel, // output: helicity selection + BufferSelectedColor& selcol, // output: color selection + const size_t nevt ) + : MatrixElementKernelBase( momenta, gs, rndhel, rndcol, matrixElements, selhel, selcol ) + , NumberOfEvents( nevt ) + , m_couplings( nevt ) +#ifdef MGONGPU_SUPPORTS_MULTICHANNEL + , m_numerators( nevt ) + , m_denominators( nevt ) +#endif + { + if( m_momenta.isOnDevice() ) throw std::runtime_error( "MatrixElementKernelHost: momenta must be a host array" ); + if( m_matrixElements.isOnDevice() ) throw std::runtime_error( "MatrixElementKernelHost: matrixElements must be a host array" ); + if( this->nevt() != m_momenta.nevt() ) throw std::runtime_error( "MatrixElementKernelHost: nevt mismatch with momenta" ); + if( this->nevt() != m_matrixElements.nevt() ) throw std::runtime_error( "MatrixElementKernelHost: nevt mismatch with matrixElements" ); + // Sanity checks for memory access (momenta buffer) + constexpr int neppM = MemoryAccessMomenta::neppM; // AOSOA layout + static_assert( ispoweroftwo( neppM ), "neppM is not a power of 2" ); + if( nevt % neppM != 0 ) + { + std::ostringstream sstr; + sstr << "MatrixElementKernelHost: nevt should be a multiple of neppM=" << neppM; + throw std::runtime_error( sstr.str() ); + } + // Fail gently and avoid "Illegal instruction (core dumped)" if the host does not support the SIMD used in the ME calculation + // Note: this prevents a crash on pmpe04 but not on some github CI nodes? + // [NB: SIMD vectorization in mg5amc C++ code is only used in the ME calculation below MatrixElementKernelHost!] + if( !MatrixElementKernelHost::hostSupportsSIMD() ) + throw std::runtime_error( "Host does not support the SIMD implementation of MatrixElementKernelsHost" ); + } + + //-------------------------------------------------------------------------- + + int MatrixElementKernelHost::computeGoodHelicities() + { + using mgOnGpu::ncomb; // the number of helicity combinations + HostBufferHelicityMask hstIsGoodHel( ncomb ); + // ... 0d1. Compute good helicity mask on the host + computeDependentCouplings( m_gs.data(), m_couplings.data(), m_gs.size() ); +#ifdef MGONGPU_SUPPORTS_MULTICHANNEL + sigmaKin_getGoodHel( m_momenta.data(), m_couplings.data(), m_matrixElements.data(), m_numerators.data(), m_denominators.data(), hstIsGoodHel.data(), nevt() ); +#else + sigmaKin_getGoodHel( m_momenta.data(), m_couplings.data(), m_matrixElements.data(), hstIsGoodHel.data(), nevt() ); +#endif + // ... 0d2. Copy back good helicity list to static memory on the host + // [FIXME! REMOVE THIS STATIC THAT BREAKS MULTITHREADING?] + return sigmaKin_setGoodHel( hstIsGoodHel.data() ); + } + + //-------------------------------------------------------------------------- + + void MatrixElementKernelHost::computeMatrixElements( const unsigned int channelId ) + { + computeDependentCouplings( m_gs.data(), m_couplings.data(), m_gs.size() ); +#ifdef MGONGPU_SUPPORTS_MULTICHANNEL + sigmaKin( m_momenta.data(), m_couplings.data(), m_rndhel.data(), m_rndcol.data(), m_matrixElements.data(), channelId, m_numerators.data(), m_denominators.data(), m_selhel.data(), m_selcol.data(), nevt() ); +#else + sigmaKin( m_momenta.data(), m_couplings.data(), m_rndhel.data(), m_rndcol.data(), m_matrixElements.data(), m_selhel.data(), m_selcol.data(), nevt() ); +#endif + } + + //-------------------------------------------------------------------------- + + // Does this host system support the SIMD used in the matrix element calculation? + bool MatrixElementKernelHost::hostSupportsSIMD( const bool verbose ) + { +#if defined __AVX512VL__ + bool known = true; + bool ok = __builtin_cpu_supports( "avx512vl" ); + const std::string tag = "skylake-avx512 (AVX512VL)"; +#elif defined __AVX2__ + bool known = true; + bool ok = __builtin_cpu_supports( "avx2" ); + const std::string tag = "haswell (AVX2)"; +#elif defined __SSE4_2__ +#ifdef __PPC__ + // See https://gcc.gnu.org/onlinedocs/gcc/Basic-PowerPC-Built-in-Functions-Available-on-all-Configurations.html + bool known = true; + bool ok = __builtin_cpu_supports( "vsx" ); + const std::string tag = "powerpc vsx (128bit as in SSE4.2)"; +#elif defined __ARM_NEON__ // consider using __BUILTIN_CPU_SUPPORTS__ + bool known = false; // __builtin_cpu_supports is not supported + // See https://gcc.gnu.org/onlinedocs/gcc/Basic-PowerPC-Built-in-Functions-Available-on-all-Configurations.html + // See https://stackoverflow.com/q/62783908 + // See https://community.arm.com/arm-community-blogs/b/operating-systems-blog/posts/runtime-detection-of-cpu-features-on-an-armv8-a-cpu + bool ok = true; // this is just an assumption! + const std::string tag = "arm neon (128bit as in SSE4.2)"; +#else + bool known = true; + bool ok = __builtin_cpu_supports( "sse4.2" ); + const std::string tag = "nehalem (SSE4.2)"; +#endif +#else + bool known = true; + bool ok = true; + const std::string tag = "none"; +#endif + if( verbose ) + { + if( tag == "none" ) + std::cout << "INFO: The application does not require the host to support any AVX feature" << std::endl; + else if( ok && known ) + std::cout << "INFO: The application is built for " << tag << " and the host supports it" << std::endl; + else if( ok ) + std::cout << "WARNING: The application is built for " << tag << " but it is unknown if the host supports it" << std::endl; + else + std::cout << "ERROR! The application is built for " << tag << " but the host does not support it" << std::endl; + } + return ok; + } + + //-------------------------------------------------------------------------- + +} +#endif + +//============================================================================ + +#ifdef __CUDACC__ +namespace mg5amcGpu +{ + + //-------------------------------------------------------------------------- + + MatrixElementKernelDevice::MatrixElementKernelDevice( const BufferMomenta& momenta, // input: momenta + const BufferGs& gs, // input: gs for alphaS + const BufferRndNumHelicity& rndhel, // input: random numbers for helicity selection + const BufferRndNumColor& rndcol, // input: random numbers for color selection + BufferMatrixElements& matrixElements, // output: matrix elements + BufferSelectedHelicity& selhel, // output: helicity selection + BufferSelectedColor& selcol, // output: color selection + const size_t gpublocks, + const size_t gputhreads ) + : MatrixElementKernelBase( momenta, gs, rndhel, rndcol, matrixElements, selhel, selcol ) + , NumberOfEvents( gpublocks * gputhreads ) + , m_couplings( this->nevt() ) +#ifdef MGONGPU_SUPPORTS_MULTICHANNEL + , m_numerators( this->nevt() ) + , m_denominators( this->nevt() ) +#endif + , m_gpublocks( gpublocks ) + , m_gputhreads( gputhreads ) + { + if( !m_momenta.isOnDevice() ) throw std::runtime_error( "MatrixElementKernelDevice: momenta must be a device array" ); + if( !m_matrixElements.isOnDevice() ) throw std::runtime_error( "MatrixElementKernelDevice: matrixElements must be a device array" ); + if( m_gpublocks == 0 ) throw std::runtime_error( "MatrixElementKernelDevice: gpublocks must be > 0" ); + if( m_gputhreads == 0 ) throw std::runtime_error( "MatrixElementKernelDevice: gputhreads must be > 0" ); + if( this->nevt() != m_momenta.nevt() ) throw std::runtime_error( "MatrixElementKernelDevice: nevt mismatch with momenta" ); + if( this->nevt() != m_matrixElements.nevt() ) throw std::runtime_error( "MatrixElementKernelDevice: nevt mismatch with matrixElements" ); + // Sanity checks for memory access (momenta buffer) + constexpr int neppM = MemoryAccessMomenta::neppM; // AOSOA layout + static_assert( ispoweroftwo( neppM ), "neppM is not a power of 2" ); + if( m_gputhreads % neppM != 0 ) + { + std::ostringstream sstr; + sstr << "MatrixElementKernelHost: gputhreads should be a multiple of neppM=" << neppM; + throw std::runtime_error( sstr.str() ); + } + } + + //-------------------------------------------------------------------------- + + void MatrixElementKernelDevice::setGrid( const int gpublocks, const int gputhreads ) + { + if( m_gpublocks == 0 ) throw std::runtime_error( "MatrixElementKernelDevice: gpublocks must be > 0 in setGrid" ); + if( m_gputhreads == 0 ) throw std::runtime_error( "MatrixElementKernelDevice: gputhreads must be > 0 in setGrid" ); + if( this->nevt() != m_gpublocks * m_gputhreads ) throw std::runtime_error( "MatrixElementKernelDevice: nevt mismatch in setGrid" ); + } + + //-------------------------------------------------------------------------- + + int MatrixElementKernelDevice::computeGoodHelicities() + { + using mgOnGpu::ncomb; // the number of helicity combinations + PinnedHostBufferHelicityMask hstIsGoodHel( ncomb ); + DeviceBufferHelicityMask devIsGoodHel( ncomb ); + // ... 0d1. Compute good helicity mask on the device + computeDependentCouplings<<>>( m_gs.data(), m_couplings.data() ); +#ifdef MGONGPU_SUPPORTS_MULTICHANNEL + sigmaKin_getGoodHel<<>>( m_momenta.data(), m_couplings.data(), m_matrixElements.data(), m_numerators.data(), m_denominators.data(), devIsGoodHel.data() ); +#else + sigmaKin_getGoodHel<<>>( m_momenta.data(), m_couplings.data(), m_matrixElements.data(), devIsGoodHel.data() ); +#endif + checkCuda( cudaPeekAtLastError() ); + // ... 0d2. Copy back good helicity mask to the host + copyHostFromDevice( hstIsGoodHel, devIsGoodHel ); + // ... 0d3. Copy back good helicity list to constant memory on the device + return sigmaKin_setGoodHel( hstIsGoodHel.data() ); + } + + //-------------------------------------------------------------------------- + + void MatrixElementKernelDevice::computeMatrixElements( const unsigned int channelId ) + { + computeDependentCouplings<<>>( m_gs.data(), m_couplings.data() ); +#ifndef MGONGPU_NSIGHT_DEBUG + constexpr unsigned int sharedMemSize = 0; +#else + constexpr unsigned int sharedMemSize = ntpbMAX * sizeof( float ); +#endif +#ifdef MGONGPU_SUPPORTS_MULTICHANNEL + sigmaKin<<>>( m_momenta.data(), m_couplings.data(), m_rndhel.data(), m_rndcol.data(), m_matrixElements.data(), channelId, m_numerators.data(), m_denominators.data(), m_selhel.data(), m_selcol.data() ); +#else + sigmaKin<<>>( m_momenta.data(), m_couplings.data(), m_rndhel.data(), m_rndcol.data(), m_matrixElements.data(), m_selhel.data(), m_selcol.data() ); +#endif + checkCuda( cudaPeekAtLastError() ); + checkCuda( cudaDeviceSynchronize() ); + } + + //-------------------------------------------------------------------------- + +} +#endif + +//============================================================================ diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/MatrixElementKernels.h b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/MatrixElementKernels.h new file mode 100644 index 0000000000..ec0fc9b18c --- /dev/null +++ b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/MatrixElementKernels.h @@ -0,0 +1,183 @@ +#ifndef MATRIXELEMENTKERNELS_H +#define MATRIXELEMENTKERNELS_H 1 + +#include "mgOnGpuConfig.h" + +#include "MemoryBuffers.h" + +#ifdef __CUDACC__ +namespace mg5amcGpu +#else +namespace mg5amcCpu +#endif +{ + //-------------------------------------------------------------------------- + + // A base class encapsulating matrix element calculations on a CPU host or on a GPU device + class MatrixElementKernelBase //: virtual public IMatrixElementKernel + { + protected: + + // Constructor from existing input and output buffers + MatrixElementKernelBase( const BufferMomenta& momenta, // input: momenta + const BufferGs& gs, // input: gs for alphaS + const BufferRndNumHelicity& rndhel, // input: random numbers for helicity selection + const BufferRndNumColor& rndcol, // input: random numbers for color selection + BufferMatrixElements& matrixElements, // output: matrix elements + BufferSelectedHelicity& selhel, // output: helicity selection + BufferSelectedColor& selcol ) // output: color selection + : m_momenta( momenta ) + , m_gs( gs ) + , m_rndhel( rndhel ) + , m_rndcol( rndcol ) + , m_matrixElements( matrixElements ) + , m_selhel( selhel ) + , m_selcol( selcol ) + { + } + + public: + + // Destructor + virtual ~MatrixElementKernelBase() {} + + // Compute good helicities (returns nGoodHel, the number of good helicity combinations out of ncomb) + virtual int computeGoodHelicities() = 0; + + // Compute matrix elements + virtual void computeMatrixElements( const unsigned int channelId ) = 0; + + // Is this a host or device kernel? + virtual bool isOnDevice() const = 0; + + protected: + + // The buffer for the input momenta + const BufferMomenta& m_momenta; + + // The buffer for the gs to calculate the alphaS values + const BufferGs& m_gs; + + // The buffer for the random numbers for helicity selection + const BufferRndNumHelicity& m_rndhel; + + // The buffer for the random numbers for color selection + const BufferRndNumColor& m_rndcol; + + // The buffer for the output matrix elements + BufferMatrixElements& m_matrixElements; + + // The buffer for the output helicity selection + BufferSelectedHelicity& m_selhel; + + // The buffer for the output color selection + BufferSelectedColor& m_selcol; + }; + + //-------------------------------------------------------------------------- + +#ifndef __CUDACC__ + // A class encapsulating matrix element calculations on a CPU host + class MatrixElementKernelHost final : public MatrixElementKernelBase, public NumberOfEvents + { + public: + + // Constructor from existing input and output buffers + MatrixElementKernelHost( const BufferMomenta& momenta, // input: momenta + const BufferGs& gs, // input: gs for alphaS + const BufferRndNumHelicity& rndhel, // input: random numbers for helicity selection + const BufferRndNumColor& rndcol, // input: random numbers for color selection + BufferMatrixElements& matrixElements, // output: matrix elements + BufferSelectedHelicity& selhel, // output: helicity selection + BufferSelectedColor& selcol, // output: color selection + const size_t nevt ); + + // Destructor + virtual ~MatrixElementKernelHost() {} + + // Compute good helicities (returns nGoodHel, the number of good helicity combinations out of ncomb) + int computeGoodHelicities() override final; + + // Compute matrix elements + void computeMatrixElements( const unsigned int channelId ) override final; + + // Is this a host or device kernel? + bool isOnDevice() const override final { return false; } + + // Does this host system support the SIMD used in the matrix element calculation? + // [NB: SIMD vectorization in mg5amc C++ code is currently only used in the ME calculations below MatrixElementKernelHost!] + static bool hostSupportsSIMD( const bool verbose = true ); + + private: + + // The buffer for the event-by-event couplings that depends on alphas QCD + HostBufferCouplings m_couplings; + +#ifdef MGONGPU_SUPPORTS_MULTICHANNEL + // The buffer for the event-by-event numerators of multichannel factors + HostBufferNumerators m_numerators; + + // The buffer for the event-by-event denominators of multichannel factors + HostBufferDenominators m_denominators; +#endif + }; +#endif + + //-------------------------------------------------------------------------- + +#ifdef __CUDACC__ + // A class encapsulating matrix element calculations on a GPU device + class MatrixElementKernelDevice : public MatrixElementKernelBase, public NumberOfEvents + { + public: + + // Constructor from existing input and output buffers + MatrixElementKernelDevice( const BufferMomenta& momenta, // input: momenta + const BufferGs& gs, // input: gs for alphaS + const BufferRndNumHelicity& rndhel, // input: random numbers for helicity selection + const BufferRndNumColor& rndcol, // input: random numbers for color selection + BufferMatrixElements& matrixElements, // output: matrix elements + BufferSelectedHelicity& selhel, // output: helicity selection + BufferSelectedColor& selcol, // output: color selection + const size_t gpublocks, + const size_t gputhreads ); + + // Destructor + virtual ~MatrixElementKernelDevice() {} + + // Reset gpublocks and gputhreads + void setGrid( const int gpublocks, const int gputhreads ); + + // Compute good helicities (returns nGoodHel, the number of good helicity combinations out of ncomb) + int computeGoodHelicities() override final; + + // Compute matrix elements + void computeMatrixElements( const unsigned int channelId ) override final; + + // Is this a host or device kernel? + bool isOnDevice() const override final { return true; } + + private: + + // The buffer for the event-by-event couplings that depends on alphas QCD + DeviceBufferCouplings m_couplings; + +#ifdef MGONGPU_SUPPORTS_MULTICHANNEL + // The buffer for the event-by-event numerators of multichannel factors + DeviceBufferNumerators m_numerators; + + // The buffer for the event-by-event denominators of multichannel factors + DeviceBufferDenominators m_denominators; +#endif + + // The number of blocks in the GPU grid + size_t m_gpublocks; + + // The number of threads in the GPU grid + size_t m_gputhreads; + }; +#endif + + //-------------------------------------------------------------------------- +} +#endif // MATRIXELEMENTKERNELS_H diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/MemoryAccessAmplitudes.h b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/MemoryAccessAmplitudes.h new file mode 100644 index 0000000000..f3ab497b7a --- /dev/null +++ b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/MemoryAccessAmplitudes.h @@ -0,0 +1,150 @@ +#ifndef MemoryAccessAmplitudes_H +#define MemoryAccessAmplitudes_H 1 + +#include "mgOnGpuConfig.h" + +#include "mgOnGpuCxtypes.h" + +#include "MemoryAccessHelpers.h" + +#define MGONGPU_TRIVIAL_AMPLITUDES 1 + +//---------------------------------------------------------------------------- + +#ifndef MGONGPU_TRIVIAL_AMPLITUDES + +// A class describing the internal layout of memory buffers for amplitudes +// This implementation uses an AOSOA[npagA][nx2][neppA] where nevt=npagA*neppA +// [If many implementations are used, a suffix _AOSOAv1 should be appended to the class name] +class MemoryAccessAmplitudesBase //_AOSOAv1 +{ +public: + + // Number of Events Per Page in the amplitude AOSOA memory buffer layout + static constexpr int neppA = 1; // AOS (just a test...) + +private: + + friend class MemoryAccessHelper; + friend class KernelAccessHelper; + friend class KernelAccessHelper; + + // The number of floating point components of a complex number + static constexpr int nx2 = mgOnGpu::nx2; + + //-------------------------------------------------------------------------- + // NB all KernelLaunchers assume that memory access can be decomposed as "accessField = decodeRecord( accessRecord )" + // (in other words: first locate the event record for a given event, then locate an element in that record) + //-------------------------------------------------------------------------- + + // Locate an event record (output) in a memory buffer (input) from the given event number (input) + // [Signature (non-const) ===> fptype* ieventAccessRecord( fptype* buffer, const int ievt ) <===] + static __host__ __device__ inline fptype* + ieventAccessRecord( fptype* buffer, + const int ievt ) + { + const int ipagA = ievt / neppA; // #event "A-page" + const int ieppA = ievt % neppA; // #event in the current event A-page + constexpr int ix2 = 0; + return &( buffer[ipagA * nx2 * neppA + ix2 * neppA + ieppA] ); // AOSOA[ipagA][ix2][ieppA] + } + + //-------------------------------------------------------------------------- + + // Locate a field (output) of an event record (input) from the given field indexes (input) + // [Signature (non-const) ===> fptype& decodeRecord( fptype* buffer, Ts... args ) <===] + // [NB: expand variadic template "Ts... args" to "const int ix2" and rename "Field" as "Ix2"] + static __host__ __device__ inline fptype& + decodeRecord( fptype* buffer, + const int ix2 ) + { + constexpr int ipagA = 0; + constexpr int ieppA = 0; + return buffer[ipagA * nx2 * neppA + ix2 * neppA + ieppA]; // AOSOA[ipagA][ix2][ieppA] + } +}; + +//---------------------------------------------------------------------------- + +// A class providing access to memory buffers for a given event, based on explicit event numbers +// Its methods use the MemoryAccessHelper templates - note the use of the template keyword in template function instantiations +class MemoryAccessAmplitudes : public MemoryAccessAmplitudesBase +{ +public: + + // Locate an event record (output) in a memory buffer (input) from the given event number (input) + // [Signature (non-const) ===> fptype* ieventAccessRecord( fptype* buffer, const int ievt ) <===] + static constexpr auto ieventAccessRecord = MemoryAccessHelper::ieventAccessRecord; + + // Locate an event record (output) in a memory buffer (input) from the given event number (input) + // [Signature (const) ===> const fptype* ieventAccessRecordConst( const fptype* buffer, const int ievt ) <===] + static constexpr auto ieventAccessRecordConst = MemoryAccessHelper::ieventAccessRecordConst; + + // Locate a field (output) of an event record (input) from the given field indexes (input) + // [Signature (non-const) ===> fptype& decodeRecord( fptype* buffer, const int ix2 ) <===] + static constexpr auto decodeRecordIx2 = MemoryAccessHelper::decodeRecord; + + // Locate a field (output) of an event record (input) from the given field indexes (input) + // [Signature (const) ===> const fptype& decodeRecordConst( const fptype* buffer, const int ix2 ) <===] + static constexpr auto decodeRecordIx2Const = + MemoryAccessHelper::template decodeRecordConst; + + // Locate a field (output) in a memory buffer (input) from the given event number (input) and the given field indexes (input) + // [Signature (non-const) ===> fptype& ieventAccessIx2( fptype* buffer, const ievt, const int ix2 ) <===] + static constexpr auto ieventAccessIx2 = + MemoryAccessHelper::template ieventAccessField; + + // Locate a field (output) in a memory buffer (input) from the given event number (input) and the given field indexes (input) + // [Signature (const) ===> const fptype& ieventAccessIx2Const( const fptype* buffer, const ievt, const int ix2 ) <===] + static constexpr auto ieventAccessIx2Const = + MemoryAccessHelper::template ieventAccessFieldConst; +}; + +#endif // #ifndef MGONGPU_TRIVIAL_AMPLITUDES + +//---------------------------------------------------------------------------- + +// A class providing access to memory buffers for a given event, based on implicit kernel rules +// Its methods use the KernelAccessHelper template - note the use of the template keyword in template function instantiations +template +class KernelAccessAmplitudes +{ +public: + +#ifndef MGONGPU_TRIVIAL_AMPLITUDES + + // Locate a field (output) in a memory buffer (input) from a kernel event-indexing mechanism (internal) and the given field indexes (input) + // [Signature (non-const) ===> fptype& kernelAccessIx2( fptype* buffer, const int ix2 ) <===] + static constexpr auto kernelAccessIx2 = + KernelAccessHelper::template kernelAccessField; + + // Locate a field (output) in a memory buffer (input) from a kernel event-indexing mechanism (internal) and the given field indexes (input) + // [Signature (const) ===> const fptype& kernelAccessIx2Const( const fptype* buffer, const int ix2 ) <===] + static constexpr auto kernelAccessIx2Const = + KernelAccessHelper::template kernelAccessFieldConst; + +#else + + static __host__ __device__ inline cxtype_sv* + kernelAccess( fptype* buffer ) + { + return reinterpret_cast( buffer ); + } + + static __host__ __device__ inline const cxtype_sv* + kernelAccessConst( const fptype* buffer ) + { + return reinterpret_cast( buffer ); + } + +#endif // #ifndef MGONGPU_TRIVIAL_AMPLITUDES +}; + +//---------------------------------------------------------------------------- + +typedef KernelAccessAmplitudes HostAccessAmplitudes; +typedef KernelAccessAmplitudes DeviceAccessAmplitudes; + +//---------------------------------------------------------------------------- + +#endif // MemoryAccessAmplitudes_H diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/MemoryAccessCouplings.h b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/MemoryAccessCouplings.h new file mode 100644 index 0000000000..11e48b2165 --- /dev/null +++ b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/MemoryAccessCouplings.h @@ -0,0 +1,256 @@ +#ifndef MemoryAccessCouplings_H +#define MemoryAccessCouplings_H 1 + +#include "mgOnGpuConfig.h" + +#include "mgOnGpuCxtypes.h" + +#include "MemoryAccessHelpers.h" +#include "MemoryAccessMomenta.h" // for MemoryAccessMomentaBase::neppM +#include "MemoryBuffers.h" // for HostBufferCouplings::isaligned + +//---------------------------------------------------------------------------- + +// A class describing the internal layout of memory buffers for couplings +// This implementation uses an AOSOA[npagC][ndcoup][nx2][neppC] "super-buffer" where nevt=npagC*neppC +// From the "super-buffer" for ndcoup different couplings, use idcoupAccessBuffer to access the buffer for one specific coupling +// [If many implementations are used, a suffix _AOSOAv1 should be appended to the class name] +class MemoryAccessCouplingsBase //_AOSOAv1 +{ +public: + + // Number of Events Per Page in the coupling AOSOA memory buffer layout + static constexpr int neppC = MemoryAccessMomentaBase::neppM; // use the same AOSOA striding as for momenta + + // SANITY CHECK: check that neppC is a power of two + static_assert( ispoweroftwo( neppC ), "neppC is not a power of 2" ); + + //-------------------------------------------------------------------------- + // ** NB! A single super-buffer AOSOA[npagC][ndcoup][nx2][neppC] includes data for ndcoup different couplings ** + // ** NB! The ieventAccessRecord and kernelAccess functions refer to the buffer for one individual coupling ** + // ** NB! Use idcoupAccessBuffer to add a fixed offset and locate the buffer for one given individual coupling ** + //-------------------------------------------------------------------------- + + // Locate the buffer for a single coupling (output) in a memory super-buffer (input) from the given coupling index (input) + // [Signature (non-const) ===> fptype* idcoupAccessBuffer( fptype* buffer, const int idcoup ) <===] + // NB: keep this in public even if exposed through KernelAccessCouplings: nvcc says it is inaccesible otherwise? + static __host__ __device__ inline fptype* + idcoupAccessBuffer( fptype* buffer, // input "super-buffer" + const int idcoup ) + { + constexpr int ipagC = 0; + constexpr int ieppC = 0; + constexpr int ix2 = 0; + // NB! this effectively adds an offset "idcoup * nx2 * neppC" + return &( buffer[ipagC * ndcoup * nx2 * neppC + idcoup * nx2 * neppC + ix2 * neppC + ieppC] ); // AOSOA[ipagC][idcoup][ix2][ieppC] + } + + // Locate the buffer for a single coupling (output) in a memory super-buffer (input) from the given coupling index (input) + // [Signature (const) ===> const fptype* idcoupAccessBufferConst( const fptype* buffer, const int idcoup ) <===] + // NB: keep this in public even if exposed through KernelAccessCouplings: nvcc says it is inaccesible otherwise? + static __host__ __device__ inline const fptype* + idcoupAccessBufferConst( const fptype* buffer, // input "super-buffer" + const int idcoup ) + { + return idcoupAccessBuffer( const_cast( buffer ), idcoup ); + } + +private: + + friend class MemoryAccessHelper; + friend class KernelAccessHelper; + friend class KernelAccessHelper; + + // The number of couplings that dependent on the running alphas QCD in this specific process + static constexpr size_t ndcoup = Parameters_SMEFTsim_topU3l_MwScheme_UFO_dependentCouplings::ndcoup; + + // The number of floating point components of a complex number + static constexpr int nx2 = mgOnGpu::nx2; + + //-------------------------------------------------------------------------- + // NB all KernelLaunchers assume that memory access can be decomposed as "accessField = decodeRecord( accessRecord )" + // (in other words: first locate the event record for a given event, then locate an element in that record) + //-------------------------------------------------------------------------- + + // Locate an event record (output) in a memory buffer (input) from the given event number (input) + // [Signature (non-const) ===> fptype* ieventAccessRecord( fptype* buffer, const int ievt ) <===] + static __host__ __device__ inline fptype* + ieventAccessRecord( fptype* buffer, + const int ievt ) + { + const int ipagC = ievt / neppC; // #event "C-page" + const int ieppC = ievt % neppC; // #event in the current event C-page + constexpr int idcoup = 0; + constexpr int ix2 = 0; + return &( buffer[ipagC * ndcoup * nx2 * neppC + idcoup * nx2 * neppC + ix2 * neppC + ieppC] ); // AOSOA[ipagC][idcoup][ix2][ieppC] + } + + //-------------------------------------------------------------------------- + + // Locate a field (output) of an event record (input) from the given field indexes (input) + // [Signature (non-const) ===> fptype& decodeRecord( fptype* buffer, Ts... args ) <===] + // [NB: expand variadic template "Ts... args" to "const int ix2" and rename "Field" as "Ix2"] + static __host__ __device__ inline fptype& + decodeRecord( fptype* buffer, + const int ix2 ) + { + constexpr int ipagC = 0; + constexpr int ieppC = 0; + // NB! the offset "idcoup * nx2 * neppC" has been added in idcoupAccessBuffer + constexpr int idcoup = 0; + return buffer[ipagC * ndcoup * nx2 * neppC + idcoup * nx2 * neppC + ix2 * neppC + ieppC]; // AOSOA[ipagC][idcoup][ix2][ieppC] + } +}; + +//---------------------------------------------------------------------------- + +// A class providing access to memory buffers for a given event, based on explicit event numbers +// Its methods use the MemoryAccessHelper templates - note the use of the template keyword in template function instantiations +class MemoryAccessCouplings : public MemoryAccessCouplingsBase +{ +public: + + // Locate an event record (output) in a memory buffer (input) from the given event number (input) + // [Signature (non-const) ===> fptype* ieventAccessRecord( fptype* buffer, const int ievt ) <===] + static constexpr auto ieventAccessRecord = MemoryAccessHelper::ieventAccessRecord; + + // Locate an event record (output) in a memory buffer (input) from the given event number (input) + // [Signature (const) ===> const fptype* ieventAccessRecordConst( const fptype* buffer, const int ievt ) <===] + static constexpr auto ieventAccessRecordConst = MemoryAccessHelper::ieventAccessRecordConst; + + // Locate a field (output) of an event record (input) from the given field indexes (input) + // [Signature (non-const) ===> fptype& decodeRecord( fptype* buffer, const int ix2 ) <===] + static constexpr auto decodeRecordIx2 = MemoryAccessHelper::decodeRecord; + + // Locate a field (output) of an event record (input) from the given field indexes (input) + // [Signature (const) ===> const fptype& decodeRecordConst( const fptype* buffer, const int ix2 ) <===] + static constexpr auto decodeRecordIx2Const = + MemoryAccessHelper::template decodeRecordConst; + + // Locate a field (output) in a memory buffer (input) from the given event number (input) and the given field indexes (input) + // [Signature (non-const) ===> fptype& ieventAccessIx2( fptype* buffer, const ievt, const int ix2 ) <===] + static constexpr auto ieventAccessIx2 = + MemoryAccessHelper::template ieventAccessField; + + // Locate a field (output) in a memory buffer (input) from the given event number (input) and the given field indexes (input) + // [Signature (const) ===> const fptype& ieventAccessIx2Const( const fptype* buffer, const ievt, const int ix2 ) <===] + static constexpr auto ieventAccessIx2Const = + MemoryAccessHelper::template ieventAccessFieldConst; +}; + +//---------------------------------------------------------------------------- + +// A class providing access to memory buffers for a given event, based on implicit kernel rules +// Its methods use the KernelAccessHelper template - note the use of the template keyword in template function instantiations +template +class KernelAccessCouplings +{ +public: + + // Expose selected functions from MemoryAccessCouplingsBase + static constexpr auto idcoupAccessBuffer = MemoryAccessCouplingsBase::idcoupAccessBuffer; + static constexpr auto idcoupAccessBufferConst = MemoryAccessCouplingsBase::idcoupAccessBufferConst; + + // Expose selected functions from MemoryAccessCouplings + static constexpr auto ieventAccessRecordConst = MemoryAccessCouplings::ieventAccessRecordConst; + + // Locate a field (output) in a memory buffer (input) from a kernel event-indexing mechanism (internal) and the given field indexes (input) + // [Signature (non-const, SCALAR) ===> fptype& kernelAccessIx2( fptype* buffer, const int ix2 ) <===] + static constexpr auto kernelAccessIx2_s = + KernelAccessHelper::template kernelAccessField; + + // Locate a field (output) in a memory buffer (input) from a kernel event-indexing mechanism (internal) and the given field indexes (input) + // [Signature (const, SCALAR) ===> const fptype& kernelAccessIx2Const( const fptype* buffer, const int ix2 ) <===] + static constexpr auto kernelAccessIx2Const_s = + KernelAccessHelper::template kernelAccessFieldConst; + + // Locate a field (output) in a memory buffer (input) from a kernel event-indexing mechanism (internal) and the given field indexes (input) + // [Signature (non const, SCALAR OR VECTOR) ===> fptype_sv& kernelAccessIx2( fptype* buffer, const int ix2 ) <===] + static __host__ __device__ inline fptype_sv& + kernelAccessIx2( fptype* buffer, + const int ix2 ) + { + fptype& out = kernelAccessIx2_s( buffer, ix2 ); +#ifndef MGONGPU_CPPSIMD + return out; +#else + // NB: derived from MemoryAccessMomenta, restricting the implementation to contiguous aligned arrays + constexpr int neppC = MemoryAccessCouplingsBase::neppC; + static_assert( neppC >= neppV ); // ASSUME CONTIGUOUS ARRAYS + static_assert( neppC % neppV == 0 ); // ASSUME CONTIGUOUS ARRAYS + static_assert( mg5amcCpu::HostBufferCouplings::isaligned() ); // ASSUME ALIGNED ARRAYS (reinterpret_cast will segfault otherwise!) + //assert( (size_t)( buffer ) % mgOnGpu::cppAlign == 0 ); // ASSUME ALIGNED ARRAYS (reinterpret_cast will segfault otherwise!) + return mg5amcCpu::fptypevFromAlignedArray( out ); // SIMD bulk load of neppV, use reinterpret_cast +#endif + } + + // Locate a field (output) in a memory buffer (input) from a kernel event-indexing mechanism (internal) and the given field indexes (input) + // [Signature (const, SCALAR OR VECTOR) ===> const fptype_sv& kernelAccessIx2Const( const fptype* buffer, const int ix2 ) <===] + static __host__ __device__ inline const fptype_sv& + kernelAccessIx2Const( const fptype* buffer, + const int ix2 ) + { + return kernelAccessIx2( const_cast( buffer ), ix2 ); + } + + /* + // Locate a field (output) in a memory buffer (input) from a kernel event-indexing mechanism (internal) and the given field indexes (input) + // [Signature (const, SCALAR OR VECTOR) ===> const fptype_sv& kernelAccessIx2Const( const fptype* buffer, const int ix2 ) <===] + static __host__ __device__ inline const fptype_sv& + kernelAccessIx2Const( const fptype* buffer, + const int ix2 ) + { + const fptype& out = kernelAccessIx2Const_s( buffer, ix2 ); +#ifndef MGONGPU_CPPSIMD + return out; +#else + // NB: derived from MemoryAccessMomenta, restricting the implementation to contiguous aligned arrays + constexpr int neppC = MemoryAccessCouplingsBase::neppC; + static_assert( neppC >= neppV ); // ASSUME CONTIGUOUS ARRAYS + static_assert( neppC % neppV == 0 ); // ASSUME CONTIGUOUS ARRAYS + static_assert( mg5amcCpu::HostBufferCouplings::isaligned() ); // ASSUME ALIGNED ARRAYS (reinterpret_cast will segfault otherwise!) + //assert( (size_t)( buffer ) % mgOnGpu::cppAlign == 0 ); // ASSUME ALIGNED ARRAYS (reinterpret_cast will segfault otherwise!) + return mg5amcCpu::fptypevFromAlignedArray( out ); // SIMD bulk load of neppV, use reinterpret_cast +#endif + } + */ + + // Locate a field (output) in a memory buffer (input) from a kernel event-indexing mechanism (internal) and the given field indexes (input) + // [Signature (non const, SCALAR OR VECTOR) ===> cxtype_sv_ref kernelAccess( fptype* buffer ) <===] + static __host__ __device__ inline cxtype_sv_ref + kernelAccess( fptype* buffer ) + { + /* + fptype_sv& real = kernelAccessIx2( buffer, 0 ); + fptype_sv& imag = kernelAccessIx2( buffer, 1 ); + printf( "C_ACCESS::kernelAccess: pbuffer=%p pr=%p pi=%p\n", buffer, &real, &imag ); + return cxtype_sv_ref( real, imag ); + */ + return cxtype_sv_ref( kernelAccessIx2( buffer, 0 ), + kernelAccessIx2( buffer, 1 ) ); + } + + // Locate a field (output) in a memory buffer (input) from a kernel event-indexing mechanism (internal) and the given field indexes (input) + // [Signature (const, SCALAR OR VECTOR) ===> cxtype_sv kernelAccessConst( const fptype* buffer ) <===] + static __host__ __device__ inline cxtype_sv + kernelAccessConst( const fptype* buffer ) + { + /* + const fptype_sv& real = kernelAccessIx2Const( buffer, 0 ); + const fptype_sv& imag = kernelAccessIx2Const( buffer, 1 ); + printf( "C_ACCESS::kernelAccessConst: pbuffer=%p pr=%p pi=%p\n", buffer, &real, &imag ); + return cxtype_sv( real, imag ); + */ + return cxtype_sv( kernelAccessIx2Const( buffer, 0 ), + kernelAccessIx2Const( buffer, 1 ) ); + } +}; + +//---------------------------------------------------------------------------- + +typedef KernelAccessCouplings HostAccessCouplings; +typedef KernelAccessCouplings DeviceAccessCouplings; + +//---------------------------------------------------------------------------- + +#endif // MemoryAccessCouplings_H diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/MemoryAccessCouplingsFixed.h b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/MemoryAccessCouplingsFixed.h new file mode 100644 index 0000000000..0f9850baf2 --- /dev/null +++ b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/MemoryAccessCouplingsFixed.h @@ -0,0 +1,70 @@ +#ifndef MemoryAccessCouplingsFixed_H +#define MemoryAccessCouplingsFixed_H 1 + +#include "mgOnGpuConfig.h" + +#include "mgOnGpuCxtypes.h" +#include "mgOnGpuVectors.h" + +//#include "MemoryAccessHelpers.h" + +//---------------------------------------------------------------------------- + +// A class describing the internal layout of memory buffers for fixed couplings +// This implementation uses a STRUCT[ndcoup][nx2] "super-buffer" layout: in practice, the cIPC global array +// From the "super-buffer" for ndcoup different couplings, use idcoupAccessBuffer to access the buffer for one specific coupling +// [If many implementations are used, a suffix _Sv1 should be appended to the class name] +class MemoryAccessCouplingsFixedBase //_Sv1 +{ +public: + + // Locate the buffer for a single coupling (output) in a memory super-buffer (input) from the given coupling index (input) + // [Signature (const) ===> const fptype* iicoupAccessBufferConst( const fptype* buffer, const int iicoup ) <===] + static __host__ __device__ inline const fptype* + iicoupAccessBufferConst( const fptype* buffer, // input "super-buffer": in practice, the cIPC global array + const int iicoup ) + { + constexpr int ix2 = 0; + // NB! this effectively adds an offset "iicoup * nx2" + return &( buffer[iicoup * nx2 + ix2] ); // STRUCT[idcoup][ix2] + } + +private: + + // The number of floating point components of a complex number + static constexpr int nx2 = mgOnGpu::nx2; +}; + +//---------------------------------------------------------------------------- + +// A class providing access to memory buffers for a given event, based on implicit kernel rules +// Its methods use the KernelAccessHelper template - note the use of the template keyword in template function instantiations +template +class KernelAccessCouplingsFixed +{ +public: + + // Expose selected functions from MemoryAccessCouplingsFixedBase + static constexpr auto iicoupAccessBufferConst = MemoryAccessCouplingsFixedBase::iicoupAccessBufferConst; + + // Locate a field (output) in a memory buffer (input) from a kernel event-indexing mechanism (internal) and the given field indexes (input) + // [Signature (const, SCALAR OR VECTOR) ===> cxtype_sv kernelAccessConst( const fptype* buffer ) <===] + static __host__ __device__ inline const cxtype_sv + kernelAccessConst( const fptype* buffer ) + { + // TRIVIAL ACCESS to fixed-couplings buffers! + //return cxmake( fptype_sv{ buffer[0] }, fptype_sv{ buffer[1] } ); // NO! BUG #339! + const fptype_sv r_sv = fptype_sv{ 0 } + buffer[0]; + const fptype_sv i_sv = fptype_sv{ 0 } + buffer[1]; + return cxmake( r_sv, i_sv ); // ugly but effective + } +}; + +//---------------------------------------------------------------------------- + +typedef KernelAccessCouplingsFixed HostAccessCouplingsFixed; +typedef KernelAccessCouplingsFixed DeviceAccessCouplingsFixed; + +//---------------------------------------------------------------------------- + +#endif // MemoryAccessCouplingsFixed_H diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/MemoryAccessDenominators.h b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/MemoryAccessDenominators.h new file mode 100644 index 0000000000..7a4a80ebd9 --- /dev/null +++ b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/MemoryAccessDenominators.h @@ -0,0 +1,18 @@ +#ifndef MemoryAccessDenominators_H +#define MemoryAccessDenominators_H 1 +#ifdef MGONGPU_SUPPORTS_MULTICHANNEL + +#include "MemoryAccessGs.h" + +//---------------------------------------------------------------------------- + +// A class describing the internal layout of memory buffers for denominators +// This implementation reuses the plain ARRAY[nevt] implementation of MemoryAccessGs + +typedef KernelAccessGs HostAccessDenominators; +typedef KernelAccessGs DeviceAccessDenominators; + +//---------------------------------------------------------------------------- + +#endif +#endif // MemoryAccessDenominators_H diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/MemoryAccessGs.h b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/MemoryAccessGs.h new file mode 100644 index 0000000000..f233d64b9c --- /dev/null +++ b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/MemoryAccessGs.h @@ -0,0 +1,148 @@ +#ifndef MemoryAccessGs_H +#define MemoryAccessGs_H 1 + +#include "mgOnGpuConfig.h" + +#include "MemoryAccessHelpers.h" +#include "MemoryAccessVectors.h" +#include "MemoryBuffers.h" // for HostBufferMatrixElements::isaligned + +//---------------------------------------------------------------------------- + +// A class describing the internal layout of memory buffers for Gs +// This implementation uses a plain ARRAY[nevt] +// [If many implementations are used, a suffix _ARRAYv1 should be appended to the class name] +class MemoryAccessGsBase //_ARRAYv1 +{ +private: + + friend class MemoryAccessHelper; + friend class KernelAccessHelper; + friend class KernelAccessHelper; + + //-------------------------------------------------------------------------- + // NB all KernelLaunchers assume that memory access can be decomposed as "accessField = decodeRecord( accessRecord )" + // (in other words: first locate the event record for a given event, then locate an element in that record) + //-------------------------------------------------------------------------- + + // Locate an event record (output) in a memory buffer (input) from the given event number (input) + // [Signature (non-const) ===> fptype* ieventAccessRecord( fptype* buffer, const int ievt ) <===] + static __host__ __device__ inline fptype* + ieventAccessRecord( fptype* buffer, + const int ievt ) + { + return &( buffer[ievt] ); // ARRAY[nevt] + } + + //-------------------------------------------------------------------------- + + // Locate a field (output) of an event record (input) from the given field indexes (input) + // [Signature (non-const) ===> fptype& decodeRecord( fptype* buffer, Ts... args ) <===] + // [NB: expand variadic template "Ts... args" to empty and rename "Field" as empty] + static __host__ __device__ inline fptype& + decodeRecord( fptype* buffer ) + { + constexpr int ievt = 0; + return buffer[ievt]; // ARRAY[nevt] + } +}; + +//---------------------------------------------------------------------------- + +// A class providing access to memory buffers for a given event, based on explicit event numbers +// Its methods use the MemoryAccessHelper templates - note the use of the template keyword in template function instantiations +class MemoryAccessGs : public MemoryAccessGsBase +{ +public: + + // Locate an event record (output) in a memory buffer (input) from the given event number (input) + // [Signature (non-const) ===> fptype* ieventAccessRecord( fptype* buffer, const int ievt ) <===] + static constexpr auto ieventAccessRecord = MemoryAccessHelper::ieventAccessRecord; + + // Locate an event record (output) in a memory buffer (input) from the given event number (input) + // [Signature (const) ===> const fptype* ieventAccessRecordConst( const fptype* buffer, const int ievt ) <===] + static constexpr auto ieventAccessRecordConst = MemoryAccessHelper::ieventAccessRecordConst; + + // Locate a field (output) of an event record (input) from the given field indexes (input) + // [Signature (non-const) ===> fptype& decodeRecord( fptype* buffer ) <===] + static constexpr auto decodeRecord = MemoryAccessHelper::decodeRecord; + + // Locate a field (output) of an event record (input) from the given field indexes (input) + // [Signature (const) ===> const fptype& decodeRecordConst( const fptype* buffer ) <===] + static constexpr auto decodeRecordConst = + MemoryAccessHelper::template decodeRecordConst<>; + + // Locate a field (output) in a memory buffer (input) from the given event number (input) and the given field indexes (input) + // [Signature (non-const) ===> fptype& ieventAccess( fptype* buffer, const ievt ) <===] + static constexpr auto ieventAccess = + MemoryAccessHelper::template ieventAccessField<>; + + // Locate a field (output) in a memory buffer (input) from the given event number (input) and the given field indexes (input) + // [Signature (const) ===> const fptype& ieventAccessConst( const fptype* buffer, const ievt ) <===] + static constexpr auto ieventAccessConst = + MemoryAccessHelper::template ieventAccessFieldConst<>; +}; + +//---------------------------------------------------------------------------- + +// A class providing access to memory buffers for a given event, based on implicit kernel rules +// Its methods use the KernelAccessHelper template - note the use of the template keyword in template function instantiations +template +class KernelAccessGs +{ +public: + + // Expose selected functions from MemoryAccessGs + static constexpr auto ieventAccessRecord = MemoryAccessGs::ieventAccessRecord; + + // Locate a field (output) in a memory buffer (input) from a kernel event-indexing mechanism (internal) and the given field indexes (input) + // [Signature (non-const, SCALAR) ===> fptype& kernelAccess( fptype* buffer ) <===] + static constexpr auto kernelAccess_s = + KernelAccessHelper::template kernelAccessField<>; // requires cuda 11.4 + + // Locate a field (output) in a memory buffer (input) from a kernel event-indexing mechanism (internal) + // [Signature (non-const, SCALAR OR VECTOR) ===> fptype_sv& kernelAccess( fptype* buffer ) <===] + static __host__ __device__ inline fptype_sv& + kernelAccess( fptype* buffer ) + { + fptype& out = kernelAccess_s( buffer ); +#ifndef MGONGPU_CPPSIMD + return out; +#else + // NB: derived from MemoryAccessMomenta, restricting the implementation to contiguous aligned arrays (#435) + static_assert( mg5amcCpu::HostBufferGs::isaligned() ); // ASSUME ALIGNED ARRAYS (reinterpret_cast will segfault otherwise!) + //assert( (size_t)( buffer ) % mgOnGpu::cppAlign == 0 ); // ASSUME ALIGNED ARRAYS (reinterpret_cast will segfault otherwise!) + return mg5amcCpu::fptypevFromAlignedArray( out ); // SIMD bulk load of neppV, use reinterpret_cast +#endif + } + + // Locate a field (output) in a memory buffer (input) from a kernel event-indexing mechanism (internal) and the given field indexes (input) + // [Signature (const, SCALAR) ===> const fptype& kernelAccessConst( const fptype* buffer ) <===] + static constexpr auto kernelAccessConst_s = + KernelAccessHelper::template kernelAccessFieldConst<>; // requires cuda 11.4 + + // Locate a field (output) in a memory buffer (input) from a kernel event-indexing mechanism (internal) + // [Signature (const, SCALAR OR VECTOR) ===> const fptype_sv& kernelAccess( const fptype* buffer ) <===] + static __host__ __device__ inline const fptype_sv& + kernelAccessConst( const fptype* buffer ) + { + const fptype& out = kernelAccessConst_s( buffer ); +#ifndef MGONGPU_CPPSIMD + return out; +#else + // NB: derived from MemoryAccessMomenta, restricting the implementation to contiguous aligned arrays (#435) + static_assert( mg5amcCpu::HostBufferGs::isaligned() ); // ASSUME ALIGNED ARRAYS (reinterpret_cast will segfault otherwise!) + //assert( (size_t)( buffer ) % mgOnGpu::cppAlign == 0 ); // ASSUME ALIGNED ARRAYS (reinterpret_cast will segfault otherwise!) + return mg5amcCpu::fptypevFromAlignedArray( out ); // SIMD bulk load of neppV, use reinterpret_cast +#endif + } +}; + +//---------------------------------------------------------------------------- + +typedef KernelAccessGs HostAccessGs; +typedef KernelAccessGs DeviceAccessGs; + +//---------------------------------------------------------------------------- + +#endif // MemoryAccessGs_H diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/MemoryAccessHelpers.h b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/MemoryAccessHelpers.h new file mode 100644 index 0000000000..aa3016c9a1 --- /dev/null +++ b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/MemoryAccessHelpers.h @@ -0,0 +1,152 @@ +#ifndef MemoryAccessHelpers_H +#define MemoryAccessHelpers_H 1 + +#include "mgOnGpuConfig.h" + +#include "mgOnGpuFptypes.h" + +//---------------------------------------------------------------------------- + +// A templated helper class that includes the boilerplate code for MemoryAccess classes +template +class MemoryAccessHelper +{ +public: + + //-------------------------------------------------------------------------- + + // Locate an event record (output) in a memory buffer (input) from the given event number (input) + // [Signature (non-const) ===> fptype* ieventAccessRecord( fptype* buffer, const int ievt ) <===] + static constexpr auto ieventAccessRecord = T::ieventAccessRecord; + + //-------------------------------------------------------------------------- + + // Locate an event record (output) in a memory buffer (input) from the given event number (input) + // [Signature (const) ===> const fptype* ieventAccessRecordConst( const fptype* buffer, const int ievt ) <===] + static __host__ __device__ inline const fptype* + ieventAccessRecordConst( const fptype* buffer, + const int ievt ) + { + return ieventAccessRecord( const_cast( buffer ), ievt ); + } + + //-------------------------------------------------------------------------- + + // Locate a field (output) of an event record (input) from the given field indexes (input) + // [Signature (non-const) ===> fptype& decodeRecord( fptype* buffer, Ts... args ) <===] + static constexpr auto decodeRecord = T::decodeRecord; + + //-------------------------------------------------------------------------- + + // Locate a field (output) of an event record (input) from the given field indexes (input) + // [Signature (const) ===> const fptype& decodeRecordConst( const fptype* buffer, Ts... args ) <===] + template + static __host__ __device__ inline const fptype& + decodeRecordConst( const fptype* buffer, + Ts... args ) // variadic template + { + return T::decodeRecord( const_cast( buffer ), args... ); + } + + //-------------------------------------------------------------------------- + + // Locate a field (output) in a memory buffer (input) from the given event number (input) and the given field indexes (input) + // [Signature (non-const) ===> fptype& ieventAccessField( fptype* buffer, const ievt, Ts... args ) <===] + template + static __host__ __device__ inline fptype& + ieventAccessField( fptype* buffer, + const int ievt, + Ts... args ) // variadic template + { + // NB all KernelLaunchers assume that memory access can be decomposed as "accessField = decodeRecord( accessRecord )" + // (in other words: first locate the event record for a given event, then locate an element in that record) + return T::decodeRecord( T::ieventAccessRecord( buffer, ievt ), args... ); + } + + //-------------------------------------------------------------------------- + + // Locate a field (output) in a memory buffer (input) from the given event number (input) and the given field indexes (input) + // [Signature (const) ===> const fptype& ieventAccessFieldConst( const fptype* buffer, const ievt, Ts... args ) <===] + template + static __host__ __device__ inline const fptype& + ieventAccessFieldConst( const fptype* buffer, + const int ievt, + Ts... args ) // variadic template + { + return ieventAccessField( const_cast( buffer ), ievt, args... ); + } +}; + +//---------------------------------------------------------------------------- + +// A templated helper class that includes the boilerplate code for KernelAccess classes +template +class KernelAccessHelper : public MemoryAccessHelper +{ +public: + + //-------------------------------------------------------------------------- + + // Locate an event record (output) in a memory buffer (input) from a kernel event-indexing mechanism (internal) + // [Signature (non-const) ===> fptype* kernelAccessRecord( fptype* buffer ) <===] + static __host__ __device__ inline fptype* + kernelAccessRecord( fptype* buffer ) + { + if constexpr( !onDevice ) // requires c++17 also in CUDA (#333) + { + // FIXME #436: clarify that buffer includes all events on device, and only the record for an event subset on host! + // FIXME #436: am I not assuming that the following line is always identical to buffer for all access classes T? + return T::ieventAccessRecord( buffer, 0 ); + } + else + { +#ifdef __CUDACC__ + const int ievt = blockDim.x * blockIdx.x + threadIdx.x; // index of event (thread) in grid + //printf( "kernelAccessRecord: ievt=%d threadId=%d\n", ievt, threadIdx.x ); + return T::ieventAccessRecord( buffer, ievt ); // NB fptype and fptype_sv coincide for CUDA +#else + throw std::runtime_error( "kernelAccessRecord on device is only implemented in CUDA" ); +#endif + } + } + + //-------------------------------------------------------------------------- + + // Locate an event record (output) in a memory buffer (input) from a kernel event-indexing mechanism (internal) + // [Signature (const) ===> const fptype* kernelAccessRecordConst( const fptype* buffer ) <===] + static __host__ __device__ inline const fptype* + kernelAccessRecordConst( const fptype* buffer ) + { + return kernelAccessRecord( const_cast( buffer ) ); + } + + //-------------------------------------------------------------------------- + + // Locate a field (output) in a memory buffer (input) from a kernel event-indexing mechanism (internal) and the given field indexes (input) + // [Signature (non-const) ===> fptype& kernelAccessField( fptype* buffer, Ts... args ) <===] + template + static __host__ __device__ inline fptype& + kernelAccessField( fptype* buffer, + Ts... args ) // variadic template + { + // NB all KernelLaunchers assume that memory access can be decomposed as "accessField = decodeRecord( accessRecord )" + // (in other words: first locate the event record for a given event, then locate an element in that record) + return T::decodeRecord( kernelAccessRecord( buffer ), args... ); + } + + //-------------------------------------------------------------------------- + + // Locate a field (output) in a memory buffer (input) from a kernel event-indexing mechanism (internal) and the given field indexes (input) + // [Signature (const) ===> const fptype& kernelAccessFieldConst( const fptype* buffer, Ts... args ) <===] + template + static __host__ __device__ inline const fptype& + kernelAccessFieldConst( const fptype* buffer, + Ts... args ) // variadic template + { + return kernelAccessField( const_cast( buffer ), args... ); + } + + //-------------------------------------------------------------------------- +}; + +#endif // MemoryAccessHelpers_H diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/MemoryAccessMatrixElements.h b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/MemoryAccessMatrixElements.h new file mode 100644 index 0000000000..05f0810807 --- /dev/null +++ b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/MemoryAccessMatrixElements.h @@ -0,0 +1,132 @@ +#ifndef MemoryAccessMatrixElements_H +#define MemoryAccessMatrixElements_H 1 + +#include "mgOnGpuConfig.h" + +#include "MemoryAccessHelpers.h" +#include "MemoryAccessVectors.h" +#include "MemoryBuffers.h" // for HostBufferMatrixElements::isaligned + +//---------------------------------------------------------------------------- + +// A class describing the internal layout of memory buffers for matrix elements +// This implementation uses a plain ARRAY[nevt] +// [If many implementations are used, a suffix _ARRAYv1 should be appended to the class name] +class MemoryAccessMatrixElementsBase //_ARRAYv1 +{ +private: + + friend class MemoryAccessHelper; + friend class KernelAccessHelper; + friend class KernelAccessHelper; + + //-------------------------------------------------------------------------- + // NB all KernelLaunchers assume that memory access can be decomposed as "accessField = decodeRecord( accessRecord )" + // (in other words: first locate the event record for a given event, then locate an element in that record) + //-------------------------------------------------------------------------- + + // Locate an event record (output) in a memory buffer (input) from the given event number (input) + // [Signature (non-const) ===> fptype* ieventAccessRecord( fptype* buffer, const int ievt ) <===] + static __host__ __device__ inline fptype* + ieventAccessRecord( fptype* buffer, + const int ievt ) + { + return &( buffer[ievt] ); // ARRAY[nevt] + } + + //-------------------------------------------------------------------------- + + // Locate a field (output) of an event record (input) from the given field indexes (input) + // [Signature (non-const) ===> fptype& decodeRecord( fptype* buffer, Ts... args ) <===] + // [NB: expand variadic template "Ts... args" to empty and rename "Field" as empty] + static __host__ __device__ inline fptype& + decodeRecord( fptype* buffer ) + { + constexpr int ievt = 0; + return buffer[ievt]; // ARRAY[nevt] + } +}; + +//---------------------------------------------------------------------------- + +// A class providing access to memory buffers for a given event, based on explicit event numbers +// Its methods use the MemoryAccessHelper templates - note the use of the template keyword in template function instantiations +class MemoryAccessMatrixElements : public MemoryAccessMatrixElementsBase +{ +public: + + // Locate an event record (output) in a memory buffer (input) from the given event number (input) + // [Signature (non-const) ===> fptype* ieventAccessRecord( fptype* buffer, const int ievt ) <===] + static constexpr auto ieventAccessRecord = MemoryAccessHelper::ieventAccessRecord; + + // Locate an event record (output) in a memory buffer (input) from the given event number (input) + // [Signature (const) ===> const fptype* ieventAccessRecordConst( const fptype* buffer, const int ievt ) <===] + static constexpr auto ieventAccessRecordConst = MemoryAccessHelper::ieventAccessRecordConst; + + // Locate a field (output) of an event record (input) from the given field indexes (input) + // [Signature (non-const) ===> fptype& decodeRecord( fptype* buffer ) <===] + static constexpr auto decodeRecord = MemoryAccessHelper::decodeRecord; + + // Locate a field (output) of an event record (input) from the given field indexes (input) + // [Signature (const) ===> const fptype& decodeRecordConst( const fptype* buffer ) <===] + static constexpr auto decodeRecordConst = + MemoryAccessHelper::template decodeRecordConst<>; + + // Locate a field (output) in a memory buffer (input) from the given event number (input) and the given field indexes (input) + // [Signature (non-const) ===> fptype& ieventAccess( fptype* buffer, const ievt ) <===] + static constexpr auto ieventAccess = + MemoryAccessHelper::template ieventAccessField<>; + + // Locate a field (output) in a memory buffer (input) from the given event number (input) and the given field indexes (input) + // [Signature (const) ===> const fptype& ieventAccessConst( const fptype* buffer, const ievt ) <===] + static constexpr auto ieventAccessConst = + MemoryAccessHelper::template ieventAccessFieldConst<>; +}; + +//---------------------------------------------------------------------------- + +// A class providing access to memory buffers for a given event, based on implicit kernel rules +// Its methods use the KernelAccessHelper template - note the use of the template keyword in template function instantiations +template +class KernelAccessMatrixElements +{ +public: + + // Expose selected functions from MemoryAccessMatrixElements + static constexpr auto ieventAccessRecord = MemoryAccessMatrixElements::ieventAccessRecord; + + // Locate a field (output) in a memory buffer (input) from a kernel event-indexing mechanism (internal) and the given field indexes (input) + // [Signature (non-const, SCALAR) ===> fptype& kernelAccess_s( fptype* buffer ) <===] + static constexpr auto kernelAccess_s = + KernelAccessHelper::template kernelAccessField<>; // requires cuda 11.4 + + // Locate a field (output) in a memory buffer (input) from a kernel event-indexing mechanism (internal) + // [Signature (non const, SCALAR OR VECTOR) ===> fptype_sv& kernelAccess( const fptype* buffer ) <===] + static __host__ __device__ inline fptype_sv& + kernelAccess( fptype* buffer ) + { + fptype& out = kernelAccess_s( buffer ); +#ifndef MGONGPU_CPPSIMD + return out; +#else + // NB: derived from MemoryAccessMomenta, restricting the implementation to contiguous aligned arrays (#435) + static_assert( mg5amcCpu::HostBufferMatrixElements::isaligned() ); // ASSUME ALIGNED ARRAYS (reinterpret_cast will segfault otherwise!) + //assert( (size_t)( buffer ) % mgOnGpu::cppAlign == 0 ); // ASSUME ALIGNED ARRAYS (reinterpret_cast will segfault otherwise!) + return mg5amcCpu::fptypevFromAlignedArray( out ); // SIMD bulk load of neppV, use reinterpret_cast +#endif + } + + // Locate a field (output) in a memory buffer (input) from a kernel event-indexing mechanism (internal) and the given field indexes (input) + // [Signature (const) ===> const fptype& kernelAccessConst( const fptype* buffer ) <===] + static constexpr auto kernelAccessConst = + KernelAccessHelper::template kernelAccessFieldConst<>; // requires cuda 11.4 +}; + +//---------------------------------------------------------------------------- + +typedef KernelAccessMatrixElements HostAccessMatrixElements; +typedef KernelAccessMatrixElements DeviceAccessMatrixElements; + +//---------------------------------------------------------------------------- + +#endif // MemoryAccessMatrixElements_H diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/MemoryAccessMomenta.h b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/MemoryAccessMomenta.h new file mode 100644 index 0000000000..ace50b40e8 --- /dev/null +++ b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/MemoryAccessMomenta.h @@ -0,0 +1,260 @@ +#ifndef MemoryAccessMomenta_H +#define MemoryAccessMomenta_H 1 + +#include "mgOnGpuConfig.h" + +#include "MemoryAccessHelpers.h" +#include "MemoryAccessVectors.h" + +//---------------------------------------------------------------------------- + +// A class describing the internal layout of memory buffers for momenta +// This implementation uses an AOSOA[npagM][npar][np4][neppM] where nevt=npagM*neppM +// [If many implementations are used, a suffix _AOSOAv1 should be appended to the class name] +class MemoryAccessMomentaBase //_AOSOAv1 +{ +public: + + // Number of Events Per Page in the momenta AOSOA memory buffer layout + // (these are all best kept as a compile-time constants: see issue #23) +#ifdef __CUDACC__ /* clang-format off */ + // ----------------------------------------------------------------------------------------------- + // --- GPUs: neppM is best set to a power of 2 times the number of fptype's in a 32-byte cacheline + // --- This is relevant to ensure coalesced access to momenta in global memory + // --- Note that neppR is hardcoded and may differ from neppM and neppV on some platforms + // ----------------------------------------------------------------------------------------------- + //static constexpr int neppM = 64/sizeof(fptype); // 2x 32-byte GPU cache lines (512 bits): 8 (DOUBLE) or 16 (FLOAT) + static constexpr int neppM = 32/sizeof(fptype); // (DEFAULT) 32-byte GPU cache line (256 bits): 4 (DOUBLE) or 8 (FLOAT) + //static constexpr int neppM = 1; // *** NB: this is equivalent to AOS *** (slower: 1.03E9 instead of 1.11E9 in eemumu) +#else + // ----------------------------------------------------------------------------------------------- + // --- CPUs: neppM is best set equal to the number of fptype's (neppV) in a vector register + // --- This is relevant to ensure faster access to momenta from C++ memory cache lines + // --- However, neppM is now decoupled from neppV (issue #176) and can be separately hardcoded + // --- In practice, neppR, neppM and neppV could now (in principle) all be different + // ----------------------------------------------------------------------------------------------- +#ifdef MGONGPU_CPPSIMD + static constexpr int neppM = MGONGPU_CPPSIMD; // (DEFAULT) neppM=neppV for optimal performance + //static constexpr int neppM = 64/sizeof(fptype); // maximum CPU vector width (512 bits): 8 (DOUBLE) or 16 (FLOAT) + //static constexpr int neppM = 32/sizeof(fptype); // lower CPU vector width (256 bits): 4 (DOUBLE) or 8 (FLOAT) + //static constexpr int neppM = 1; // *** NB: this is equivalent to AOS *** (slower: 4.66E6 instead of 5.09E9 in eemumu) + //static constexpr int neppM = MGONGPU_CPPSIMD*2; // FOR TESTS +#else + static constexpr int neppM = 1; // (DEFAULT) neppM=neppV for optimal performance (NB: this is equivalent to AOS) +#endif +#endif /* clang-format on */ + + // SANITY CHECK: check that neppM is a power of two + static_assert( ispoweroftwo( neppM ), "neppM is not a power of 2" ); + +private: + + friend class MemoryAccessHelper; + friend class KernelAccessHelper; + friend class KernelAccessHelper; + + // The number of components of a 4-momentum + static constexpr int np4 = mgOnGpu::np4; + + // The number of particles in this physics process + static constexpr int npar = mgOnGpu::npar; + + //-------------------------------------------------------------------------- + // NB all KernelLaunchers assume that memory access can be decomposed as "accessField = decodeRecord( accessRecord )" + // (in other words: first locate the event record for a given event, then locate an element in that record) + //-------------------------------------------------------------------------- + + // Locate an event record (output) in a memory buffer (input) from the given event number (input) + // [Signature (non-const) ===> fptype* ieventAccessRecord( fptype* buffer, const int ievt ) <===] + static __host__ __device__ inline fptype* + ieventAccessRecord( fptype* buffer, + const int ievt ) + { + const int ipagM = ievt / neppM; // #event "M-page" + const int ieppM = ievt % neppM; // #event in the current event M-page + constexpr int ip4 = 0; + constexpr int ipar = 0; + return &( buffer[ipagM * npar * np4 * neppM + ipar * np4 * neppM + ip4 * neppM + ieppM] ); // AOSOA[ipagM][ipar][ip4][ieppM] + } + + //-------------------------------------------------------------------------- + + // Locate a field (output) of an event record (input) from the given field indexes (input) + // [Signature (non-const) ===> fptype& decodeRecord( fptype* buffer, Ts... args ) <===] + // [NB: expand variadic template "Ts... args" to "const int ip4, const int ipar" and rename "Field" as "Ip4Ipar"] + static __host__ __device__ inline fptype& + decodeRecord( fptype* buffer, + const int ip4, + const int ipar ) + { + constexpr int ipagM = 0; + constexpr int ieppM = 0; + return buffer[ipagM * npar * np4 * neppM + ipar * np4 * neppM + ip4 * neppM + ieppM]; // AOSOA[ipagM][ipar][ip4][ieppM] + } +}; + +//---------------------------------------------------------------------------- + +// A class providing access to memory buffers for a given event, based on explicit event numbers +// Its methods use the MemoryAccessHelper templates - note the use of the template keyword in template function instantiations +class MemoryAccessMomenta : public MemoryAccessMomentaBase +{ +public: + + // Locate an event record (output) in a memory buffer (input) from the given event number (input) + // [Signature (non-const) ===> fptype* ieventAccessRecord( fptype* buffer, const int ievt ) <===] + static constexpr auto ieventAccessRecord = MemoryAccessHelper::ieventAccessRecord; + + // Locate an event record (output) in a memory buffer (input) from the given event number (input) + // [Signature (const) ===> const fptype* ieventAccessRecordConst( const fptype* buffer, const int ievt ) <===] + static constexpr auto ieventAccessRecordConst = MemoryAccessHelper::ieventAccessRecordConst; + + // Locate a field (output) of an event record (input) from the given field indexes (input) + // [Signature (non-const) ===> fptype& decodeRecord( fptype* buffer, const int ipar, const int ipar ) <===] + static constexpr auto decodeRecordIp4Ipar = MemoryAccessHelper::decodeRecord; + + // Locate a field (output) of an event record (input) from the given field indexes (input) + // [Signature (const) ===> const fptype& decodeRecordConst( const fptype* buffer, const int ipar, const int ipar ) <===] + static constexpr auto decodeRecordIp4IparConst = + MemoryAccessHelper::template decodeRecordConst; + + // Locate a field (output) in a memory buffer (input) from the given event number (input) and the given field indexes (input) + // [Signature (non-const) ===> fptype& ieventAccessIp4Ipar( fptype* buffer, const ievt, const int ipar, const int ipar ) <===] + static constexpr auto ieventAccessIp4Ipar = + MemoryAccessHelper::template ieventAccessField; + + // Locate a field (output) in a memory buffer (input) from the given event number (input) and the given field indexes (input) + // [Signature (const) ===> const fptype& ieventAccessIp4IparConst( const fptype* buffer, const ievt, const int ipar, const int ipar ) <===] + // DEFAULT VERSION + static constexpr auto ieventAccessIp4IparConst = + MemoryAccessHelper::template ieventAccessFieldConst; + + /* + // Locate a field (output) in a memory buffer (input) from the given event number (input) and the given field indexes (input) + // [Signature (const) ===> const fptype& ieventAccessIp4IparConst( const fptype* buffer, const ievt, const int ipar, const int ipar ) <===] + // DEBUG VERSION WITH PRINTOUTS + static __host__ __device__ inline const fptype& + ieventAccessIp4IparConst( const fptype* buffer, + const int ievt, + const int ip4, + const int ipar ) + { + const fptype& out = MemoryAccessHelper::template ieventAccessFieldConst( buffer, ievt, ip4, ipar ); + printf( "ipar=%2d ip4=%2d ievt=%8d out=%8.3f\n", ipar, ip4, ievt, out ); + return out; + } + */ +}; + +//---------------------------------------------------------------------------- + +// A class providing access to memory buffers for a given event, based on implicit kernel rules +// Its methods use the KernelAccessHelper template - note the use of the template keyword in template function instantiations +template +class KernelAccessMomenta +{ +public: + + // Expose selected functions from MemoryAccessMomenta + static constexpr auto ieventAccessRecordConst = MemoryAccessMomenta::ieventAccessRecordConst; + + // Locate a field (output) in a memory buffer (input) from a kernel event-indexing mechanism (internal) and the given field indexes (input) + // [Signature (non-const, SCALAR) ===> fptype& kernelAccessIp4Ipar( fptype* buffer, const int ipar, const int ipar ) <===] + static constexpr auto kernelAccessIp4Ipar = + KernelAccessHelper::template kernelAccessField; + + // Locate a field (output) in a memory buffer (input) from a kernel event-indexing mechanism (internal) and the given field indexes (input) + // [Signature (const, SCALAR) ===> const fptype& kernelAccessIp4IparConst( const fptype* buffer, const int ipar, const int ipar ) <===] + // DEFAULT VERSION + static constexpr auto kernelAccessIp4IparConst_s = + KernelAccessHelper::template kernelAccessFieldConst; + + /* + // Locate a field (output) in a memory buffer (input) from a kernel event-indexing mechanism (internal) and the given field indexes (input) + // [Signature (const, SCALAR) ===> const fptype& kernelAccessIp4IparConst( const fptype* buffer, const int ipar, const int ipar ) <===] + // DEBUG VERSION WITH PRINTOUTS + static __host__ __device__ inline const fptype& + kernelAccessIp4IparConst_s( const fptype* buffer, + const int ip4, + const int ipar ) + { + const fptype& out = KernelAccessHelper::template kernelAccessFieldConst( buffer, ip4, ipar ); + printf( "ipar=%2d ip4=%2d ievt='kernel' out=%8.3f\n", ipar, ip4, out ); + return out; + } + */ + + // Locate a field (output) in a memory buffer (input) from a kernel event-indexing mechanism (internal) and the given field indexes (input) + // [Signature (const, SCALAR OR VECTOR) ===> fptype_sv kernelAccessIp4IparConst( const fptype* buffer, const int ipar, const int ipar ) <===] + // FIXME? Eventually return by const reference and support aligned arrays only? + // FIXME? Currently return by value to support also unaligned and arbitrary arrays + static __host__ __device__ inline fptype_sv + kernelAccessIp4IparConst( const fptype* buffer, + const int ip4, + const int ipar ) + { + const fptype& out = kernelAccessIp4IparConst_s( buffer, ip4, ipar ); +#ifndef MGONGPU_CPPSIMD + return out; +#else + constexpr int neppM = MemoryAccessMomentaBase::neppM; + constexpr bool useContiguousEventsIfPossible = true; // DEFAULT + //constexpr bool useContiguousEventsIfPossible = false; // FOR PERFORMANCE TESTS (treat as arbitrary array even if it is an AOSOA) + // Use c++17 "if constexpr": compile-time branching + if constexpr( useContiguousEventsIfPossible && ( neppM >= neppV ) && ( neppM % neppV == 0 ) ) + { + //constexpr bool skipAlignmentCheck = true; // FASTEST (SEGFAULTS IF MISALIGNED ACCESS, NEEDS A SANITY CHECK ELSEWHERE!) + constexpr bool skipAlignmentCheck = false; // DEFAULT: A BIT SLOWER BUT SAFER [ALLOWS MISALIGNED ACCESS] + if constexpr( skipAlignmentCheck ) + { + //static bool first=true; if( first ){ std::cout << "WARNING! assume aligned AOSOA, skip check" << std::endl; first=false; } // SLOWER (5.06E6) + // FASTEST? (5.09E6 in eemumu 512y) + // This assumes alignment for momenta1d without checking - causes segmentation fault in reinterpret_cast if not aligned! + return mg5amcCpu::fptypevFromAlignedArray( out ); // use reinterpret_cast + } + else if( (size_t)( buffer ) % mgOnGpu::cppAlign == 0 ) + { + //static bool first=true; if( first ){ std::cout << "WARNING! aligned AOSOA, reinterpret cast" << std::endl; first=false; } // SLOWER (5.00E6) + // DEFAULT! A tiny bit (<1%) slower because of the alignment check (5.07E6 in eemumu 512y) + // This explicitly checks buffer alignment to avoid segmentation faults in reinterpret_cast + return mg5amcCpu::fptypevFromAlignedArray( out ); // SIMD bulk load of neppV, use reinterpret_cast + } + else + { + //static bool first=true; if( first ){ std::cout << "WARNING! AOSOA but no reinterpret cast" << std::endl; first=false; } // SLOWER (4.93E6) + // A bit (1%) slower (5.05E6 in eemumu 512y) + // This does not require buffer alignment, but it requires AOSOA with neppM>=neppV and neppM%neppV==0 + return mg5amcCpu::fptypevFromUnalignedArray( out ); // SIMD bulk load of neppV, do not use reinterpret_cast (fewer SIMD operations) + } + } + else + { + //static bool first=true; if( first ){ std::cout << "WARNING! arbitrary array" << std::endl; first=false; } // SLOWER (5.08E6) + // ?!Used to be much slower, now a tiny bit faster for AOSOA?! (5.11E6 for AOSOA, 4.64E6 for AOS in eemumu 512y) + // This does not even require AOSOA with neppM>=neppV and neppM%neppV==0 (e.g. can be used with AOS neppM==1) + constexpr int ievt0 = 0; // just make it explicit in the code that buffer refers to a given ievt0 and decoderIeppV fetches event ievt0+ieppV + auto decoderIeppv = [buffer, ip4, ipar]( int ieppV ) + -> const fptype& + { return MemoryAccessMomenta::ieventAccessIp4IparConst( buffer, ievt0 + ieppV, ip4, ipar ); }; + return mg5amcCpu::fptypevFromArbitraryArray( decoderIeppv ); // iterate over ieppV in neppV (no SIMD) + } +#endif + } + + // Is this a HostAccess or DeviceAccess class? + // [this is only needed for a warning printout in rambo.h for nparf==1 #358] + static __host__ __device__ inline constexpr bool + isOnDevice() + { + return onDevice; + } +}; + +//---------------------------------------------------------------------------- + +typedef KernelAccessMomenta HostAccessMomenta; +typedef KernelAccessMomenta DeviceAccessMomenta; + +//---------------------------------------------------------------------------- + +#endif // MemoryAccessMomenta_H diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/MemoryAccessNumerators.h b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/MemoryAccessNumerators.h new file mode 100644 index 0000000000..e5f81381a9 --- /dev/null +++ b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/MemoryAccessNumerators.h @@ -0,0 +1,18 @@ +#ifndef MemoryAccessNumerators_H +#define MemoryAccessNumerators_H 1 +#ifdef MGONGPU_SUPPORTS_MULTICHANNEL + +#include "MemoryAccessGs.h" + +//---------------------------------------------------------------------------- + +// A class describing the internal layout of memory buffers for numerators +// This implementation reuses the plain ARRAY[nevt] implementation of MemoryAccessGs + +typedef KernelAccessGs HostAccessNumerators; +typedef KernelAccessGs DeviceAccessNumerators; + +//---------------------------------------------------------------------------- + +#endif +#endif // MemoryAccessNumerators_H diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/MemoryAccessRandomNumbers.h b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/MemoryAccessRandomNumbers.h new file mode 100644 index 0000000000..a7ff24243f --- /dev/null +++ b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/MemoryAccessRandomNumbers.h @@ -0,0 +1,132 @@ +#ifndef MemoryAccessRandomNumbers_H +#define MemoryAccessRandomNumbers_H 1 + +#include "mgOnGpuConfig.h" + +#include "MemoryAccessHelpers.h" + +//---------------------------------------------------------------------------- + +// A class describing the internal layout of memory buffers for random numbers +// This implementation uses an AOSOA[npagR][nparf][np4][neppR] where nevt=npagR*neppR +// [If many implementations are used, a suffix _AOSOAv1 should be appended to the class name] +class MemoryAccessRandomNumbersBase //_AOSOAv1 +{ +public: /* clang-format off */ + + // Number of Events Per Page in the random number AOSOA memory buffer layout + // *** NB Different values of neppR lead to different physics results: the *** + // *** same 1d array is generated, but it is interpreted in different ways *** + static constexpr int neppR = 8; // HARDCODED TO GIVE ALWAYS THE SAME PHYSICS RESULTS! + //static constexpr int neppR = 1; // AOS (tests of sectors/requests) + +private: /* clang-format on */ + + friend class MemoryAccessHelper; + friend class KernelAccessHelper; + friend class KernelAccessHelper; + + // The number of components of a 4-momentum + static constexpr int np4 = mgOnGpu::np4; + + // The number of final state particles in this physics process + static constexpr int nparf = mgOnGpu::nparf; + + //-------------------------------------------------------------------------- + // NB all KernelLaunchers assume that memory access can be decomposed as "accessField = decodeRecord( accessRecord )" + // (in other words: first locate the event record for a given event, then locate an element in that record) + //-------------------------------------------------------------------------- + + // Locate an event record (output) in a memory buffer (input) from the given event number (input) + // [Signature (non-const) ===> fptype* ieventAccessRecord( fptype* buffer, const int ievt ) <===] + static __host__ __device__ inline fptype* + ieventAccessRecord( fptype* buffer, + const int ievt ) + { + const int ipagR = ievt / neppR; // #event "R-page" + const int ieppR = ievt % neppR; // #event in the current event R-page + constexpr int ip4 = 0; + constexpr int iparf = 0; + return &( buffer[ipagR * nparf * np4 * neppR + iparf * np4 * neppR + ip4 * neppR + ieppR] ); // AOSOA[ipagR][iparf][ip4][ieppR] + } + + //-------------------------------------------------------------------------- + + // Locate a field (output) of an event record (input) from the given field indexes (input) + // [Signature (non-const) ===> fptype& decodeRecord( fptype* buffer, Ts... args ) <===] + // [NB: expand variadic template "Ts... args" to "const int ip4, const int iparf" and rename "Field" as "Ip4Iparf"] + static __host__ __device__ inline fptype& + decodeRecord( fptype* buffer, + const int ip4, + const int iparf ) + { + constexpr int ipagR = 0; + constexpr int ieppR = 0; + return buffer[ipagR * nparf * np4 * neppR + iparf * np4 * neppR + ip4 * neppR + ieppR]; // AOSOA[ipagR][iparf][ip4][ieppR] + } +}; + +//---------------------------------------------------------------------------- + +// A class providing access to memory buffers for a given event, based on explicit event numbers +// Its methods use the MemoryAccessHelper templates - note the use of the template keyword in template function instantiations +class MemoryAccessRandomNumbers : public MemoryAccessRandomNumbersBase +{ +public: + + // Locate an event record (output) in a memory buffer (input) from the given event number (input) + // [Signature (non-const) ===> fptype* ieventAccessRecord( fptype* buffer, const int ievt ) <===] + static constexpr auto ieventAccessRecord = MemoryAccessHelper::ieventAccessRecord; + + // Locate an event record (output) in a memory buffer (input) from the given event number (input) + // [Signature (const) ===> const fptype* ieventAccessRecordConst( const fptype* buffer, const int ievt ) <===] + static constexpr auto ieventAccessRecordConst = MemoryAccessHelper::ieventAccessRecordConst; + + // Locate a field (output) of an event record (input) from the given field indexes (input) + // [Signature (non-const) ===> fptype& decodeRecord( fptype* buffer, const int ipar, const int iparf ) <===] + static constexpr auto decodeRecordIp4Iparf = MemoryAccessHelper::decodeRecord; + + // Locate a field (output) of an event record (input) from the given field indexes (input) + // [Signature (const) ===> const fptype& decodeRecordConst( const fptype* buffer, const int ipar, const int iparf ) <===] + static constexpr auto decodeRecordIp4IparfConst = + MemoryAccessHelper::template decodeRecordConst; + + // Locate a field (output) in a memory buffer (input) from the given event number (input) and the given field indexes (input) + // [Signature (non-const) ===> fptype& ieventAccessIp4Iparf( fptype* buffer, const ievt, const int ipar, const int iparf ) <===] + static constexpr auto ieventAccessIp4Iparf = + MemoryAccessHelper::template ieventAccessField; + + // Locate a field (output) in a memory buffer (input) from the given event number (input) and the given field indexes (input) + // [Signature (const) ===> const fptype& ieventAccessIp4IparfConst( const fptype* buffer, const ievt, const int ipar, const int iparf ) <===] + static constexpr auto ieventAccessIp4IparfConst = + MemoryAccessHelper::template ieventAccessFieldConst; +}; + +//---------------------------------------------------------------------------- + +// A class providing access to memory buffers for a given event, based on implicit kernel rules +// Its methods use the KernelAccessHelper template - note the use of the template keyword in template function instantiations +template +class KernelAccessRandomNumbers +{ +public: + + // Locate a field (output) in a memory buffer (input) from a kernel event-indexing mechanism (internal) and the given field indexes (input) + // [Signature (non-const) ===> fptype& kernelAccessIp4Iparf( fptype* buffer, const int ipar, const int iparf ) <===] + static constexpr auto kernelAccessIp4Iparf = + KernelAccessHelper::template kernelAccessField; + + // Locate a field (output) in a memory buffer (input) from a kernel event-indexing mechanism (internal) and the given field indexes (input) + // [Signature (const) ===> const fptype& kernelAccessIp4IparfConst( const fptype* buffer, const int ipar, const int iparf ) <===] + static constexpr auto kernelAccessIp4IparfConst = + KernelAccessHelper::template kernelAccessFieldConst; +}; + +//---------------------------------------------------------------------------- + +typedef KernelAccessRandomNumbers HostAccessRandomNumbers; +typedef KernelAccessRandomNumbers DeviceAccessRandomNumbers; + +//---------------------------------------------------------------------------- + +#endif // MemoryAccessRandomNumbers_H diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/MemoryAccessVectors.h b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/MemoryAccessVectors.h new file mode 100644 index 0000000000..2697cdad52 --- /dev/null +++ b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/MemoryAccessVectors.h @@ -0,0 +1,122 @@ +#ifndef MemoryAccessVectors_H +#define MemoryAccessVectors_H 1 + +#include "mgOnGpuConfig.h" + +#include "mgOnGpuVectors.h" + +#ifndef __CUDACC__ +namespace mg5amcCpu // this is only needed for CPU SIMD vectorization +{ + +#ifdef MGONGPU_CPPSIMD + //-------------------------------------------------------------------------- + + // Cast one non-const fptype_v reference (one vector of neppV fptype values) from one non-const fptype reference (#435), + // assuming that "pointer(evt#0)+1" indicates "pointer(evt#1)", and that the arrays are aligned + inline fptype_v& fptypevFromAlignedArray( fptype& ref ) + { + return *reinterpret_cast( &ref ); + } + + // Cast one const fptype_v reference (one vector of neppV fptype values) from one const fptype reference, + // assuming that "pointer(evt#0)+1" indicates "pointer(evt#1)", and that the arrays are aligned + inline const fptype_v& fptypevFromAlignedArray( const fptype& ref ) + { + return *reinterpret_cast( &ref ); + } + + // Build one fptype_v (one vector of neppV fptype values) from one fptype reference, + // assuming that "pointer(evt#0)+1" indicates "pointer(evt#1)", but that the arrays are not aligned + inline fptype_v fptypevFromUnalignedArray( const fptype& ref ) + { +#if MGONGPU_CPPSIMD == 2 + return fptype_v{ *( &ref ), // explicit initialization of all array elements (2) + *( &ref + 1 ) }; +#elif MGONGPU_CPPSIMD == 4 + return fptype_v{ *( &ref ), // explicit initialization of all array elements (4) + *( &ref + 1 ), + *( &ref + 2 ), + *( &ref + 3 ) }; +#elif MGONGPU_CPPSIMD == 8 + return fptype_v{ *( &ref ), // explicit initialization of all array elements (8) + *( &ref + 1 ), + *( &ref + 2 ), + *( &ref + 3 ), + *( &ref + 4 ), + *( &ref + 5 ), + *( &ref + 6 ), + *( &ref + 7 ) }; +#elif MGONGPU_CPPSIMD == 16 + return fptype_v{ *( &ref ), // explicit initialization of all array elements (16) + *( &ref + 1 ), + *( &ref + 2 ), + *( &ref + 3 ), + *( &ref + 4 ), + *( &ref + 5 ), + *( &ref + 6 ), + *( &ref + 7 ), + *( &ref + 8 ), + *( &ref + 9 ), + *( &ref + 10 ), + *( &ref + 11 ), + *( &ref + 12 ), + *( &ref + 13 ), + *( &ref + 14 ), + *( &ref + 15 ) }; +#else +#error Internal error! Unknown MGONGPU_CPPSIMD value +#endif + } + + // Build one fptype_v (one vector of neppV fptype values) from one fptype reference, + // with no a priori assumption on how the input fptype array should be decoded + template + inline fptype_v fptypevFromArbitraryArray( Functor decoderIeppv ) + { +#if MGONGPU_CPPSIMD == 2 + return fptype_v{ decoderIeppv( 0 ), // explicit initialization of all array elements (2) + decoderIeppv( 1 ) }; +#elif MGONGPU_CPPSIMD == 4 + return fptype_v{ decoderIeppv( 0 ), // explicit initialization of all array elements (4) + decoderIeppv( 1 ), + decoderIeppv( 2 ), + decoderIeppv( 3 ) }; +#elif MGONGPU_CPPSIMD == 8 + return fptype_v{ decoderIeppv( 0 ), // explicit initialization of all array elements (8) + decoderIeppv( 1 ), + decoderIeppv( 2 ), + decoderIeppv( 3 ), + decoderIeppv( 4 ), + decoderIeppv( 5 ), + decoderIeppv( 6 ), + decoderIeppv( 7 ) }; +#elif MGONGPU_CPPSIMD == 16 + return fptype_v{ decoderIeppv( 0 ), // explicit initialization of all array elements (16) + decoderIeppv( 1 ), + decoderIeppv( 2 ), + decoderIeppv( 3 ), + decoderIeppv( 4 ), + decoderIeppv( 5 ), + decoderIeppv( 6 ), + decoderIeppv( 7 ), + decoderIeppv( 8 ), + decoderIeppv( 9 ), + decoderIeppv( 10 ), + decoderIeppv( 11 ), + decoderIeppv( 12 ), + decoderIeppv( 13 ), + decoderIeppv( 14 ), + decoderIeppv( 15 ) }; +#else +#error Internal error! Unknown MGONGPU_CPPSIMD value +#endif + } + + //-------------------------------------------------------------------------- +#endif + +} // end namespace +#endif + +#endif // MemoryAccessVectors_H diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/MemoryAccessWavefunctions.h b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/MemoryAccessWavefunctions.h new file mode 100644 index 0000000000..738eef9a02 --- /dev/null +++ b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/MemoryAccessWavefunctions.h @@ -0,0 +1,155 @@ +#ifndef MemoryAccessWavefunctions_H +#define MemoryAccessWavefunctions_H 1 + +#include "mgOnGpuConfig.h" + +#include "mgOnGpuCxtypes.h" + +#include "MemoryAccessHelpers.h" + +#define MGONGPU_TRIVIAL_WAVEFUNCTIONS 1 + +//---------------------------------------------------------------------------- + +#ifndef MGONGPU_TRIVIAL_WAVEFUNCTIONS + +// A class describing the internal layout of memory buffers for wavefunctions +// This implementation uses an AOSOA[npagW][nw6][nx2][neppW] where nevt=npagW*neppW +// [If many implementations are used, a suffix _AOSOAv1 should be appended to the class name] +class MemoryAccessWavefunctionsBase //_AOSOAv1 +{ +public: + + // Number of Events Per Page in the wavefunction AOSOA memory buffer layout + static constexpr int neppW = 1; // AOS (just a test...) + +private: + + friend class MemoryAccessHelper; + friend class KernelAccessHelper; + friend class KernelAccessHelper; + + // The number of components of a (fermion or vector) wavefunction + static constexpr int nw6 = mgOnGpu::nw6; + + // The number of floating point components of a complex number + static constexpr int nx2 = mgOnGpu::nx2; + + //-------------------------------------------------------------------------- + // NB all KernelLaunchers assume that memory access can be decomposed as "accessField = decodeRecord( accessRecord )" + // (in other words: first locate the event record for a given event, then locate an element in that record) + //-------------------------------------------------------------------------- + + // Locate an event record (output) in a memory buffer (input) from the given event number (input) + // [Signature (non-const) ===> fptype* ieventAccessRecord( fptype* buffer, const int ievt ) <===] + static __host__ __device__ inline fptype* + ieventAccessRecord( fptype* buffer, + const int ievt ) + { + const int ipagW = ievt / neppW; // #event "W-page" + const int ieppW = ievt % neppW; // #event in the current event W-page + constexpr int iw6 = 0; + constexpr int ix2 = 0; + return &( buffer[ipagW * nw6 * nx2 * neppW + iw6 * nx2 * neppW + ix2 * neppW + ieppW] ); // AOSOA[ipagW][iw6][ix2][ieppW] + } + + //-------------------------------------------------------------------------- + + // Locate a field (output) of an event record (input) from the given field indexes (input) + // [Signature (non-const) ===> fptype& decodeRecord( fptype* buffer, Ts... args ) <===] + // [NB: expand variadic template "Ts... args" to "const int iw6, const int ix2" and rename "Field" as "Iw6Ix2"] + static __host__ __device__ inline fptype& + decodeRecord( fptype* buffer, + const int iw6, + const int ix2 ) + { + constexpr int ipagW = 0; + constexpr int ieppW = 0; + return buffer[ipagW * nw6 * nx2 * neppW + iw6 * nx2 * neppW + ix2 * neppW + ieppW]; // AOSOA[ipagW][iw6][ix2][ieppW] + } +}; + +//---------------------------------------------------------------------------- + +// A class providing access to memory buffers for a given event, based on explicit event numbers +// Its methods use the MemoryAccessHelper templates - note the use of the template keyword in template function instantiations +class MemoryAccessWavefunctions : public MemoryAccessWavefunctionsBase +{ +public: + + // Locate an event record (output) in a memory buffer (input) from the given event number (input) + // [Signature (non-const) ===> fptype* ieventAccessRecord( fptype* buffer, const int ievt ) <===] + static constexpr auto ieventAccessRecord = MemoryAccessHelper::ieventAccessRecord; + + // Locate an event record (output) in a memory buffer (input) from the given event number (input) + // [Signature (const) ===> const fptype* ieventAccessRecordConst( const fptype* buffer, const int ievt ) <===] + static constexpr auto ieventAccessRecordConst = MemoryAccessHelper::ieventAccessRecordConst; + + // Locate a field (output) of an event record (input) from the given field indexes (input) + // [Signature (non-const) ===> fptype& decodeRecord( fptype* buffer, const int iw6, const int ix2 ) <===] + static constexpr auto decodeRecordIw6Ix2 = MemoryAccessHelper::decodeRecord; + + // Locate a field (output) of an event record (input) from the given field indexes (input) + // [Signature (const) ===> const fptype& decodeRecordConst( const fptype* buffer, const int iw6, const int ix2 ) <===] + static constexpr auto decodeRecordIw6Ix2Const = + MemoryAccessHelper::template decodeRecordConst; + + // Locate a field (output) in a memory buffer (input) from the given event number (input) and the given field indexes (input) + // [Signature (non-const) ===> fptype& ieventAccessIw6Ix2( fptype* buffer, const ievt, const int iw6, const int ix2 ) <===] + static constexpr auto ieventAccessIw6Ix2 = + MemoryAccessHelper::template ieventAccessField; + + // Locate a field (output) in a memory buffer (input) from the given event number (input) and the given field indexes (input) + // [Signature (const) ===> const fptype& ieventAccessIw6Ix2Const( const fptype* buffer, const ievt, const int iw6, const int ix2 ) <===] + static constexpr auto ieventAccessIw6Ix2Const = + MemoryAccessHelper::template ieventAccessFieldConst; +}; + +#endif // #ifndef MGONGPU_TRIVIAL_WAVEFUNCTIONS + +//---------------------------------------------------------------------------- + +// A class providing access to memory buffers for a given event, based on implicit kernel rules +// Its methods use the KernelAccessHelper template - note the use of the template keyword in template function instantiations +template +class KernelAccessWavefunctions +{ +public: + +#ifndef MGONGPU_TRIVIAL_WAVEFUNCTIONS + + // Locate a field (output) in a memory buffer (input) from a kernel event-indexing mechanism (internal) and the given field indexes (input) + // [Signature (non-const) ===> fptype& kernelAccessIw6Ix2( fptype* buffer, const int iw6, const int ix2 ) <===] + static constexpr auto kernelAccessIw6Ix2 = + KernelAccessHelper::template kernelAccessField; + + // Locate a field (output) in a memory buffer (input) from a kernel event-indexing mechanism (internal) and the given field indexes (input) + // [Signature (const) ===> const fptype& kernelAccessIw6Ix2Const( const fptype* buffer, const int iw6, const int ix2 ) <===] + static constexpr auto kernelAccessIw6Ix2Const = + KernelAccessHelper::template kernelAccessFieldConst; + +#else + + static __host__ __device__ inline cxtype_sv* + kernelAccess( fptype* buffer ) + { + return reinterpret_cast( buffer ); + } + + static __host__ __device__ inline const cxtype_sv* + kernelAccessConst( const fptype* buffer ) + { + return reinterpret_cast( buffer ); + } + +#endif // #ifndef MGONGPU_TRIVIAL_WAVEFUNCTIONS +}; + +//---------------------------------------------------------------------------- + +typedef KernelAccessWavefunctions HostAccessWavefunctions; +typedef KernelAccessWavefunctions DeviceAccessWavefunctions; + +//---------------------------------------------------------------------------- + +#endif // MemoryAccessWavefunctions_H diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/MemoryAccessWeights.h b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/MemoryAccessWeights.h new file mode 100644 index 0000000000..3915657657 --- /dev/null +++ b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/MemoryAccessWeights.h @@ -0,0 +1,135 @@ +#ifndef MemoryAccessWeights_H +#define MemoryAccessWeights_H 1 + +#include "mgOnGpuConfig.h" + +#include "MemoryAccessHelpers.h" + +//---------------------------------------------------------------------------- + +// A class describing the internal layout of memory buffers for weights +// This implementation uses a plain ARRAY[nevt] +// [If many implementations are used, a suffix _ARRAYv1 should be appended to the class name] +class MemoryAccessWeightsBase //_ARRAYv1 +{ +private: + + friend class MemoryAccessHelper; + friend class KernelAccessHelper; + friend class KernelAccessHelper; + + //-------------------------------------------------------------------------- + // NB all KernelLaunchers assume that memory access can be decomposed as "accessField = decodeRecord( accessRecord )" + // (in other words: first locate the event record for a given event, then locate an element in that record) + //-------------------------------------------------------------------------- + + // Locate an event record (output) in a memory buffer (input) from the given event number (input) + // [Signature (non-const) ===> fptype* ieventAccessRecord( fptype* buffer, const int ievt ) <===] + static __host__ __device__ inline fptype* + ieventAccessRecord( fptype* buffer, + const int ievt ) + { + return &( buffer[ievt] ); // ARRAY[nevt] + } + + //-------------------------------------------------------------------------- + + // Locate a field (output) of an event record (input) from the given field indexes (input) + // [Signature (non-const) ===> fptype& decodeRecord( fptype* buffer, Ts... args ) <===] + // [NB: expand variadic template "Ts... args" to empty and rename "Field" as empty] + static __host__ __device__ inline fptype& + decodeRecord( fptype* buffer ) + { + constexpr int ievt = 0; + return buffer[ievt]; // ARRAY[nevt] + } +}; + +//---------------------------------------------------------------------------- + +// A class providing access to memory buffers for a given event, based on explicit event numbers +// Its methods use the MemoryAccessHelper templates - note the use of the template keyword in template function instantiations +class MemoryAccessWeights : public MemoryAccessWeightsBase +{ +public: + + // Locate an event record (output) in a memory buffer (input) from the given event number (input) + // [Signature (non-const) ===> fptype* ieventAccessRecord( fptype* buffer, const int ievt ) <===] + static constexpr auto ieventAccessRecord = MemoryAccessHelper::ieventAccessRecord; + + // Locate an event record (output) in a memory buffer (input) from the given event number (input) + // [Signature (const) ===> const fptype* ieventAccessRecordConst( const fptype* buffer, const int ievt ) <===] + static constexpr auto ieventAccessRecordConst = MemoryAccessHelper::ieventAccessRecordConst; + + // Locate a field (output) of an event record (input) from the given field indexes (input) + // [Signature (non-const) ===> fptype& decodeRecord( fptype* buffer ) <===] + static constexpr auto decodeRecord = MemoryAccessHelper::decodeRecord; + + // Locate a field (output) of an event record (input) from the given field indexes (input) + // [Signature (const) ===> const fptype& decodeRecordConst( const fptype* buffer ) <===] + static constexpr auto decodeRecordConst = + MemoryAccessHelper::template decodeRecordConst<>; + + // Locate a field (output) in a memory buffer (input) from the given event number (input) and the given field indexes (input) + // [Signature (non-const) ===> fptype& ieventAccess( fptype* buffer, const ievt ) <===] + static constexpr auto ieventAccess = + MemoryAccessHelper::template ieventAccessField<>; + + // Locate a field (output) in a memory buffer (input) from the given event number (input) and the given field indexes (input) + // [Signature (const) ===> const fptype& ieventAccessConst( const fptype* buffer, const ievt ) <===] + static constexpr auto ieventAccessConst = + MemoryAccessHelper::template ieventAccessFieldConst<>; +}; + +//---------------------------------------------------------------------------- + +// A class providing access to memory buffers for a given event, based on implicit kernel rules +// Its methods use the KernelAccessHelper template - note the use of the template keyword in template function instantiations +template +class KernelAccessWeights +{ +public: + + /* + // Locate a field (output) in a memory buffer (input) from a kernel event-indexing mechanism (internal) and the given field indexes (input) + // [Signature (non-const) ===> fptype& kernelAccess( fptype* buffer ) <===] + // FINAL IMPLEMENTATION FOR CUDA 11.4 + static constexpr auto kernelAccess = + KernelAccessHelper::template kernelAccessField<>; + */ + + // Locate a field (output) in a memory buffer (input) from a kernel event-indexing mechanism (internal) and the given field indexes (input) + // [Signature (non-const) ===> fptype& kernelAccess( fptype* buffer ) <===] + // TEMPORARY HACK FOR CUDA 11.1 + static __host__ __device__ inline fptype& + kernelAccess( fptype* buffer ) + { + return KernelAccessHelper::template kernelAccessField<>( buffer ); + } + + /* + // Locate a field (output) in a memory buffer (input) from a kernel event-indexing mechanism (internal) and the given field indexes (input) + // [Signature (const) ===> const fptype& kernelAccessConst( const fptype* buffer ) <===] + // FINAL IMPLEMENTATION FOR CUDA 11.4 + static constexpr auto kernelAccessConst = + KernelAccessHelper::template kernelAccessFieldConst<>; + */ + + // Locate a field (output) in a memory buffer (input) from a kernel event-indexing mechanism (internal) and the given field indexes (input) + // [Signature (const) ===> const fptype& kernelAccessConst( const fptype* buffer ) <===] + // TEMPORARY HACK FOR CUDA 11.1 + static __host__ __device__ inline const fptype& + kernelAccessConst( const fptype* buffer ) + { + return KernelAccessHelper::template kernelAccessFieldConst<>( buffer ); + } +}; + +//---------------------------------------------------------------------------- + +typedef KernelAccessWeights HostAccessWeights; +typedef KernelAccessWeights DeviceAccessWeights; + +//---------------------------------------------------------------------------- + +#endif // MemoryAccessWeights_H diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/MemoryBuffers.h b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/MemoryBuffers.h new file mode 100644 index 0000000000..1d8f404c6d --- /dev/null +++ b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/MemoryBuffers.h @@ -0,0 +1,530 @@ +#ifndef MemoryBuffers_H +#define MemoryBuffers_H 1 + +#include "mgOnGpuConfig.h" + +#include "mgOnGpuCxtypes.h" + +#include "CudaRuntime.h" +#include "Parameters_SMEFTsim_topU3l_MwScheme_UFO.h" + +#include + +#ifdef __CUDACC__ +namespace mg5amcGpu +#else +namespace mg5amcCpu +#endif +{ + //-------------------------------------------------------------------------- + + // TEMPORARY? Take this from a PhysicsProcess class? Define them here directly in codegen? + namespace MemoryBuffers + { + static constexpr size_t np4 = mgOnGpu::np4; + static constexpr size_t nparf = mgOnGpu::nparf; + static constexpr size_t npar = mgOnGpu::npar; + static constexpr size_t nw6 = mgOnGpu::nw6; + static constexpr size_t nx2 = mgOnGpu::nx2; + static constexpr size_t ndcoup = Parameters_SMEFTsim_topU3l_MwScheme_UFO_dependentCouplings::ndcoup; + } + + //-------------------------------------------------------------------------- + + // An abstract interface encapsulating a given number of events + class INumberOfEvents + { + public: + virtual ~INumberOfEvents() {} + virtual size_t nevt() const = 0; + }; + + //-------------------------------------------------------------------------- + + // A class encapsulating a given number of events + class NumberOfEvents : virtual public INumberOfEvents + { + public: + NumberOfEvents( const size_t nevt ) + : m_nevt( nevt ) {} + virtual ~NumberOfEvents() {} + virtual size_t nevt() const override { return m_nevt; } + private: + const size_t m_nevt; + }; + + //-------------------------------------------------------------------------- + + // A base class encapsulating a memory buffer (not necessarily an event buffer) + template + class BufferBase : virtual public INumberOfEvents + { + protected: + BufferBase( const size_t size, const bool onDevice ) + : m_size( size ), m_data( nullptr ), m_isOnDevice( onDevice ) {} + virtual ~BufferBase() {} + public: + T* data() { return m_data; } + const T* data() const { return m_data; } + T& operator[]( const size_t index ) { return m_data[index]; } + const T& operator[]( const size_t index ) const { return m_data[index]; } + size_t size() const { return m_size; } + size_t bytes() const { return m_size * sizeof( T ); } + bool isOnDevice() const { return m_isOnDevice; } + virtual size_t nevt() const override { throw std::runtime_error( "This BufferBase is not an event buffer" ); } + protected: + const size_t m_size; + T* m_data; + const bool m_isOnDevice; + }; + + //-------------------------------------------------------------------------- + +#ifndef __CUDACC__ + constexpr bool HostBufferALIGNED = false; // ismisaligned=false + constexpr bool HostBufferMISALIGNED = true; // ismisaligned=true + + // A class encapsulating a C++ host buffer + template + class HostBufferBase : public BufferBase + { + public: + HostBufferBase( const size_t size ) + : BufferBase( size, false ) + { + if constexpr( !ismisaligned ) + this->m_data = new( std::align_val_t( cppAlign ) ) T[size](); + else + this->m_data = new( std::align_val_t( cppAlign ) ) T[size + 1]() + 1; // TEST MISALIGNMENT! + } + virtual ~HostBufferBase() + { + if constexpr( !ismisaligned ) + ::operator delete[]( this->m_data, std::align_val_t( cppAlign ) ); + else + ::operator delete[]( ( this->m_data ) - 1, std::align_val_t( cppAlign ) ); // TEST MISALIGNMENT! + } + static constexpr bool isaligned() { return !ismisaligned; } + public: + static constexpr size_t cppAlign = mgOnGpu::cppAlign; + }; +#endif + + //-------------------------------------------------------------------------- + +#ifdef __CUDACC__ + // A class encapsulating a CUDA pinned host buffer + template + class PinnedHostBufferBase : public BufferBase + { + public: + PinnedHostBufferBase( const size_t size ) + : BufferBase( size, false ) + { + checkCuda( cudaMallocHost( &( this->m_data ), this->bytes() ) ); + } + virtual ~PinnedHostBufferBase() + { + checkCuda( cudaFreeHost( this->m_data ) ); + } + }; +#endif + + //-------------------------------------------------------------------------- + +#ifdef __CUDACC__ + // A class encapsulating a CUDA device buffer + template + class DeviceBufferBase : public BufferBase + { + public: + DeviceBufferBase( const size_t size ) + : BufferBase( size, true ) + { + checkCuda( cudaMalloc( &( this->m_data ), this->bytes() ) ); + } + virtual ~DeviceBufferBase() + { + checkCuda( cudaFree( this->m_data ) ); + } + }; +#endif + + //-------------------------------------------------------------------------- + +#ifndef __CUDACC__ + // A class encapsulating a C++ host buffer for a given number of events + template + class HostBuffer : public HostBufferBase, virtual private NumberOfEvents + { + public: + HostBuffer( const size_t nevt ) + : NumberOfEvents( nevt ) + , HostBufferBase( sizePerEvent * nevt ) {} + virtual ~HostBuffer() {} + virtual size_t nevt() const override final { return NumberOfEvents::nevt(); } + }; +#endif + + //-------------------------------------------------------------------------- + +#ifdef __CUDACC__ + // A class encapsulating a CUDA pinned host buffer for a given number of events + template + class PinnedHostBuffer : public PinnedHostBufferBase, virtual private NumberOfEvents + { + public: + PinnedHostBuffer( const size_t nevt ) + : NumberOfEvents( nevt ) + , PinnedHostBufferBase( sizePerEvent * nevt ) {} + virtual ~PinnedHostBuffer() {} + virtual size_t nevt() const override final { return NumberOfEvents::nevt(); } + }; +#endif + + //-------------------------------------------------------------------------- + +#ifdef __CUDACC__ + // A class encapsulating a CUDA device buffer for a given number of events + template + class DeviceBuffer : public DeviceBufferBase, virtual private NumberOfEvents + { + public: + DeviceBuffer( const size_t nevt ) + : NumberOfEvents( nevt ) + , DeviceBufferBase( sizePerEvent * nevt ) {} + virtual ~DeviceBuffer() {} + virtual size_t nevt() const override final { return NumberOfEvents::nevt(); } + }; +#endif + + //-------------------------------------------------------------------------- + + // A base class encapsulating a memory buffer for momenta random numbers + typedef BufferBase BufferRndNumMomenta; + + // The size (number of elements) per event in a memory buffer for momenta random numbers + constexpr size_t sizePerEventRndNumMomenta = MemoryBuffers::np4 * MemoryBuffers::nparf; + +#ifndef __CUDACC__ + // A class encapsulating a C++ host buffer for momenta random numbers + typedef HostBuffer HostBufferRndNumMomenta; +#else + // A class encapsulating a CUDA pinned host buffer for momenta random numbers + typedef PinnedHostBuffer PinnedHostBufferRndNumMomenta; + // A class encapsulating a CUDA device buffer for momenta random numbers + typedef DeviceBuffer DeviceBufferRndNumMomenta; +#endif + + //-------------------------------------------------------------------------- + + /* + // A base class encapsulating a memory buffer with ONE fptype per event + typedef BufferBase BufferOneFp; + + // The size (number of elements) per event in a memory buffer with ONE fptype per event + constexpr size_t sizePerEventOneFp = 1; + +#ifndef __CUDACC__ + // A class encapsulating a C++ host buffer with ONE fptype per event + typedef HostBuffer HostBufferOneFp; +#else + // A class encapsulating a CUDA pinned host buffer for gs + typedef PinnedHostBuffer PinnedHostBufferOneFp; + // A class encapsulating a CUDA device buffer for gs + typedef DeviceBuffer DeviceBufferOneFp; +#endif + + // Memory buffers for Gs (related to the event-by-event strength of running coupling constant alphas QCD) + typedef BufferOneFp BufferGs; + typedef HostBufferOneFp HostBufferGs; + typedef PinnedHostBufferOneFp PinnedHostBufferGs; + typedef DeviceBufferOneFp DeviceBufferGs; + */ + + //-------------------------------------------------------------------------- + + // A base class encapsulating a memory buffer for Gs (related to the event-by-event strength of running coupling constant alphas QCD) + typedef BufferBase BufferGs; + + // The size (number of elements) per event in a memory buffer for Gs + constexpr size_t sizePerEventGs = 1; + +#ifndef __CUDACC__ + // A class encapsulating a C++ host buffer for gs + typedef HostBuffer HostBufferGs; +#else + // A class encapsulating a CUDA pinned host buffer for gs + typedef PinnedHostBuffer PinnedHostBufferGs; + // A class encapsulating a CUDA device buffer for gs + typedef DeviceBuffer DeviceBufferGs; +#endif + + //-------------------------------------------------------------------------- + +#ifdef MGONGPU_SUPPORTS_MULTICHANNEL + // A base class encapsulating a memory buffer for numerators (of the multichannel single-diagram enhancement factors) + typedef BufferBase BufferNumerators; + + // The size (number of elements) per event in a memory buffer for numerators + constexpr size_t sizePerEventNumerators = 1; + +#ifndef __CUDACC__ + // A class encapsulating a C++ host buffer for gs + typedef HostBuffer HostBufferNumerators; +#else + // A class encapsulating a CUDA pinned host buffer for gs + typedef PinnedHostBuffer PinnedHostBufferNumerators; + // A class encapsulating a CUDA device buffer for gs + typedef DeviceBuffer DeviceBufferNumerators; +#endif +#endif + + //-------------------------------------------------------------------------- + +#ifdef MGONGPU_SUPPORTS_MULTICHANNEL + // A base class encapsulating a memory buffer for denominators (of the multichannel single-diagram enhancement factors) + typedef BufferBase BufferDenominators; + + // The size (number of elements) per event in a memory buffer for denominators + constexpr size_t sizePerEventDenominators = 1; + +#ifndef __CUDACC__ + // A class encapsulating a C++ host buffer for gs + typedef HostBuffer HostBufferDenominators; +#else + // A class encapsulating a CUDA pinned host buffer for gs + typedef PinnedHostBuffer PinnedHostBufferDenominators; + // A class encapsulating a CUDA device buffer for gs + typedef DeviceBuffer DeviceBufferDenominators; +#endif +#endif + + //-------------------------------------------------------------------------- + + // A base class encapsulating a memory buffer for couplings that depend on the event-by-event running coupling constant alphas QCD + typedef BufferBase BufferCouplings; + + // The size (number of elements) per event in a memory buffer for random numbers + constexpr size_t sizePerEventCouplings = MemoryBuffers::ndcoup * MemoryBuffers::nx2; + +#ifndef __CUDACC__ + // A class encapsulating a C++ host buffer for gs + typedef HostBuffer HostBufferCouplings; +#else + // A class encapsulating a CUDA pinned host buffer for gs + typedef PinnedHostBuffer PinnedHostBufferCouplings; + // A class encapsulating a CUDA device buffer for gs + typedef DeviceBuffer DeviceBufferCouplings; +#endif + + //-------------------------------------------------------------------------- + + // A base class encapsulating a memory buffer for momenta + typedef BufferBase BufferMomenta; + + // The size (number of elements) per event in a memory buffer for momenta + constexpr size_t sizePerEventMomenta = MemoryBuffers::np4 * MemoryBuffers::npar; + +#ifndef __CUDACC__ + // A class encapsulating a C++ host buffer for momenta + typedef HostBuffer HostBufferMomenta; + //typedef HostBuffer HostBufferMomenta; // TEST MISALIGNMENT! +#else + // A class encapsulating a CUDA pinned host buffer for momenta + typedef PinnedHostBuffer PinnedHostBufferMomenta; + // A class encapsulating a CUDA device buffer for momenta + typedef DeviceBuffer DeviceBufferMomenta; +#endif + + //-------------------------------------------------------------------------- + + // A base class encapsulating a memory buffer for sampling weights + typedef BufferBase BufferWeights; + + // The size (number of elements) per event in a memory buffer for sampling weights + constexpr size_t sizePerEventWeights = 1; + +#ifndef __CUDACC__ + // A class encapsulating a C++ host buffer for sampling weights + typedef HostBuffer HostBufferWeights; +#else + // A class encapsulating a CUDA pinned host buffer for sampling weights + typedef PinnedHostBuffer PinnedHostBufferWeights; + // A class encapsulating a CUDA device buffer for sampling weights + typedef DeviceBuffer DeviceBufferWeights; +#endif + + //-------------------------------------------------------------------------- + + // A base class encapsulating a memory buffer for matrix elements + typedef BufferBase BufferMatrixElements; + + // The size (number of elements) per event in a memory buffer for matrix elements + constexpr size_t sizePerEventMatrixElements = 1; + +#ifndef __CUDACC__ + // A class encapsulating a C++ host buffer for matrix elements + typedef HostBuffer HostBufferMatrixElements; +#else + // A class encapsulating a CUDA pinned host buffer for matrix elements + typedef PinnedHostBuffer PinnedHostBufferMatrixElements; + // A class encapsulating a CUDA device buffer for matrix elements + typedef DeviceBuffer DeviceBufferMatrixElements; +#endif + + //-------------------------------------------------------------------------- + + // A base class encapsulating a memory buffer for the helicity mask + typedef BufferBase BufferHelicityMask; + +#ifndef __CUDACC__ + // A class encapsulating a C++ host buffer for the helicity mask + typedef HostBufferBase HostBufferHelicityMask; +#else + // A class encapsulating a CUDA pinned host buffer for the helicity mask + typedef PinnedHostBufferBase PinnedHostBufferHelicityMask; + // A class encapsulating a CUDA device buffer for the helicity mask + typedef DeviceBufferBase DeviceBufferHelicityMask; +#endif + + //-------------------------------------------------------------------------- + + // A base class encapsulating a memory buffer for wavefunctions + typedef BufferBase BufferWavefunctions; + + // The size (number of elements) per event in a memory buffer for wavefunctions + constexpr size_t sizePerEventWavefunctions = MemoryBuffers::nw6 * MemoryBuffers::nx2; + +#ifndef __CUDACC__ + // A class encapsulating a C++ host buffer for wavefunctions + typedef HostBuffer HostBufferWavefunctions; +#else + // A class encapsulating a CUDA pinned host buffer for wavefunctions + typedef PinnedHostBuffer PinnedHostBufferWavefunctions; + // A class encapsulating a CUDA device buffer for wavefunctions + typedef DeviceBuffer DeviceBufferWavefunctions; +#endif + + //-------------------------------------------------------------------------- + + // A base class encapsulating a memory buffer for helicity random numbers + typedef BufferBase BufferRndNumHelicity; + + // The size (number of elements) per event in a memory buffer for helicity random numbers + constexpr size_t sizePerEventRndNumHelicity = 1; + +#ifndef __CUDACC__ + // A class encapsulating a C++ host buffer for helicity random numbers + typedef HostBuffer HostBufferRndNumHelicity; +#else + // A class encapsulating a CUDA pinned host buffer for helicity random numbers + typedef PinnedHostBuffer PinnedHostBufferRndNumHelicity; + // A class encapsulating a CUDA device buffer for helicity random numbers + typedef DeviceBuffer DeviceBufferRndNumHelicity; +#endif + + //-------------------------------------------------------------------------- + + // A base class encapsulating a memory buffer for color random numbers + typedef BufferBase BufferRndNumColor; + + // The size (number of elements) per event in a memory buffer for color random numbers + constexpr size_t sizePerEventRndNumColor = 1; + +#ifndef __CUDACC__ + // A class encapsulating a C++ host buffer for color random numbers + typedef HostBuffer HostBufferRndNumColor; +#else + // A class encapsulating a CUDA pinned host buffer for color random numbers + typedef PinnedHostBuffer PinnedHostBufferRndNumColor; + // A class encapsulating a CUDA device buffer for color random numbers + typedef DeviceBuffer DeviceBufferRndNumColor; +#endif + + //-------------------------------------------------------------------------- + + // A base class encapsulating a memory buffer for helicity selection + typedef BufferBase BufferSelectedHelicity; + + // The size (number of elements) per event in a memory buffer for helicity selection + constexpr size_t sizePerEventSelectedHelicity = 1; + +#ifndef __CUDACC__ + // A class encapsulating a C++ host buffer for helicity selection + typedef HostBuffer HostBufferSelectedHelicity; +#else + // A class encapsulating a CUDA pinned host buffer for helicity selection + typedef PinnedHostBuffer PinnedHostBufferSelectedHelicity; + // A class encapsulating a CUDA device buffer for helicity selection + typedef DeviceBuffer DeviceBufferSelectedHelicity; +#endif + + //-------------------------------------------------------------------------- + + // A base class encapsulating a memory buffer for color selection + typedef BufferBase BufferSelectedColor; + + // The size (number of elements) per event in a memory buffer for color selection + constexpr size_t sizePerEventSelectedColor = 1; + +#ifndef __CUDACC__ + // A class encapsulating a C++ host buffer for color selection + typedef HostBuffer HostBufferSelectedColor; +#else + // A class encapsulating a CUDA pinned host buffer for color selection + typedef PinnedHostBuffer PinnedHostBufferSelectedColor; + // A class encapsulating a CUDA device buffer for color selection + typedef DeviceBuffer DeviceBufferSelectedColor; +#endif + + //-------------------------------------------------------------------------- + +#ifdef __CUDACC__ + template + void copyDeviceFromHost( Tdst& dst, const Tsrc& src ) // keep the same order of arguments as in memcpy + { + if( dst.size() != src.size() ) + { + std::ostringstream sstr; + sstr << "Size (#elements) mismatch in copyDeviceFromHost: dst=" << dst.size() << ", src=" << src.size(); + throw std::runtime_error( sstr.str() ); + } + if( dst.bytes() != src.bytes() ) + { + std::ostringstream sstr; + sstr << "Size (#bytes) mismatch in copyDeviceFromHost: dst=" << dst.bytes() << ", src=" << src.bytes(); + throw std::runtime_error( sstr.str() ); + } + // NB (PR #45): cudaMemcpy involves an intermediate memcpy to pinned memory if host array is a not a pinned host array + checkCuda( cudaMemcpy( dst.data(), src.data(), src.bytes(), cudaMemcpyHostToDevice ) ); + } +#endif + + //-------------------------------------------------------------------------- + +#ifdef __CUDACC__ + template + void copyHostFromDevice( Tdst& dst, const Tsrc& src ) // keep the same order of arguments as in memcpy + { + if( dst.size() != src.size() ) + { + std::ostringstream sstr; + sstr << "Size (#elements) mismatch in copyHostFromDevice: dst=" << dst.size() << ", src=" << src.size(); + throw std::runtime_error( sstr.str() ); + } + if( dst.bytes() != src.bytes() ) + { + std::ostringstream sstr; + sstr << "Size (#bytes) mismatch in copyHostFromDevice: dst=" << dst.bytes() << ", src=" << src.bytes(); + throw std::runtime_error( sstr.str() ); + } + // NB (PR #45): cudaMemcpy involves an intermediate memcpy to pinned memory if host array is a not a pinned host array + checkCuda( cudaMemcpy( dst.data(), src.data(), src.bytes(), cudaMemcpyDeviceToHost ) ); + } +#endif + + //-------------------------------------------------------------------------- +} + +#endif // MemoryBuffers_H diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/Bridge.h b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/Bridge.h new file mode 120000 index 0000000000..7afe008f47 --- /dev/null +++ b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/Bridge.h @@ -0,0 +1 @@ +../Bridge.h \ No newline at end of file diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/BridgeKernels.cc b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/BridgeKernels.cc new file mode 120000 index 0000000000..4c8697458f --- /dev/null +++ b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/BridgeKernels.cc @@ -0,0 +1 @@ +../BridgeKernels.cc \ No newline at end of file diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/BridgeKernels.h b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/BridgeKernels.h new file mode 120000 index 0000000000..f21b556a84 --- /dev/null +++ b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/BridgeKernels.h @@ -0,0 +1 @@ +../BridgeKernels.h \ No newline at end of file diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/CMakeLists.txt b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/CMakeLists.txt new file mode 100644 index 0000000000..4ac6c179d3 --- /dev/null +++ b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/CMakeLists.txt @@ -0,0 +1,24 @@ +get_filename_component(basename ${CMAKE_CURRENT_SOURCE_DIR} NAME) +string(TOLOWER ${basename} targadd) + +file(GLOB_RECURSE HEADERS "../*.h" CPPProcess.h) +set(SOURCES ../BridgeKernels.cc CPPProcess.cc ../CrossSectionKernels.cc + ../MatrixElementKernels.cc ../RamboSamplingKernels.cc + ../RandomNumberKernels.cc) + +set(libname mg5amc_cxx_${targadd}) +add_library(${libname} ${SOURCES} ${HEADERS}) +target_include_directories(${libname} PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}" + "${PROJECT_SOURCE_DIR}/src" + "${PROJECT_GITROOT_DIR}/tools") + +set(execname check_${targadd}.exe) +add_executable(${execname} check_sa.cc) +target_link_libraries(${execname} PUBLIC mg5amc_common ${libname}) +target_include_directories(${execname} PRIVATE "${PROJECT_SOURCE_DIR}/src") + +# some XCode specific stuff to make the executable run +set_property(TARGET ${libname} PROPERTY XCODE_GENERATE_SCHEME TRUE) +set_property(TARGET ${execname} PROPERTY XCODE_GENERATE_SCHEME TRUE) +set_property(TARGET ${execname} PROPERTY XCODE_SCHEME_ARGUMENTS "--bridge" "8" "8" "32") +set_property(TARGET ${execname} PROPERTY XCODE_SCHEME_WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}") diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/CPPProcess.cc b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/CPPProcess.cc new file mode 100644 index 0000000000..6fd28880a0 --- /dev/null +++ b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/CPPProcess.cc @@ -0,0 +1,2123 @@ +//========================================================================== +// This file has been automatically generated for CUDA/C++ standalone by +// MadGraph5_aMC@NLO v. 3.5.0_lo_vect, 2023-01-26 +// By the MadGraph5_aMC@NLO Development Team +// Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch +//========================================================================== + +#include "CPPProcess.h" + +#include "mgOnGpuConfig.h" + +#include "CudaRuntime.h" +#include "HelAmps_SMEFTsim_topU3l_MwScheme_UFO.h" +#include "MemoryAccessAmplitudes.h" +#include "MemoryAccessCouplings.h" +#include "MemoryAccessCouplingsFixed.h" +#include "MemoryAccessGs.h" +#include "MemoryAccessMatrixElements.h" +#include "MemoryAccessMomenta.h" +#include "MemoryAccessWavefunctions.h" + +#ifdef MGONGPU_SUPPORTS_MULTICHANNEL +#include "MemoryAccessDenominators.h" +#include "MemoryAccessNumerators.h" +#include "coloramps.h" +#endif + +#include +#include +#include +#include +#include + +// Test ncu metrics for CUDA thread divergence +#undef MGONGPU_TEST_DIVERGENCE +//#define MGONGPU_TEST_DIVERGENCE 1 + +//========================================================================== +// Class member functions for calculating the matrix elements for +// Process: g g > t t~ t t~ WEIGHTED<=4 @1 + +#ifdef __CUDACC__ +namespace mg5amcGpu +#else +namespace mg5amcCpu +#endif +{ + using mgOnGpu::np4; // dimensions of 4-momenta (E,px,py,pz) + using mgOnGpu::npar; // #particles in total (external = initial + final): e.g. 4 for e+ e- -> mu+ mu- + using mgOnGpu::ncomb; // #helicity combinations: e.g. 16 for e+ e- -> mu+ mu- (2**4 = fermion spin up/down ** npar) + + using mgOnGpu::nwf; // #wavefunctions = #external (npar) + #internal: e.g. 5 for e+ e- -> mu+ mu- (1 internal is gamma or Z) + using mgOnGpu::nw6; // dimensions of each wavefunction (HELAS KEK 91-11): e.g. 6 for e+ e- -> mu+ mu- (fermions and vectors) + + using Parameters_SMEFTsim_topU3l_MwScheme_UFO_dependentCouplings::ndcoup; // #couplings that vary event by event (depend on running alphas QCD) + using Parameters_SMEFTsim_topU3l_MwScheme_UFO_independentCouplings::nicoup; // #couplings that are fixed for all events (do not depend on running alphas QCD) + + // The number of colors + constexpr int ncolor = 12; + + // The number of SIMD vectors of events processed by calculate_wavefunction +#if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT + constexpr int nParity = 2; +#else + constexpr int nParity = 1; +#endif + + // Physics parameters (masses, coupling, etc...) + // For CUDA performance, hardcoded constexpr's would be better: fewer registers and a tiny throughput increase + // However, physics parameters are user-defined through card files: use CUDA constant memory instead (issue #39) + // [NB if hardcoded parameters are used, it's better to define them here to avoid silent shadowing (issue #263)] +#ifdef MGONGPU_HARDCODE_PARAM + __device__ const fptype cIPD[2] = { (fptype)Parameters_SMEFTsim_topU3l_MwScheme_UFO::mdl_MT, (fptype)Parameters_SMEFTsim_topU3l_MwScheme_UFO::mdl_WT }; + __device__ const fptype* cIPC = nullptr; // unused as nicoup=0 +#else +#ifdef __CUDACC__ + __device__ __constant__ fptype cIPD[2]; + __device__ __constant__ fptype* cIPC = nullptr; // unused as nicoup=0 +#else + static fptype cIPD[2]; + static fptype* cIPC = nullptr; // unused as nicoup=0 +#endif +#endif + + // Helicity combinations (and filtering of "good" helicity combinations) +#ifdef __CUDACC__ + __device__ __constant__ short cHel[ncomb][npar]; + __device__ __constant__ int cNGoodHel; + __device__ __constant__ int cGoodHel[ncomb]; +#else + static short cHel[ncomb][npar]; + static int cNGoodHel; + static int cGoodHel[ncomb]; +#endif + + //-------------------------------------------------------------------------- + + // Evaluate |M|^2 for each subprocess + // NB: calculate_wavefunctions ADDS |M|^2 for a given ihel to the running sum of |M|^2 over helicities for the given event(s) + // (similarly, it also ADDS the numerator and denominator for a given ihel to their running sums over helicities) + // In CUDA, this device function computes the ME for a single event + // In C++, this function computes the ME for a single event "page" or SIMD vector (or for two in "mixed" precision mode, nParity=2) + __device__ INLINE void /* clang-format off */ + calculate_wavefunctions( int ihel, + const fptype* allmomenta, // input: momenta[nevt*npar*4] + const fptype* allcouplings, // input: couplings[nevt*ndcoup*2] + fptype* allMEs, // output: allMEs[nevt], |M|^2 running_sum_over_helicities +#ifdef MGONGPU_SUPPORTS_MULTICHANNEL + const unsigned int channelId, // input: multichannel channel id (1 to #diagrams); 0 to disable channel enhancement + fptype* allNumerators, // output: multichannel numerators[nevt], running_sum_over_helicities + fptype* allDenominators, // output: multichannel denominators[nevt], running_sum_over_helicities +#endif + fptype_sv* jamp2_sv // output: jamp2[nParity][ncolor][neppV] for color choice (nullptr if disabled) +#ifndef __CUDACC__ + , const int ievt00 // input: first event number in current C++ event page (for CUDA, ievt depends on threadid) +#endif + ) + //ALWAYS_INLINE // attributes are not permitted in a function definition + { +#ifdef __CUDACC__ + using namespace mg5amcGpu; + using M_ACCESS = DeviceAccessMomenta; // non-trivial access: buffer includes all events + using E_ACCESS = DeviceAccessMatrixElements; // non-trivial access: buffer includes all events + using W_ACCESS = DeviceAccessWavefunctions; // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event + using A_ACCESS = DeviceAccessAmplitudes; // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event + using CD_ACCESS = DeviceAccessCouplings; // non-trivial access (dependent couplings): buffer includes all events + using CI_ACCESS = DeviceAccessCouplingsFixed; // TRIVIAL access (independent couplings): buffer for one event +#ifdef MGONGPU_SUPPORTS_MULTICHANNEL + using NUM_ACCESS = DeviceAccessNumerators; // non-trivial access: buffer includes all events + using DEN_ACCESS = DeviceAccessDenominators; // non-trivial access: buffer includes all events +#endif +#else + using namespace mg5amcCpu; + using M_ACCESS = HostAccessMomenta; // non-trivial access: buffer includes all events + using E_ACCESS = HostAccessMatrixElements; // non-trivial access: buffer includes all events + using W_ACCESS = HostAccessWavefunctions; // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event + using A_ACCESS = HostAccessAmplitudes; // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event + using CD_ACCESS = HostAccessCouplings; // non-trivial access (dependent couplings): buffer includes all events + using CI_ACCESS = HostAccessCouplingsFixed; // TRIVIAL access (independent couplings): buffer for one event +#ifdef MGONGPU_SUPPORTS_MULTICHANNEL + using NUM_ACCESS = HostAccessNumerators; // non-trivial access: buffer includes all events + using DEN_ACCESS = HostAccessDenominators; // non-trivial access: buffer includes all events +#endif +#endif /* clang-format on */ + mgDebug( 0, __FUNCTION__ ); + //printf( "calculate_wavefunctions: ihel=%2d\n", ihel ); +#ifndef __CUDACC__ + //printf( "calculate_wavefunctions: ievt00=%d\n", ievt00 ); +#endif + + // Local TEMPORARY variables for a subset of Feynman diagrams in the given CUDA event (ievt) or C++ event page (ipagV) + // [NB these variables are reused several times (and re-initialised each time) within the same event or event page] + // ** NB: in other words, amplitudes and wavefunctions still have TRIVIAL ACCESS: there is currently no need + // ** NB: to have large memory structurs for wavefunctions/amplitudes in all events (no kernel splitting yet)! + //MemoryBufferWavefunctions w_buffer[nwf]{ neppV }; + cxtype_sv w_sv[nwf][nw6]; // particle wavefunctions within Feynman diagrams (nw6 is often 6, the dimension of spin 1/2 or spin 1 particles) + cxtype_sv amp_sv[1]; // invariant amplitude for one given Feynman diagram + + // Proof of concept for using fptype* in the interface + fptype* w_fp[nwf]; + for( int iwf = 0; iwf < nwf; iwf++ ) w_fp[iwf] = reinterpret_cast( w_sv[iwf] ); + fptype* amp_fp; + amp_fp = reinterpret_cast( amp_sv ); + + // Local variables for the given CUDA event (ievt) or C++ event page (ipagV) + // [jamp: sum (for one event or event page) of the invariant amplitudes for all Feynman diagrams in a given color combination] + cxtype_sv jamp_sv[ncolor] = {}; // all zeros (NB: vector cxtype_v IS initialized to 0, but scalar cxtype is NOT, if "= {}" is missing!) + + // === Calculate wavefunctions and amplitudes for all diagrams in all processes === + // === (for one event in CUDA, for one - or two in mixed mode - SIMD event pages in C++ === +#if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT + // Mixed fptypes #537: float for color algebra and double elsewhere + // Delay color algebra and ME updates (only on even pages) + cxtype_sv jamp_sv_previous[ncolor] = {}; + fptype* MEs_previous = 0; +#endif + for( int iParity = 0; iParity < nParity; ++iParity ) + { // START LOOP ON IPARITY +#ifndef __CUDACC__ + const int ievt0 = ievt00 + iParity * neppV; +#endif + constexpr size_t nxcoup = ndcoup + nicoup; // both dependent and independent couplings + const fptype* allCOUPs[nxcoup]; +#ifdef __CUDACC__ +#pragma nv_diagnostic push +#pragma nv_diag_suppress 186 // e.g. <> +#endif + for( size_t idcoup = 0; idcoup < ndcoup; idcoup++ ) + allCOUPs[idcoup] = CD_ACCESS::idcoupAccessBufferConst( allcouplings, idcoup ); // dependent couplings, vary event-by-event + for( size_t iicoup = 0; iicoup < nicoup; iicoup++ ) + allCOUPs[ndcoup + iicoup] = CI_ACCESS::iicoupAccessBufferConst( cIPC, iicoup ); // independent couplings, fixed for all events +#ifdef __CUDACC__ +#pragma nv_diagnostic pop + // CUDA kernels take input/output buffers with momenta/MEs for all events + const fptype* momenta = allmomenta; + const fptype* COUPs[nxcoup]; + for( size_t ixcoup = 0; ixcoup < nxcoup; ixcoup++ ) COUPs[ixcoup] = allCOUPs[ixcoup]; + fptype* MEs = allMEs; +#ifdef MGONGPU_SUPPORTS_MULTICHANNEL + fptype* numerators = allNumerators; + fptype* denominators = allDenominators; +#endif +#else + // C++ kernels take input/output buffers with momenta/MEs for one specific event (the first in the current event page) + const fptype* momenta = M_ACCESS::ieventAccessRecordConst( allmomenta, ievt0 ); + const fptype* COUPs[nxcoup]; + for( size_t idcoup = 0; idcoup < ndcoup; idcoup++ ) + COUPs[idcoup] = CD_ACCESS::ieventAccessRecordConst( allCOUPs[idcoup], ievt0 ); // dependent couplings, vary event-by-event + for( size_t iicoup = 0; iicoup < nicoup; iicoup++ ) + COUPs[ndcoup + iicoup] = allCOUPs[ndcoup + iicoup]; // independent couplings, fixed for all events + fptype* MEs = E_ACCESS::ieventAccessRecord( allMEs, ievt0 ); +#ifdef MGONGPU_SUPPORTS_MULTICHANNEL + fptype* numerators = NUM_ACCESS::ieventAccessRecord( allNumerators, ievt0 ); + fptype* denominators = DEN_ACCESS::ieventAccessRecord( allDenominators, ievt0 ); +#endif +#endif + + // Reset color flows (reset jamp_sv) at the beginning of a new event or event page + for( int i = 0; i < ncolor; i++ ) { jamp_sv[i] = cxzero_sv(); } + +#ifdef MGONGPU_SUPPORTS_MULTICHANNEL + // Numerators and denominators for the current event (CUDA) or SIMD event page (C++) + fptype_sv& numerators_sv = NUM_ACCESS::kernelAccess( numerators ); + fptype_sv& denominators_sv = DEN_ACCESS::kernelAccess( denominators ); +#endif + + // *** DIAGRAM 1 OF 72 *** + + // Wavefunction(s) for diagram number 1 + vxxxxx( momenta, 0., cHel[ihel][0], -1, w_fp[0], 0 ); + + vxxxxx( momenta, 0., cHel[ihel][1], -1, w_fp[1], 1 ); + + oxxxxx( momenta, cIPD[0], cHel[ihel][2], +1, w_fp[2], 2 ); + + ixxxxx( momenta, cIPD[0], cHel[ihel][3], -1, w_fp[3], 3 ); + + oxxxxx( momenta, cIPD[0], cHel[ihel][4], +1, w_fp[4], 4 ); + + ixxxxx( momenta, cIPD[0], cHel[ihel][5], -1, w_fp[5], 5 ); + + VVV5P0_1( w_fp[0], w_fp[1], COUPs[0], 0., 0., w_fp[6] ); + FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 0., 0., w_fp[7] ); + FFV1_1( w_fp[4], w_fp[6], COUPs[1], cIPD[0], cIPD[1], w_fp[8] ); + + // Amplitude(s) for diagram number 1 + FFV1_0( w_fp[5], w_fp[8], w_fp[7], COUPs[1], &_fp[0] ); +#ifdef MGONGPU_SUPPORTS_MULTICHANNEL + // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) +#endif + jamp_sv[2] -= 1. / 2. * cxtype( 0, 1 ) * amp_sv[0]; + jamp_sv[3] += 1. / 6. * cxtype( 0, 1 ) * amp_sv[0]; + jamp_sv[10] += 1. / 2. * cxtype( 0, 1 ) * amp_sv[0]; + jamp_sv[11] -= 1. / 6. * cxtype( 0, 1 ) * amp_sv[0]; + + // *** DIAGRAM 2 OF 72 *** + + // Wavefunction(s) for diagram number 2 + FFV1_2( w_fp[5], w_fp[6], COUPs[1], cIPD[0], cIPD[1], w_fp[9] ); + + // Amplitude(s) for diagram number 2 + FFV1_0( w_fp[9], w_fp[4], w_fp[7], COUPs[1], &_fp[0] ); +#ifdef MGONGPU_SUPPORTS_MULTICHANNEL + // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) +#endif + jamp_sv[1] -= 1. / 2. * cxtype( 0, 1 ) * amp_sv[0]; + jamp_sv[3] += 1. / 6. * cxtype( 0, 1 ) * amp_sv[0]; + jamp_sv[9] += 1. / 2. * cxtype( 0, 1 ) * amp_sv[0]; + jamp_sv[11] -= 1. / 6. * cxtype( 0, 1 ) * amp_sv[0]; + + // *** DIAGRAM 3 OF 72 *** + + // Wavefunction(s) for diagram number 3 + FFV1P0_3( w_fp[5], w_fp[4], COUPs[1], 0., 0., w_fp[10] ); + + // Amplitude(s) for diagram number 3 + VVV5_0( w_fp[6], w_fp[7], w_fp[10], COUPs[0], &_fp[0] ); +#ifdef MGONGPU_SUPPORTS_MULTICHANNEL + // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) +#endif + jamp_sv[1] += 1. / 2. * amp_sv[0]; + jamp_sv[2] -= 1. / 2. * amp_sv[0]; + jamp_sv[9] -= 1. / 2. * amp_sv[0]; + jamp_sv[10] += 1. / 2. * amp_sv[0]; + + // *** DIAGRAM 4 OF 72 *** + + // Wavefunction(s) for diagram number 4 + FFV1P0_3( w_fp[5], w_fp[2], COUPs[1], 0., 0., w_fp[11] ); + FFV1_2( w_fp[3], w_fp[6], COUPs[1], cIPD[0], cIPD[1], w_fp[12] ); + + // Amplitude(s) for diagram number 4 + FFV1_0( w_fp[12], w_fp[4], w_fp[11], COUPs[1], &_fp[0] ); +#ifdef MGONGPU_SUPPORTS_MULTICHANNEL + // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) +#endif + jamp_sv[0] += 1. / 2. * cxtype( 0, 1 ) * amp_sv[0]; + jamp_sv[2] -= 1. / 6. * cxtype( 0, 1 ) * amp_sv[0]; + jamp_sv[8] -= 1. / 2. * cxtype( 0, 1 ) * amp_sv[0]; + jamp_sv[10] += 1. / 6. * cxtype( 0, 1 ) * amp_sv[0]; + + // *** DIAGRAM 5 OF 72 *** + + // Wavefunction(s) for diagram number 5 + // (none) + + // Amplitude(s) for diagram number 5 + FFV1_0( w_fp[3], w_fp[8], w_fp[11], COUPs[1], &_fp[0] ); +#ifdef MGONGPU_SUPPORTS_MULTICHANNEL + // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) +#endif + jamp_sv[2] -= 1. / 6. * cxtype( 0, 1 ) * amp_sv[0]; + jamp_sv[3] += 1. / 2. * cxtype( 0, 1 ) * amp_sv[0]; + jamp_sv[10] += 1. / 6. * cxtype( 0, 1 ) * amp_sv[0]; + jamp_sv[11] -= 1. / 2. * cxtype( 0, 1 ) * amp_sv[0]; + + // *** DIAGRAM 6 OF 72 *** + + // Wavefunction(s) for diagram number 6 + FFV1P0_3( w_fp[3], w_fp[4], COUPs[1], 0., 0., w_fp[8] ); + + // Amplitude(s) for diagram number 6 + VVV5_0( w_fp[6], w_fp[11], w_fp[8], COUPs[0], &_fp[0] ); +#ifdef MGONGPU_SUPPORTS_MULTICHANNEL + // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) +#endif + jamp_sv[0] -= 1. / 2. * amp_sv[0]; + jamp_sv[3] += 1. / 2. * amp_sv[0]; + jamp_sv[8] += 1. / 2. * amp_sv[0]; + jamp_sv[11] -= 1. / 2. * amp_sv[0]; + + // *** DIAGRAM 7 OF 72 *** + + // Wavefunction(s) for diagram number 7 + FFV1_1( w_fp[2], w_fp[6], COUPs[1], cIPD[0], cIPD[1], w_fp[13] ); + + // Amplitude(s) for diagram number 7 + FFV1_0( w_fp[5], w_fp[13], w_fp[8], COUPs[1], &_fp[0] ); +#ifdef MGONGPU_SUPPORTS_MULTICHANNEL + // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) +#endif + jamp_sv[0] += 1. / 2. * cxtype( 0, 1 ) * amp_sv[0]; + jamp_sv[1] -= 1. / 6. * cxtype( 0, 1 ) * amp_sv[0]; + jamp_sv[8] -= 1. / 2. * cxtype( 0, 1 ) * amp_sv[0]; + jamp_sv[9] += 1. / 6. * cxtype( 0, 1 ) * amp_sv[0]; + + // *** DIAGRAM 8 OF 72 *** + + // Wavefunction(s) for diagram number 8 + // (none) + + // Amplitude(s) for diagram number 8 + FFV1_0( w_fp[9], w_fp[2], w_fp[8], COUPs[1], &_fp[0] ); +#ifdef MGONGPU_SUPPORTS_MULTICHANNEL + // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) +#endif + jamp_sv[1] -= 1. / 6. * cxtype( 0, 1 ) * amp_sv[0]; + jamp_sv[3] += 1. / 2. * cxtype( 0, 1 ) * amp_sv[0]; + jamp_sv[9] += 1. / 6. * cxtype( 0, 1 ) * amp_sv[0]; + jamp_sv[11] -= 1. / 2. * cxtype( 0, 1 ) * amp_sv[0]; + + // *** DIAGRAM 9 OF 72 *** + + // Wavefunction(s) for diagram number 9 + // (none) + + // Amplitude(s) for diagram number 9 + FFV1_0( w_fp[3], w_fp[13], w_fp[10], COUPs[1], &_fp[0] ); +#ifdef MGONGPU_SUPPORTS_MULTICHANNEL + // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) +#endif + jamp_sv[0] += 1. / 6. * cxtype( 0, 1 ) * amp_sv[0]; + jamp_sv[1] -= 1. / 2. * cxtype( 0, 1 ) * amp_sv[0]; + jamp_sv[8] -= 1. / 6. * cxtype( 0, 1 ) * amp_sv[0]; + jamp_sv[9] += 1. / 2. * cxtype( 0, 1 ) * amp_sv[0]; + + // *** DIAGRAM 10 OF 72 *** + + // Wavefunction(s) for diagram number 10 + // (none) + + // Amplitude(s) for diagram number 10 + FFV1_0( w_fp[12], w_fp[2], w_fp[10], COUPs[1], &_fp[0] ); +#ifdef MGONGPU_SUPPORTS_MULTICHANNEL + // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) +#endif + jamp_sv[0] += 1. / 6. * cxtype( 0, 1 ) * amp_sv[0]; + jamp_sv[2] -= 1. / 2. * cxtype( 0, 1 ) * amp_sv[0]; + jamp_sv[8] -= 1. / 6. * cxtype( 0, 1 ) * amp_sv[0]; + jamp_sv[10] += 1. / 2. * cxtype( 0, 1 ) * amp_sv[0]; + + // *** DIAGRAM 11 OF 72 *** + + // Wavefunction(s) for diagram number 11 + FFV1_1( w_fp[2], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[12] ); + FFV1_2( w_fp[3], w_fp[1], COUPs[1], cIPD[0], cIPD[1], w_fp[13] ); + FFV1P0_3( w_fp[5], w_fp[12], COUPs[1], 0., 0., w_fp[9] ); + + // Amplitude(s) for diagram number 11 + FFV1_0( w_fp[13], w_fp[4], w_fp[9], COUPs[1], &_fp[0] ); +#ifdef MGONGPU_SUPPORTS_MULTICHANNEL + // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) +#endif + jamp_sv[0] -= 1. / 2. * amp_sv[0]; + jamp_sv[5] += 1. / 6. * amp_sv[0]; + + // *** DIAGRAM 12 OF 72 *** + + // Wavefunction(s) for diagram number 12 + // (none) + + // Amplitude(s) for diagram number 12 + FFV1_0( w_fp[13], w_fp[12], w_fp[10], COUPs[1], &_fp[0] ); +#ifdef MGONGPU_SUPPORTS_MULTICHANNEL + // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) +#endif + jamp_sv[0] -= 1. / 6. * amp_sv[0]; + jamp_sv[5] += 1. / 2. * amp_sv[0]; + + // *** DIAGRAM 13 OF 72 *** + + // Wavefunction(s) for diagram number 13 + FFV1_1( w_fp[4], w_fp[1], COUPs[1], cIPD[0], cIPD[1], w_fp[6] ); + FFV1P0_3( w_fp[3], w_fp[12], COUPs[1], 0., 0., w_fp[14] ); + + // Amplitude(s) for diagram number 13 + FFV1_0( w_fp[5], w_fp[6], w_fp[14], COUPs[1], &_fp[0] ); +#ifdef MGONGPU_SUPPORTS_MULTICHANNEL + // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) +#endif + jamp_sv[4] -= 1. / 6. * amp_sv[0]; + jamp_sv[5] += 1. / 2. * amp_sv[0]; + + // *** DIAGRAM 14 OF 72 *** + + // Wavefunction(s) for diagram number 14 + // (none) + + // Amplitude(s) for diagram number 14 + FFV1_0( w_fp[3], w_fp[6], w_fp[9], COUPs[1], &_fp[0] ); +#ifdef MGONGPU_SUPPORTS_MULTICHANNEL + // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) +#endif + jamp_sv[4] -= 1. / 2. * amp_sv[0]; + jamp_sv[5] += 1. / 6. * amp_sv[0]; + + // *** DIAGRAM 15 OF 72 *** + + // Wavefunction(s) for diagram number 15 + FFV1_2( w_fp[5], w_fp[1], COUPs[1], cIPD[0], cIPD[1], w_fp[15] ); + + // Amplitude(s) for diagram number 15 + FFV1_0( w_fp[15], w_fp[4], w_fp[14], COUPs[1], &_fp[0] ); +#ifdef MGONGPU_SUPPORTS_MULTICHANNEL + // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) +#endif + jamp_sv[1] += 1. / 2. * amp_sv[0]; + jamp_sv[4] -= 1. / 6. * amp_sv[0]; + + // *** DIAGRAM 16 OF 72 *** + + // Wavefunction(s) for diagram number 16 + // (none) + + // Amplitude(s) for diagram number 16 + FFV1_0( w_fp[15], w_fp[12], w_fp[8], COUPs[1], &_fp[0] ); +#ifdef MGONGPU_SUPPORTS_MULTICHANNEL + // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) +#endif + jamp_sv[1] += 1. / 6. * amp_sv[0]; + jamp_sv[4] -= 1. / 2. * amp_sv[0]; + + // *** DIAGRAM 17 OF 72 *** + + // Wavefunction(s) for diagram number 17 + FFV1_1( w_fp[12], w_fp[1], COUPs[1], cIPD[0], cIPD[1], w_fp[16] ); + + // Amplitude(s) for diagram number 17 + FFV1_0( w_fp[5], w_fp[16], w_fp[8], COUPs[1], &_fp[0] ); +#ifdef MGONGPU_SUPPORTS_MULTICHANNEL + // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) +#endif + jamp_sv[0] -= 1. / 2. * amp_sv[0]; + jamp_sv[1] += 1. / 6. * amp_sv[0]; + + // *** DIAGRAM 18 OF 72 *** + + // Wavefunction(s) for diagram number 18 + // (none) + + // Amplitude(s) for diagram number 18 + VVV5_0( w_fp[1], w_fp[8], w_fp[9], COUPs[0], &_fp[0] ); +#ifdef MGONGPU_SUPPORTS_MULTICHANNEL + // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) +#endif + jamp_sv[0] += 1. / 2. * cxtype( 0, 1 ) * amp_sv[0]; + jamp_sv[4] -= 1. / 2. * cxtype( 0, 1 ) * amp_sv[0]; + + // *** DIAGRAM 19 OF 72 *** + + // Wavefunction(s) for diagram number 19 + // (none) + + // Amplitude(s) for diagram number 19 + FFV1_0( w_fp[3], w_fp[16], w_fp[10], COUPs[1], &_fp[0] ); +#ifdef MGONGPU_SUPPORTS_MULTICHANNEL + // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) +#endif + jamp_sv[0] -= 1. / 6. * amp_sv[0]; + jamp_sv[1] += 1. / 2. * amp_sv[0]; + + // *** DIAGRAM 20 OF 72 *** + + // Wavefunction(s) for diagram number 20 + // (none) + + // Amplitude(s) for diagram number 20 + VVV5_0( w_fp[1], w_fp[10], w_fp[14], COUPs[0], &_fp[0] ); +#ifdef MGONGPU_SUPPORTS_MULTICHANNEL + // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) +#endif + jamp_sv[1] -= 1. / 2. * cxtype( 0, 1 ) * amp_sv[0]; + jamp_sv[5] += 1. / 2. * cxtype( 0, 1 ) * amp_sv[0]; + + // *** DIAGRAM 21 OF 72 *** + + // Wavefunction(s) for diagram number 21 + FFV1_2( w_fp[3], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[14] ); + FFV1_1( w_fp[2], w_fp[1], COUPs[1], cIPD[0], cIPD[1], w_fp[16] ); + FFV1P0_3( w_fp[14], w_fp[4], COUPs[1], 0., 0., w_fp[9] ); + + // Amplitude(s) for diagram number 21 + FFV1_0( w_fp[5], w_fp[16], w_fp[9], COUPs[1], &_fp[0] ); +#ifdef MGONGPU_SUPPORTS_MULTICHANNEL + // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) +#endif + jamp_sv[6] += 1. / 6. * amp_sv[0]; + jamp_sv[8] -= 1. / 2. * amp_sv[0]; + + // *** DIAGRAM 22 OF 72 *** + + // Wavefunction(s) for diagram number 22 + // (none) + + // Amplitude(s) for diagram number 22 + FFV1_0( w_fp[14], w_fp[16], w_fp[10], COUPs[1], &_fp[0] ); +#ifdef MGONGPU_SUPPORTS_MULTICHANNEL + // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) +#endif + jamp_sv[6] += 1. / 2. * amp_sv[0]; + jamp_sv[8] -= 1. / 6. * amp_sv[0]; + + // *** DIAGRAM 23 OF 72 *** + + // Wavefunction(s) for diagram number 23 + FFV1P0_3( w_fp[14], w_fp[2], COUPs[1], 0., 0., w_fp[12] ); + + // Amplitude(s) for diagram number 23 + FFV1_0( w_fp[5], w_fp[6], w_fp[12], COUPs[1], &_fp[0] ); +#ifdef MGONGPU_SUPPORTS_MULTICHANNEL + // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) +#endif + jamp_sv[4] -= 1. / 6. * amp_sv[0]; + jamp_sv[10] += 1. / 2. * amp_sv[0]; + + // *** DIAGRAM 24 OF 72 *** + + // Wavefunction(s) for diagram number 24 + // (none) + + // Amplitude(s) for diagram number 24 + FFV1_0( w_fp[14], w_fp[6], w_fp[11], COUPs[1], &_fp[0] ); +#ifdef MGONGPU_SUPPORTS_MULTICHANNEL + // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) +#endif + jamp_sv[4] -= 1. / 2. * amp_sv[0]; + jamp_sv[10] += 1. / 6. * amp_sv[0]; + + // *** DIAGRAM 25 OF 72 *** + + // Wavefunction(s) for diagram number 25 + // (none) + + // Amplitude(s) for diagram number 25 + FFV1_0( w_fp[15], w_fp[4], w_fp[12], COUPs[1], &_fp[0] ); +#ifdef MGONGPU_SUPPORTS_MULTICHANNEL + // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) +#endif + jamp_sv[4] -= 1. / 6. * amp_sv[0]; + jamp_sv[6] += 1. / 2. * amp_sv[0]; + + // *** DIAGRAM 26 OF 72 *** + + // Wavefunction(s) for diagram number 26 + // (none) + + // Amplitude(s) for diagram number 26 + FFV1_0( w_fp[15], w_fp[2], w_fp[9], COUPs[1], &_fp[0] ); +#ifdef MGONGPU_SUPPORTS_MULTICHANNEL + // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) +#endif + jamp_sv[4] -= 1. / 2. * amp_sv[0]; + jamp_sv[6] += 1. / 6. * amp_sv[0]; + + // *** DIAGRAM 27 OF 72 *** + + // Wavefunction(s) for diagram number 27 + FFV1_2( w_fp[14], w_fp[1], COUPs[1], cIPD[0], cIPD[1], w_fp[17] ); + + // Amplitude(s) for diagram number 27 + FFV1_0( w_fp[17], w_fp[4], w_fp[11], COUPs[1], &_fp[0] ); +#ifdef MGONGPU_SUPPORTS_MULTICHANNEL + // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) +#endif + jamp_sv[8] -= 1. / 2. * amp_sv[0]; + jamp_sv[10] += 1. / 6. * amp_sv[0]; + + // *** DIAGRAM 28 OF 72 *** + + // Wavefunction(s) for diagram number 28 + // (none) + + // Amplitude(s) for diagram number 28 + VVV5_0( w_fp[1], w_fp[11], w_fp[9], COUPs[0], &_fp[0] ); +#ifdef MGONGPU_SUPPORTS_MULTICHANNEL + // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) +#endif + jamp_sv[4] += 1. / 2. * cxtype( 0, 1 ) * amp_sv[0]; + jamp_sv[8] -= 1. / 2. * cxtype( 0, 1 ) * amp_sv[0]; + + // *** DIAGRAM 29 OF 72 *** + + // Wavefunction(s) for diagram number 29 + // (none) + + // Amplitude(s) for diagram number 29 + FFV1_0( w_fp[17], w_fp[2], w_fp[10], COUPs[1], &_fp[0] ); +#ifdef MGONGPU_SUPPORTS_MULTICHANNEL + // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) +#endif + jamp_sv[8] -= 1. / 6. * amp_sv[0]; + jamp_sv[10] += 1. / 2. * amp_sv[0]; + + // *** DIAGRAM 30 OF 72 *** + + // Wavefunction(s) for diagram number 30 + // (none) + + // Amplitude(s) for diagram number 30 + VVV5_0( w_fp[1], w_fp[10], w_fp[12], COUPs[0], &_fp[0] ); +#ifdef MGONGPU_SUPPORTS_MULTICHANNEL + // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) +#endif + jamp_sv[6] -= 1. / 2. * cxtype( 0, 1 ) * amp_sv[0]; + jamp_sv[10] += 1. / 2. * cxtype( 0, 1 ) * amp_sv[0]; + + // *** DIAGRAM 31 OF 72 *** + + // Wavefunction(s) for diagram number 31 + FFV1_1( w_fp[4], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[12] ); + FFV1P0_3( w_fp[3], w_fp[12], COUPs[1], 0., 0., w_fp[17] ); + + // Amplitude(s) for diagram number 31 + FFV1_0( w_fp[5], w_fp[16], w_fp[17], COUPs[1], &_fp[0] ); +#ifdef MGONGPU_SUPPORTS_MULTICHANNEL + // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) +#endif + jamp_sv[6] += 1. / 6. * amp_sv[0]; + jamp_sv[7] -= 1. / 2. * amp_sv[0]; + + // *** DIAGRAM 32 OF 72 *** + + // Wavefunction(s) for diagram number 32 + FFV1P0_3( w_fp[5], w_fp[12], COUPs[1], 0., 0., w_fp[9] ); + + // Amplitude(s) for diagram number 32 + FFV1_0( w_fp[3], w_fp[16], w_fp[9], COUPs[1], &_fp[0] ); +#ifdef MGONGPU_SUPPORTS_MULTICHANNEL + // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) +#endif + jamp_sv[6] += 1. / 2. * amp_sv[0]; + jamp_sv[7] -= 1. / 6. * amp_sv[0]; + + // *** DIAGRAM 33 OF 72 *** + + // Wavefunction(s) for diagram number 33 + // (none) + + // Amplitude(s) for diagram number 33 + FFV1_0( w_fp[13], w_fp[2], w_fp[9], COUPs[1], &_fp[0] ); +#ifdef MGONGPU_SUPPORTS_MULTICHANNEL + // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) +#endif + jamp_sv[2] += 1. / 2. * amp_sv[0]; + jamp_sv[7] -= 1. / 6. * amp_sv[0]; + + // *** DIAGRAM 34 OF 72 *** + + // Wavefunction(s) for diagram number 34 + // (none) + + // Amplitude(s) for diagram number 34 + FFV1_0( w_fp[13], w_fp[12], w_fp[11], COUPs[1], &_fp[0] ); +#ifdef MGONGPU_SUPPORTS_MULTICHANNEL + // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) +#endif + jamp_sv[2] += 1. / 6. * amp_sv[0]; + jamp_sv[7] -= 1. / 2. * amp_sv[0]; + + // *** DIAGRAM 35 OF 72 *** + + // Wavefunction(s) for diagram number 35 + // (none) + + // Amplitude(s) for diagram number 35 + FFV1_0( w_fp[15], w_fp[2], w_fp[17], COUPs[1], &_fp[0] ); +#ifdef MGONGPU_SUPPORTS_MULTICHANNEL + // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) +#endif + jamp_sv[3] -= 1. / 2. * amp_sv[0]; + jamp_sv[6] += 1. / 6. * amp_sv[0]; + + // *** DIAGRAM 36 OF 72 *** + + // Wavefunction(s) for diagram number 36 + // (none) + + // Amplitude(s) for diagram number 36 + FFV1_0( w_fp[15], w_fp[12], w_fp[7], COUPs[1], &_fp[0] ); +#ifdef MGONGPU_SUPPORTS_MULTICHANNEL + // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) +#endif + jamp_sv[3] -= 1. / 6. * amp_sv[0]; + jamp_sv[6] += 1. / 2. * amp_sv[0]; + + // *** DIAGRAM 37 OF 72 *** + + // Wavefunction(s) for diagram number 37 + FFV1_1( w_fp[12], w_fp[1], COUPs[1], cIPD[0], cIPD[1], w_fp[14] ); + + // Amplitude(s) for diagram number 37 + FFV1_0( w_fp[5], w_fp[14], w_fp[7], COUPs[1], &_fp[0] ); +#ifdef MGONGPU_SUPPORTS_MULTICHANNEL + // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) +#endif + jamp_sv[2] += 1. / 2. * amp_sv[0]; + jamp_sv[3] -= 1. / 6. * amp_sv[0]; + + // *** DIAGRAM 38 OF 72 *** + + // Wavefunction(s) for diagram number 38 + // (none) + + // Amplitude(s) for diagram number 38 + VVV5_0( w_fp[1], w_fp[7], w_fp[9], COUPs[0], &_fp[0] ); +#ifdef MGONGPU_SUPPORTS_MULTICHANNEL + // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) +#endif + jamp_sv[2] -= 1. / 2. * cxtype( 0, 1 ) * amp_sv[0]; + jamp_sv[6] += 1. / 2. * cxtype( 0, 1 ) * amp_sv[0]; + + // *** DIAGRAM 39 OF 72 *** + + // Wavefunction(s) for diagram number 39 + // (none) + + // Amplitude(s) for diagram number 39 + FFV1_0( w_fp[3], w_fp[14], w_fp[11], COUPs[1], &_fp[0] ); +#ifdef MGONGPU_SUPPORTS_MULTICHANNEL + // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) +#endif + jamp_sv[2] += 1. / 6. * amp_sv[0]; + jamp_sv[3] -= 1. / 2. * amp_sv[0]; + + // *** DIAGRAM 40 OF 72 *** + + // Wavefunction(s) for diagram number 40 + // (none) + + // Amplitude(s) for diagram number 40 + VVV5_0( w_fp[1], w_fp[11], w_fp[17], COUPs[0], &_fp[0] ); +#ifdef MGONGPU_SUPPORTS_MULTICHANNEL + // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) +#endif + jamp_sv[3] += 1. / 2. * cxtype( 0, 1 ) * amp_sv[0]; + jamp_sv[7] -= 1. / 2. * cxtype( 0, 1 ) * amp_sv[0]; + + // *** DIAGRAM 41 OF 72 *** + + // Wavefunction(s) for diagram number 41 + FFV1_2( w_fp[5], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[17] ); + FFV1P0_3( w_fp[17], w_fp[4], COUPs[1], 0., 0., w_fp[14] ); + + // Amplitude(s) for diagram number 41 + FFV1_0( w_fp[3], w_fp[16], w_fp[14], COUPs[1], &_fp[0] ); +#ifdef MGONGPU_SUPPORTS_MULTICHANNEL + // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) +#endif + jamp_sv[7] -= 1. / 6. * amp_sv[0]; + jamp_sv[9] += 1. / 2. * amp_sv[0]; + + // *** DIAGRAM 42 OF 72 *** + + // Wavefunction(s) for diagram number 42 + // (none) + + // Amplitude(s) for diagram number 42 + FFV1_0( w_fp[17], w_fp[16], w_fp[8], COUPs[1], &_fp[0] ); +#ifdef MGONGPU_SUPPORTS_MULTICHANNEL + // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) +#endif + jamp_sv[7] -= 1. / 2. * amp_sv[0]; + jamp_sv[9] += 1. / 6. * amp_sv[0]; + + // *** DIAGRAM 43 OF 72 *** + + // Wavefunction(s) for diagram number 43 + FFV1P0_3( w_fp[17], w_fp[2], COUPs[1], 0., 0., w_fp[9] ); + + // Amplitude(s) for diagram number 43 + FFV1_0( w_fp[13], w_fp[4], w_fp[9], COUPs[1], &_fp[0] ); +#ifdef MGONGPU_SUPPORTS_MULTICHANNEL + // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) +#endif + jamp_sv[5] += 1. / 6. * amp_sv[0]; + jamp_sv[7] -= 1. / 2. * amp_sv[0]; + + // *** DIAGRAM 44 OF 72 *** + + // Wavefunction(s) for diagram number 44 + // (none) + + // Amplitude(s) for diagram number 44 + FFV1_0( w_fp[13], w_fp[2], w_fp[14], COUPs[1], &_fp[0] ); +#ifdef MGONGPU_SUPPORTS_MULTICHANNEL + // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) +#endif + jamp_sv[5] += 1. / 2. * amp_sv[0]; + jamp_sv[7] -= 1. / 6. * amp_sv[0]; + + // *** DIAGRAM 45 OF 72 *** + + // Wavefunction(s) for diagram number 45 + // (none) + + // Amplitude(s) for diagram number 45 + FFV1_0( w_fp[3], w_fp[6], w_fp[9], COUPs[1], &_fp[0] ); +#ifdef MGONGPU_SUPPORTS_MULTICHANNEL + // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) +#endif + jamp_sv[5] += 1. / 6. * amp_sv[0]; + jamp_sv[11] -= 1. / 2. * amp_sv[0]; + + // *** DIAGRAM 46 OF 72 *** + + // Wavefunction(s) for diagram number 46 + // (none) + + // Amplitude(s) for diagram number 46 + FFV1_0( w_fp[17], w_fp[6], w_fp[7], COUPs[1], &_fp[0] ); +#ifdef MGONGPU_SUPPORTS_MULTICHANNEL + // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) +#endif + jamp_sv[5] += 1. / 2. * amp_sv[0]; + jamp_sv[11] -= 1. / 6. * amp_sv[0]; + + // *** DIAGRAM 47 OF 72 *** + + // Wavefunction(s) for diagram number 47 + FFV1_2( w_fp[17], w_fp[1], COUPs[1], cIPD[0], cIPD[1], w_fp[12] ); + + // Amplitude(s) for diagram number 47 + FFV1_0( w_fp[12], w_fp[4], w_fp[7], COUPs[1], &_fp[0] ); +#ifdef MGONGPU_SUPPORTS_MULTICHANNEL + // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) +#endif + jamp_sv[9] += 1. / 2. * amp_sv[0]; + jamp_sv[11] -= 1. / 6. * amp_sv[0]; + + // *** DIAGRAM 48 OF 72 *** + + // Wavefunction(s) for diagram number 48 + // (none) + + // Amplitude(s) for diagram number 48 + VVV5_0( w_fp[1], w_fp[7], w_fp[14], COUPs[0], &_fp[0] ); +#ifdef MGONGPU_SUPPORTS_MULTICHANNEL + // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) +#endif + jamp_sv[5] -= 1. / 2. * cxtype( 0, 1 ) * amp_sv[0]; + jamp_sv[9] += 1. / 2. * cxtype( 0, 1 ) * amp_sv[0]; + + // *** DIAGRAM 49 OF 72 *** + + // Wavefunction(s) for diagram number 49 + // (none) + + // Amplitude(s) for diagram number 49 + FFV1_0( w_fp[12], w_fp[2], w_fp[8], COUPs[1], &_fp[0] ); +#ifdef MGONGPU_SUPPORTS_MULTICHANNEL + // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) +#endif + jamp_sv[9] += 1. / 6. * amp_sv[0]; + jamp_sv[11] -= 1. / 2. * amp_sv[0]; + + // *** DIAGRAM 50 OF 72 *** + + // Wavefunction(s) for diagram number 50 + // (none) + + // Amplitude(s) for diagram number 50 + VVV5_0( w_fp[1], w_fp[8], w_fp[9], COUPs[0], &_fp[0] ); +#ifdef MGONGPU_SUPPORTS_MULTICHANNEL + // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) +#endif + jamp_sv[7] += 1. / 2. * cxtype( 0, 1 ) * amp_sv[0]; + jamp_sv[11] -= 1. / 2. * cxtype( 0, 1 ) * amp_sv[0]; + + // *** DIAGRAM 51 OF 72 *** + + // Wavefunction(s) for diagram number 51 + FFV1_1( w_fp[16], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[9] ); + + // Amplitude(s) for diagram number 51 + FFV1_0( w_fp[5], w_fp[9], w_fp[8], COUPs[1], &_fp[0] ); +#ifdef MGONGPU_SUPPORTS_MULTICHANNEL + // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) +#endif + jamp_sv[8] -= 1. / 2. * amp_sv[0]; + jamp_sv[9] += 1. / 6. * amp_sv[0]; + + // *** DIAGRAM 52 OF 72 *** + + // Wavefunction(s) for diagram number 52 + VVV5P0_1( w_fp[0], w_fp[8], COUPs[0], 0., 0., w_fp[12] ); + + // Amplitude(s) for diagram number 52 + FFV1_0( w_fp[5], w_fp[16], w_fp[12], COUPs[1], &_fp[0] ); +#ifdef MGONGPU_SUPPORTS_MULTICHANNEL + // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) +#endif + jamp_sv[7] -= 1. / 2. * cxtype( 0, 1 ) * amp_sv[0]; + jamp_sv[8] += 1. / 2. * cxtype( 0, 1 ) * amp_sv[0]; + + // *** DIAGRAM 53 OF 72 *** + + // Wavefunction(s) for diagram number 53 + // (none) + + // Amplitude(s) for diagram number 53 + FFV1_0( w_fp[3], w_fp[9], w_fp[10], COUPs[1], &_fp[0] ); +#ifdef MGONGPU_SUPPORTS_MULTICHANNEL + // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) +#endif + jamp_sv[8] -= 1. / 6. * amp_sv[0]; + jamp_sv[9] += 1. / 2. * amp_sv[0]; + + // *** DIAGRAM 54 OF 72 *** + + // Wavefunction(s) for diagram number 54 + VVV5P0_1( w_fp[0], w_fp[10], COUPs[0], 0., 0., w_fp[9] ); + + // Amplitude(s) for diagram number 54 + FFV1_0( w_fp[3], w_fp[16], w_fp[9], COUPs[1], &_fp[0] ); +#ifdef MGONGPU_SUPPORTS_MULTICHANNEL + // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) +#endif + jamp_sv[6] += 1. / 2. * cxtype( 0, 1 ) * amp_sv[0]; + jamp_sv[9] -= 1. / 2. * cxtype( 0, 1 ) * amp_sv[0]; + + // *** DIAGRAM 55 OF 72 *** + + // Wavefunction(s) for diagram number 55 + FFV1_2( w_fp[13], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[16] ); + + // Amplitude(s) for diagram number 55 + FFV1_0( w_fp[16], w_fp[4], w_fp[11], COUPs[1], &_fp[0] ); +#ifdef MGONGPU_SUPPORTS_MULTICHANNEL + // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) +#endif + jamp_sv[0] -= 1. / 2. * amp_sv[0]; + jamp_sv[2] += 1. / 6. * amp_sv[0]; + + // *** DIAGRAM 56 OF 72 *** + + // Wavefunction(s) for diagram number 56 + VVV5P0_1( w_fp[0], w_fp[11], COUPs[0], 0., 0., w_fp[14] ); + + // Amplitude(s) for diagram number 56 + FFV1_0( w_fp[13], w_fp[4], w_fp[14], COUPs[1], &_fp[0] ); +#ifdef MGONGPU_SUPPORTS_MULTICHANNEL + // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) +#endif + jamp_sv[0] -= 1. / 2. * cxtype( 0, 1 ) * amp_sv[0]; + jamp_sv[7] += 1. / 2. * cxtype( 0, 1 ) * amp_sv[0]; + + // *** DIAGRAM 57 OF 72 *** + + // Wavefunction(s) for diagram number 57 + // (none) + + // Amplitude(s) for diagram number 57 + FFV1_0( w_fp[16], w_fp[2], w_fp[10], COUPs[1], &_fp[0] ); +#ifdef MGONGPU_SUPPORTS_MULTICHANNEL + // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) +#endif + jamp_sv[0] -= 1. / 6. * amp_sv[0]; + jamp_sv[2] += 1. / 2. * amp_sv[0]; + + // *** DIAGRAM 58 OF 72 *** + + // Wavefunction(s) for diagram number 58 + // (none) + + // Amplitude(s) for diagram number 58 + FFV1_0( w_fp[13], w_fp[2], w_fp[9], COUPs[1], &_fp[0] ); +#ifdef MGONGPU_SUPPORTS_MULTICHANNEL + // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) +#endif + jamp_sv[2] += 1. / 2. * cxtype( 0, 1 ) * amp_sv[0]; + jamp_sv[5] -= 1. / 2. * cxtype( 0, 1 ) * amp_sv[0]; + + // *** DIAGRAM 59 OF 72 *** + + // Wavefunction(s) for diagram number 59 + FFV1_1( w_fp[6], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[13] ); + + // Amplitude(s) for diagram number 59 + FFV1_0( w_fp[5], w_fp[13], w_fp[7], COUPs[1], &_fp[0] ); +#ifdef MGONGPU_SUPPORTS_MULTICHANNEL + // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) +#endif + jamp_sv[10] += 1. / 2. * amp_sv[0]; + jamp_sv[11] -= 1. / 6. * amp_sv[0]; + + // *** DIAGRAM 60 OF 72 *** + + // Wavefunction(s) for diagram number 60 + VVV5P0_1( w_fp[0], w_fp[7], COUPs[0], 0., 0., w_fp[16] ); + + // Amplitude(s) for diagram number 60 + FFV1_0( w_fp[5], w_fp[6], w_fp[16], COUPs[1], &_fp[0] ); +#ifdef MGONGPU_SUPPORTS_MULTICHANNEL + // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) +#endif + jamp_sv[5] += 1. / 2. * cxtype( 0, 1 ) * amp_sv[0]; + jamp_sv[10] -= 1. / 2. * cxtype( 0, 1 ) * amp_sv[0]; + + // *** DIAGRAM 61 OF 72 *** + + // Wavefunction(s) for diagram number 61 + // (none) + + // Amplitude(s) for diagram number 61 + FFV1_0( w_fp[3], w_fp[13], w_fp[11], COUPs[1], &_fp[0] ); +#ifdef MGONGPU_SUPPORTS_MULTICHANNEL + // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) +#endif + jamp_sv[10] += 1. / 6. * amp_sv[0]; + jamp_sv[11] -= 1. / 2. * amp_sv[0]; + + // *** DIAGRAM 62 OF 72 *** + + // Wavefunction(s) for diagram number 62 + // (none) + + // Amplitude(s) for diagram number 62 + FFV1_0( w_fp[3], w_fp[6], w_fp[14], COUPs[1], &_fp[0] ); +#ifdef MGONGPU_SUPPORTS_MULTICHANNEL + // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) +#endif + jamp_sv[4] -= 1. / 2. * cxtype( 0, 1 ) * amp_sv[0]; + jamp_sv[11] += 1. / 2. * cxtype( 0, 1 ) * amp_sv[0]; + + // *** DIAGRAM 63 OF 72 *** + + // Wavefunction(s) for diagram number 63 + FFV1_2( w_fp[15], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[6] ); + + // Amplitude(s) for diagram number 63 + FFV1_0( w_fp[6], w_fp[4], w_fp[7], COUPs[1], &_fp[0] ); +#ifdef MGONGPU_SUPPORTS_MULTICHANNEL + // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) +#endif + jamp_sv[1] += 1. / 2. * amp_sv[0]; + jamp_sv[3] -= 1. / 6. * amp_sv[0]; + + // *** DIAGRAM 64 OF 72 *** + + // Wavefunction(s) for diagram number 64 + // (none) + + // Amplitude(s) for diagram number 64 + FFV1_0( w_fp[15], w_fp[4], w_fp[16], COUPs[1], &_fp[0] ); +#ifdef MGONGPU_SUPPORTS_MULTICHANNEL + // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) +#endif + jamp_sv[1] += 1. / 2. * cxtype( 0, 1 ) * amp_sv[0]; + jamp_sv[6] -= 1. / 2. * cxtype( 0, 1 ) * amp_sv[0]; + + // *** DIAGRAM 65 OF 72 *** + + // Wavefunction(s) for diagram number 65 + // (none) + + // Amplitude(s) for diagram number 65 + FFV1_0( w_fp[6], w_fp[2], w_fp[8], COUPs[1], &_fp[0] ); +#ifdef MGONGPU_SUPPORTS_MULTICHANNEL + // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) +#endif + jamp_sv[1] += 1. / 6. * amp_sv[0]; + jamp_sv[3] -= 1. / 2. * amp_sv[0]; + + // *** DIAGRAM 66 OF 72 *** + + // Wavefunction(s) for diagram number 66 + // (none) + + // Amplitude(s) for diagram number 66 + FFV1_0( w_fp[15], w_fp[2], w_fp[12], COUPs[1], &_fp[0] ); +#ifdef MGONGPU_SUPPORTS_MULTICHANNEL + // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) +#endif + jamp_sv[3] -= 1. / 2. * cxtype( 0, 1 ) * amp_sv[0]; + jamp_sv[4] += 1. / 2. * cxtype( 0, 1 ) * amp_sv[0]; + + // *** DIAGRAM 67 OF 72 *** + + // Wavefunction(s) for diagram number 67 + // (none) + + // Amplitude(s) for diagram number 67 + VVVV1_0( w_fp[0], w_fp[1], w_fp[7], w_fp[10], COUPs[2], &_fp[0] ); +#ifdef MGONGPU_SUPPORTS_MULTICHANNEL + // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) +#endif + jamp_sv[1] += 1. / 2. * amp_sv[0]; + jamp_sv[2] -= 1. / 2. * amp_sv[0]; + jamp_sv[9] -= 1. / 2. * amp_sv[0]; + jamp_sv[10] += 1. / 2. * amp_sv[0]; + VVVV9_0( w_fp[0], w_fp[1], w_fp[7], w_fp[10], COUPs[2], &_fp[0] ); +#ifdef MGONGPU_SUPPORTS_MULTICHANNEL + // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) +#endif + jamp_sv[1] += 1. / 2. * amp_sv[0]; + jamp_sv[5] -= 1. / 2. * amp_sv[0]; + jamp_sv[6] -= 1. / 2. * amp_sv[0]; + jamp_sv[10] += 1. / 2. * amp_sv[0]; + VVVV10_0( w_fp[0], w_fp[1], w_fp[7], w_fp[10], COUPs[2], &_fp[0] ); +#ifdef MGONGPU_SUPPORTS_MULTICHANNEL + // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) +#endif + jamp_sv[2] += 1. / 2. * amp_sv[0]; + jamp_sv[5] -= 1. / 2. * amp_sv[0]; + jamp_sv[6] -= 1. / 2. * amp_sv[0]; + jamp_sv[9] += 1. / 2. * amp_sv[0]; + + // *** DIAGRAM 68 OF 72 *** + + // Wavefunction(s) for diagram number 68 + // (none) + + // Amplitude(s) for diagram number 68 + VVV5_0( w_fp[1], w_fp[10], w_fp[16], COUPs[0], &_fp[0] ); +#ifdef MGONGPU_SUPPORTS_MULTICHANNEL + // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) +#endif + jamp_sv[1] += 1. / 2. * amp_sv[0]; + jamp_sv[5] -= 1. / 2. * amp_sv[0]; + jamp_sv[6] -= 1. / 2. * amp_sv[0]; + jamp_sv[10] += 1. / 2. * amp_sv[0]; + + // *** DIAGRAM 69 OF 72 *** + + // Wavefunction(s) for diagram number 69 + // (none) + + // Amplitude(s) for diagram number 69 + VVV5_0( w_fp[1], w_fp[7], w_fp[9], COUPs[0], &_fp[0] ); +#ifdef MGONGPU_SUPPORTS_MULTICHANNEL + // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) +#endif + jamp_sv[2] += 1. / 2. * amp_sv[0]; + jamp_sv[5] -= 1. / 2. * amp_sv[0]; + jamp_sv[6] -= 1. / 2. * amp_sv[0]; + jamp_sv[9] += 1. / 2. * amp_sv[0]; + + // *** DIAGRAM 70 OF 72 *** + + // Wavefunction(s) for diagram number 70 + // (none) + + // Amplitude(s) for diagram number 70 + VVVV1_0( w_fp[0], w_fp[1], w_fp[11], w_fp[8], COUPs[2], &_fp[0] ); +#ifdef MGONGPU_SUPPORTS_MULTICHANNEL + // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) +#endif + jamp_sv[0] -= 1. / 2. * amp_sv[0]; + jamp_sv[3] += 1. / 2. * amp_sv[0]; + jamp_sv[8] += 1. / 2. * amp_sv[0]; + jamp_sv[11] -= 1. / 2. * amp_sv[0]; + VVVV9_0( w_fp[0], w_fp[1], w_fp[11], w_fp[8], COUPs[2], &_fp[0] ); +#ifdef MGONGPU_SUPPORTS_MULTICHANNEL + // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) +#endif + jamp_sv[0] -= 1. / 2. * amp_sv[0]; + jamp_sv[4] += 1. / 2. * amp_sv[0]; + jamp_sv[7] += 1. / 2. * amp_sv[0]; + jamp_sv[11] -= 1. / 2. * amp_sv[0]; + VVVV10_0( w_fp[0], w_fp[1], w_fp[11], w_fp[8], COUPs[2], &_fp[0] ); +#ifdef MGONGPU_SUPPORTS_MULTICHANNEL + // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) +#endif + jamp_sv[3] -= 1. / 2. * amp_sv[0]; + jamp_sv[4] += 1. / 2. * amp_sv[0]; + jamp_sv[7] += 1. / 2. * amp_sv[0]; + jamp_sv[8] -= 1. / 2. * amp_sv[0]; + + // *** DIAGRAM 71 OF 72 *** + + // Wavefunction(s) for diagram number 71 + // (none) + + // Amplitude(s) for diagram number 71 + VVV5_0( w_fp[1], w_fp[8], w_fp[14], COUPs[0], &_fp[0] ); +#ifdef MGONGPU_SUPPORTS_MULTICHANNEL + // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) +#endif + jamp_sv[0] -= 1. / 2. * amp_sv[0]; + jamp_sv[4] += 1. / 2. * amp_sv[0]; + jamp_sv[7] += 1. / 2. * amp_sv[0]; + jamp_sv[11] -= 1. / 2. * amp_sv[0]; + + // *** DIAGRAM 72 OF 72 *** + + // Wavefunction(s) for diagram number 72 + // (none) + + // Amplitude(s) for diagram number 72 + VVV5_0( w_fp[1], w_fp[11], w_fp[12], COUPs[0], &_fp[0] ); +#ifdef MGONGPU_SUPPORTS_MULTICHANNEL + // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) +#endif + jamp_sv[3] -= 1. / 2. * amp_sv[0]; + jamp_sv[4] += 1. / 2. * amp_sv[0]; + jamp_sv[7] += 1. / 2. * amp_sv[0]; + jamp_sv[8] -= 1. / 2. * amp_sv[0]; + + // *** COLOR CHOICE BELOW *** + // Store the leading color flows for choice of color + if( jamp2_sv ) // disable color choice if nullptr + for( int icolC = 0; icolC < ncolor; icolC++ ) + jamp2_sv[ncolor * iParity + icolC] += cxabs2( jamp_sv[icolC] ); + + // *** COLOR MATRIX BELOW *** + // (This method used to be called CPPProcess::matrix_1_gg_ttxttx()?) + + // The color denominators (initialize all array elements, with ncolor=12) + // [NB do keep 'static' for these constexpr arrays, see issue #283] + static constexpr fptype2 denom[ncolor] = { 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3 }; // 1-D array[12] + + // The color matrix (initialize all array elements, with ncolor=12) + // [NB do keep 'static' for these constexpr arrays, see issue #283] + static constexpr fptype2 cf[ncolor][ncolor] = { + { 48, 16, 16, 6, 0, 16, -2, 0, -6, -2, -2, 6 }, + { 16, 48, 6, 16, 16, 0, 0, -2, -2, -6, 6, -2 }, + { 16, 6, 48, 16, -2, 0, 0, 16, -2, 6, -6, -2 }, + { 6, 16, 16, 48, 0, -2, 16, 0, 6, -2, -2, -6 }, + { 0, 16, -2, 0, 48, 16, 16, 6, 0, -2, 16, 0 }, + { 16, 0, 0, -2, 16, 48, 6, 16, -2, 0, 0, 16 }, + { -2, 0, 0, 16, 16, 6, 48, 16, 16, 0, 0, -2 }, + { 0, -2, 16, 0, 6, 16, 16, 48, 0, 16, -2, 0 }, + { -6, -2, -2, 6, 0, -2, 16, 0, 48, 16, 16, 6 }, + { -2, -6, 6, -2, -2, 0, 0, 16, 16, 48, 6, 16 }, + { -2, 6, -6, -2, 16, 0, 0, -2, 16, 6, 48, 16 }, + { 6, -2, -2, -6, 0, 16, -2, 0, 6, 16, 16, 48 } }; // 2-D array[12][12] + +#ifndef __CUDACC__ + // Pre-compute a constexpr triangular color matrix properly normalized #475 + struct TriangularNormalizedColorMatrix + { + // See https://stackoverflow.com/a/34465458 + __host__ __device__ constexpr TriangularNormalizedColorMatrix() + : value() + { + for( int icol = 0; icol < ncolor; icol++ ) + { + // Diagonal terms + value[icol][icol] = cf[icol][icol] / denom[icol]; + // Off-diagonal terms + for( int jcol = icol + 1; jcol < ncolor; jcol++ ) + value[icol][jcol] = 2 * cf[icol][jcol] / denom[icol]; + } + } + fptype2 value[ncolor][ncolor]; + }; + static constexpr auto cf2 = TriangularNormalizedColorMatrix(); +#endif + +#if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT + if( iParity == 0 ) // NB: first page is 0! skip even pages, compute on odd pages + { + // Mixed fptypes: delay color algebra and ME updates to next (odd) ipagV + for( int icol = 0; icol < ncolor; icol++ ) + jamp_sv_previous[icol] = jamp_sv[icol]; + MEs_previous = MEs; + continue; // go to next iParity in the loop: skip color algebra and ME update on odd pages + } + fptype_sv deltaMEs_previous = { 0 }; +#endif + + // Sum and square the color flows to get the matrix element + // (compute |M|^2 by squaring |M|, taking into account colours) + // Sum and square the color flows to get the matrix element + // (compute |M|^2 by squaring |M|, taking into account colours) + fptype_sv deltaMEs = { 0 }; // all zeros https://en.cppreference.com/w/c/language/array_initialization#Notes + + // Use the property that M is a real matrix (see #475): + // we can rewrite the quadratic form (A-iB)(M)(A+iB) as AMA - iBMA + iBMA + BMB = AMA + BMB + // In addition, on C++ use the property that M is symmetric (see #475), + // and also use constexpr to compute "2*" and "/denom[icol]" once and for all at compile time: + // we gain (not a factor 2...) in speed here as we only loop over the up diagonal part of the matrix. + // Strangely, CUDA is slower instead, so keep the old implementation for the moment. +#if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT + fptype2_sv jampR_sv[ncolor] = { 0 }; + fptype2_sv jampI_sv[ncolor] = { 0 }; + for( int icol = 0; icol < ncolor; icol++ ) + { + jampR_sv[icol] = fpvmerge( cxreal( jamp_sv_previous[icol] ), cxreal( jamp_sv[icol] ) ); + jampI_sv[icol] = fpvmerge( cximag( jamp_sv_previous[icol] ), cximag( jamp_sv[icol] ) ); + } +#endif + for( int icol = 0; icol < ncolor; icol++ ) + { +#ifndef __CUDACC__ + // === C++ START === + // Diagonal terms +#if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT + fptype2_sv& jampRi_sv = jampR_sv[icol]; + fptype2_sv& jampIi_sv = jampI_sv[icol]; +#else + fptype2_sv jampRi_sv = (fptype2_sv)( cxreal( jamp_sv[icol] ) ); + fptype2_sv jampIi_sv = (fptype2_sv)( cximag( jamp_sv[icol] ) ); +#endif + fptype2_sv ztempR_sv = cf2.value[icol][icol] * jampRi_sv; + fptype2_sv ztempI_sv = cf2.value[icol][icol] * jampIi_sv; + // Off-diagonal terms + for( int jcol = icol + 1; jcol < ncolor; jcol++ ) + { +#if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT + fptype2_sv& jampRj_sv = jampR_sv[jcol]; + fptype2_sv& jampIj_sv = jampI_sv[jcol]; +#else + fptype2_sv jampRj_sv = (fptype2_sv)( cxreal( jamp_sv[jcol] ) ); + fptype2_sv jampIj_sv = (fptype2_sv)( cximag( jamp_sv[jcol] ) ); +#endif + ztempR_sv += cf2.value[icol][jcol] * jampRj_sv; + ztempI_sv += cf2.value[icol][jcol] * jampIj_sv; + } + fptype2_sv deltaMEs2 = ( jampRi_sv * ztempR_sv + jampIi_sv * ztempI_sv ); +#if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT + deltaMEs_previous += fpvsplit0( deltaMEs2 ); + deltaMEs += fpvsplit1( deltaMEs2 ); +#else + deltaMEs += deltaMEs2; +#endif + // === C++ END === +#else + // === CUDA START === + fptype2_sv ztempR_sv = { 0 }; + fptype2_sv ztempI_sv = { 0 }; + for( int jcol = 0; jcol < ncolor; jcol++ ) + { + fptype2_sv jampRj_sv = cxreal( jamp_sv[jcol] ); + fptype2_sv jampIj_sv = cximag( jamp_sv[jcol] ); + ztempR_sv += cf[icol][jcol] * jampRj_sv; + ztempI_sv += cf[icol][jcol] * jampIj_sv; + } + deltaMEs += ( ztempR_sv * cxreal( jamp_sv[icol] ) + ztempI_sv * cximag( jamp_sv[icol] ) ) / denom[icol]; + // === CUDA END === +#endif + } + + // *** STORE THE RESULTS *** + + // NB: calculate_wavefunctions ADDS |M|^2 for a given ihel to the running sum of |M|^2 over helicities for the given event(s) + fptype_sv& MEs_sv = E_ACCESS::kernelAccess( MEs ); + MEs_sv += deltaMEs; // fix #435 +#if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT + fptype_sv& MEs_sv_previous = E_ACCESS::kernelAccess( MEs_previous ); + MEs_sv_previous += deltaMEs_previous; +#endif + /* +#ifdef __CUDACC__ + if ( cNGoodHel > 0 ) printf( "calculate_wavefunctions: ievt=%6d ihel=%2d me_running=%f\n", blockDim.x * blockIdx.x + threadIdx.x, ihel, MEs_sv ); +#else +#ifdef MGONGPU_CPPSIMD + if( cNGoodHel > 0 ) + for( int ieppV = 0; ieppV < neppV; ieppV++ ) + printf( "calculate_wavefunctions: ievt=%6d ihel=%2d me_running=%f\n", ievt0 + ieppV, ihel, MEs_sv[ieppV] ); +#else + if ( cNGoodHel > 0 ) printf( "calculate_wavefunctions: ievt=%6d ihel=%2d me_running=%f\n", ievt0, ihel, MEs_sv ); +#endif +#endif + */ + } // END LOOP ON IPARITY + mgDebug( 1, __FUNCTION__ ); + return; + } + + //-------------------------------------------------------------------------- + + CPPProcess::CPPProcess( bool verbose, + bool debug ) + : m_verbose( verbose ) + , m_debug( debug ) +#ifndef MGONGPU_HARDCODE_PARAM + , m_pars( 0 ) +#endif + , m_masses() + { + // Helicities for the process [NB do keep 'static' for this constexpr array, see issue #283] + // *** NB There is no automatic check yet that these are in the same order as Fortran! #569 *** + static constexpr short tHel[ncomb][mgOnGpu::npar] = { + { -1, -1, -1, 1, -1, 1 }, + { -1, -1, -1, 1, -1, -1 }, + { -1, -1, -1, 1, 1, 1 }, + { -1, -1, -1, 1, 1, -1 }, + { -1, -1, -1, -1, -1, 1 }, + { -1, -1, -1, -1, -1, -1 }, + { -1, -1, -1, -1, 1, 1 }, + { -1, -1, -1, -1, 1, -1 }, + { -1, -1, 1, 1, -1, 1 }, + { -1, -1, 1, 1, -1, -1 }, + { -1, -1, 1, 1, 1, 1 }, + { -1, -1, 1, 1, 1, -1 }, + { -1, -1, 1, -1, -1, 1 }, + { -1, -1, 1, -1, -1, -1 }, + { -1, -1, 1, -1, 1, 1 }, + { -1, -1, 1, -1, 1, -1 }, + { -1, 1, -1, 1, -1, 1 }, + { -1, 1, -1, 1, -1, -1 }, + { -1, 1, -1, 1, 1, 1 }, + { -1, 1, -1, 1, 1, -1 }, + { -1, 1, -1, -1, -1, 1 }, + { -1, 1, -1, -1, -1, -1 }, + { -1, 1, -1, -1, 1, 1 }, + { -1, 1, -1, -1, 1, -1 }, + { -1, 1, 1, 1, -1, 1 }, + { -1, 1, 1, 1, -1, -1 }, + { -1, 1, 1, 1, 1, 1 }, + { -1, 1, 1, 1, 1, -1 }, + { -1, 1, 1, -1, -1, 1 }, + { -1, 1, 1, -1, -1, -1 }, + { -1, 1, 1, -1, 1, 1 }, + { -1, 1, 1, -1, 1, -1 }, + { 1, -1, -1, 1, -1, 1 }, + { 1, -1, -1, 1, -1, -1 }, + { 1, -1, -1, 1, 1, 1 }, + { 1, -1, -1, 1, 1, -1 }, + { 1, -1, -1, -1, -1, 1 }, + { 1, -1, -1, -1, -1, -1 }, + { 1, -1, -1, -1, 1, 1 }, + { 1, -1, -1, -1, 1, -1 }, + { 1, -1, 1, 1, -1, 1 }, + { 1, -1, 1, 1, -1, -1 }, + { 1, -1, 1, 1, 1, 1 }, + { 1, -1, 1, 1, 1, -1 }, + { 1, -1, 1, -1, -1, 1 }, + { 1, -1, 1, -1, -1, -1 }, + { 1, -1, 1, -1, 1, 1 }, + { 1, -1, 1, -1, 1, -1 }, + { 1, 1, -1, 1, -1, 1 }, + { 1, 1, -1, 1, -1, -1 }, + { 1, 1, -1, 1, 1, 1 }, + { 1, 1, -1, 1, 1, -1 }, + { 1, 1, -1, -1, -1, 1 }, + { 1, 1, -1, -1, -1, -1 }, + { 1, 1, -1, -1, 1, 1 }, + { 1, 1, -1, -1, 1, -1 }, + { 1, 1, 1, 1, -1, 1 }, + { 1, 1, 1, 1, -1, -1 }, + { 1, 1, 1, 1, 1, 1 }, + { 1, 1, 1, 1, 1, -1 }, + { 1, 1, 1, -1, -1, 1 }, + { 1, 1, 1, -1, -1, -1 }, + { 1, 1, 1, -1, 1, 1 }, + { 1, 1, 1, -1, 1, -1 } }; +#ifdef __CUDACC__ + checkCuda( cudaMemcpyToSymbol( cHel, tHel, ncomb * mgOnGpu::npar * sizeof( short ) ) ); +#else + memcpy( cHel, tHel, ncomb * mgOnGpu::npar * sizeof( short ) ); +#endif + } + + //-------------------------------------------------------------------------- + + CPPProcess::~CPPProcess() {} + + //-------------------------------------------------------------------------- + +#ifndef MGONGPU_HARDCODE_PARAM + // Initialize process (with parameters read from user cards) + void + CPPProcess::initProc( const std::string& param_card_name ) + { + // Instantiate the model class and set parameters that stay fixed during run + m_pars = Parameters_SMEFTsim_topU3l_MwScheme_UFO::getInstance(); + SLHAReader slha( param_card_name, m_verbose ); + m_pars->setIndependentParameters( slha ); + m_pars->setIndependentCouplings(); + //m_pars->setDependentParameters(); // now computed event-by-event (running alphas #373) + //m_pars->setDependentCouplings(); // now computed event-by-event (running alphas #373) + if( m_verbose ) + { + m_pars->printIndependentParameters(); + m_pars->printIndependentCouplings(); + //m_pars->printDependentParameters(); // now computed event-by-event (running alphas #373) + //m_pars->printDependentCouplings(); // now computed event-by-event (running alphas #373) + } + // Set external particle masses for this matrix element + m_masses.push_back( m_pars->ZERO ); + m_masses.push_back( m_pars->ZERO ); + m_masses.push_back( m_pars->mdl_MT ); + m_masses.push_back( m_pars->mdl_MT ); + m_masses.push_back( m_pars->mdl_MT ); + m_masses.push_back( m_pars->mdl_MT ); + // Read physics parameters like masses and couplings from user configuration files (static: initialize once) + // Then copy them to CUDA constant memory (issue #39) or its C++ emulation in file-scope static memory + const fptype tIPD[2] = { (fptype)m_pars->mdl_MT, (fptype)m_pars->mdl_WT }; + //const cxtype tIPC[0] = { ... }; // nicoup=0 +#ifdef __CUDACC__ + checkCuda( cudaMemcpyToSymbol( cIPD, tIPD, 2 * sizeof( fptype ) ) ); + //checkCuda( cudaMemcpyToSymbol( cIPC, tIPC, 0 * sizeof( cxtype ) ) ); // nicoup=0 +#else + memcpy( cIPD, tIPD, 2 * sizeof( fptype ) ); + //memcpy( cIPC, tIPC, 0 * sizeof( cxtype ) ); // nicoup=0 +#endif + //for ( i=0; i<2; i++ ) std::cout << std::setprecision(17) << "tIPD[i] = " << tIPD[i] << std::endl; + } +#else + // Initialize process (with hardcoded parameters) + void + CPPProcess::initProc( const std::string& /*param_card_name*/ ) + { + // Use hardcoded physics parameters + if( m_verbose ) + { + Parameters_SMEFTsim_topU3l_MwScheme_UFO::printIndependentParameters(); + Parameters_SMEFTsim_topU3l_MwScheme_UFO::printIndependentCouplings(); + //Parameters_SMEFTsim_topU3l_MwScheme_UFO::printDependentParameters(); // now computed event-by-event (running alphas #373) + //Parameters_SMEFTsim_topU3l_MwScheme_UFO::printDependentCouplings(); // now computed event-by-event (running alphas #373) + } + // Set external particle masses for this matrix element + m_masses.push_back( Parameters_SMEFTsim_topU3l_MwScheme_UFO::ZERO ); + m_masses.push_back( Parameters_SMEFTsim_topU3l_MwScheme_UFO::ZERO ); + m_masses.push_back( Parameters_SMEFTsim_topU3l_MwScheme_UFO::mdl_MT ); + m_masses.push_back( Parameters_SMEFTsim_topU3l_MwScheme_UFO::mdl_MT ); + m_masses.push_back( Parameters_SMEFTsim_topU3l_MwScheme_UFO::mdl_MT ); + m_masses.push_back( Parameters_SMEFTsim_topU3l_MwScheme_UFO::mdl_MT ); + } +#endif + + //-------------------------------------------------------------------------- + + // Retrieve the compiler that was used to build this module + const std::string + CPPProcess::getCompiler() + { + std::stringstream out; + // CUDA version (NVCC) + // [Use __NVCC__ instead of __CUDACC__ here!] + // [This tests if 'nvcc' was used even to build a .cc file, even if not necessarily 'nvcc -x cu' for a .cu file] + // [Check 'nvcc --compiler-options -dM -E dummy.c | grep CUDA': see https://stackoverflow.com/a/53713712] +#ifdef __NVCC__ +#if defined __CUDACC_VER_MAJOR__ && defined __CUDACC_VER_MINOR__ && defined __CUDACC_VER_BUILD__ + out << "nvcc " << __CUDACC_VER_MAJOR__ << "." << __CUDACC_VER_MINOR__ << "." << __CUDACC_VER_BUILD__; +#else + out << "nvcc UNKNOWN"; +#endif + out << " ("; +#endif + // ICX version (either as CXX or as host compiler inside NVCC) +#if defined __INTEL_COMPILER +#error "icc is no longer supported: please use icx" +#elif defined __INTEL_LLVM_COMPILER // alternative: __INTEL_CLANG_COMPILER + out << "icx " << __INTEL_LLVM_COMPILER; +#ifdef __NVCC__ + out << ", "; +#else + out << " ("; +#endif +#endif + // CLANG version (either as CXX or as host compiler inside NVCC or inside ICX) +#if defined __clang__ +#if defined __clang_major__ && defined __clang_minor__ && defined __clang_patchlevel__ +#ifdef __APPLE__ + out << "Apple clang " << __clang_major__ << "." << __clang_minor__ << "." << __clang_patchlevel__; +#else + out << "clang " << __clang_major__ << "." << __clang_minor__ << "." << __clang_patchlevel__; + // GCC toolchain version inside CLANG + std::string tchainout; + std::string tchaincmd = "readelf -p .comment $(${CXX} -print-libgcc-file-name) |& grep 'GCC: (GNU)' | grep -v Warning | sort -u | awk '{print $5}'"; + std::unique_ptr tchainpipe( popen( tchaincmd.c_str(), "r" ), pclose ); + if( !tchainpipe ) throw std::runtime_error( "`readelf ...` failed?" ); + std::array tchainbuf; + while( fgets( tchainbuf.data(), tchainbuf.size(), tchainpipe.get() ) != nullptr ) tchainout += tchainbuf.data(); + tchainout.pop_back(); // remove trailing newline +#if defined __NVCC__ or defined __INTEL_LLVM_COMPILER + out << ", gcc " << tchainout; +#else + out << " (gcc " << tchainout << ")"; +#endif +#endif +#else + out << "clang UNKNOWKN"; +#endif +#else + // GCC version (either as CXX or as host compiler inside NVCC) +#if defined __GNUC__ && defined __GNUC_MINOR__ && defined __GNUC_PATCHLEVEL__ + out << "gcc " << __GNUC__ << "." << __GNUC_MINOR__ << "." << __GNUC_PATCHLEVEL__; +#else + out << "gcc UNKNOWKN"; +#endif +#endif +#if defined __NVCC__ or defined __INTEL_LLVM_COMPILER + out << ")"; +#endif + return out.str(); + } + + //-------------------------------------------------------------------------- + + __global__ void /* clang-format off */ + computeDependentCouplings( const fptype* allgs, // input: Gs[nevt] + fptype* allcouplings // output: couplings[nevt*ndcoup*2] +#ifndef __CUDACC__ + , const int nevt // input: #events (for cuda: nevt == ndim == gpublocks*gputhreads) +#endif + ) /* clang-format on */ + { +#ifdef __CUDACC__ + using namespace mg5amcGpu; + using G_ACCESS = DeviceAccessGs; + using C_ACCESS = DeviceAccessCouplings; + G2COUP( allgs, allcouplings ); +#else + using namespace mg5amcCpu; + using G_ACCESS = HostAccessGs; + using C_ACCESS = HostAccessCouplings; + for( int ipagV = 0; ipagV < nevt / neppV; ++ipagV ) + { + const int ievt0 = ipagV * neppV; + const fptype* gs = MemoryAccessGs::ieventAccessRecordConst( allgs, ievt0 ); + fptype* couplings = MemoryAccessCouplings::ieventAccessRecord( allcouplings, ievt0 ); + G2COUP( gs, couplings ); + } +#endif + } + + //-------------------------------------------------------------------------- + +#ifdef __CUDACC__ /* clang-format off */ + __global__ void + sigmaKin_getGoodHel( const fptype* allmomenta, // input: momenta[nevt*npar*4] + const fptype* allcouplings, // input: couplings[nevt*ndcoup*2] + fptype* allMEs, // output: allMEs[nevt], |M|^2 final_avg_over_helicities +#ifdef MGONGPU_SUPPORTS_MULTICHANNEL + fptype* allNumerators, // output: multichannel numerators[nevt], running_sum_over_helicities + fptype* allDenominators, // output: multichannel denominators[nevt], running_sum_over_helicities +#endif + bool* isGoodHel ) // output: isGoodHel[ncomb] - device array (CUDA implementation) + { /* clang-format on */ + fptype allMEsLast = 0; + const int ievt = blockDim.x * blockIdx.x + threadIdx.x; // index of event (thread) in grid + allMEs[ievt] = 0; + for( int ihel = 0; ihel < ncomb; ihel++ ) + { + // NB: calculate_wavefunctions ADDS |M|^2 for a given ihel to the running sum of |M|^2 over helicities for the given event(s) + constexpr fptype_sv* jamp2_sv = nullptr; // no need for color selection during helicity filtering +#ifdef MGONGPU_SUPPORTS_MULTICHANNEL + constexpr unsigned int channelId = 0; // disable single-diagram channel enhancement + calculate_wavefunctions( ihel, allmomenta, allcouplings, allMEs, channelId, allNumerators, allDenominators, jamp2_sv ); +#else + calculate_wavefunctions( ihel, allmomenta, allcouplings, allMEs, jamp2_sv ); +#endif + if( allMEs[ievt] != allMEsLast ) + { + //if ( !isGoodHel[ihel] ) std::cout << "sigmaKin_getGoodHel ihel=" << ihel << " TRUE" << std::endl; + isGoodHel[ihel] = true; + } + allMEsLast = allMEs[ievt]; // running sum up to helicity ihel for event ievt + } + } +#else + void + sigmaKin_getGoodHel( const fptype* allmomenta, // input: momenta[nevt*npar*4] + const fptype* allcouplings, // input: couplings[nevt*ndcoup*2] + fptype* allMEs, // output: allMEs[nevt], |M|^2 final_avg_over_helicities +#ifdef MGONGPU_SUPPORTS_MULTICHANNEL + fptype* allNumerators, // output: multichannel numerators[nevt], running_sum_over_helicities + fptype* allDenominators, // output: multichannel denominators[nevt], running_sum_over_helicities +#endif + bool* isGoodHel, // output: isGoodHel[ncomb] - host array (C++ implementation) + const int nevt ) // input: #events (for cuda: nevt == ndim == gpublocks*gputhreads) + { + //assert( (size_t)(allmomenta) % mgOnGpu::cppAlign == 0 ); // SANITY CHECK: require SIMD-friendly alignment [COMMENT OUT TO TEST MISALIGNED ACCESS] + //assert( (size_t)(allMEs) % mgOnGpu::cppAlign == 0 ); // SANITY CHECK: require SIMD-friendly alignment [COMMENT OUT TO TEST MISALIGNED ACCESS] + // Allocate arrays at build time to contain at least 16 events (or at least neppV events if neppV>16, e.g. in future VPUs) + constexpr int maxtry0 = std::max( 16, neppV ); // 16, but at least neppV (otherwise the npagV loop does not even start) + fptype allMEsLast[maxtry0] = { 0 }; // allocated at build time: maxtry0 must be a constexpr + // Loop over only nevt events if nevt is < 16 (note that nevt is always >= neppV) + assert( nevt >= neppV ); + const int maxtry = std::min( maxtry0, nevt ); // 16, but at most nevt (avoid invalid memory access if nevt 0 ) allMEs[ievt] *= allNumerators[ievt] / allDenominators[ievt]; +#endif +#else + for( int ipagV = 0; ipagV < npagV; ++ipagV ) + { + const int ievt0 = ipagV * neppV; + fptype* MEs = E_ACCESS::ieventAccessRecord( allMEs, ievt0 ); + fptype_sv& MEs_sv = E_ACCESS::kernelAccess( MEs ); + MEs_sv /= helcolDenominators[0]; +#ifdef MGONGPU_SUPPORTS_MULTICHANNEL + if( channelId > 0 ) + { + fptype* numerators = NUM_ACCESS::ieventAccessRecord( allNumerators, ievt0 ); + fptype* denominators = DEN_ACCESS::ieventAccessRecord( allDenominators, ievt0 ); + fptype_sv& numerators_sv = NUM_ACCESS::kernelAccess( numerators ); + fptype_sv& denominators_sv = DEN_ACCESS::kernelAccess( denominators ); + MEs_sv *= numerators_sv / denominators_sv; + } +#endif + //for( int ieppV = 0; ieppV < neppV; ieppV++ ) + //{ + // const unsigned int ievt = ipagV * neppV + ieppV; + // printf( "sigmaKin: ievt=%2d me=%f\n", ievt, allMEs[ievt] ); + //} + } +#endif + mgDebugFinalise(); + } + + //-------------------------------------------------------------------------- + +} // end namespace + +//========================================================================== diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/CPPProcess.h b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/CPPProcess.h new file mode 100644 index 0000000000..8e1aa66442 --- /dev/null +++ b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/CPPProcess.h @@ -0,0 +1,166 @@ +//========================================================================== +// This file has been automatically generated for CUDA/C++ standalone by +// MadGraph5_aMC@NLO v. 3.5.0_lo_vect, 2023-01-26 +// By the MadGraph5_aMC@NLO Development Team +// Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch +//========================================================================== + +#ifndef MG5_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx_H +#define MG5_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx_H 1 + +#include "mgOnGpuConfig.h" + +#include "mgOnGpuVectors.h" + +#include "Parameters_SMEFTsim_topU3l_MwScheme_UFO.h" + +#include + +//-------------------------------------------------------------------------- + +#ifdef __CUDACC__ +namespace mg5amcGpu +#else +namespace mg5amcCpu +#endif +{ + //========================================================================== + // A class for calculating the matrix elements for + // Process: g g > t t~ t t~ WEIGHTED<=4 @1 + //-------------------------------------------------------------------------- + + class CPPProcess + { + public: /* clang-format off */ + + // Constructor (from command line arguments) + CPPProcess( bool verbose = false, bool debug = false ); + + // Destructor + ~CPPProcess(); + + // Initialize process (read model parameters from file) + virtual void initProc( const std::string& param_card_name ); + + // Retrieve the compiler that was used to build this module + static const std::string getCompiler(); + + // Other methods of this instance (???) + //const std::vector& getMasses() const { return m_masses; } + //virtual int code() const{ return 1; } + //void setInitial( int inid1, int inid2 ){ id1 = inid1; id2 = inid2; } + //int getDim() const { return dim; } + //int getNIOParticles() const { return nexternal; } // nexternal was nioparticles + + // Accessors (unused so far: add four of them only to fix a clang build warning) + //bool verbose() const { return m_verbose; } + bool debug() const { return m_debug; } + + public: /* clang-format on */ + + // Hardcoded parameters for this process (constant class variables) + // [NB: this class assumes nprocesses==1 i.e. a single DSIG1 and no DSIG2 in Fortran (#272 and #343)] + //static const int ninitial = mgOnGpu::npari; + //static const int nexternal = 6; // mgOnGpu::npar (nexternal was nioparticles) + //static const int nwavefuncs = 6; // mgOnGpu::nwf + //static const int namplitudes = 76; + //static const int ncomb = 64; // mgOnGpu::ncomb + + private: + + // Command line arguments (constructor) + bool m_verbose; + bool m_debug; + + // Physics model parameters to be read from file (initProc function) +#ifndef MGONGPU_HARDCODE_PARAM + Parameters_SMEFTsim_topU3l_MwScheme_UFO* m_pars; +#endif + std::vector m_masses; // external particle masses + + // Other variables of this instance (???) + //int id1, id2; // initial particle ids + //cxtype** amp; // ??? + }; + + //-------------------------------------------------------------------------- + +#ifdef __CUDACC__ + __global__ void + computeDependentCouplings( const fptype* allgs, // input: Gs[nevt] + fptype* allcouplings ); // output: couplings[nevt*ndcoup*2] +#else + __global__ void + computeDependentCouplings( const fptype* allgs, // input: Gs[nevt] + fptype* allcouplings, // output: couplings[nevt*ndcoup*2] + const int nevt ); // input: #events (for cuda: nevt == ndim == gpublocks*gputhreads) +#endif + + //-------------------------------------------------------------------------- + +#ifdef __CUDACC__ /* clang-format off */ + __global__ void + sigmaKin_getGoodHel( const fptype* allmomenta, // input: momenta[nevt*npar*4] + const fptype* allcouplings, // input: couplings[nevt*ndcoup*2] + fptype* allMEs, // output: allMEs[nevt], |M|^2 final_avg_over_helicities +#ifdef MGONGPU_SUPPORTS_MULTICHANNEL + fptype* allNumerators, // output: multichannel numerators[nevt], running_sum_over_helicities + fptype* allDenominators, // output: multichannel denominators[nevt], running_sum_over_helicities +#endif + bool* isGoodHel ); // output: isGoodHel[ncomb] - device array (CUDA implementation) +#else + __global__ void + sigmaKin_getGoodHel( const fptype* allmomenta, // input: momenta[nevt*npar*4] + const fptype* allcouplings, // input: couplings[nevt*ndcoup*2] + fptype* allMEs, // output: allMEs[nevt], |M|^2 final_avg_over_helicities +#ifdef MGONGPU_SUPPORTS_MULTICHANNEL + fptype* allNumerators, // output: multichannel numerators[nevt], running_sum_over_helicities + fptype* allDenominators, // output: multichannel denominators[nevt], running_sum_over_helicities +#endif + bool* isGoodHel, // output: isGoodHel[ncomb] - host array (C++ implementation) + const int nevt ); // input: #events (for cuda: nevt == ndim == gpublocks*gputhreads) +#endif /* clang-format on */ + + //-------------------------------------------------------------------------- + + int // output: nGoodHel (the number of good helicity combinations out of ncomb) + sigmaKin_setGoodHel( const bool* isGoodHel ); // input: isGoodHel[ncomb] - host array + + //-------------------------------------------------------------------------- + +#ifdef __CUDACC__ /* clang-format off */ + __global__ void + sigmaKin( const fptype* allmomenta, // input: momenta[nevt*npar*4] + const fptype* allcouplings, // input: couplings[nevt*ndcoup*2] + const fptype* allrndhel, // input: random numbers[nevt] for helicity selection + const fptype* allrndcol, // input: random numbers[nevt] for color selection + fptype* allMEs, // output: allMEs[nevt], |M|^2 final_avg_over_helicities +#ifdef MGONGPU_SUPPORTS_MULTICHANNEL + const unsigned int channelId, // input: multichannel channel id (1 to #diagrams); 0 to disable channel enhancement + fptype* allNumerators, // output: multichannel numerators[nevt], running_sum_over_helicities + fptype* allDenominators, // output: multichannel denominators[nevt], running_sum_over_helicities +#endif + int* allselhel, // output: helicity selection[nevt] + int* allselcol // output: helicity selection[nevt] + ); +#else + __global__ void + sigmaKin( const fptype* allmomenta, // input: momenta[nevt*npar*4] + const fptype* allcouplings, // input: couplings[nevt*ndcoup*2] + const fptype* allrndhel, // input: random numbers[nevt] for helicity selection + const fptype* allrndcol, // input: random numbers[nevt] for color selection + fptype* allMEs, // output: allMEs[nevt], |M|^2 final_avg_over_helicities +#ifdef MGONGPU_SUPPORTS_MULTICHANNEL + const unsigned int channelId, // input: multichannel channel id (1 to #diagrams); 0 to disable channel enhancement + fptype* allNumerators, // output: multichannel numerators[nevt], running_sum_over_helicities + fptype* allDenominators, // output: multichannel denominators[nevt], running_sum_over_helicities +#endif + int* allselhel, // output: helicity selection[nevt] + int* allselcol, // output: helicity selection[nevt] + const int nevt ); // input: #events (for cuda: nevt == ndim == gpublocks*gputhreads) +#endif /* clang-format on */ + + //-------------------------------------------------------------------------- +} + +#endif // MG5_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx_H diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/CrossSectionKernels.cc b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/CrossSectionKernels.cc new file mode 120000 index 0000000000..d9cb57c4bb --- /dev/null +++ b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/CrossSectionKernels.cc @@ -0,0 +1 @@ +../CrossSectionKernels.cc \ No newline at end of file diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/CrossSectionKernels.h b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/CrossSectionKernels.h new file mode 120000 index 0000000000..125b8758e4 --- /dev/null +++ b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/CrossSectionKernels.h @@ -0,0 +1 @@ +../CrossSectionKernels.h \ No newline at end of file diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/CudaRuntime.h b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/CudaRuntime.h new file mode 120000 index 0000000000..ce9e1a487a --- /dev/null +++ b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/CudaRuntime.h @@ -0,0 +1 @@ +../CudaRuntime.h \ No newline at end of file diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/EventStatistics.h b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/EventStatistics.h new file mode 120000 index 0000000000..34c1a31129 --- /dev/null +++ b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/EventStatistics.h @@ -0,0 +1 @@ +../EventStatistics.h \ No newline at end of file diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/MadgraphTest.h b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/MadgraphTest.h new file mode 120000 index 0000000000..13942d64c4 --- /dev/null +++ b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/MadgraphTest.h @@ -0,0 +1 @@ +../MadgraphTest.h \ No newline at end of file diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/MatrixElementKernels.cc b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/MatrixElementKernels.cc new file mode 120000 index 0000000000..f800cb9638 --- /dev/null +++ b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/MatrixElementKernels.cc @@ -0,0 +1 @@ +../MatrixElementKernels.cc \ No newline at end of file diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/MatrixElementKernels.h b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/MatrixElementKernels.h new file mode 120000 index 0000000000..ac47855d4f --- /dev/null +++ b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/MatrixElementKernels.h @@ -0,0 +1 @@ +../MatrixElementKernels.h \ No newline at end of file diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/MemoryAccessAmplitudes.h b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/MemoryAccessAmplitudes.h new file mode 120000 index 0000000000..448995d3e5 --- /dev/null +++ b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/MemoryAccessAmplitudes.h @@ -0,0 +1 @@ +../MemoryAccessAmplitudes.h \ No newline at end of file diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/MemoryAccessCouplings.h b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/MemoryAccessCouplings.h new file mode 120000 index 0000000000..388f907580 --- /dev/null +++ b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/MemoryAccessCouplings.h @@ -0,0 +1 @@ +../MemoryAccessCouplings.h \ No newline at end of file diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/MemoryAccessCouplingsFixed.h b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/MemoryAccessCouplingsFixed.h new file mode 120000 index 0000000000..c795c16465 --- /dev/null +++ b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/MemoryAccessCouplingsFixed.h @@ -0,0 +1 @@ +../MemoryAccessCouplingsFixed.h \ No newline at end of file diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/MemoryAccessDenominators.h b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/MemoryAccessDenominators.h new file mode 120000 index 0000000000..4ab752bdad --- /dev/null +++ b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/MemoryAccessDenominators.h @@ -0,0 +1 @@ +../MemoryAccessDenominators.h \ No newline at end of file diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/MemoryAccessGs.h b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/MemoryAccessGs.h new file mode 120000 index 0000000000..9d5e237faf --- /dev/null +++ b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/MemoryAccessGs.h @@ -0,0 +1 @@ +../MemoryAccessGs.h \ No newline at end of file diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/MemoryAccessHelpers.h b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/MemoryAccessHelpers.h new file mode 120000 index 0000000000..3692f9e4da --- /dev/null +++ b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/MemoryAccessHelpers.h @@ -0,0 +1 @@ +../MemoryAccessHelpers.h \ No newline at end of file diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/MemoryAccessMatrixElements.h b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/MemoryAccessMatrixElements.h new file mode 120000 index 0000000000..b04a26e4f6 --- /dev/null +++ b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/MemoryAccessMatrixElements.h @@ -0,0 +1 @@ +../MemoryAccessMatrixElements.h \ No newline at end of file diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/MemoryAccessMomenta.h b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/MemoryAccessMomenta.h new file mode 120000 index 0000000000..4a5e8b375d --- /dev/null +++ b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/MemoryAccessMomenta.h @@ -0,0 +1 @@ +../MemoryAccessMomenta.h \ No newline at end of file diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/MemoryAccessNumerators.h b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/MemoryAccessNumerators.h new file mode 120000 index 0000000000..a525b6607d --- /dev/null +++ b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/MemoryAccessNumerators.h @@ -0,0 +1 @@ +../MemoryAccessNumerators.h \ No newline at end of file diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/MemoryAccessRandomNumbers.h b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/MemoryAccessRandomNumbers.h new file mode 120000 index 0000000000..844de324e7 --- /dev/null +++ b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/MemoryAccessRandomNumbers.h @@ -0,0 +1 @@ +../MemoryAccessRandomNumbers.h \ No newline at end of file diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/MemoryAccessVectors.h b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/MemoryAccessVectors.h new file mode 120000 index 0000000000..d890503974 --- /dev/null +++ b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/MemoryAccessVectors.h @@ -0,0 +1 @@ +../MemoryAccessVectors.h \ No newline at end of file diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/MemoryAccessWavefunctions.h b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/MemoryAccessWavefunctions.h new file mode 120000 index 0000000000..61a331899b --- /dev/null +++ b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/MemoryAccessWavefunctions.h @@ -0,0 +1 @@ +../MemoryAccessWavefunctions.h \ No newline at end of file diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/MemoryAccessWeights.h b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/MemoryAccessWeights.h new file mode 120000 index 0000000000..ec10cd2e17 --- /dev/null +++ b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/MemoryAccessWeights.h @@ -0,0 +1 @@ +../MemoryAccessWeights.h \ No newline at end of file diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/MemoryBuffers.h b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/MemoryBuffers.h new file mode 120000 index 0000000000..600b7ad779 --- /dev/null +++ b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/MemoryBuffers.h @@ -0,0 +1 @@ +../MemoryBuffers.h \ No newline at end of file diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/RamboSamplingKernels.cc b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/RamboSamplingKernels.cc new file mode 120000 index 0000000000..033b20955e --- /dev/null +++ b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/RamboSamplingKernels.cc @@ -0,0 +1 @@ +../RamboSamplingKernels.cc \ No newline at end of file diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/RamboSamplingKernels.h b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/RamboSamplingKernels.h new file mode 120000 index 0000000000..ca354ce496 --- /dev/null +++ b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/RamboSamplingKernels.h @@ -0,0 +1 @@ +../RamboSamplingKernels.h \ No newline at end of file diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/RandomNumberKernels.cc b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/RandomNumberKernels.cc new file mode 120000 index 0000000000..09a0e03a16 --- /dev/null +++ b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/RandomNumberKernels.cc @@ -0,0 +1 @@ +../RandomNumberKernels.cc \ No newline at end of file diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/RandomNumberKernels.h b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/RandomNumberKernels.h new file mode 120000 index 0000000000..5e8526a6ae --- /dev/null +++ b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/RandomNumberKernels.h @@ -0,0 +1 @@ +../RandomNumberKernels.h \ No newline at end of file diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/check_sa.cc b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/check_sa.cc new file mode 100644 index 0000000000..41367fd70b --- /dev/null +++ b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/check_sa.cc @@ -0,0 +1,1120 @@ +#include "mgOnGpuConfig.h" + +#include "BridgeKernels.h" +#include "CPPProcess.h" +#include "CrossSectionKernels.h" +#include "MatrixElementKernels.h" +#include "MemoryAccessMatrixElements.h" +#include "MemoryAccessMomenta.h" +#include "MemoryAccessRandomNumbers.h" +#include "MemoryAccessWeights.h" +#include "MemoryBuffers.h" +#include "RamboSamplingKernels.h" +#include "RandomNumberKernels.h" +#include "epoch_process_id.h" +#include "ompnumthreads.h" +#include "timermap.h" + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define STRINGIFY( s ) #s +#define XSTRINGIFY( s ) STRINGIFY( s ) + +#define SEP79 79 + +bool +is_number( const char* s ) +{ + const char* t = s; + while( *t != '\0' && isdigit( *t ) ) + ++t; + return (int)strlen( s ) == t - s; +} + +int +usage( char* argv0, int ret = 1 ) +{ + std::cout << "Usage: " << argv0 + << " [--verbose|-v] [--debug|-d] [--performance|-p] [--json|-j] [--curhst|--curdev|--common] [--rmbhst|--rmbdev] [--bridge]" + << " [#gpuBlocksPerGrid #gpuThreadsPerBlock] #iterations" << std::endl; + std::cout << std::endl; + std::cout << "The number of events per iteration is #gpuBlocksPerGrid * #gpuThreadsPerBlock" << std::endl; + std::cout << "(also in CPU/C++ code, where only the product of these two parameters counts)" << std::endl; + std::cout << std::endl; + std::cout << "Summary stats are always computed: '-p' and '-j' only control their printout" << std::endl; + std::cout << "The '-d' flag only enables NaN/abnormal warnings and OMP debugging" << std::endl; +#ifndef __CUDACC__ +#ifdef _OPENMP + std::cout << std::endl; + std::cout << "Use the OMP_NUM_THREADS environment variable to control OMP multi-threading" << std::endl; + std::cout << "(OMP multithreading will be disabled if OMP_NUM_THREADS is not set)" << std::endl; +#endif +#endif + return ret; +} + +int +main( int argc, char** argv ) +{ + // Namespaces for CUDA and C++ (FIXME - eventually use the same namespace everywhere...) +#ifdef __CUDACC__ + using namespace mg5amcGpu; +#else + using namespace mg5amcCpu; +#endif + + // DEFAULTS FOR COMMAND LINE ARGUMENTS + bool verbose = false; + bool debug = false; + bool perf = false; + bool json = false; + unsigned int niter = 0; + unsigned int gpublocks = 1; + unsigned int gputhreads = 32; + unsigned int jsondate = 0; + unsigned int jsonrun = 0; + unsigned int numvec[5] = { 0, 0, 0, 0, 0 }; + int nnum = 0; + // Random number mode + enum class RandomNumberMode + { + CommonRandom = 0, + CurandHost = 1, + CurandDevice = 2 + }; +#ifdef __CUDACC__ + RandomNumberMode rndgen = RandomNumberMode::CurandDevice; // default on GPU +#elif not defined MGONGPU_HAS_NO_CURAND + RandomNumberMode rndgen = RandomNumberMode::CurandHost; // default on CPU if build has curand +#else + RandomNumberMode rndgen = RandomNumberMode::CommonRandom; // default on CPU if build has no curand +#endif + // Rambo sampling mode (NB RamboHost implies CommonRandom or CurandHost!) + enum class RamboSamplingMode + { + RamboHost = 1, + RamboDevice = 2 + }; +#ifdef __CUDACC__ + RamboSamplingMode rmbsmp = RamboSamplingMode::RamboDevice; // default on GPU +#else + RamboSamplingMode rmbsmp = RamboSamplingMode::RamboHost; // default on CPU +#endif + // Bridge emulation mode (NB Bridge implies RamboHost!) + bool bridge = false; + + // READ COMMAND LINE ARGUMENTS + for( int argn = 1; argn < argc; ++argn ) + { + std::string arg = argv[argn]; + if( ( arg == "--verbose" ) || ( arg == "-v" ) ) + { + verbose = true; + } + else if( ( arg == "--debug" ) || ( arg == "-d" ) ) + { + debug = true; + } + else if( ( arg == "--performance" ) || ( arg == "-p" ) ) + { + perf = true; + } + else if( ( arg == "--json" ) || ( arg == "-j" ) ) + { + json = true; + } + else if( arg == "--curdev" ) + { +#ifdef __CUDACC__ + rndgen = RandomNumberMode::CurandDevice; +#else + throw std::runtime_error( "CurandDevice is not supported on CPUs" ); +#endif + } + else if( arg == "--curhst" ) + { +#ifndef MGONGPU_HAS_NO_CURAND + rndgen = RandomNumberMode::CurandHost; +#else + throw std::runtime_error( "CurandHost is not supported because this application was built without Curand support" ); +#endif + } + else if( arg == "--common" ) + { + rndgen = RandomNumberMode::CommonRandom; + } + else if( arg == "--rmbdev" ) + { +#ifdef __CUDACC__ + rmbsmp = RamboSamplingMode::RamboDevice; +#else + throw std::runtime_error( "RamboDevice is not supported on CPUs" ); +#endif + } + else if( arg == "--rmbhst" ) + { + rmbsmp = RamboSamplingMode::RamboHost; + } + else if( arg == "--bridge" ) + { + bridge = true; + } + else if( is_number( argv[argn] ) && nnum < 5 ) + { + numvec[nnum++] = strtoul( argv[argn], NULL, 0 ); + } + else + { + return usage( argv[0] ); + } + } + + if( nnum == 3 || nnum == 5 ) + { + gpublocks = numvec[0]; + gputhreads = numvec[1]; + niter = numvec[2]; + if( nnum == 5 ) + { + jsondate = numvec[3]; + jsonrun = numvec[4]; + } + } + else if( nnum == 1 ) + { + niter = numvec[0]; + } + else + { + return usage( argv[0] ); + } + + if( niter == 0 ) + return usage( argv[0] ); + + if( bridge && rmbsmp == RamboSamplingMode::RamboDevice ) + { + std::cout << "WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost" << std::endl; + rmbsmp = RamboSamplingMode::RamboHost; + } + + if( rmbsmp == RamboSamplingMode::RamboHost && rndgen == RandomNumberMode::CurandDevice ) + { +#if not defined MGONGPU_HAS_NO_CURAND + std::cout << "WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost" << std::endl; + rndgen = RandomNumberMode::CurandHost; +#else + std::cout << "WARNING! RamboHost selected: cannot use CurandDevice, will use CommonRandom" << std::endl; + rndgen = RandomNumberMode::CommonRandom; +#endif + } + + constexpr int neppM = MemoryAccessMomenta::neppM; // AOSOA layout + constexpr int neppR = MemoryAccessRandomNumbers::neppR; // AOSOA layout + + using mgOnGpu::ntpbMAX; + if( gputhreads > ntpbMAX ) + { + std::cout << "ERROR! #threads/block should be <= " << ntpbMAX << std::endl; + return usage( argv[0] ); + } + +#ifndef __CUDACC__ +#ifdef _OPENMP + ompnumthreadsNotSetMeansOneThread( debug ? 1 : 0 ); // quiet(-1), info(0), debug(1) +#endif +#endif + +#ifndef __CUDACC__ + // Fail gently and avoid "Illegal instruction (core dumped)" if the host does not support the SIMD used in the ME calculation + // Note: this prevents a crash on pmpe04 but not on some github CI nodes? + // [NB: SIMD vectorization in mg5amc C++ code is only used in the ME calculation below MatrixElementKernelHost!] + if( !MatrixElementKernelHost::hostSupportsSIMD() ) return 1; +#endif + + const unsigned int ndim = gpublocks * gputhreads; // number of threads in one GPU grid + const unsigned int nevt = ndim; // number of events in one iteration == number of GPU threads + + if( verbose ) + std::cout << "# iterations: " << niter << std::endl; + + // *** START THE NEW TIMERS *** + mgOnGpu::TimerMap timermap; + + // === STEP 0 - INITIALISE + +#ifdef __CUDACC__ + + // --- 00. Initialise cuda + // Instantiate a CudaRuntime at the beginnining of the application's main to + // invoke cudaSetDevice(0) in the constructor and book a cudaDeviceReset() call in the destructor + const std::string cdinKey = "00 CudaInit"; + timermap.start( cdinKey ); + CudaRuntime cudaRuntime( debug ); +#endif + + // --- 0a. Initialise physics process + const std::string procKey = "0a ProcInit"; + timermap.start( procKey ); + + // Create a process object + CPPProcess process( verbose ); + + // Read param_card and set parameters + process.initProc( "../../Cards/param_card.dat" ); + const fptype energy = 1500; // historical default, Ecms = 1500 GeV = 1.5 TeV (above the Z peak) + //const fptype energy = 91.2; // Ecms = 91.2 GeV (Z peak) + //const fptype energy = 0.100; // Ecms = 100 MeV (well below the Z peak, pure em scattering) + const int meGeVexponent = -( 2 * mgOnGpu::npar - 8 ); + + // --- 0b. Allocate memory structures + const std::string alloKey = "0b MemAlloc"; + timermap.start( alloKey ); + + // Memory buffers for random numbers for momenta +#ifndef __CUDACC__ + HostBufferRndNumMomenta hstRndmom( nevt ); +#else + PinnedHostBufferRndNumMomenta hstRndmom( nevt ); + DeviceBufferRndNumMomenta devRndmom( nevt ); +#endif + + // Memory buffers for sampling weights +#ifndef __CUDACC__ + HostBufferWeights hstWeights( nevt ); +#else + PinnedHostBufferWeights hstWeights( nevt ); + DeviceBufferWeights devWeights( nevt ); +#endif + + // Memory buffers for momenta +#ifndef __CUDACC__ + HostBufferMomenta hstMomenta( nevt ); +#else + PinnedHostBufferMomenta hstMomenta( nevt ); + DeviceBufferMomenta devMomenta( nevt ); +#endif + + // Memory buffers for Gs +#ifndef __CUDACC__ + HostBufferGs hstGs( nevt ); +#else + PinnedHostBufferGs hstGs( nevt ); + DeviceBufferGs devGs( nevt ); +#endif + + // Hardcode Gs for now (eventually they should come from Fortran MadEvent) + for( unsigned int i = 0; i < nevt; ++i ) + { + constexpr fptype fixedG = 1.2177157847767195; // fixed G for aS=0.118 (hardcoded for now in check_sa.cc, fcheck_sa.f, runTest.cc) + hstGs[i] = fixedG; + //if ( i > 0 ) hstGs[i] = 0; // try hardcoding G only for event 0 + //hstGs[i] = i; + } + + // Memory buffers for matrix elements +#ifndef __CUDACC__ + HostBufferMatrixElements hstMatrixElements( nevt ); +#else + PinnedHostBufferMatrixElements hstMatrixElements( nevt ); + DeviceBufferMatrixElements devMatrixElements( nevt ); +#endif + + // Memory buffers for random numbers for helicity selection + // *** NB #403 these buffers always remain initialised at 0: no need for helicity choice in gcheck/check (no LHE produced) *** +#ifndef __CUDACC__ + HostBufferRndNumHelicity hstRndHel( nevt ); +#else + PinnedHostBufferRndNumHelicity hstRndHel( nevt ); + DeviceBufferRndNumHelicity devRndHel( nevt ); +#endif + + // Memory buffers for random numbers for color selection + // *** NB #402 these buffers always remain initialised at 0: no need for color choice in gcheck/check (no LHE produced) *** +#ifndef __CUDACC__ + HostBufferRndNumColor hstRndCol( nevt ); +#else + PinnedHostBufferRndNumColor hstRndCol( nevt ); + DeviceBufferRndNumColor devRndCol( nevt ); +#endif + + // Memory buffers for helicity selection +#ifndef __CUDACC__ + HostBufferSelectedHelicity hstSelHel( nevt ); +#else + PinnedHostBufferSelectedHelicity hstSelHel( nevt ); + DeviceBufferSelectedHelicity devSelHel( nevt ); +#endif + + // Memory buffers for color selection +#ifndef __CUDACC__ + HostBufferSelectedColor hstSelCol( nevt ); +#else + PinnedHostBufferSelectedColor hstSelCol( nevt ); + DeviceBufferSelectedColor devSelCol( nevt ); +#endif + + std::unique_ptr genrtimes( new double[niter] ); + std::unique_ptr rambtimes( new double[niter] ); + std::unique_ptr wavetimes( new double[niter] ); + std::unique_ptr wv3atimes( new double[niter] ); + + // --- 0c. Create curand or common generator + const std::string cgenKey = "0c GenCreat"; + timermap.start( cgenKey ); + // Allocate the appropriate RandomNumberKernel + std::unique_ptr prnk; + if( rndgen == RandomNumberMode::CommonRandom ) + { + prnk.reset( new CommonRandomNumberKernel( hstRndmom ) ); + } +#ifndef MGONGPU_HAS_NO_CURAND + else if( rndgen == RandomNumberMode::CurandHost ) + { + const bool onDevice = false; + prnk.reset( new CurandRandomNumberKernel( hstRndmom, onDevice ) ); + } +#ifdef __CUDACC__ + else + { + const bool onDevice = true; + prnk.reset( new CurandRandomNumberKernel( devRndmom, onDevice ) ); + } +#else + else + { + throw std::logic_error( "CurandDevice is not supported on CPUs" ); // INTERNAL ERROR (no path to this statement) + } +#endif +#else + else + { + throw std::logic_error( "This application was built without Curand support" ); // INTERNAL ERROR (no path to this statement) + } +#endif + + // --- 0c. Create rambo sampling kernel [keep this in 0c for the moment] + std::unique_ptr prsk; + if( rmbsmp == RamboSamplingMode::RamboHost ) + { + prsk.reset( new RamboSamplingKernelHost( energy, hstRndmom, hstMomenta, hstWeights, nevt ) ); + } + else + { +#ifdef __CUDACC__ + prsk.reset( new RamboSamplingKernelDevice( energy, devRndmom, devMomenta, devWeights, gpublocks, gputhreads ) ); +#else + throw std::logic_error( "RamboDevice is not supported on CPUs" ); // INTERNAL ERROR (no path to this statement) +#endif + } + + // --- 0c. Create matrix element kernel [keep this in 0c for the moment] + std::unique_ptr pmek; + if( !bridge ) + { +#ifdef __CUDACC__ + pmek.reset( new MatrixElementKernelDevice( devMomenta, devGs, devRndHel, devRndCol, devMatrixElements, devSelHel, devSelCol, gpublocks, gputhreads ) ); +#else + pmek.reset( new MatrixElementKernelHost( hstMomenta, hstGs, hstRndHel, hstRndCol, hstMatrixElements, hstSelHel, hstSelCol, nevt ) ); +#endif + } + else + { +#ifdef __CUDACC__ + pmek.reset( new BridgeKernelDevice( hstMomenta, hstGs, hstRndHel, hstRndCol, hstMatrixElements, hstSelHel, hstSelCol, gpublocks, gputhreads ) ); +#else + pmek.reset( new BridgeKernelHost( hstMomenta, hstGs, hstRndHel, hstRndCol, hstMatrixElements, hstSelHel, hstSelCol, nevt ) ); +#endif + } + int nGoodHel = 0; // the number of good helicities (out of ncomb) + + // --- 0c. Create cross section kernel [keep this in 0c for the moment] + EventStatistics hstStats; + CrossSectionKernelHost xsk( hstWeights, hstMatrixElements, hstStats, nevt ); + + // ************************************** + // *** START MAIN LOOP ON #ITERATIONS *** + // ************************************** + + for( unsigned long int iiter = 0; iiter < niter; ++iiter ) + { + //std::cout << "Iteration #" << iiter+1 << " of " << niter << std::endl; + + // === STEP 1 OF 3 + + // *** START THE OLD-STYLE TIMER FOR RANDOM GEN *** + double genrtime = 0; + + // --- 1a. Seed rnd generator (to get same results on host and device in curand) + // [NB This should not be necessary using the host API: "Generation functions + // can be called multiple times on the same generator to generate successive + // blocks of results. For pseudorandom generators, multiple calls to generation + // functions will yield the same result as a single call with a large size."] + const unsigned long long seed = 20200805; + const std::string sgenKey = "1a GenSeed "; + timermap.start( sgenKey ); + prnk->seedGenerator( seed + iiter ); + genrtime += timermap.stop(); + + // --- 1b. Generate all relevant numbers to build nevt events (i.e. nevt phase space points) on the host + const std::string rngnKey = "1b GenRnGen"; + timermap.start( rngnKey ); + prnk->generateRnarray(); + //std::cout << "Got random numbers" << std::endl; + +#ifdef __CUDACC__ + if( rndgen != RandomNumberMode::CurandDevice && rmbsmp == RamboSamplingMode::RamboDevice ) + { + // --- 1c. Copy rndmom from host to device + const std::string htodKey = "1c CpHTDrnd"; + genrtime += timermap.start( htodKey ); + copyDeviceFromHost( devRndmom, hstRndmom ); + } +#endif + + // *** STOP THE OLD-STYLE TIMER FOR RANDOM GEN *** + genrtime += timermap.stop(); + + // === STEP 2 OF 3 + // Fill in particle momenta for each of nevt events on the device + + // *** START THE OLD-STYLE TIMER FOR RAMBO *** + double rambtime = 0; + + // --- 2a. Fill in momenta of initial state particles on the device + const std::string riniKey = "2a RamboIni"; + timermap.start( riniKey ); + prsk->getMomentaInitial(); + //std::cout << "Got initial momenta" << std::endl; + + // --- 2b. Fill in momenta of final state particles using the RAMBO algorithm on the device + // (i.e. map random numbers to final-state particle momenta for each of nevt events) + const std::string rfinKey = "2b RamboFin"; + rambtime += timermap.start( rfinKey ); + prsk->getMomentaFinal(); + //std::cout << "Got final momenta" << std::endl; + +#ifdef __CUDACC__ + if( rmbsmp == RamboSamplingMode::RamboDevice ) + { + // --- 2c. CopyDToH Weights + const std::string cwgtKey = "2c CpDTHwgt"; + rambtime += timermap.start( cwgtKey ); + copyHostFromDevice( hstWeights, devWeights ); + + // --- 2d. CopyDToH Momenta + const std::string cmomKey = "2d CpDTHmom"; + rambtime += timermap.start( cmomKey ); + copyHostFromDevice( hstMomenta, devMomenta ); + } + else // only if ( ! bridge ) ??? + { + // --- 2c. CopyHToD Weights + const std::string cwgtKey = "2c CpHTDwgt"; + rambtime += timermap.start( cwgtKey ); + copyDeviceFromHost( devWeights, hstWeights ); + + // --- 2d. CopyHToD Momenta + const std::string cmomKey = "2d CpHTDmom"; + rambtime += timermap.start( cmomKey ); + copyDeviceFromHost( devMomenta, hstMomenta ); + } +#endif + + // *** STOP THE OLD-STYLE TIMER FOR RAMBO *** + rambtime += timermap.stop(); + + // === STEP 3 OF 3 + // Evaluate matrix elements for all nevt events + // 0d. For Bridge only, transpose C2F [renamed as 0d: this is not initialisation, but I want it out of the ME timers (#371)] + // 0e. (Only on the first iteration) Get good helicities [renamed as 0e: this IS initialisation!] + // 3a. Evaluate MEs on the device (include transpose F2C for Bridge) + // 3b. Copy MEs back from device to host + + // --- 0d. TransC2F + if( bridge ) + { + const std::string tc2fKey = "0d TransC2F"; + timermap.start( tc2fKey ); + dynamic_cast( pmek.get() )->transposeInputMomentaC2F(); + } + +#ifdef __CUDACC__ + // --- 2d. CopyHToD Momenta + const std::string gKey = "0.. CpHTDg"; + rambtime += timermap.start( gKey ); // FIXME! NOT A RAMBO TIMER! + copyDeviceFromHost( devGs, hstGs ); +#endif + + // --- 0e. SGoodHel + if( iiter == 0 ) + { + const std::string ghelKey = "0e SGoodHel"; + timermap.start( ghelKey ); + nGoodHel = pmek->computeGoodHelicities(); + } + + // *** START THE OLD-STYLE TIMERS FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** + double wavetime = 0; // calc plus copy + double wv3atime = 0; // calc only + + // --- 3a. SigmaKin + const std::string skinKey = "3a SigmaKin"; + timermap.start( skinKey ); + constexpr unsigned int channelId = 0; // TEMPORARY? disable multi-channel in check.exe and gcheck.exe #466 + pmek->computeMatrixElements( channelId ); + + // *** STOP THE NEW OLD-STYLE TIMER FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** + wv3atime += timermap.stop(); // calc only + wavetime += wv3atime; // calc plus copy + +#ifdef __CUDACC__ + if( !bridge ) + { + // --- 3b. CopyDToH MEs + const std::string cmesKey = "3b CpDTHmes"; + timermap.start( cmesKey ); + copyHostFromDevice( hstMatrixElements, devMatrixElements ); + // *** STOP THE OLD OLD-STYLE TIMER FOR MATRIX ELEMENTS (WAVEFUNCTIONS) *** + wavetime += timermap.stop(); // calc plus copy + } +#endif + + // === STEP 4 FINALISE LOOP + // --- 4@ Update event statistics + const std::string updtKey = "4@ UpdtStat"; + timermap.start( updtKey ); + xsk.updateEventStatistics(); + + // --- 4a Dump within the loop + const std::string loopKey = "4a DumpLoop"; + timermap.start( loopKey ); + genrtimes[iiter] = genrtime; + rambtimes[iiter] = rambtime; + wavetimes[iiter] = wavetime; + wv3atimes[iiter] = wv3atime; + + if( verbose ) + { + std::cout << std::string( SEP79, '*' ) << std::endl + << "Iteration #" << iiter + 1 << " of " << niter << std::endl; + if( perf ) std::cout << "Wave function time: " << wavetime << std::endl; + } + + for( unsigned int ievt = 0; ievt < nevt; ++ievt ) // Loop over all events in this iteration + { + if( verbose ) + { + // Display momenta + std::cout << "Momenta:" << std::endl; + for( int ipar = 0; ipar < mgOnGpu::npar; ipar++ ) + { + // NB: 'setw' affects only the next field (of any type) + std::cout << std::scientific // fixed format: affects all floats (default precision: 6) + << std::setw( 4 ) << ipar + 1 + << std::setw( 14 ) << MemoryAccessMomenta::ieventAccessIp4IparConst( hstMomenta.data(), ievt, 0, ipar ) + << std::setw( 14 ) << MemoryAccessMomenta::ieventAccessIp4IparConst( hstMomenta.data(), ievt, 1, ipar ) + << std::setw( 14 ) << MemoryAccessMomenta::ieventAccessIp4IparConst( hstMomenta.data(), ievt, 2, ipar ) + << std::setw( 14 ) << MemoryAccessMomenta::ieventAccessIp4IparConst( hstMomenta.data(), ievt, 3, ipar ) + << std::endl + << std::defaultfloat; // default format: affects all floats + } + std::cout << std::string( SEP79, '-' ) << std::endl; + // Display matrix elements + std::cout << " Matrix element = " << MemoryAccessMatrixElements::ieventAccessConst( hstMatrixElements.data(), ievt ) + << " GeV^" << meGeVexponent << std::endl; + std::cout << std::string( SEP79, '-' ) << std::endl; + } + } + + if( !( verbose || debug || perf ) ) + { + std::cout << "."; + } + } + + // ************************************** + // *** END MAIN LOOP ON #ITERATIONS *** + // ************************************** + + // === STEP 8 ANALYSIS + // --- 8a Analysis: compute stats after the loop + const std::string statKey = "8a CompStat"; + timermap.start( statKey ); + + double sumgtim = 0; + //double sqsgtim = 0; + double mingtim = genrtimes[0]; + double maxgtim = genrtimes[0]; + for( unsigned int iiter = 0; iiter < niter; ++iiter ) + { + sumgtim += genrtimes[iiter]; + //sqsgtim += genrtimes[iiter]*genrtimes[iiter]; + mingtim = std::min( mingtim, genrtimes[iiter] ); + maxgtim = std::max( maxgtim, genrtimes[iiter] ); + } + + double sumrtim = 0; + //double sqsrtim = 0; + double minrtim = rambtimes[0]; + double maxrtim = rambtimes[0]; + for( unsigned int iiter = 0; iiter < niter; ++iiter ) + { + sumrtim += rambtimes[iiter]; + //sqsrtim += rambtimes[iiter]*rambtimes[iiter]; + minrtim = std::min( minrtim, rambtimes[iiter] ); + maxrtim = std::max( maxrtim, rambtimes[iiter] ); + } + + double sumwtim = 0; + //double sqswtim = 0; + double minwtim = wavetimes[0]; + double maxwtim = wavetimes[0]; + for( unsigned int iiter = 0; iiter < niter; ++iiter ) + { + sumwtim += wavetimes[iiter]; + //sqswtim += wavetimes[iiter]*wavetimes[iiter]; + minwtim = std::min( minwtim, wavetimes[iiter] ); + maxwtim = std::max( maxwtim, wavetimes[iiter] ); + } + double meanwtim = sumwtim / niter; + //double stdwtim = std::sqrt( sqswtim / niter - meanwtim * meanwtim ); + + double sumw3atim = 0; + //double sqsw3atim = 0; + double minw3atim = wv3atimes[0]; + double maxw3atim = wv3atimes[0]; + for( unsigned int iiter = 0; iiter < niter; ++iiter ) + { + sumw3atim += wv3atimes[iiter]; + //sqsw3atim += wv3atimes[iiter]*wv3atimes[iiter]; + minw3atim = std::min( minw3atim, wv3atimes[iiter] ); + maxw3atim = std::max( maxw3atim, wv3atimes[iiter] ); + } + double meanw3atim = sumw3atim / niter; + //double stdw3atim = std::sqrt( sqsw3atim / niter - meanw3atim * meanw3atim ); + + const unsigned int nevtALL = hstStats.nevtALL; // total number of ALL events in all iterations + if( nevtALL != niter * nevt ) + std::cout << "ERROR! nevtALL mismatch " << nevtALL << " != " << niter * nevt << std::endl; // SANITY CHECK + int nabn = hstStats.nevtABN; + int nzero = hstStats.nevtZERO; + + // === STEP 9 FINALISE + + std::string rndgentxt; + if( rndgen == RandomNumberMode::CommonRandom ) + rndgentxt = "COMMON RANDOM HOST"; + else if( rndgen == RandomNumberMode::CurandHost ) + rndgentxt = "CURAND HOST"; + else if( rndgen == RandomNumberMode::CurandDevice ) + rndgentxt = "CURAND DEVICE"; +#ifdef __CUDACC__ + rndgentxt += " (CUDA code)"; +#else + rndgentxt += " (C++ code)"; +#endif + + // Workflow description summary + std::string wrkflwtxt; + // -- CUDA or C++? +#ifdef __CUDACC__ + wrkflwtxt += "CUD:"; +#else + wrkflwtxt += "CPP:"; +#endif + // -- DOUBLE or FLOAT? +#if defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT + wrkflwtxt += "MIX+"; // mixed fptypes (single precision color algebra #537) +#elif defined MGONGPU_FPTYPE_DOUBLE + wrkflwtxt += "DBL+"; +#elif defined MGONGPU_FPTYPE_FLOAT + wrkflwtxt += "FLT+"; +#else + wrkflwtxt += "???+"; // no path to this statement +#endif + // -- CUCOMPLEX or THRUST or STD complex numbers? +#ifdef __CUDACC__ +#if defined MGONGPU_CUCXTYPE_CUCOMPLEX + wrkflwtxt += "CUX:"; +#elif defined MGONGPU_CUCXTYPE_THRUST + wrkflwtxt += "THX:"; +#elif defined MGONGPU_CUCXTYPE_CXSMPL + wrkflwtxt += "CXS:"; +#else + wrkflwtxt += "???:"; // no path to this statement +#endif +#else +#if defined MGONGPU_CPPCXTYPE_STDCOMPLEX + wrkflwtxt += "STX:"; +#elif defined MGONGPU_CPPCXTYPE_CXSMPL + wrkflwtxt += "CXS:"; +#else + wrkflwtxt += "???:"; // no path to this statement +#endif +#endif + // -- COMMON or CURAND HOST or CURAND DEVICE random numbers? + if( rndgen == RandomNumberMode::CommonRandom ) + wrkflwtxt += "COMMON+"; + else if( rndgen == RandomNumberMode::CurandHost ) + wrkflwtxt += "CURHST+"; + else if( rndgen == RandomNumberMode::CurandDevice ) + wrkflwtxt += "CURDEV+"; + else + wrkflwtxt += "??????+"; // no path to this statement + // -- HOST or DEVICE rambo sampling? + if( rmbsmp == RamboSamplingMode::RamboHost ) + wrkflwtxt += "RMBHST+"; + else if( rmbsmp == RamboSamplingMode::RamboDevice ) + wrkflwtxt += "RMBDEV+"; + else + wrkflwtxt += "??????+"; // no path to this statement +#ifdef __CUDACC__ + // -- HOST or DEVICE matrix elements? Standalone MEs or BRIDGE? + if( !bridge ) + wrkflwtxt += "MESDEV"; + else + wrkflwtxt += "BRDDEV"; +#else + if( !bridge ) + wrkflwtxt += "MESHST"; // FIXME! allow this also in CUDA (eventually with various simd levels) + else + wrkflwtxt += "BRDHST"; +#endif + // -- SIMD matrix elements? +#if !defined MGONGPU_CPPSIMD + wrkflwtxt += "/none"; +#elif defined __AVX512VL__ +#ifdef MGONGPU_PVW512 + wrkflwtxt += "/512z"; +#else + wrkflwtxt += "/512y"; +#endif +#elif defined __AVX2__ + wrkflwtxt += "/avx2"; +#elif defined __SSE4_2__ +#ifdef __PPC__ + wrkflwtxt += "/ppcv"; +#elif defined __ARM_NEON__ + wrkflwtxt += "/neon"; +#else + wrkflwtxt += "/sse4"; +#endif +#else + wrkflwtxt += "/????"; // no path to this statement +#endif + // -- Has cxtype_v::operator[] bracket with non-const reference? +#if defined MGONGPU_CPPSIMD +#ifdef MGONGPU_HAS_CPPCXTYPEV_BRK + wrkflwtxt += "+CXVBRK"; +#else + wrkflwtxt += "+NOVBRK"; +#endif +#else + wrkflwtxt += "+NAVBRK"; // N/A +#endif + + // --- 9a Dump to screen + const std::string dumpKey = "9a DumpScrn"; + timermap.start( dumpKey ); + + if( !( verbose || debug || perf ) ) + { + std::cout << std::endl; + } + + if( perf ) + { +#ifndef __CUDACC__ +#ifdef _OPENMP + // Get the output of "nproc --all" (https://stackoverflow.com/a/478960) + std::string nprocall; + std::unique_ptr nprocpipe( popen( "nproc --all", "r" ), pclose ); + if( !nprocpipe ) throw std::runtime_error( "`nproc --all` failed?" ); + std::array nprocbuf; + while( fgets( nprocbuf.data(), nprocbuf.size(), nprocpipe.get() ) != nullptr ) nprocall += nprocbuf.data(); +#endif +#endif +#ifdef MGONGPU_CPPSIMD +#ifdef MGONGPU_HAS_CPPCXTYPEV_BRK + const std::string cxtref = " [cxtype_ref=YES]"; +#else + const std::string cxtref = " [cxtype_ref=NO]"; +#endif +#endif + // Dump all configuration parameters and all results + std::cout << std::string( SEP79, '*' ) << std::endl +#ifdef __CUDACC__ + << "Process = " << XSTRINGIFY( MG_EPOCH_PROCESS_ID ) << "_CUDA" +#else + << "Process = " << XSTRINGIFY( MG_EPOCH_PROCESS_ID ) << "_CPP" +#endif + << " [" << process.getCompiler() << "]" +#ifdef MGONGPU_INLINE_HELAMPS + << " [inlineHel=1]" +#else + << " [inlineHel=0]" +#endif +#ifdef MGONGPU_HARDCODE_PARAM + << " [hardcodePARAM=1]" << std::endl +#else + << " [hardcodePARAM=0]" << std::endl +#endif + << "NumBlocksPerGrid = " << gpublocks << std::endl + << "NumThreadsPerBlock = " << gputhreads << std::endl + << "NumIterations = " << niter << std::endl + << std::string( SEP79, '-' ) << std::endl; + std::cout << "Workflow summary = " << wrkflwtxt << std::endl +#if defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT + << "FP precision = MIXED (NaN/abnormal=" << nabn << ", zero=" << nzero << ")" << std::endl +#elif defined MGONGPU_FPTYPE_DOUBLE + << "FP precision = DOUBLE (NaN/abnormal=" << nabn << ", zero=" << nzero << ")" << std::endl +#elif defined MGONGPU_FPTYPE_FLOAT + << "FP precision = FLOAT (NaN/abnormal=" << nabn << ", zero=" << nzero << ")" << std::endl +#endif +#ifdef __CUDACC__ +#if defined MGONGPU_CUCXTYPE_CUCOMPLEX + << "Complex type = CUCOMPLEX" << std::endl +#elif defined MGONGPU_CUCXTYPE_THRUST + << "Complex type = THRUST::COMPLEX" << std::endl +#endif +#else + << "Complex type = STD::COMPLEX" << std::endl +#endif + << "RanNumb memory layout = AOSOA[" << neppR << "]" + << ( neppR == 1 ? " == AOS" : "" ) + << " [HARDCODED FOR REPRODUCIBILITY]" << std::endl + << "Momenta memory layout = AOSOA[" << neppM << "]" + << ( neppM == 1 ? " == AOS" : "" ) << std::endl +#ifdef __CUDACC__ + //<< "Wavefunction GPU memory = LOCAL" << std::endl +#else +#if !defined MGONGPU_CPPSIMD + << "Internal loops fptype_sv = SCALAR ('none': ~vector[" << neppV + << "], no SIMD)" << std::endl +#elif defined __AVX512VL__ +#ifdef MGONGPU_PVW512 + << "Internal loops fptype_sv = VECTOR[" << neppV + << "] ('512z': AVX512, 512bit)" << cxtref << std::endl +#else + << "Internal loops fptype_sv = VECTOR[" << neppV + << "] ('512y': AVX512, 256bit)" << cxtref << std::endl +#endif +#elif defined __AVX2__ + << "Internal loops fptype_sv = VECTOR[" << neppV + << "] ('avx2': AVX2, 256bit)" << cxtref << std::endl +#elif defined __SSE4_2__ + << "Internal loops fptype_sv = VECTOR[" << neppV +#ifdef __PPC__ + << "] ('sse4': PPC VSX, 128bit)" << cxtref << std::endl +#elif defined __ARM_NEON__ + << "] ('sse4': ARM NEON, 128bit)" << cxtref << std::endl +#else + << "] ('sse4': SSE4.2, 128bit)" << cxtref << std::endl +#endif +#else +#error Internal error: unknown SIMD build configuration +#endif +#endif + << "Random number generation = " << rndgentxt << std::endl +#ifndef __CUDACC__ +#ifdef _OPENMP + << "OMP threads / `nproc --all` = " << omp_get_max_threads() << " / " << nprocall // includes a newline +#endif +#endif + //<< "MatrixElements compiler = " << process.getCompiler() << std::endl + << std::string( SEP79, '-' ) << std::endl + << "HelicityComb Good/Tot = " << nGoodHel << "/" << mgOnGpu::ncomb << std::endl + << std::string( SEP79, '-' ) << std::endl + << "NumberOfEntries = " << niter << std::endl + << std::scientific // fixed format: affects all floats (default precision: 6) + << "TotalTime[Rnd+Rmb+ME] (123) = ( " << sumgtim + sumrtim + sumwtim << std::string( 16, ' ' ) << " ) sec" << std::endl + << "TotalTime[Rambo+ME] (23) = ( " << sumrtim + sumwtim << std::string( 16, ' ' ) << " ) sec" << std::endl + << "TotalTime[RndNumGen] (1) = ( " << sumgtim << std::string( 16, ' ' ) << " ) sec" << std::endl + << "TotalTime[Rambo] (2) = ( " << sumrtim << std::string( 16, ' ' ) << " ) sec" << std::endl + << "TotalTime[MatrixElems] (3) = ( " << sumwtim << std::string( 16, ' ' ) << " ) sec" << std::endl + << "MeanTimeInMatrixElems = ( " << meanwtim << std::string( 16, ' ' ) << " ) sec" << std::endl + << "[Min,Max]TimeInMatrixElems = [ " << minwtim + << " , " << maxwtim << " ] sec" << std::endl + //<< "StdDevTimeInMatrixElems = ( " << stdwtim << std::string(16, ' ') << " ) sec" << std::endl + << "TotalTime[MECalcOnly] (3a) = ( " << sumw3atim << std::string( 16, ' ' ) << " ) sec" << std::endl + << "MeanTimeInMECalcOnly = ( " << meanw3atim << std::string( 16, ' ' ) << " ) sec" << std::endl + << "[Min,Max]TimeInMECalcOnly = [ " << minw3atim + << " , " << maxw3atim << " ] sec" << std::endl + //<< "StdDevTimeInMECalcOnly = ( " << stdw3atim << std::string(16, ' ') << " ) sec" << std::endl + << std::string( SEP79, '-' ) << std::endl + //<< "ProcessID: = " << getpid() << std::endl + //<< "NProcesses = " << process.nprocesses << std::endl // assume nprocesses == 1 (#272 and #343) + << "TotalEventsComputed = " << nevtALL << std::endl + << "EvtsPerSec[Rnd+Rmb+ME](123) = ( " << nevtALL / ( sumgtim + sumrtim + sumwtim ) + << std::string( 16, ' ' ) << " ) sec^-1" << std::endl + << "EvtsPerSec[Rmb+ME] (23) = ( " << nevtALL / ( sumrtim + sumwtim ) + << std::string( 16, ' ' ) << " ) sec^-1" << std::endl + //<< "EvtsPerSec[RndNumGen] (1) = ( " << nevtALL/sumgtim + //<< std::string(16, ' ') << " ) sec^-1" << std::endl + //<< "EvtsPerSec[Rambo] (2) = ( " << nevtALL/sumrtim + //<< std::string(16, ' ') << " ) sec^-1" << std::endl + << "EvtsPerSec[MatrixElems] (3) = ( " << nevtALL / sumwtim + << std::string( 16, ' ' ) << " ) sec^-1" << std::endl + << "EvtsPerSec[MECalcOnly] (3a) = ( " << nevtALL / sumw3atim + << std::string( 16, ' ' ) << " ) sec^-1" << std::endl + << std::defaultfloat; // default format: affects all floats + std::cout << std::string( SEP79, '*' ) << std::endl + << hstStats; + } + + // --- 9b Dump to json + const std::string jsonKey = "9b DumpJson"; + timermap.start( jsonKey ); + + if( json ) + { + std::string jsonFileName = std::to_string( jsondate ) + "-perf-test-run" + std::to_string( jsonrun ) + ".json"; + jsonFileName = "./perf/data/" + jsonFileName; + + //Checks if file exists + std::ifstream fileCheck; + bool fileExists = false; + fileCheck.open( jsonFileName ); + if( fileCheck ) + { + fileExists = true; + fileCheck.close(); + } + + std::ofstream jsonFile; + jsonFile.open( jsonFileName, std::ios_base::app ); + if( !fileExists ) + { + jsonFile << "[" << std::endl; + } + else + { + //deleting the last bracket and outputting a ", " + std::string temp = "truncate -s-1 " + jsonFileName; + const char* command = temp.c_str(); + if( system( command ) != 0 ) + std::cout << "WARNING! Command '" << temp << "' failed" << std::endl; + jsonFile << ", " << std::endl; + } + + jsonFile << "{" << std::endl + << "\"NumIterations\": " << niter << ", " << std::endl + << "\"NumThreadsPerBlock\": " << gputhreads << ", " << std::endl + << "\"NumBlocksPerGrid\": " << gpublocks << ", " << std::endl +#if defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT + << "\"FP precision\": " + << "\"MIXED (NaN/abnormal=" << nabn << ")\"," << std::endl +#elif defined MGONGPU_FPTYPE_DOUBLE + << "\"FP precision\": " + << "\"DOUBLE (NaN/abnormal=" << nabn << ")\"," << std::endl +#elif defined MGONGPU_FPTYPE_FLOAT + << "\"FP precision\": " + << "\"FLOAT (NaN/abnormal=" << nabn << ")\"," << std::endl +#endif + << "\"Complex type\": " +#ifdef __CUDACC__ +#if defined MGONGPU_CUCXTYPE_CUCOMPLEX + << "\"CUCOMPLEX\"," << std::endl +#elif defined MGONGPU_CUCXTYPE_THRUST + << "\"THRUST::COMPLEX\"," << std::endl +#endif +#else + << "\"STD::COMPLEX\"," << std::endl +#endif + << "\"RanNumb memory layout\": " + << "\"AOSOA[" << neppR << "]\"" + << ( neppR == 1 ? " == AOS" : "" ) << ", " << std::endl + << "\"Momenta memory layout\": " + << "\"AOSOA[" << neppM << "]\"" + << ( neppM == 1 ? " == AOS" : "" ) << ", " << std::endl +#ifdef __CUDACC__ + //<< "\"Wavefunction GPU memory\": " << "\"LOCAL\"," << std::endl +#endif + << "\"Curand generation\": " + << "\"" << rndgentxt << "\"," << std::endl; + + double minelem = hstStats.minME; + double maxelem = hstStats.maxME; + double meanelem = hstStats.meanME(); + double stdelem = hstStats.stdME(); + + jsonFile << "\"NumberOfEntries\": " << niter << "," << std::endl + //<< std::scientific // Not sure about this + << "\"TotalTime[Rnd+Rmb+ME] (123)\": \"" + << std::to_string( sumgtim + sumrtim + sumwtim ) << " sec\"," + << std::endl + << "\"TotalTime[Rambo+ME] (23)\": \"" + << std::to_string( sumrtim + sumwtim ) << " sec\"," << std::endl + << "\"TotalTime[RndNumGen] (1)\": \"" + << std::to_string( sumgtim ) << " sec\"," << std::endl + << "\"TotalTime[Rambo] (2)\": \"" + << std::to_string( sumrtim ) << " sec\"," << std::endl + << "\"TotalTime[MatrixElems] (3)\": \"" + << std::to_string( sumwtim ) << " sec\"," << std::endl + << "\"MeanTimeInMatrixElems\": \"" + << std::to_string( meanwtim ) << " sec\"," << std::endl + << "\"MinTimeInMatrixElems\": \"" + << std::to_string( minwtim ) << " sec\"," << std::endl + << "\"MaxTimeInMatrixElems\": \"" + << std::to_string( maxwtim ) << " sec\"," << std::endl + //<< "ProcessID: = " << getpid() << std::endl + //<< "NProcesses = " << process.nprocesses << std::endl // assume nprocesses == 1 (#272 and #343) + << "\"TotalEventsComputed\": " << nevtALL << "," << std::endl + << "\"EvtsPerSec[Rnd+Rmb+ME](123)\": \"" + << std::to_string( nevtALL / ( sumgtim + sumrtim + sumwtim ) ) << " sec^-1\"," << std::endl + << "\"EvtsPerSec[Rmb+ME] (23)\": \"" + << std::to_string( nevtALL / ( sumrtim + sumwtim ) ) << " sec^-1\"," << std::endl + << "\"EvtsPerSec[MatrixElems] (3)\": \"" + << std::to_string( nevtALL / sumwtim ) << " sec^-1\"," << std::endl + << "\"EvtsPerSec[MECalcOnly] (3)\": \"" + << std::to_string( nevtALL / sumw3atim ) << " sec^-1\"," << std::endl + << "\"NumMatrixElems(notAbnormal)\": " << nevtALL - nabn << "," << std::endl + << std::scientific + << "\"MeanMatrixElemValue\": " + << "\"" << std::to_string( meanelem ) << " GeV^" + << std::to_string( meGeVexponent ) << "\"," << std::endl + << "\"StdErrMatrixElemValue\": " + << "\"" << std::to_string( stdelem / sqrt( nevtALL ) ) << " GeV^" + << std::to_string( meGeVexponent ) << "\"," << std::endl + << "\"StdDevMatrixElemValue\": " + << "\"" << std::to_string( stdelem ) + << " GeV^" << std::to_string( meGeVexponent ) << "\"," << std::endl + << "\"MinMatrixElemValue\": " + << "\"" << std::to_string( minelem ) << " GeV^" + << std::to_string( meGeVexponent ) << "\"," << std::endl + << "\"MaxMatrixElemValue\": " + << "\"" << std::to_string( maxelem ) << " GeV^" + << std::to_string( meGeVexponent ) << "\"," << std::endl; + + timermap.dump( jsonFile, true ); // NB For the active json timer this dumps a partial total + + jsonFile << "}" << std::endl; + jsonFile << "]"; + jsonFile.close(); + } + + // *** STOP THE NEW TIMERS *** + timermap.stop(); + if( perf ) + { + std::cout << std::string( SEP79, '*' ) << std::endl; + timermap.dump(); + std::cout << std::string( SEP79, '*' ) << std::endl; + } + + // [NB some resources like curand generators will be deleted here when stack-allocated classes go out of scope] + //std::cout << "ALL OK" << std::endl; + return 0; +} diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/cudacpp.mk b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/cudacpp.mk new file mode 120000 index 0000000000..252b38e27a --- /dev/null +++ b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/cudacpp.mk @@ -0,0 +1 @@ +../cudacpp.mk \ No newline at end of file diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/epoch_process_id.h b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/epoch_process_id.h new file mode 100644 index 0000000000..064373c38c --- /dev/null +++ b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/epoch_process_id.h @@ -0,0 +1,11 @@ +#ifndef EPOCH_PROCESS_ID_H +#define EPOCH_PROCESS_ID_H 1 + +// No need to indicate EPOCHX_ any longer for auto-generated code +// However, keep the name of the file as it may be useful again for new manual developments +#define MG_EPOCH_PROCESS_ID SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX + +// For simplicity, define here the name of the process-dependent reference file for tests +#define MG_EPOCH_REFERENCE_FILE_NAME "../../../../../test/ref/dump_CPUTest.Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx.txt" + +#endif // EPOCH_PROCESS_ID_H diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/fbridge.cc b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/fbridge.cc new file mode 120000 index 0000000000..cbcc1f579f --- /dev/null +++ b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/fbridge.cc @@ -0,0 +1 @@ +../fbridge.cc \ No newline at end of file diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/fbridge.inc b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/fbridge.inc new file mode 120000 index 0000000000..69598a6d2f --- /dev/null +++ b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/fbridge.inc @@ -0,0 +1 @@ +../fbridge.inc \ No newline at end of file diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/fcheck_sa.f b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/fcheck_sa.f new file mode 100644 index 0000000000..0320b590a7 --- /dev/null +++ b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/fcheck_sa.f @@ -0,0 +1,84 @@ + PROGRAM FCHECK_SA + IMPLICIT NONE + INCLUDE 'fsampler.inc' + INCLUDE 'fbridge.inc' + INTEGER*8 SAMPLER, BRIDGE ! 64bit memory addresses + INTEGER NEVTMAX, NEXTERNAL, NP4 + PARAMETER(NEVTMAX=2048*256, NEXTERNAL=6, NP4=4) + CHARACTER*32 ARG0, ARG1, ARG2, ARG3 + INTEGER NARG1, NARG2, NARG3 + INTEGER NEVT, NITER + INTEGER IEVT, IITER +c INTEGER IEXTERNAL + DOUBLE PRECISION MOMENTA(0:NP4-1, NEXTERNAL, NEVTMAX) ! c-array momenta[nevt][nexternal][np4] + DOUBLE PRECISION GS(NEVTMAX) + DOUBLE PRECISION RNDHEL(NEVTMAX) ! not yet used + DOUBLE PRECISION RNDCOL(NEVTMAX) ! not yet used + INTEGER*4 CHANID + PARAMETER(CHANID=0) ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 + DOUBLE PRECISION MES(NEVTMAX) + INTEGER*4 SELHEL(NEVTMAX) ! not yet used + INTEGER*4 SELCOL(NEVTMAX) ! not yet used + DOUBLE PRECISION MES_SUM ! use REAL*16 for quadruple precision + INTEGER NEVTOK ! exclude nan/abnormal MEs +C +C READ COMMAND LINE ARGUMENTS +C (NB: most errors will crash the program !) +C + IF ( COMMAND_ARGUMENT_COUNT() == 3 ) THEN + CALL GET_COMMAND_ARGUMENT(1,ARG1) + CALL GET_COMMAND_ARGUMENT(2,ARG2) + CALL GET_COMMAND_ARGUMENT(3,ARG3) + READ (ARG1,'(I4)') NARG1 + READ (ARG2,'(I4)') NARG2 + READ (ARG3,'(I4)') NARG3 + WRITE(6,*) "GPUBLOCKS= ", NARG1 + WRITE(6,*) "GPUTHREADS= ", NARG2 + WRITE(6,*) "NITERATIONS=", NARG3 + NEVT = NARG1 * NARG2 + NITER = NARG3 + IF ( NEVT > NEVTMAX ) THEN + WRITE(6,*) "ERROR! NEVT>NEVTMAX" + STOP + ENDIF + ELSE + CALL GET_COMMAND_ARGUMENT(0,ARG0) + WRITE(6,*) "Usage: ", TRIM(ARG0), + & " gpublocks gputhreads niterations" + STOP + ENDIF +C +C USE SAMPLER AND BRIDGE +C + NEVTOK = 0 + MES_SUM = 0 + CALL FBRIDGECREATE(BRIDGE, NEVT, NEXTERNAL, NP4) ! this must be at the beginning as it initialises the CUDA device + CALL FSAMPLERCREATE(SAMPLER, NEVT, NEXTERNAL, NP4) + DO IITER = 1, NITER + CALL FSAMPLERSEQUENCE(SAMPLER, MOMENTA) + DO IEVT = 1, NEVT + GS(IEVT) = 1.2177157847767195 ! fixed G for aS=0.118 (hardcoded for now in check_sa.cc, fcheck_sa.f, runTest.cc) + END DO + CALL FBRIDGESEQUENCE(BRIDGE, MOMENTA, GS, + & RNDHEL, RNDCOL, CHANID, MES, SELHEL, SELCOL) + DO IEVT = 1, NEVT +c DO IEXTERNAL = 1, NEXTERNAL +c WRITE(6,*) 'MOMENTA', IEVT, IEXTERNAL, +c & MOMENTA(0, IEXTERNAL, IEVT), +c & MOMENTA(1, IEXTERNAL, IEVT), +c & MOMENTA(2, IEXTERNAL, IEVT), +c & MOMENTA(3, IEXTERNAL, IEVT) +c END DO +c WRITE(6,*) 'MES ', IEVT, MES(IEVT) +c WRITE(6,*) + IF ( .NOT. ISNAN(MES(IEVT)) ) THEN + NEVTOK = NEVTOK + 1 + MES_SUM = MES_SUM + MES(IEVT) + ENDIF + END DO + END DO + CALL FSAMPLERDELETE(SAMPLER) + CALL FBRIDGEDELETE(BRIDGE) ! this must be at the end as it shuts down the CUDA device + WRITE(6,*) 'Average Matrix Element:', MES_SUM/NEVT/NITER + WRITE(6,*) 'Abnormal MEs:', NEVT*NITER - NEVTOK + END PROGRAM FCHECK_SA diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/fsampler.cc b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/fsampler.cc new file mode 120000 index 0000000000..521c828d41 --- /dev/null +++ b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/fsampler.cc @@ -0,0 +1 @@ +../fsampler.cc \ No newline at end of file diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/fsampler.inc b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/fsampler.inc new file mode 120000 index 0000000000..4b0f3c2656 --- /dev/null +++ b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/fsampler.inc @@ -0,0 +1 @@ +../fsampler.inc \ No newline at end of file diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/gBridgeKernels.cu b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/gBridgeKernels.cu new file mode 120000 index 0000000000..12c1d49d13 --- /dev/null +++ b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/gBridgeKernels.cu @@ -0,0 +1 @@ +BridgeKernels.cc \ No newline at end of file diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/gCPPProcess.cu b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/gCPPProcess.cu new file mode 120000 index 0000000000..1fc8661d4e --- /dev/null +++ b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/gCPPProcess.cu @@ -0,0 +1 @@ +CPPProcess.cc \ No newline at end of file diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/gCrossSectionKernels.cu b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/gCrossSectionKernels.cu new file mode 120000 index 0000000000..9a05a7b55a --- /dev/null +++ b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/gCrossSectionKernels.cu @@ -0,0 +1 @@ +CrossSectionKernels.cc \ No newline at end of file diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/gMatrixElementKernels.cu b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/gMatrixElementKernels.cu new file mode 120000 index 0000000000..82415576cc --- /dev/null +++ b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/gMatrixElementKernels.cu @@ -0,0 +1 @@ +MatrixElementKernels.cc \ No newline at end of file diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/gRamboSamplingKernels.cu b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/gRamboSamplingKernels.cu new file mode 120000 index 0000000000..8dbfaa6493 --- /dev/null +++ b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/gRamboSamplingKernels.cu @@ -0,0 +1 @@ +RamboSamplingKernels.cc \ No newline at end of file diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/gRandomNumberKernels.cu b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/gRandomNumberKernels.cu new file mode 120000 index 0000000000..26580cf106 --- /dev/null +++ b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/gRandomNumberKernels.cu @@ -0,0 +1 @@ +RandomNumberKernels.cc \ No newline at end of file diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/gcheck_sa.cu b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/gcheck_sa.cu new file mode 120000 index 0000000000..b99171c25e --- /dev/null +++ b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/gcheck_sa.cu @@ -0,0 +1 @@ +check_sa.cc \ No newline at end of file diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/makefile b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/makefile new file mode 120000 index 0000000000..cd937e1d9e --- /dev/null +++ b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/makefile @@ -0,0 +1 @@ +cudacpp.mk \ No newline at end of file diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/nvtx.h b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/nvtx.h new file mode 120000 index 0000000000..a2f268fa94 --- /dev/null +++ b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/nvtx.h @@ -0,0 +1 @@ +../nvtx.h \ No newline at end of file diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/ompnumthreads.h b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/ompnumthreads.h new file mode 120000 index 0000000000..4385e53fca --- /dev/null +++ b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/ompnumthreads.h @@ -0,0 +1 @@ +../ompnumthreads.h \ No newline at end of file diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/perf.py b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/perf.py new file mode 120000 index 0000000000..b7d410aefa --- /dev/null +++ b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/perf.py @@ -0,0 +1 @@ +../perf.py \ No newline at end of file diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/profile.sh b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/profile.sh new file mode 120000 index 0000000000..01080a084d --- /dev/null +++ b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/profile.sh @@ -0,0 +1 @@ +../profile.sh \ No newline at end of file diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/runTest.cc b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/runTest.cc new file mode 120000 index 0000000000..32afd3ca34 --- /dev/null +++ b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/runTest.cc @@ -0,0 +1 @@ +../runTest.cc \ No newline at end of file diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/testmisc.cc b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/testmisc.cc new file mode 120000 index 0000000000..3b553cf3f8 --- /dev/null +++ b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/testmisc.cc @@ -0,0 +1 @@ +../testmisc.cc \ No newline at end of file diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/testxxx.cc b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/testxxx.cc new file mode 120000 index 0000000000..045b2f10ea --- /dev/null +++ b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/testxxx.cc @@ -0,0 +1 @@ +../testxxx.cc \ No newline at end of file diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/testxxx_cc_ref.txt b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/testxxx_cc_ref.txt new file mode 120000 index 0000000000..51764d98ac --- /dev/null +++ b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/testxxx_cc_ref.txt @@ -0,0 +1 @@ +../testxxx_cc_ref.txt \ No newline at end of file diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/timer.h b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/timer.h new file mode 120000 index 0000000000..e161ad9e27 --- /dev/null +++ b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/timer.h @@ -0,0 +1 @@ +../timer.h \ No newline at end of file diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/timermap.h b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/timermap.h new file mode 120000 index 0000000000..1479de7fc0 --- /dev/null +++ b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/timermap.h @@ -0,0 +1 @@ +../timermap.h \ No newline at end of file diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/RamboSamplingKernels.cc b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/RamboSamplingKernels.cc new file mode 100644 index 0000000000..ed2e042427 --- /dev/null +++ b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/RamboSamplingKernels.cc @@ -0,0 +1,178 @@ +#include "RamboSamplingKernels.h" + +#include "CudaRuntime.h" +#include "MemoryAccessMomenta.h" +#include "MemoryAccessRandomNumbers.h" +#include "MemoryAccessWeights.h" +#include "MemoryBuffers.h" +#include "rambo.h" // inline implementation of RAMBO algorithms and kernels + +#include + +#ifdef __CUDACC__ +namespace mg5amcGpu +#else +namespace mg5amcCpu +#endif +{ + //-------------------------------------------------------------------------- + + RamboSamplingKernelHost::RamboSamplingKernelHost( const fptype energy, // input: energy + const BufferRndNumMomenta& rndmom, // input: random numbers in [0,1] + BufferMomenta& momenta, // output: momenta + BufferWeights& weights, // output: weights + const size_t nevt ) + : SamplingKernelBase( energy, rndmom, momenta, weights ) + , NumberOfEvents( nevt ) + { + if( m_rndmom.isOnDevice() ) throw std::runtime_error( "RamboSamplingKernelHost: rndmom must be a host array" ); + if( m_momenta.isOnDevice() ) throw std::runtime_error( "RamboSamplingKernelHost: momenta must be a host array" ); + if( m_weights.isOnDevice() ) throw std::runtime_error( "RamboSamplingKernelHost: weights must be a host array" ); + if( this->nevt() != m_rndmom.nevt() ) throw std::runtime_error( "RamboSamplingKernelHost: nevt mismatch with rndmom" ); + if( this->nevt() != m_momenta.nevt() ) throw std::runtime_error( "RamboSamplingKernelHost: nevt mismatch with momenta" ); + if( this->nevt() != m_weights.nevt() ) throw std::runtime_error( "RamboSamplingKernelHost: nevt mismatch with weights" ); + // Sanity checks for memory access (momenta buffer) + constexpr int neppM = MemoryAccessMomenta::neppM; // AOSOA layout + static_assert( ispoweroftwo( neppM ), "neppM is not a power of 2" ); + if( nevt % neppM != 0 ) + { + std::ostringstream sstr; + sstr << "RamboSamplingKernelHost: nevt should be a multiple of neppM=" << neppM; + throw std::runtime_error( sstr.str() ); + } + // Sanity checks for memory access (random number buffer) + constexpr int neppR = MemoryAccessRandomNumbers::neppR; // AOSOA layout + static_assert( ispoweroftwo( neppR ), "neppR is not a power of 2" ); + if( nevt % neppR != 0 ) + { + std::ostringstream sstr; + sstr << "RamboSamplingKernelHost: nevt should be a multiple of neppR=" << neppR; + throw std::runtime_error( sstr.str() ); + } + } + + //-------------------------------------------------------------------------- + + void + RamboSamplingKernelHost::getMomentaInitial() + { + constexpr auto getMomentaInitial = ramboGetMomentaInitial; + // ** START LOOP ON IEVT ** + for( size_t ievt = 0; ievt < nevt(); ++ievt ) + { + // NB all KernelLaunchers assume that memory access can be decomposed as "accessField = decodeRecord( accessRecord )" + fptype* ievtMomenta = MemoryAccessMomenta::ieventAccessRecord( m_momenta.data(), ievt ); + getMomentaInitial( m_energy, ievtMomenta ); + } + // ** END LOOP ON IEVT ** + } + + //-------------------------------------------------------------------------- + + void + RamboSamplingKernelHost::getMomentaFinal() + { + constexpr auto getMomentaFinal = ramboGetMomentaFinal; + // ** START LOOP ON IEVT ** + for( size_t ievt = 0; ievt < nevt(); ++ievt ) + { + // NB all KernelLaunchers assume that memory access can be decomposed as "accessField = decodeRecord( accessRecord )" + const fptype* ievtRndmom = MemoryAccessRandomNumbers::ieventAccessRecordConst( m_rndmom.data(), ievt ); + fptype* ievtMomenta = MemoryAccessMomenta::ieventAccessRecord( m_momenta.data(), ievt ); + fptype* ievtWeights = MemoryAccessWeights::ieventAccessRecord( m_weights.data(), ievt ); + getMomentaFinal( m_energy, ievtRndmom, ievtMomenta, ievtWeights ); + } + // ** END LOOP ON IEVT ** + } + + //-------------------------------------------------------------------------- + +#ifdef __CUDACC__ + RamboSamplingKernelDevice::RamboSamplingKernelDevice( const fptype energy, // input: energy + const BufferRndNumMomenta& rndmom, // input: random numbers in [0,1] + BufferMomenta& momenta, // output: momenta + BufferWeights& weights, // output: weights + const size_t gpublocks, + const size_t gputhreads ) + : SamplingKernelBase( energy, rndmom, momenta, weights ) + , NumberOfEvents( gpublocks * gputhreads ) + , m_gpublocks( gpublocks ) + , m_gputhreads( gputhreads ) + { + if( !m_rndmom.isOnDevice() ) throw std::runtime_error( "RamboSamplingKernelDevice: rndmom must be a device array" ); + if( !m_momenta.isOnDevice() ) throw std::runtime_error( "RamboSamplingKernelDevice: momenta must be a device array" ); + if( !m_weights.isOnDevice() ) throw std::runtime_error( "RamboSamplingKernelDevice: weights must be a device array" ); + if( m_gpublocks == 0 ) throw std::runtime_error( "RamboSamplingKernelDevice: gpublocks must be > 0" ); + if( m_gputhreads == 0 ) throw std::runtime_error( "RamboSamplingKernelDevice: gputhreads must be > 0" ); + if( this->nevt() != m_rndmom.nevt() ) throw std::runtime_error( "RamboSamplingKernelDevice: nevt mismatch with rndmom" ); + if( this->nevt() != m_momenta.nevt() ) throw std::runtime_error( "RamboSamplingKernelDevice: nevt mismatch with momenta" ); + if( this->nevt() != m_weights.nevt() ) throw std::runtime_error( "RamboSamplingKernelDevice: nevt mismatch with weights" ); + // Sanity checks for memory access (momenta buffer) + constexpr int neppM = MemoryAccessMomenta::neppM; // AOSOA layout + static_assert( ispoweroftwo( neppM ), "neppM is not a power of 2" ); + if( m_gputhreads % neppM != 0 ) + { + std::ostringstream sstr; + sstr << "RamboSamplingKernelHost: gputhreads should be a multiple of neppM=" << neppM; + throw std::runtime_error( sstr.str() ); + } + // Sanity checks for memory access (random number buffer) + constexpr int neppR = MemoryAccessRandomNumbers::neppR; // AOSOA layout + static_assert( ispoweroftwo( neppR ), "neppR is not a power of 2" ); + if( m_gputhreads % neppR != 0 ) + { + std::ostringstream sstr; + sstr << "RamboSamplingKernelDevice: gputhreads should be a multiple of neppR=" << neppR; + throw std::runtime_error( sstr.str() ); + } + } +#endif + + //-------------------------------------------------------------------------- + +#ifdef __CUDACC__ + __global__ void + getMomentaInitialDevice( const fptype energy, + fptype* momenta ) + { + constexpr auto getMomentaInitial = ramboGetMomentaInitial; + return getMomentaInitial( energy, momenta ); + } +#endif + + //-------------------------------------------------------------------------- + +#ifdef __CUDACC__ + void + RamboSamplingKernelDevice::getMomentaInitial() + { + getMomentaInitialDevice<<>>( m_energy, m_momenta.data() ); + } +#endif + + //-------------------------------------------------------------------------- + +#ifdef __CUDACC__ + __global__ void + getMomentaFinalDevice( const fptype energy, + const fptype* rndmom, + fptype* momenta, + fptype* wgts ) + { + constexpr auto getMomentaFinal = ramboGetMomentaFinal; + return getMomentaFinal( energy, rndmom, momenta, wgts ); + } +#endif + + //-------------------------------------------------------------------------- + +#ifdef __CUDACC__ + void + RamboSamplingKernelDevice::getMomentaFinal() + { + getMomentaFinalDevice<<>>( m_energy, m_rndmom.data(), m_momenta.data(), m_weights.data() ); + } +#endif + + //-------------------------------------------------------------------------- +} diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/RamboSamplingKernels.h b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/RamboSamplingKernels.h new file mode 100644 index 0000000000..f40433af4a --- /dev/null +++ b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/RamboSamplingKernels.h @@ -0,0 +1,129 @@ +#ifndef RAMBOSAMPLINGKERNELS_H +#define RAMBOSAMPLINGKERNELS_H 1 + +#include "mgOnGpuConfig.h" + +#include "MemoryBuffers.h" + +#ifdef __CUDACC__ +namespace mg5amcGpu +#else +namespace mg5amcCpu +#endif +{ + //-------------------------------------------------------------------------- + + // A base class encapsulating phase space sampling on a CPU host or on a GPU device + class SamplingKernelBase //: virtual public ISamplingKernel + { + protected: + + // Constructor from existing input and output buffers + SamplingKernelBase( const fptype energy, // input: energy + const BufferRndNumMomenta& rndmom, // input: random numbers in [0,1] + BufferMomenta& momenta, // output: momenta + BufferWeights& weights ) // output: weights + : m_energy( energy ) + , m_rndmom( rndmom ) + , m_momenta( momenta ) + , m_weights( weights ) + { + } + + public: + + // Destructor + virtual ~SamplingKernelBase() {} + + // Get momenta of initial state particles + virtual void getMomentaInitial() = 0; + + // Get momenta of final state particles and weights + virtual void getMomentaFinal() = 0; + + // Is this a host or device kernel? + virtual bool isOnDevice() const = 0; + + protected: + + // The energy + const fptype m_energy; + + // The buffer for the input random numbers + const BufferRndNumMomenta& m_rndmom; + + // The buffer for the output momenta + BufferMomenta& m_momenta; + + // The buffer for the output weights + BufferWeights& m_weights; + }; + + //-------------------------------------------------------------------------- + + // A class encapsulating RAMBO phase space sampling on a CPU host + class RamboSamplingKernelHost final : public SamplingKernelBase, public NumberOfEvents + { + public: + + // Constructor from existing input and output buffers + RamboSamplingKernelHost( const fptype energy, // input: energy + const BufferRndNumMomenta& rndmom, // input: random numbers in [0,1] + BufferMomenta& momenta, // output: momenta + BufferWeights& weights, // output: weights + const size_t nevt ); + + // Destructor + virtual ~RamboSamplingKernelHost() {} + + // Get momenta of initial state particles + void getMomentaInitial() override final; + + // Get momenta of final state particles and weights + void getMomentaFinal() override final; + + // Is this a host or device kernel? + bool isOnDevice() const override final { return false; } + }; + + //-------------------------------------------------------------------------- + +#ifdef __CUDACC__ + // A class encapsulating RAMBO phase space sampling on a GPU device + class RamboSamplingKernelDevice final : public SamplingKernelBase, public NumberOfEvents + { + public: + + // Constructor from existing input and output buffers + RamboSamplingKernelDevice( const fptype energy, // input: energy + const BufferRndNumMomenta& rndmom, // input: random numbers in [0,1] + BufferMomenta& momenta, // output: momenta + BufferWeights& weights, // output: weights + const size_t gpublocks, + const size_t gputhreads ); + + // Destructor + virtual ~RamboSamplingKernelDevice() {} + + // Get momenta of initial state particles + void getMomentaInitial() override final; + + // Get momenta of final state particles and weights + void getMomentaFinal() override final; + + // Is this a host or device kernel? + bool isOnDevice() const override final { return true; } + + private: + + // The number of blocks in the GPU grid + size_t m_gpublocks; + + // The number of threads in the GPU grid + size_t m_gputhreads; + }; +#endif + + //-------------------------------------------------------------------------- +} +#endif // RAMBOSAMPLINGKERNELS_H diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/RandomNumberKernels.cc b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/RandomNumberKernels.cc new file mode 100644 index 0000000000..eb8bc09ea9 --- /dev/null +++ b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/RandomNumberKernels.cc @@ -0,0 +1,149 @@ +#include "RandomNumberKernels.h" + +#include "CommonRandomNumbers.h" +#include "CudaRuntime.h" +#include "MemoryBuffers.h" + +#include + +#ifndef MGONGPU_HAS_NO_CURAND /* clang-format off */ +#define checkCurand( code ){ assertCurand( code, __FILE__, __LINE__ ); } +inline void assertCurand( curandStatus_t code, const char *file, int line, bool abort = true ) +{ + if ( code != CURAND_STATUS_SUCCESS ) + { + printf( "CurandAssert: %s:%d code=%d\n", file, line, code ); + if ( abort ) assert( code == CURAND_STATUS_SUCCESS ); + } +} +#endif /* clang-format on */ + +#ifdef __CUDACC__ +namespace mg5amcGpu +#else +namespace mg5amcCpu +#endif +{ + //-------------------------------------------------------------------------- + + CommonRandomNumberKernel::CommonRandomNumberKernel( BufferRndNumMomenta& rnarray ) + : RandomNumberKernelBase( rnarray ) + , m_seed( 20211220 ) + { + if( m_rnarray.isOnDevice() ) + throw std::runtime_error( "CommonRandomNumberKernel on host with a device random number array" ); + } + + //-------------------------------------------------------------------------- + + void CommonRandomNumberKernel::generateRnarray() + { + std::vector rnd = CommonRandomNumbers::generate( m_rnarray.size(), m_seed ); // NB: generate as double (HARDCODED) + std::copy( rnd.begin(), rnd.end(), m_rnarray.data() ); // NB: copy may imply a double-to-float conversion + } + + //-------------------------------------------------------------------------- + +#ifndef MGONGPU_HAS_NO_CURAND + CurandRandomNumberKernel::CurandRandomNumberKernel( BufferRndNumMomenta& rnarray, const bool onDevice ) + : RandomNumberKernelBase( rnarray ) + , m_isOnDevice( onDevice ) + { + if( m_isOnDevice ) + { +#ifdef __CUDACC__ + if( !m_rnarray.isOnDevice() ) + throw std::runtime_error( "CurandRandomNumberKernel on device with a host random number array" ); +#else + throw std::runtime_error( "CurandRandomNumberKernel does not support CurandDevice on CPU host" ); +#endif + } + else + { + if( m_rnarray.isOnDevice() ) + throw std::runtime_error( "CurandRandomNumberKernel on host with a device random number array" ); + } + createGenerator(); + } + + //-------------------------------------------------------------------------- + + CurandRandomNumberKernel::~CurandRandomNumberKernel() + { + destroyGenerator(); + } + + //-------------------------------------------------------------------------- + + void CurandRandomNumberKernel::seedGenerator( const unsigned int seed ) + { + if( m_isOnDevice ) + { + destroyGenerator(); // workaround for #429 + createGenerator(); // workaround for #429 + } + //printf( "seedGenerator: seed %d\n", seed ); + checkCurand( curandSetPseudoRandomGeneratorSeed( m_rnGen, seed ) ); + } + + //-------------------------------------------------------------------------- + + void CurandRandomNumberKernel::createGenerator() + { + // [NB Timings are for GenRnGen host|device (cpp|cuda) generation of 256*32*1 events with nproc=1: rn(0) is host=0.0012s] + const curandRngType_t type = CURAND_RNG_PSEUDO_MTGP32; // 0.00082s | 0.00064s (FOR FAST TESTS) + //const curandRngType_t type = CURAND_RNG_PSEUDO_XORWOW; // 0.049s | 0.0016s + //const curandRngType_t type = CURAND_RNG_PSEUDO_MRG32K3A; // 0.71s | 0.0012s (better but slower, especially in c++) + //const curandRngType_t type = CURAND_RNG_PSEUDO_MT19937; // 21s | 0.021s + //const curandRngType_t type = CURAND_RNG_PSEUDO_PHILOX4_32_10; // 0.024s | 0.00026s (used to segfault?) + if( m_isOnDevice ) + { + checkCurand( curandCreateGenerator( &m_rnGen, type ) ); + } + else + { + checkCurand( curandCreateGeneratorHost( &m_rnGen, type ) ); + } + //checkCurand( curandSetGeneratorOrdering( *&m_rnGen, CURAND_ORDERING_PSEUDO_LEGACY ) ); // fails with code=104 (see #429) + checkCurand( curandSetGeneratorOrdering( *&m_rnGen, CURAND_ORDERING_PSEUDO_BEST ) ); + //checkCurand( curandSetGeneratorOrdering( *&m_rnGen, CURAND_ORDERING_PSEUDO_DYNAMIC ) ); // fails with code=104 (see #429) + //checkCurand( curandSetGeneratorOrdering( *&m_rnGen, CURAND_ORDERING_PSEUDO_SEEDED ) ); // fails with code=104 (see #429) + } + + //-------------------------------------------------------------------------- + + void CurandRandomNumberKernel::destroyGenerator() + { + checkCurand( curandDestroyGenerator( m_rnGen ) ); + } + + //-------------------------------------------------------------------------- + + void CurandRandomNumberKernel::generateRnarray() + { +#if defined MGONGPU_FPTYPE_DOUBLE + checkCurand( curandGenerateUniformDouble( m_rnGen, m_rnarray.data(), m_rnarray.size() ) ); +#elif defined MGONGPU_FPTYPE_FLOAT + checkCurand( curandGenerateUniform( m_rnGen, m_rnarray.data(), m_rnarray.size() ) ); +#endif + /* + printf( "\nCurandRandomNumberKernel::generateRnarray size = %d\n", (int)m_rnarray.size() ); + fptype* data = m_rnarray.data(); +#ifdef __CUDACC__ + if( m_rnarray.isOnDevice() ) + { + data = new fptype[m_rnarray.size()](); + checkCuda( cudaMemcpy( data, m_rnarray.data(), m_rnarray.bytes(), cudaMemcpyDeviceToHost ) ); + } +#endif + for( int i = 0; i < ( (int)m_rnarray.size() / 4 ); i++ ) + printf( "[%4d] %f %f %f %f\n", i * 4, data[i * 4], data[i * 4 + 2], data[i * 4 + 2], data[i * 4 + 3] ); +#ifdef __CUDACC__ + if( m_rnarray.isOnDevice() ) delete[] data; +#endif + */ + } + + //-------------------------------------------------------------------------- +#endif +} diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/RandomNumberKernels.h b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/RandomNumberKernels.h new file mode 100644 index 0000000000..4d55f3d449 --- /dev/null +++ b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/RandomNumberKernels.h @@ -0,0 +1,146 @@ +#ifndef RANDOMNUMBERKERNELS_H +#define RANDOMNUMBERKERNELS_H 1 + +#include "mgOnGpuConfig.h" + +// NB This must come AFTER mgOnGpuConfig.h which contains our definition of __global__ when __CUDACC__ is not defined +#ifndef MGONGPU_HAS_NO_CURAND +#include "curand.h" +#endif + +#include "MemoryBuffers.h" + +#ifdef __CUDACC__ +namespace mg5amcGpu +#else +namespace mg5amcCpu +#endif +{ + //-------------------------------------------------------------------------- + + /* + // An interface encapsulating random number generation on a CPU host or on a GPU device + class IRandomNumberKernel + { + public: + + // Destructor + virtual ~IRandomNumberKernel(){} + + // Seed the random number generator + virtual void seedGenerator( const unsigned int seed ) = 0; + + // Generate the random number array + virtual void generateRnarray() = 0; + + // Is this a host or device kernel? + virtual bool isOnDevice() const = 0; + + }; + */ + + //-------------------------------------------------------------------------- + + // A base class encapsulating random number generation on a CPU host or on a GPU device + class RandomNumberKernelBase //: virtual public IRandomNumberKernel + { + + protected: + + // Constructor from an existing output buffer + RandomNumberKernelBase( BufferRndNumMomenta& rnarray ) + : m_rnarray( rnarray ) {} + + public: + + // Destructor + virtual ~RandomNumberKernelBase() {} + + // Seed the random number generator + virtual void seedGenerator( const unsigned int seed ) = 0; + + // Generate the random number array + virtual void generateRnarray() = 0; + + // Is this a host or device kernel? + virtual bool isOnDevice() const = 0; + + protected: + + // The buffer for the output random numbers + BufferRndNumMomenta& m_rnarray; + }; + + //-------------------------------------------------------------------------- + + // A class encapsulating common random number generation on a CPU host + class CommonRandomNumberKernel final : public RandomNumberKernelBase + { + public: + + // Constructor from an existing output buffer + CommonRandomNumberKernel( BufferRndNumMomenta& rnarray ); + + // Destructor + ~CommonRandomNumberKernel() {} + + // Seed the random number generator + void seedGenerator( const unsigned int seed ) override final { m_seed = seed; }; + + // Generate the random number array + void generateRnarray() override final; + + // Is this a host or device kernel? + bool isOnDevice() const override final { return false; } + + private: + + // The generator seed + unsigned int m_seed; + }; + + //-------------------------------------------------------------------------- + +#ifndef MGONGPU_HAS_NO_CURAND + // A class encapsulating CURAND random number generation on a CPU host or on a GPU device + class CurandRandomNumberKernel final : public RandomNumberKernelBase + { + public: + + // Constructor from an existing output buffer + CurandRandomNumberKernel( BufferRndNumMomenta& rnarray, const bool onDevice ); + + // Destructor + ~CurandRandomNumberKernel(); + + // Seed the random number generator + void seedGenerator( const unsigned int seed ) override final; + + // Generate the random number array + void generateRnarray() override final; + + // Is this a host or device kernel? + bool isOnDevice() const override final { return m_isOnDevice; } + + private: + + // Create the generator (workaround for #429: do this in every seedGenerator call rather than only in the ctor) + void createGenerator(); + + // Destroy the generator (workaround for #429: do this in every seedGenerator call rather than only in the ctor) + void destroyGenerator(); + + private: + + // Is this a host or device kernel? + const bool m_isOnDevice; + + // The curand generator + curandGenerator_t m_rnGen; + }; + +#endif + + //-------------------------------------------------------------------------- +} +#endif // RANDOMNUMBERKERNELS_H diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/cudacpp.mk b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/cudacpp.mk new file mode 100644 index 0000000000..2155495366 --- /dev/null +++ b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/cudacpp.mk @@ -0,0 +1,798 @@ +#=== Determine the name of this makefile (https://ftp.gnu.org/old-gnu/Manuals/make-3.80/html_node/make_17.html) +#=== NB: different names (e.g. cudacpp.mk and cudacpp_src.mk) are used in the Subprocess and src directories + +CUDACPP_MAKEFILE = $(word $(words $(MAKEFILE_LIST)),$(MAKEFILE_LIST)) +CUDACPP_SRC_MAKEFILE = cudacpp_src.mk + +#------------------------------------------------------------------------------- + +#=== Use bash in the Makefile (https://www.gnu.org/software/make/manual/html_node/Choosing-the-Shell.html) + +SHELL := /bin/bash + +#------------------------------------------------------------------------------- + +#=== Detect O/S and architecture (assuming uname is available, https://en.wikipedia.org/wiki/Uname) + +# Detect O/S kernel (Linux, Darwin...) +UNAME_S := $(shell uname -s) +###$(info UNAME_S='$(UNAME_S)') + +# Detect architecture (x86_64, ppc64le...) +UNAME_P := $(shell uname -p) +###$(info UNAME_P='$(UNAME_P)') + +#------------------------------------------------------------------------------- + +#=== Configure common compiler flags for C++ and CUDA + +INCFLAGS = -I. +OPTFLAGS = -O3 # this ends up in CUFLAGS too (should it?), cannot add -Ofast or -ffast-math here + +# Dependency on src directory +MG5AMC_COMMONLIB = mg5amc_common +LIBFLAGS = -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) +INCFLAGS += -I../../src + +# Dependency on tools directory +TOOLSDIR = ../../../../../tools +INCFLAGS += -I$(TOOLSDIR) + +# Dependency on test directory +TESTDIR = ../../../../../test +GTESTLIBDIR = $(TESTDIR)/googletest/build/lib/ +GTESTLIBS = $(GTESTLIBDIR)/libgtest.a $(GTESTLIBDIR)/libgtest_main.a + +#------------------------------------------------------------------------------- + +#=== Configure the C++ compiler + +CXXFLAGS = $(OPTFLAGS) -std=c++17 $(INCFLAGS) $(USE_NVTX) -Wall -Wshadow -Wextra +ifeq ($(shell $(CXX) --version | grep ^nvc++),) +CXXFLAGS+= -ffast-math # see issue #117 +endif +###CXXFLAGS+= -Ofast # performance is not different from --fast-math +###CXXFLAGS+= -g # FOR DEBUGGING ONLY + +# Optionally add debug flags to display the full list of flags (eg on Darwin) +###CXXFLAGS+= -v + +# Note: AR, CXX and FC are implicitly defined if not set externally +# See https://www.gnu.org/software/make/manual/html_node/Implicit-Variables.html + +#------------------------------------------------------------------------------- + +#=== Configure the CUDA compiler + +# If CXX is not a single word (example "clang++ --gcc-toolchain...") then disable CUDA builds (issue #505) +# This is because it is impossible to pass this to "CUFLAGS += -ccbin " below +ifneq ($(words $(subst ccache ,,$(CXX))),1) # allow at most "CXX=ccache " from outside + $(warning CUDA builds are not supported for multi-word CXX "$(CXX)") + override CUDA_HOME=disabled +endif + +# If CUDA_HOME is not set, try to set it from the location of nvcc +ifndef CUDA_HOME + CUDA_HOME = $(patsubst %bin/nvcc,%,$(shell which nvcc 2>/dev/null)) + $(warning CUDA_HOME was not set: using "$(CUDA_HOME)") +endif + +# Set NVCC as $(CUDA_HOME)/bin/nvcc if it exists +ifneq ($(wildcard $(CUDA_HOME)/bin/nvcc),) + NVCC = $(CUDA_HOME)/bin/nvcc + USE_NVTX ?=-DUSE_NVTX + # See https://docs.nvidia.com/cuda/cuda-compiler-driver-nvcc/index.html + # See https://arnon.dk/matching-sm-architectures-arch-and-gencode-for-various-nvidia-cards/ + # Default: use compute capability 70 for V100 (CERN lxbatch, CERN itscrd, Juwels Cluster). + # Embed device code for 70, and PTX for 70+. + # Export MADGRAPH_CUDA_ARCHITECTURE (comma-separated list) to use another value or list of values (see #533). + # Examples: use 60 for P100 (Piz Daint), 80 for A100 (Juwels Booster, NVidia raplab/Curiosity). + MADGRAPH_CUDA_ARCHITECTURE ?= 70 + ###CUARCHFLAGS = -gencode arch=compute_$(MADGRAPH_CUDA_ARCHITECTURE),code=compute_$(MADGRAPH_CUDA_ARCHITECTURE) -gencode arch=compute_$(MADGRAPH_CUDA_ARCHITECTURE),code=sm_$(MADGRAPH_CUDA_ARCHITECTURE) # Older implementation (AV): go back to this one for multi-GPU support #533 + ###CUARCHFLAGS = --gpu-architecture=compute_$(MADGRAPH_CUDA_ARCHITECTURE) --gpu-code=sm_$(MADGRAPH_CUDA_ARCHITECTURE),compute_$(MADGRAPH_CUDA_ARCHITECTURE) # Newer implementation (SH): cannot use this as-is for multi-GPU support #533 + comma:=, + CUARCHFLAGS = $(foreach arch,$(subst $(comma), ,$(MADGRAPH_CUDA_ARCHITECTURE)),-gencode arch=compute_$(arch),code=compute_$(arch) -gencode arch=compute_$(arch),code=sm_$(arch)) + CUINC = -I$(CUDA_HOME)/include/ + CULIBFLAGS = -L$(CUDA_HOME)/lib64/ -lcurand # NB: -lcuda is not needed here! + CUOPTFLAGS = -lineinfo + CUFLAGS = $(OPTFLAGS) $(CUOPTFLAGS) $(INCFLAGS) $(CUINC) $(USE_NVTX) $(CUARCHFLAGS) -use_fast_math + ###CUFLAGS += -Xcompiler -Wall -Xcompiler -Wextra -Xcompiler -Wshadow + ###NVCC_VERSION = $(shell $(NVCC) --version | grep 'Cuda compilation tools' | cut -d' ' -f5 | cut -d, -f1) + CUFLAGS += -std=c++17 # need CUDA >= 11.2 (see #333): this is enforced in mgOnGpuConfig.h + # Without -maxrregcount: baseline throughput: 6.5E8 (16384 32 12) up to 7.3E8 (65536 128 12) + ###CUFLAGS+= --maxrregcount 160 # improves throughput: 6.9E8 (16384 32 12) up to 7.7E8 (65536 128 12) + ###CUFLAGS+= --maxrregcount 128 # improves throughput: 7.3E8 (16384 32 12) up to 7.6E8 (65536 128 12) + ###CUFLAGS+= --maxrregcount 96 # degrades throughput: 4.1E8 (16384 32 12) up to 4.5E8 (65536 128 12) + ###CUFLAGS+= --maxrregcount 64 # degrades throughput: 1.7E8 (16384 32 12) flat at 1.7E8 (65536 128 12) +else ifneq ($(origin REQUIRE_CUDA),undefined) + # If REQUIRE_CUDA is set but no cuda is found, stop here (e.g. for CI tests on GPU #443) + $(error No cuda installation found (set CUDA_HOME or make nvcc visible in PATH)) +else + # No cuda. Switch cuda compilation off and go to common random numbers in C++ + $(warning CUDA_HOME is not set or is invalid: export CUDA_HOME to compile with cuda) + override NVCC= + override USE_NVTX= + override CULIBFLAGS= +endif + +# Set the host C++ compiler for nvcc via "-ccbin " +# (NB issue #505: this must be a single word, "clang++ --gcc-toolchain..." is not supported) +CUFLAGS += -ccbin $(shell which $(subst ccache ,,$(CXX))) + +# Allow newer (unsupported) C++ compilers with older versions of CUDA if ALLOW_UNSUPPORTED_COMPILER_IN_CUDA is set (#504) +ifneq ($(origin ALLOW_UNSUPPORTED_COMPILER_IN_CUDA),undefined) +CUFLAGS += -allow-unsupported-compiler +endif + +#------------------------------------------------------------------------------- + +#=== Configure ccache for C++ and CUDA builds + +# Enable ccache if USECCACHE=1 +ifeq ($(USECCACHE)$(shell echo $(CXX) | grep ccache),1) + override CXX:=ccache $(CXX) +endif +#ifeq ($(USECCACHE)$(shell echo $(AR) | grep ccache),1) +# override AR:=ccache $(AR) +#endif +ifneq ($(NVCC),) + ifeq ($(USECCACHE)$(shell echo $(NVCC) | grep ccache),1) + override NVCC:=ccache $(NVCC) + endif +endif + +#------------------------------------------------------------------------------- + +#=== Configure PowerPC-specific compiler flags for C++ and CUDA + +# PowerPC-specific CXX compiler flags (being reviewed) +ifeq ($(UNAME_P),ppc64le) + CXXFLAGS+= -mcpu=power9 -mtune=power9 # gains ~2-3% both for none and sse4 + # Throughput references without the extra flags below: none=1.41-1.42E6, sse4=2.15-2.19E6 + ###CXXFLAGS+= -DNO_WARN_X86_INTRINSICS # no change + ###CXXFLAGS+= -fpeel-loops # no change + ###CXXFLAGS+= -funroll-loops # gains ~1% for none, loses ~1% for sse4 + ###CXXFLAGS+= -ftree-vectorize # no change + ###CXXFLAGS+= -flto # would increase to none=4.08-4.12E6, sse4=4.99-5.03E6! +else + ###CXXFLAGS+= -flto # also on Intel this would increase throughputs by a factor 2 to 4... + ######CXXFLAGS+= -fno-semantic-interposition # no benefit (neither alone, nor combined with -flto) +endif + +# PowerPC-specific CUDA compiler flags (to be reviewed!) +ifeq ($(UNAME_P),ppc64le) + CUFLAGS+= -Xcompiler -mno-float128 +endif + +#------------------------------------------------------------------------------- + +#=== Configure defaults and check if user-defined choices exist for OMPFLAGS, AVX, FPTYPE, HELINL, HRDCOD, RNDGEN + +# Set the default OMPFLAGS choice +ifneq ($(shell $(CXX) --version | egrep '^Intel'),) +override OMPFLAGS = -fopenmp +###override OMPFLAGS = # disable OpenMP MT on Intel (was ok without nvcc but not ok with nvcc before #578) +else ifneq ($(shell $(CXX) --version | egrep '^(clang)'),) +override OMPFLAGS = -fopenmp +###override OMPFLAGS = # disable OpenMP MT on clang (was not ok without or with nvcc before #578) +else ifneq ($(shell $(CXX) --version | egrep '^(Apple clang)'),) +override OMPFLAGS = # disable OpenMP MT on Apple clang (builds fail in the CI #578) +else +override OMPFLAGS = -fopenmp +###override OMPFLAGS = # disable OpenMP MT (default before #575) +endif + +# Set the default AVX (vectorization) choice +ifeq ($(AVX),) + ifeq ($(UNAME_P),ppc64le) + ###override AVX = none + override AVX = sse4 + else ifeq ($(UNAME_P),arm) + ###override AVX = none + override AVX = sse4 + else ifeq ($(wildcard /proc/cpuinfo),) + override AVX = none + $(warning Using AVX='$(AVX)' because host SIMD features cannot be read from /proc/cpuinfo) + else ifeq ($(shell grep -m1 -c avx512vl /proc/cpuinfo)$(shell $(CXX) --version | grep ^clang),1) + override AVX = 512y + ###$(info Using AVX='$(AVX)' as no user input exists) + else + override AVX = avx2 + ifneq ($(shell grep -m1 -c avx512vl /proc/cpuinfo),1) + $(warning Using AVX='$(AVX)' because host does not support avx512vl) + else + $(warning Using AVX='$(AVX)' because this is faster than avx512vl for clang) + endif + endif +else + ###$(info Using AVX='$(AVX)' according to user input) +endif + +# Set the default FPTYPE (floating point type) choice +ifeq ($(FPTYPE),) + override FPTYPE = d +endif + +# Set the default HELINL (inline helicities?) choice +ifeq ($(HELINL),) + override HELINL = 0 +endif + +# Set the default HRDCOD (hardcode cIPD physics parameters?) choice +ifeq ($(HRDCOD),) + override HRDCOD = 0 +endif + +# Set the default RNDGEN (random number generator) choice +ifeq ($(NVCC),) + override RNDGEN = hasNoCurand +else ifeq ($(RNDGEN),) + override RNDGEN = hasCurand +endif + +# Export AVX, FPTYPE, HELINL, HRDCOD, RNDGEN, OMPFLAGS so that it is not necessary to pass them to the src Makefile too +export AVX +export FPTYPE +export HELINL +export HRDCOD +export RNDGEN +export OMPFLAGS + +#------------------------------------------------------------------------------- + +#=== Set the CUDA/C++ compiler flags appropriate to user-defined choices of AVX, FPTYPE, HELINL, HRDCOD, RNDGEN + +# Set the build flags appropriate to OMPFLAGS +$(info OMPFLAGS=$(OMPFLAGS)) +CXXFLAGS += $(OMPFLAGS) + +# Set the build flags appropriate to each AVX choice (example: "make AVX=none") +# [NB MGONGPU_PVW512 is needed because "-mprefer-vector-width=256" is not exposed in a macro] +# [See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=96476] +$(info AVX=$(AVX)) +ifeq ($(UNAME_P),ppc64le) + ifeq ($(AVX),sse4) + override AVXFLAGS = -D__SSE4_2__ # Power9 VSX with 128 width (VSR registers) + else ifneq ($(AVX),none) + $(error Unknown AVX='$(AVX)': only 'none' and 'sse4' are supported on PowerPC for the moment) + endif +else ifeq ($(UNAME_P),arm) + ifeq ($(AVX),sse4) + override AVXFLAGS = -D__SSE4_2__ # ARM NEON with 128 width (Q/quadword registers) + else ifneq ($(AVX),none) + $(error Unknown AVX='$(AVX)': only 'none' and 'sse4' are supported on ARM for the moment) + endif +else ifneq ($(shell $(CXX) --version | grep ^nvc++),) # support nvc++ #531 + ifeq ($(AVX),none) + override AVXFLAGS = -mno-sse3 # no SIMD + else ifeq ($(AVX),sse4) + override AVXFLAGS = -mno-avx # SSE4.2 with 128 width (xmm registers) + else ifeq ($(AVX),avx2) + override AVXFLAGS = -march=haswell # AVX2 with 256 width (ymm registers) [DEFAULT for clang] + else ifeq ($(AVX),512y) + override AVXFLAGS = -march=skylake -mprefer-vector-width=256 # AVX512 with 256 width (ymm registers) [DEFAULT for gcc] + else ifeq ($(AVX),512z) + override AVXFLAGS = -march=skylake -DMGONGPU_PVW512 # AVX512 with 512 width (zmm registers) + else + $(error Unknown AVX='$(AVX)': only 'none', 'sse4', 'avx2', '512y' and '512z' are supported) + endif +else + ifeq ($(AVX),none) + override AVXFLAGS = -march=x86-64 # no SIMD (see #588) + else ifeq ($(AVX),sse4) + override AVXFLAGS = -march=nehalem # SSE4.2 with 128 width (xmm registers) + else ifeq ($(AVX),avx2) + override AVXFLAGS = -march=haswell # AVX2 with 256 width (ymm registers) [DEFAULT for clang] + else ifeq ($(AVX),512y) + override AVXFLAGS = -march=skylake-avx512 -mprefer-vector-width=256 # AVX512 with 256 width (ymm registers) [DEFAULT for gcc] + else ifeq ($(AVX),512z) + override AVXFLAGS = -march=skylake-avx512 -DMGONGPU_PVW512 # AVX512 with 512 width (zmm registers) + else + $(error Unknown AVX='$(AVX)': only 'none', 'sse4', 'avx2', '512y' and '512z' are supported) + endif +endif +# For the moment, use AVXFLAGS everywhere: eventually, use them only in encapsulated implementations? +CXXFLAGS+= $(AVXFLAGS) + +# Set the build flags appropriate to each FPTYPE choice (example: "make FPTYPE=f") +$(info FPTYPE=$(FPTYPE)) +ifeq ($(FPTYPE),d) + CXXFLAGS += -DMGONGPU_FPTYPE_DOUBLE -DMGONGPU_FPTYPE2_DOUBLE + CUFLAGS += -DMGONGPU_FPTYPE_DOUBLE -DMGONGPU_FPTYPE2_DOUBLE +else ifeq ($(FPTYPE),f) + CXXFLAGS += -DMGONGPU_FPTYPE_FLOAT -DMGONGPU_FPTYPE2_FLOAT + CUFLAGS += -DMGONGPU_FPTYPE_FLOAT -DMGONGPU_FPTYPE2_FLOAT +else ifeq ($(FPTYPE),m) + CXXFLAGS += -DMGONGPU_FPTYPE_DOUBLE -DMGONGPU_FPTYPE2_FLOAT + CUFLAGS += -DMGONGPU_FPTYPE_DOUBLE -DMGONGPU_FPTYPE2_FLOAT +else + $(error Unknown FPTYPE='$(FPTYPE)': only 'd', 'f' and 'm' are supported) +endif + +# Set the build flags appropriate to each HELINL choice (example: "make HELINL=1") +$(info HELINL=$(HELINL)) +ifeq ($(HELINL),1) + CXXFLAGS += -DMGONGPU_INLINE_HELAMPS + CUFLAGS += -DMGONGPU_INLINE_HELAMPS +else ifneq ($(HELINL),0) + $(error Unknown HELINL='$(HELINL)': only '0' and '1' are supported) +endif + +# Set the build flags appropriate to each HRDCOD choice (example: "make HRDCOD=1") +$(info HRDCOD=$(HRDCOD)) +ifeq ($(HRDCOD),1) + CXXFLAGS += -DMGONGPU_HARDCODE_PARAM + CUFLAGS += -DMGONGPU_HARDCODE_PARAM +else ifneq ($(HRDCOD),0) + $(error Unknown HRDCOD='$(HRDCOD)': only '0' and '1' are supported) +endif + +# Set the build flags appropriate to each RNDGEN choice (example: "make RNDGEN=hasNoCurand") +$(info RNDGEN=$(RNDGEN)) +ifeq ($(RNDGEN),hasNoCurand) + CXXFLAGS += -DMGONGPU_HAS_NO_CURAND +else ifneq ($(RNDGEN),hasCurand) + $(error Unknown RNDGEN='$(RNDGEN)': only 'hasCurand' and 'hasNoCurand' are supported) +endif + +#------------------------------------------------------------------------------- + +#=== Configure build directories and build lockfiles === + +# Build directory "short" tag (defines target and path to the optional build directory) +# (Rationale: keep directory names shorter, e.g. do not include random number generator choice) +override DIRTAG = $(AVX)_$(FPTYPE)_inl$(HELINL)_hrd$(HRDCOD) + +# Build lockfile "full" tag (defines full specification of build options that cannot be intermixed) +# (Rationale: avoid mixing of CUDA and no-CUDA environment builds with different random number generators) +override TAG = $(AVX)_$(FPTYPE)_inl$(HELINL)_hrd$(HRDCOD)_$(RNDGEN) + +# Build directory: current directory by default, or build.$(DIRTAG) if USEBUILDDIR==1 +ifeq ($(USEBUILDDIR),1) + override BUILDDIR = build.$(DIRTAG) + override LIBDIR = ../../lib/$(BUILDDIR) + override LIBDIRRPATH = '$$ORIGIN/../$(LIBDIR)' + $(info Building in BUILDDIR=$(BUILDDIR) for tag=$(TAG) (USEBUILDDIR is set = 1)) +else + override BUILDDIR = . + override LIBDIR = ../../lib + override LIBDIRRPATH = '$$ORIGIN/$(LIBDIR)' + $(info Building in BUILDDIR=$(BUILDDIR) for tag=$(TAG) (USEBUILDDIR is not set)) +endif +###override INCDIR = ../../include +###$(info Building in BUILDDIR=$(BUILDDIR) for tag=$(TAG)) + +# On Linux, set rpath to LIBDIR to make it unnecessary to use LD_LIBRARY_PATH +# Use relative paths with respect to the executables or shared libraries ($ORIGIN on Linux) +# On Darwin, building libraries with absolute paths in LIBDIR makes this unnecessary +ifeq ($(UNAME_S),Darwin) + override CXXLIBFLAGSRPATH = + override CULIBFLAGSRPATH = + override CXXLIBFLAGSRPATH2 = + override CULIBFLAGSRPATH2 = +else + # RPATH to cuda/cpp libs when linking executables + override CXXLIBFLAGSRPATH = -Wl,-rpath,$(LIBDIRRPATH) + override CULIBFLAGSRPATH = -Xlinker -rpath,$(LIBDIRRPATH) + # RPATH to common lib when linking cuda/cpp libs + override CXXLIBFLAGSRPATH2 = -Wl,-rpath,'$$ORIGIN' + override CULIBFLAGSRPATH2 = -Xlinker -rpath,'$$ORIGIN' +endif + +# Setting LD_LIBRARY_PATH or DYLD_LIBRARY_PATH in the RUNTIME is no longer necessary (neither on Linux nor on Mac) +override RUNTIME = + +#=============================================================================== +#=== Makefile TARGETS and build rules below +#=============================================================================== + +cxx_main=$(BUILDDIR)/check.exe +fcxx_main=$(BUILDDIR)/fcheck.exe + +ifneq ($(NVCC),) +cu_main=$(BUILDDIR)/gcheck.exe +fcu_main=$(BUILDDIR)/fgcheck.exe +else +cu_main= +fcu_main= +endif + +testmain=$(BUILDDIR)/runTest.exe + +all.$(TAG): $(BUILDDIR)/.build.$(TAG) $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cu_main) $(cxx_main) $(testmain) $(fcu_main) $(fcxx_main) + +# Target (and build options): debug +MAKEDEBUG= +debug: OPTFLAGS = -g -O0 -DDEBUG2 +debug: CUOPTFLAGS = -G +debug: MAKEDEBUG := debug +debug: all.$(TAG) + +# Target: tag-specific build lockfiles +override oldtagsb=`if [ -d $(BUILDDIR) ]; then find $(BUILDDIR) -maxdepth 1 -name '.build.*' ! -name '.build.$(TAG)' -exec echo $(shell pwd)/{} \; ; fi` +$(BUILDDIR)/.build.$(TAG): + @if [ ! -d $(BUILDDIR) ]; then echo "mkdir -p $(BUILDDIR)"; mkdir -p $(BUILDDIR); fi + @if [ "$(oldtagsb)" != "" ]; then echo "Cannot build for tag=$(TAG) as old builds exist for other tags:"; echo " $(oldtagsb)"; echo "Please run 'make clean' first\nIf 'make clean' is not enough: run 'make clean USEBUILDDIR=1 AVX=$(AVX) FPTYPE=$(FPTYPE)' or 'make cleanall'"; exit 1; fi + @touch $(BUILDDIR)/.build.$(TAG) + +# Generic target and build rules: objects from CUDA compilation +ifneq ($(NVCC),) +$(BUILDDIR)/%.o : %.cu *.h ../../src/*.h + @if [ ! -d $(BUILDDIR) ]; then echo "mkdir -p $(BUILDDIR)"; mkdir -p $(BUILDDIR); fi + $(NVCC) $(CPPFLAGS) $(CUFLAGS) -Xcompiler -fPIC -c $< -o $@ + +$(BUILDDIR)/%_cu.o : %.cc *.h ../../src/*.h + @if [ ! -d $(BUILDDIR) ]; then echo "mkdir -p $(BUILDDIR)"; mkdir -p $(BUILDDIR); fi + $(NVCC) $(CPPFLAGS) $(CUFLAGS) -Xcompiler -fPIC -c -x cu $< -o $@ +endif + +# Generic target and build rules: objects from C++ compilation +$(BUILDDIR)/%.o : %.cc *.h ../../src/*.h + @if [ ! -d $(BUILDDIR) ]; then echo "mkdir -p $(BUILDDIR)"; mkdir -p $(BUILDDIR); fi + $(CXX) $(CPPFLAGS) $(CXXFLAGS) $(CUINC) -fPIC -c $< -o $@ + +# Apply special build flags only to CrossSectionKernel.cc and gCrossSectionKernel.cu (no fast math, see #117) +ifeq ($(shell $(CXX) --version | grep ^nvc++),) +$(BUILDDIR)/CrossSectionKernels.o: CXXFLAGS += -fno-fast-math +$(BUILDDIR)/CrossSectionKernels.o: CXXFLAGS += -fno-fast-math +ifneq ($(NVCC),) +$(BUILDDIR)/gCrossSectionKernels.o: CUFLAGS += -Xcompiler -fno-fast-math +endif +endif + +# Avoid "warning: builtin __has_trivial_... is deprecated; use __is_trivially_... instead" in nvcc with icx2023 (#592) +ifneq ($(shell $(CXX) --version | egrep '^(Intel)'),) +ifneq ($(NVCC),) +CUFLAGS += -Xcompiler -Wno-deprecated-builtins +endif +endif + +# Avoid clang warning "overriding '-ffp-contract=fast' option with '-ffp-contract=on'" (#516) +# This patch does remove the warning, but I prefer to keep it disabled for the moment... +###ifneq ($(shell $(CXX) --version | egrep '^(clang|Apple clang|Intel)'),) +###$(BUILDDIR)/CrossSectionKernels.o: CXXFLAGS += -Wno-overriding-t-option +###ifneq ($(NVCC),) +###$(BUILDDIR)/gCrossSectionKernels.o: CUFLAGS += -Xcompiler -Wno-overriding-t-option +###endif +###endif + +#### Apply special build flags only to CPPProcess.cc (-flto) +###$(BUILDDIR)/CPPProcess.o: CXXFLAGS += -flto + +#### Apply special build flags only to CPPProcess.cc (AVXFLAGS) +###$(BUILDDIR)/CPPProcess.o: CXXFLAGS += $(AVXFLAGS) + +#------------------------------------------------------------------------------- + +# Target (and build rules): common (src) library +commonlib : $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so + +$(LIBDIR)/lib$(MG5AMC_COMMONLIB).so: ../../src/*.h ../../src/*.cc + $(MAKE) -C ../../src $(MAKEDEBUG) -f $(CUDACPP_SRC_MAKEFILE) + +#------------------------------------------------------------------------------- + +processid_short=$(shell basename $(CURDIR) | awk -F_ '{print $$(NF-1)"_"$$NF}') +###$(info processid_short=$(processid_short)) + +MG5AMC_CXXLIB = mg5amc_$(processid_short)_cpp +cxx_objects_lib=$(BUILDDIR)/CPPProcess.o $(BUILDDIR)/MatrixElementKernels.o $(BUILDDIR)/BridgeKernels.o $(BUILDDIR)/CrossSectionKernels.o +cxx_objects_exe=$(BUILDDIR)/RandomNumberKernels.o $(BUILDDIR)/RamboSamplingKernels.o + +ifneq ($(NVCC),) +MG5AMC_CULIB = mg5amc_$(processid_short)_cuda +cu_objects_lib=$(BUILDDIR)/gCPPProcess.o $(BUILDDIR)/gMatrixElementKernels.o $(BUILDDIR)/gBridgeKernels.o $(BUILDDIR)/gCrossSectionKernels.o +cu_objects_exe=$(BUILDDIR)/gRandomNumberKernels.o $(BUILDDIR)/gRamboSamplingKernels.o +endif + +# Target (and build rules): C++ and CUDA shared libraries +$(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(BUILDDIR)/fbridge.o +$(LIBDIR)/lib$(MG5AMC_CXXLIB).so: cxx_objects_lib += $(BUILDDIR)/fbridge.o +$(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) + $(CXX) -shared -o $@ $(cxx_objects_lib) $(CXXLIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) + +ifneq ($(NVCC),) +$(LIBDIR)/lib$(MG5AMC_CULIB).so: $(BUILDDIR)/fbridge_cu.o +$(LIBDIR)/lib$(MG5AMC_CULIB).so: cu_objects_lib += $(BUILDDIR)/fbridge_cu.o +$(LIBDIR)/lib$(MG5AMC_CULIB).so: $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cu_objects_lib) + $(NVCC) --shared -o $@ $(cu_objects_lib) $(CULIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) +endif + +#------------------------------------------------------------------------------- + +# Target (and build rules): Fortran include files +###$(INCDIR)/%.inc : ../%.inc +### @if [ ! -d $(INCDIR) ]; then echo "mkdir -p $(INCDIR)"; mkdir -p $(INCDIR); fi +### \cp $< $@ + +#------------------------------------------------------------------------------- + +# Target (and build rules): C++ and CUDA standalone executables +$(cxx_main): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH +$(cxx_main): $(BUILDDIR)/check_sa.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so $(cxx_objects_exe) + $(CXX) -o $@ $(BUILDDIR)/check_sa.o $(OMPFLAGS) -ldl -pthread $(LIBFLAGS) -L$(LIBDIR) -l$(MG5AMC_CXXLIB) $(cxx_objects_exe) $(CULIBFLAGS) + +ifneq ($(NVCC),) +ifneq ($(shell $(CXX) --version | grep ^Intel),) +$(cu_main): LIBFLAGS += -lintlc # compile with icpx and link with nvcc (undefined reference to `_intel_fast_memcpy') +$(cu_main): LIBFLAGS += -lsvml # compile with icpx and link with nvcc (undefined reference to `__svml_cos4_l9') +else ifneq ($(shell $(CXX) --version | grep ^nvc++),) # support nvc++ #531 +$(cu_main): LIBFLAGS += -L$(patsubst %bin/nvc++,%lib,$(subst ccache ,,$(CXX))) -lnvhpcatm -lnvcpumath -lnvc +endif +$(cu_main): LIBFLAGS += $(CULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH +$(cu_main): $(BUILDDIR)/gcheck_sa.o $(LIBDIR)/lib$(MG5AMC_CULIB).so $(cu_objects_exe) + $(NVCC) -o $@ $(BUILDDIR)/gcheck_sa.o $(CUARCHFLAGS) $(LIBFLAGS) -L$(LIBDIR) -l$(MG5AMC_CULIB) $(cu_objects_exe) $(CULIBFLAGS) +endif + +#------------------------------------------------------------------------------- + +# Generic target and build rules: objects from Fortran compilation +$(BUILDDIR)/%.o : %.f *.inc + @if [ ! -d $(BUILDDIR) ]; then echo "mkdir -p $(BUILDDIR)"; mkdir -p $(BUILDDIR); fi + $(FC) -I. -c $< -o $@ + +# Generic target and build rules: objects from Fortran compilation +###$(BUILDDIR)/%.o : %.f *.inc +### @if [ ! -d $(INCDIR) ]; then echo "mkdir -p $(INCDIR)"; mkdir -p $(INCDIR); fi +### @if [ ! -d $(BUILDDIR) ]; then echo "mkdir -p $(BUILDDIR)"; mkdir -p $(BUILDDIR); fi +### $(FC) -I. -I$(INCDIR) -c $< -o $@ + +# Target (and build rules): Fortran standalone executables +###$(BUILDDIR)/fcheck_sa.o : $(INCDIR)/fbridge.inc + +ifeq ($(UNAME_S),Darwin) +$(fcxx_main): LIBFLAGS += -L$(shell dirname $(shell $(FC) --print-file-name libgfortran.dylib)) # add path to libgfortran on Mac #375 +endif +$(fcxx_main): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH +$(fcxx_main): $(BUILDDIR)/fcheck_sa.o $(BUILDDIR)/fsampler.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so $(cxx_objects_exe) + $(CXX) -o $@ $(BUILDDIR)/fcheck_sa.o $(OMPFLAGS) $(BUILDDIR)/fsampler.o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_CXXLIB) $(cxx_objects_exe) $(CULIBFLAGS) + +ifneq ($(NVCC),) +ifneq ($(shell $(CXX) --version | grep ^Intel),) +$(fcu_main): LIBFLAGS += -lintlc # compile with icpx and link with nvcc (undefined reference to `_intel_fast_memcpy') +$(fcu_main): LIBFLAGS += -lsvml # compile with icpx and link with nvcc (undefined reference to `__svml_cos4_l9') +endif +ifeq ($(UNAME_S),Darwin) +$(fcu_main): LIBFLAGS += -L$(shell dirname $(shell $(FC) --print-file-name libgfortran.dylib)) # add path to libgfortran on Mac #375 +endif +$(fcu_main): LIBFLAGS += $(CULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH +$(fcu_main): $(BUILDDIR)/fcheck_sa.o $(BUILDDIR)/fsampler_cu.o $(LIBDIR)/lib$(MG5AMC_CULIB).so $(cu_objects_exe) + $(NVCC) -o $@ $(BUILDDIR)/fcheck_sa.o $(BUILDDIR)/fsampler_cu.o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_CULIB) $(cu_objects_exe) $(CULIBFLAGS) +endif + +#------------------------------------------------------------------------------- + +# Target (and build rules): test objects and test executable +$(BUILDDIR)/testxxx.o: $(GTESTLIBS) +$(BUILDDIR)/testxxx.o: INCFLAGS += -I$(TESTDIR)/googletest/googletest/include +$(BUILDDIR)/testxxx.o: testxxx_cc_ref.txt +$(testmain): $(BUILDDIR)/testxxx.o +$(testmain): cxx_objects_exe += $(BUILDDIR)/testxxx.o # Comment out this line to skip the C++ test of xxx functions + +ifneq ($(NVCC),) +$(BUILDDIR)/testxxx_cu.o: $(GTESTLIBS) +$(BUILDDIR)/testxxx_cu.o: INCFLAGS += -I$(TESTDIR)/googletest/googletest/include +$(BUILDDIR)/testxxx_cu.o: testxxx_cc_ref.txt +$(testmain): $(BUILDDIR)/testxxx_cu.o +$(testmain): cu_objects_exe += $(BUILDDIR)/testxxx_cu.o # Comment out this line to skip the CUDA test of xxx functions +endif + +$(BUILDDIR)/testmisc.o: $(GTESTLIBS) +$(BUILDDIR)/testmisc.o: INCFLAGS += -I$(TESTDIR)/googletest/googletest/include +$(testmain): $(BUILDDIR)/testmisc.o +$(testmain): cxx_objects_exe += $(BUILDDIR)/testmisc.o # Comment out this line to skip the C++ miscellaneous tests + +ifneq ($(NVCC),) +$(BUILDDIR)/testmisc_cu.o: $(GTESTLIBS) +$(BUILDDIR)/testmisc_cu.o: INCFLAGS += -I$(TESTDIR)/googletest/googletest/include +$(testmain): $(BUILDDIR)/testmisc_cu.o +$(testmain): cu_objects_exe += $(BUILDDIR)/testmisc_cu.o # Comment out this line to skip the CUDA miscellaneous tests +endif + +$(BUILDDIR)/runTest.o: $(GTESTLIBS) +$(BUILDDIR)/runTest.o: INCFLAGS += -I$(TESTDIR)/googletest/googletest/include +$(testmain): $(BUILDDIR)/runTest.o +$(testmain): cxx_objects_exe += $(BUILDDIR)/runTest.o + +ifneq ($(NVCC),) +$(BUILDDIR)/runTest_cu.o: $(GTESTLIBS) +$(BUILDDIR)/runTest_cu.o: INCFLAGS += -I$(TESTDIR)/googletest/googletest/include +ifneq ($(shell $(CXX) --version | grep ^Intel),) +$(testmain): LIBFLAGS += -lintlc # compile with icpx and link with nvcc (undefined reference to `_intel_fast_memcpy') +$(testmain): LIBFLAGS += -lsvml # compile with icpx and link with nvcc (undefined reference to `__svml_cos4_l9') +else ifneq ($(shell $(CXX) --version | grep ^nvc++),) # support nvc++ #531 +$(testmain): LIBFLAGS += -L$(patsubst %bin/nvc++,%lib,$(subst ccache ,,$(CXX))) -lnvhpcatm -lnvcpumath -lnvc +endif +$(testmain): $(BUILDDIR)/runTest_cu.o +$(testmain): cu_objects_exe += $(BUILDDIR)/runTest_cu.o +endif + +$(testmain): $(GTESTLIBS) +$(testmain): INCFLAGS += -I$(TESTDIR)/googletest/googletest/include +$(testmain): LIBFLAGS += -L$(GTESTLIBDIR) -lgtest -lgtest_main + +ifneq ($(OMPFLAGS),) +ifneq ($(shell $(CXX) --version | egrep '^Intel'),) +$(testmain): LIBFLAGS += -liomp5 # see #578 (not '-qopenmp -static-intel' as in https://stackoverflow.com/questions/45909648) +else ifneq ($(shell $(CXX) --version | egrep '^clang'),) +$(testmain): LIBFLAGS += -L $(shell dirname $(shell $(CXX) -print-file-name=libc++.so)) -lomp # see #604 +###else ifneq ($(shell $(CXX) --version | egrep '^Apple clang'),) +###$(testmain): LIBFLAGS += ???? # OMP is not supported yet by cudacpp for Apple clang (see #578 and #604) +else +$(testmain): LIBFLAGS += -lgomp +endif +endif + +ifeq ($(NVCC),) # link only runTest.o +$(testmain): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH +$(testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) $(cxx_objects_exe) $(GTESTLIBS) + $(CXX) -o $@ $(cxx_objects_lib) $(cxx_objects_exe) -ldl -pthread $(LIBFLAGS) $(CULIBFLAGS) +else # link both runTest.o and runTest_cu.o +$(testmain): LIBFLAGS += $(CULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH +$(testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) $(cxx_objects_exe) $(cu_objects_lib) $(cu_objects_exe) $(GTESTLIBS) + $(NVCC) -o $@ $(cxx_objects_lib) $(cxx_objects_exe) $(cu_objects_lib) $(cu_objects_exe) -ldl $(LIBFLAGS) -lcuda $(CULIBFLAGS) +endif + +# Use flock (Linux only, no Mac) to allow 'make -j' if googletest has not yet been downloaded https://stackoverflow.com/a/32666215 +$(GTESTLIBS): +ifneq ($(shell which flock 2>/dev/null),) + flock $(BUILDDIR)/.make_test.lock $(MAKE) -C $(TESTDIR) +else + $(MAKE) -C $(TESTDIR) +endif + +#------------------------------------------------------------------------------- + +# Target: build all targets in all AVX modes (each AVX mode in a separate build directory) +# Split the avxall target into five separate targets to allow parallel 'make -j avxall' builds +# (Hack: add a fbridge.inc dependency to avxall, to ensure it is only copied once for all AVX modes) +avxnone: + @echo + $(MAKE) USEBUILDDIR=1 AVX=none -f $(CUDACPP_MAKEFILE) + +avxsse4: + @echo + $(MAKE) USEBUILDDIR=1 AVX=sse4 -f $(CUDACPP_MAKEFILE) + +avxavx2: + @echo + $(MAKE) USEBUILDDIR=1 AVX=avx2 -f $(CUDACPP_MAKEFILE) + +avx512y: + @echo + $(MAKE) USEBUILDDIR=1 AVX=512y -f $(CUDACPP_MAKEFILE) + +avx512z: + @echo + $(MAKE) USEBUILDDIR=1 AVX=512z -f $(CUDACPP_MAKEFILE) + +ifeq ($(UNAME_P),ppc64le) +###avxall: $(INCDIR)/fbridge.inc avxnone avxsse4 +avxall: avxnone avxsse4 +else ifeq ($(UNAME_P),arm) +###avxall: $(INCDIR)/fbridge.inc avxnone avxsse4 +avxall: avxnone avxsse4 +else +###avxall: $(INCDIR)/fbridge.inc avxnone avxsse4 avxavx2 avx512y avx512z +avxall: avxnone avxsse4 avxavx2 avx512y avx512z +endif + +#------------------------------------------------------------------------------- + +# Target: clean the builds +.PHONY: clean + +clean: +ifeq ($(USEBUILDDIR),1) + rm -rf $(BUILDDIR) +else + rm -f $(BUILDDIR)/.build.* $(BUILDDIR)/*.o $(BUILDDIR)/*.exe + rm -f $(LIBDIR)/lib$(MG5AMC_CXXLIB).so $(LIBDIR)/lib$(MG5AMC_CULIB).so +endif + $(MAKE) -C ../../src clean -f $(CUDACPP_SRC_MAKEFILE) +### rm -rf $(INCDIR) + +cleanall: + @echo + $(MAKE) USEBUILDDIR=0 clean -f $(CUDACPP_MAKEFILE) + @echo + $(MAKE) USEBUILDDIR=0 -C ../../src cleanall -f $(CUDACPP_SRC_MAKEFILE) + rm -rf build.* + +# Target: clean the builds as well as the googletest installation +distclean: cleanall + $(MAKE) -C $(TESTDIR) clean + +#------------------------------------------------------------------------------- + +# Target: show system and compiler information +info: + @echo "" + @uname -spn # e.g. Linux nodename.cern.ch x86_64 +ifeq ($(UNAME_S),Darwin) + @sysctl -a | grep -i brand + @sysctl -a | grep machdep.cpu | grep features || true + @sysctl -a | grep hw.physicalcpu: + @sysctl -a | grep hw.logicalcpu: +else + @cat /proc/cpuinfo | grep "model name" | sort -u + @cat /proc/cpuinfo | grep "flags" | sort -u + @cat /proc/cpuinfo | grep "cpu cores" | sort -u + @cat /proc/cpuinfo | grep "physical id" | sort -u +endif + @echo "" +ifneq ($(shell which nvidia-smi 2>/dev/null),) + nvidia-smi -L + @echo "" +endif + @echo USECCACHE=$(USECCACHE) +ifeq ($(USECCACHE),1) + ccache --version | head -1 +endif + @echo "" + @echo NVCC=$(NVCC) +ifneq ($(NVCC),) + $(NVCC) --version +endif + @echo "" + @echo CXX=$(CXX) +ifneq ($(shell $(CXX) --version | grep ^clang),) + @echo $(CXX) -v + @$(CXX) -v |& egrep -v '(Found|multilib)' + @readelf -p .comment `$(CXX) -print-libgcc-file-name` |& grep 'GCC: (GNU)' | grep -v Warning | sort -u | awk '{print "GCC toolchain:",$$5}' +else + $(CXX) --version +endif + @echo "" + @echo FC=$(FC) + $(FC) --version + +#------------------------------------------------------------------------------- + +# Target: check (run the C++ test executable) +# [NB THIS IS WHAT IS USED IN THE GITHUB CI!] +ifneq ($(NVCC),) +check: runTest cmpFcheck cmpFGcheck +else +check: runTest cmpFcheck +endif + +# Target: runTest (run the C++ test executable runTest.exe) +runTest: all.$(TAG) + $(RUNTIME) $(BUILDDIR)/runTest.exe + +# Target: runCheck (run the C++ standalone executable check.exe, with a small number of events) +runCheck: all.$(TAG) + $(RUNTIME) $(BUILDDIR)/check.exe -p 2 32 2 + +# Target: runGcheck (run the CUDA standalone executable gcheck.exe, with a small number of events) +runGcheck: all.$(TAG) + $(RUNTIME) $(BUILDDIR)/gcheck.exe -p 2 32 2 + +# Target: runFcheck (run the Fortran standalone executable - with C++ MEs - fcheck.exe, with a small number of events) +runFcheck: all.$(TAG) + $(RUNTIME) $(BUILDDIR)/fcheck.exe 2 32 2 + +# Target: runFGcheck (run the Fortran standalone executable - with CUDA MEs - fgcheck.exe, with a small number of events) +runFGcheck: all.$(TAG) + $(RUNTIME) $(BUILDDIR)/fgcheck.exe 2 32 2 + +# Target: cmpFcheck (compare ME results from the C++ and Fortran with C++ MEs standalone executables, with a small number of events) +cmpFcheck: all.$(TAG) + @echo + @echo "$(BUILDDIR)/check.exe --common -p 2 32 2" + @echo "$(BUILDDIR)/fcheck.exe 2 32 2" + @me1=$(shell $(RUNTIME) $(BUILDDIR)/check.exe --common -p 2 32 2 | grep MeanMatrix | awk '{print $$4}'); me2=$(shell $(RUNTIME) $(BUILDDIR)/fcheck.exe 2 32 2 | grep Average | awk '{print $$4}'); echo "Avg ME (C++/C++) = $${me1}"; echo "Avg ME (F77/C++) = $${me2}"; if [ "$${me2}" == "NaN" ]; then echo "ERROR! Fortran calculation (F77/C++) returned NaN"; elif [ "$${me2}" == "" ]; then echo "ERROR! Fortran calculation (F77/C++) crashed"; else python3 -c "me1=$${me1}; me2=$${me2}; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 2E-4; print ( '%s (relative difference %s 2E-4)' % ( ('OK','<=') if ok else ('ERROR','>') ) ); import sys; sys.exit(0 if ok else 1)"; fi + +# Target: cmpFGcheck (compare ME results from the CUDA and Fortran with CUDA MEs standalone executables, with a small number of events) +cmpFGcheck: all.$(TAG) + @echo + @echo "$(BUILDDIR)/gcheck.exe --common -p 2 32 2" + @echo "$(BUILDDIR)/fgcheck.exe 2 32 2" + @me1=$(shell $(RUNTIME) $(BUILDDIR)/gcheck.exe --common -p 2 32 2 | grep MeanMatrix | awk '{print $$4}'); me2=$(shell $(RUNTIME) $(BUILDDIR)/fgcheck.exe 2 32 2 | grep Average | awk '{print $$4}'); echo "Avg ME (C++/CUDA) = $${me1}"; echo "Avg ME (F77/CUDA) = $${me2}"; if [ "$${me2}" == "NaN" ]; then echo "ERROR! Fortran calculation (F77/CUDA) crashed"; elif [ "$${me2}" == "" ]; then echo "ERROR! Fortran calculation (F77/CUDA) crashed"; else python3 -c "me1=$${me1}; me2=$${me2}; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 2E-4; print ( '%s (relative difference %s 2E-4)' % ( ('OK','<=') if ok else ('ERROR','>') ) ); import sys; sys.exit(0 if ok else 1)"; fi + +# Target: memcheck (run the CUDA standalone executable gcheck.exe with a small number of events through cuda-memcheck) +memcheck: all.$(TAG) + $(RUNTIME) $(CUDA_HOME)/bin/cuda-memcheck --check-api-memory-access yes --check-deprecated-instr yes --check-device-heap yes --demangle full --language c --leak-check full --racecheck-report all --report-api-errors all --show-backtrace yes --tool memcheck --track-unused-memory yes $(BUILDDIR)/gcheck.exe -p 2 32 2 + +#------------------------------------------------------------------------------- diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/fbridge.cc b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/fbridge.cc new file mode 100644 index 0000000000..9c9287e0c5 --- /dev/null +++ b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/fbridge.cc @@ -0,0 +1,126 @@ +#include "Bridge.h" +#include "CPPProcess.h" +#include "CudaRuntime.h" + +extern "C" +{ + /** + * The namespace where the Bridge class is taken from. + * + * In the current implementation, two separate shared libraries are created for the GPU/CUDA and CPU/C++ implementations. + * Actually, two shared libraries for GPU and CPU are created for each of the five SIMD implementations on CPUs (none, sse4, avx2, 512y, 512z). + * A single fcreatebridge_ symbol is created in each library with the same name, connected to the appropriate Bridge on CPU or GPU. + * The Fortran MadEvent code is always the same: the choice whether to use a CPU or GPU implementation is done by linking the appropriate library. + * As the names of the two CPU/GPU libraries are the same in the five SIMD implementations, the choice of SIMD is done by setting LD_LIBRARY_PATH. + * + * In a future implementation, a single heterogeneous shared library may be created, with the same interface. + * Using the same Fortran MadEvent code, linking to the hetrerogeneous library would allow access to both CPU and GPU implementations. + * The specific heterogeneous configuration (how many GPUs, how many threads on each CPU, etc) could be loaded in CUDA/C++ from a data file. + */ +#ifdef __CUDACC__ + using namespace mg5amcGpu; +#else + using namespace mg5amcCpu; +#endif + + /** + * The floating point precision used in Fortran arrays. + * This is presently hardcoded to double precision (REAL*8). + */ + using FORTRANFPTYPE = double; // for Fortran double precision (REAL*8) arrays + //using FORTRANFPTYPE = float; // for Fortran single precision (REAL*4) arrays + + /** + * Create a Bridge and return its pointer. + * This is a C symbol that should be called from the Fortran code (in auto_dsig1.f). + * + * @param ppbridge the pointer to the Bridge pointer (the Bridge pointer is handled in Fortran as an INTEGER*8 variable) + * @param nevtF the pointer to the number of events in the Fortran arrays + * @param nparF the pointer to the number of external particles in the Fortran arrays (KEPT FOR SANITY CHECKS ONLY) + * @param np4F the pointer to the number of momenta components, usually 4, in the Fortran arrays (KEPT FOR SANITY CHECKS ONLY) + */ + void fbridgecreate_( CppObjectInFortran** ppbridge, const int* pnevtF, const int* pnparF, const int* pnp4F ) + { +#ifdef __CUDACC__ + CudaRuntime::setUp(); +#endif + // Create a process object, read parm card and set parameters + // FIXME: the process instance can happily go out of scope because it is only needed to read parameters? + // FIXME: the CPPProcess should really be a singleton? what if fbridgecreate is called from several Fortran threads? + CPPProcess process( /*verbose=*/false ); + process.initProc( "../../Cards/param_card.dat" ); + // FIXME: disable OMP in Bridge when called from Fortran + *ppbridge = new Bridge( *pnevtF, *pnparF, *pnp4F ); + } + + /** + * Delete a Bridge. + * This is a C symbol that should be called from the Fortran code (in auto_dsig1.f). + * + * @param ppbridge the pointer to the Bridge pointer (the Bridge pointer is handled in Fortran as an INTEGER*8 variable) + */ + void fbridgedelete_( CppObjectInFortran** ppbridge ) + { + Bridge* pbridge = dynamic_cast*>( *ppbridge ); + if( pbridge == 0 ) throw std::runtime_error( "fbridgedelete_: invalid Bridge address" ); + delete pbridge; +#ifdef __CUDACC__ + CudaRuntime::tearDown(); +#endif + } + + /** + * Execute the matrix-element calculation "sequence" via a Bridge on GPU/CUDA or CUDA/C++. + * This is a C symbol that should be called from the Fortran code (in auto_dsig1.f). + * + * @param ppbridge the pointer to the Bridge pointer (the Bridge pointer is handled in Fortran as an INTEGER*8 variable) + * @param momenta the pointer to the input 4-momenta + * @param gs the pointer to the input Gs (running QCD coupling constant alphas) + * @param rndhel the pointer to the input random numbers for helicity selection + * @param rndcol the pointer to the input random numbers for color selection + * @param channelId the pointer to the input Feynman diagram to enhance in multi-channel mode if 1 to n (disable multi-channel if 0) + * @param mes the pointer to the output matrix elements + * @param selhel the pointer to the output selected helicities + * @param selcol the pointer to the output selected colors + */ + void fbridgesequence_( CppObjectInFortran** ppbridge, + const FORTRANFPTYPE* momenta, + const FORTRANFPTYPE* gs, + const FORTRANFPTYPE* rndhel, + const FORTRANFPTYPE* rndcol, + const unsigned int* pchannelId, + FORTRANFPTYPE* mes, + int* selhel, + int* selcol ) + { + Bridge* pbridge = dynamic_cast*>( *ppbridge ); + if( pbridge == 0 ) throw std::runtime_error( "fbridgesequence_: invalid Bridge address" ); +#ifdef __CUDACC__ + // Use the device/GPU implementation in the CUDA library + // (there is also a host implementation in this library) + pbridge->gpu_sequence( momenta, gs, rndhel, rndcol, *pchannelId, mes, selhel, selcol ); +#else + // Use the host/CPU implementation in the C++ library + // (there is no device implementation in this library) + pbridge->cpu_sequence( momenta, gs, rndhel, rndcol, *pchannelId, mes, selhel, selcol ); +#endif + } + + /** + * Retrieve the number of good helicities for helicity filtering in the Bridge. + * This is a C symbol that should be called from the Fortran code (in auto_dsig1.f). + * + * @param ppbridge the pointer to the Bridge pointer (the Bridge pointer is handled in Fortran as an INTEGER*8 variable) + * @param pngoodhel the pointer to the output number of good helicities + * @param pntothel the pointer to the output total number of helicities + */ + void fbridgegetngoodhel_( CppObjectInFortran** ppbridge, + unsigned int* pngoodhel, + unsigned int* pntothel ) + { + Bridge* pbridge = dynamic_cast*>( *ppbridge ); + if( pbridge == 0 ) throw std::runtime_error( "fbridgegetngoodhel_: invalid Bridge address" ); + *pngoodhel = pbridge->nGoodHel(); + *pntothel = pbridge->nTotHel(); + } +} diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/fbridge.inc b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/fbridge.inc new file mode 100644 index 0000000000..f140b660fc --- /dev/null +++ b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/fbridge.inc @@ -0,0 +1,66 @@ +C +C Create a Bridge and return its pointer +C - PBRIDGE: the memory address of the C++ Bridge +C - NEVT: the number of events in the Fortran arrays +C - NPAR: the number of external particles in the Fortran arrays (KEPT FOR SANITY CHECKS ONLY: remove it?) +C - NP4: the number of momenta components, usually 4, in the Fortran arrays (KEPT FOR SANITY CHECKS ONLY: remove it?) +C + INTERFACE + SUBROUTINE FBRIDGECREATE(PBRIDGE, NEVT, NPAR, NP4) + INTEGER*8 PBRIDGE + INTEGER*4 NEVT + INTEGER*4 NPAR + INTEGER*4 NP4 + END SUBROUTINE FBRIDGECREATE + END INTERFACE + +C +C Delete a Bridge. +C - PBRIDGE: the memory address of the C++ Bridge +C + INTERFACE + SUBROUTINE FBRIDGEDELETE(PBRIDGE) + INTEGER*8 PBRIDGE + END SUBROUTINE FBRIDGEDELETE + END INTERFACE + +C +C Execute the matrix-element calculation "sequence" via a Bridge on GPU/CUDA or CUDA/C++. +C - PBRIDGE: the memory address of the C++ Bridge +C - MOMENTA: the input 4-momenta Fortran array +C - GS: the input Gs (running QCD coupling constant alphas) Fortran array +C - RNDHEL: the input random number Fortran array for helicity selection +C - RNDCOL: the input random number Fortran array for color selection +C - CHANID: the input Feynman diagram to enhance in multi-channel mode if 1 to n (disable multi-channel if 0) +C - MES: the output matrix element Fortran array +C - SELHEL: the output selected helicity Fortran array +C - SELCOL: the output selected color Fortran array +C + INTERFACE + SUBROUTINE FBRIDGESEQUENCE(PBRIDGE, MOMENTA, GS, + & RNDHEL, RNDCOL, CHANID, MES, SELHEL, SELCOL) + INTEGER*8 PBRIDGE + DOUBLE PRECISION MOMENTA(*) + DOUBLE PRECISION GS(*) + DOUBLE PRECISION RNDHEL(*) + DOUBLE PRECISION RNDCOL(*) + INTEGER*4 CHANID + DOUBLE PRECISION MES(*) + INTEGER*4 SELHEL(*) + INTEGER*4 SELCOL(*) + END SUBROUTINE FBRIDGESEQUENCE + END INTERFACE + +C +C Retrieve the number of good helicities for helicity filtering in the Bridge. +C - PBRIDGE: the memory address of the C++ Bridge +C - NGOODHEL: the output number of good helicities +C - NTOTHEL: the output total number of helicities in cudacpp (aka NCOMB in Fortran) +C + INTERFACE + SUBROUTINE FBRIDGEGETNGOODHEL(PBRIDGE, NGOODHEL, NTOTHEL) + INTEGER*8 PBRIDGE + INTEGER*4 NGOODHEL + INTEGER*4 NTOTHEL + END SUBROUTINE FBRIDGEGETNGOODHEL + END INTERFACE diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/fsampler.cc b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/fsampler.cc new file mode 100644 index 0000000000..bc90937f47 --- /dev/null +++ b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/fsampler.cc @@ -0,0 +1,159 @@ +#include "mgOnGpuConfig.h" + +#include "Bridge.h" +#include "MemoryBuffers.h" +#include "RamboSamplingKernels.h" +#include "RandomNumberKernels.h" + +//-------------------------------------------------------------------------- + +#ifdef __CUDACC__ +namespace mg5amcGpu +#else +namespace mg5amcCpu +#endif +{ + template + class Sampler final : public CppObjectInFortran + { + public: + // Constructor + // @param nevtF (VECSIZE_USED, vector.inc) number of events in Fortran arrays + // @param nparF (NEXTERNAL, nexternal.inc) number of external particles in Fortran arrays (KEPT FOR SANITY CHECKS ONLY: remove it?) + // @param np4F number of momenta components, usually 4, in Fortran arrays (KEPT FOR SANITY CHECKS ONLY: remove it?) + Sampler( int nevtF, int nparF, int np4F ); + // Destructor + virtual ~Sampler() {} + // Delete copy/move constructors and assignment operators + Sampler( const Sampler& ) = delete; + Sampler( Sampler&& ) = delete; + Sampler& operator=( const Sampler& ) = delete; + Sampler& operator=( Sampler&& ) = delete; + // Draw random numbers and convert them to momenta in C++, then transpose them to Fortran momenta + void samplerHostSequence( FORTRANFPTYPE* fortranMomenta ); + private: + const int m_nevt; // The number of events in each iteration + int m_iiter; // The iteration counter (for random number seeding) +#ifndef __CUDACC__ + HostBufferRndNumMomenta m_hstRndmom; // Memory buffers for random numbers + HostBufferMomenta m_hstMomenta; // Memory buffers for momenta + HostBufferWeights m_hstWeights; // Memory buffers for sampling weights +#else + PinnedHostBufferRndNumMomenta m_hstRndmom; // Memory buffers for random numbers + PinnedHostBufferMomenta m_hstMomenta; // Memory buffers for momenta + PinnedHostBufferWeights m_hstWeights; // Memory buffers for sampling weights +#endif + std::unique_ptr m_prnk; // The appropriate RandomNumberKernel + std::unique_ptr m_prsk; // The appropriate SamplingKernel + // HARDCODED DEFAULTS + static constexpr fptype energy = 1500; // historical default, Ecms = 1500 GeV = 1.5 TeV (above the Z peak) + }; + + template + Sampler::Sampler( int nevtF, int nparF, int np4F ) + : m_nevt( nevtF ) + , m_iiter( 0 ) + , m_hstRndmom( nevtF ) + , m_hstMomenta( nevtF ) + , m_hstWeights( nevtF ) + , m_prnk( new CommonRandomNumberKernel( m_hstRndmom ) ) + , m_prsk( new RamboSamplingKernelHost( energy, m_hstRndmom, m_hstMomenta, m_hstWeights, nevtF ) ) + { + if( nparF != mgOnGpu::npar ) throw std::runtime_error( "Sampler constructor: npar mismatch" ); + if( np4F != mgOnGpu::np4 ) throw std::runtime_error( "Sampler constructor: np4 mismatch" ); + std::cout << "WARNING! Instantiate host Sampler (nevt=" << m_nevt << ")" << std::endl; + } + + // Draw random numbers and convert them to momenta in C++, then transpose them to Fortran momenta + template + void Sampler::samplerHostSequence( FORTRANFPTYPE* fortranMomenta ) + { + std::cout << "Iteration #" << m_iiter + 1 << std::endl; + // === STEP 1 OF 3 + // --- 1a. Seed rnd generator (to get same results on host and device in curand) + // [NB This should not be necessary using the host API: "Generation functions + // can be called multiple times on the same generator to generate successive + // blocks of results. For pseudorandom generators, multiple calls to generation + // functions will yield the same result as a single call with a large size."] + // *** NB! REMEMBER THAT THE FORTRAN SAMPLER ALWAYS USES COMMON RANDOM NUMBERS! *** + constexpr unsigned long long seed = 20200805; + m_prnk->seedGenerator( seed + m_iiter ); + m_iiter++; + // --- 1b. Generate all relevant numbers to build nevt events (i.e. nevt phase space points) on the host + m_prnk->generateRnarray(); + //std::cout << "Got random numbers" << std::endl; + // === STEP 2 OF 3 + // --- 2a. Fill in momenta of initial state particles on the device + m_prsk->getMomentaInitial(); + //std::cout << "Got initial momenta" << std::endl; + // --- 2b. Fill in momenta of final state particles using the RAMBO algorithm on the device + // (i.e. map random numbers to final-state particle momenta for each of nevt events) + m_prsk->getMomentaFinal(); + //std::cout << "Got final momenta" << std::endl; + // --- 2c. TransposeC2F + hst_transposeMomentaC2F( m_hstMomenta.data(), fortranMomenta, m_nevt ); + } +} + +//-------------------------------------------------------------------------- + +extern "C" +{ +#ifdef __CUDACC__ + using namespace mg5amcGpu; +#else + using namespace mg5amcCpu; +#endif + + /** + * The floating point precision used in Fortran arrays. + * This is presently hardcoded to double precision (REAL*8). + */ + using FORTRANFPTYPE = double; // for Fortran double precision (REAL*8) arrays + //using FORTRANFPTYPE = float; // for Fortran single precision (REAL*4) arrays + + /** + * Create a Sampler and return its pointer. + * This is a C symbol that should be called from the Fortran code (in auto_dsig1.f). + * + * @param ppsampler the pointer to the Sampler pointer (the Sampler pointer is handled in Fortran as an INTEGER*8 variable) + * @param nevtF the pointer to the number of events in the Fortran arrays + * @param nparF the pointer to the number of external particles in the Fortran arrays (KEPT FOR SANITY CHECKS ONLY) + * @param np4F the pointer to the number of momenta components, usually 4, in the Fortran arrays (KEPT FOR SANITY CHECKS ONLY) + */ + void fsamplercreate_( CppObjectInFortran** ppsampler, const int* pnevtF, const int* pnparF, const int* pnp4F ) + { + *ppsampler = new Sampler( *pnevtF, *pnparF, *pnp4F ); + } + + /** + * Delete a Sampler. + * This is a C symbol that should be called from the Fortran code (in auto_dsig1.f). + * + * @param ppsampler the pointer to the Sampler pointer (the Sampler pointer is handled in Fortran as an INTEGER*8 variable) + */ + void fsamplerdelete_( CppObjectInFortran** ppsampler ) + { + Sampler* psampler = dynamic_cast*>( *ppsampler ); + if( psampler == 0 ) throw std::runtime_error( "fsamplerdelete_: invalid Sampler address" ); + delete psampler; + } + + /** + * Execute the matrix-element calculation "sequence" via a Sampler on GPU/CUDA or CUDA/C++. + * This is a C symbol that should be called from the Fortran code (in auto_dsig1.f). + * + * @param ppsampler the pointer to the Sampler pointer (the Sampler pointer is handled in Fortran as an INTEGER*8 variable) + * @param momenta the pointer to the input 4-momenta + * @param mes the pointer to the output matrix elements + */ + void fsamplersequence_( CppObjectInFortran** ppsampler, FORTRANFPTYPE* momenta ) + { + Sampler* psampler = dynamic_cast*>( *ppsampler ); + if( psampler == 0 ) throw std::runtime_error( "fsamplersequence_: invalid Sampler address" ); + // Use the host/CPU implementation (there is no device implementation) + psampler->samplerHostSequence( momenta ); + } +} + +//-------------------------------------------------------------------------- diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/fsampler.inc b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/fsampler.inc new file mode 100644 index 0000000000..d4895df206 --- /dev/null +++ b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/fsampler.inc @@ -0,0 +1,37 @@ +C +C Create a Sampler and return its pointer +C - PSAMPLER: the memory address of the C++ Sampler +C - NEVT: the number of events in the Fortran arrays +C - NPAR: the number of external particles in the Fortran arrays (KEPT FOR SANITY CHECKS ONLY: remove it?) +C - NP4: the number of momenta components, usually 4, in the Fortran arrays (KEPT FOR SANITY CHECKS ONLY: remove it?) +C + INTERFACE + SUBROUTINE FSAMPLERCREATE(PSAMPLER, NEVT, NPAR, NP4) + INTEGER*8 PSAMPLER + INTEGER*4 NEVT + INTEGER*4 NPAR + INTEGER*4 NP4 + END SUBROUTINE FSAMPLERCREATE + END INTERFACE + +C +C Delete a Sampler. +C - PSAMPLER: the memory address of the C++ Sampler +C + INTERFACE + SUBROUTINE FSAMPLERDELETE(PSAMPLER) + INTEGER*8 PSAMPLER + END SUBROUTINE FSAMPLERDELETE + END INTERFACE + +C +C Execute the matrix-element calculation "sequence" via a Sampler on GPU/CUDA or CUDA/C++. +C - PSAMPLER: the memory address of the C++ Sampler +C - MOMENTA: the output 4-momenta Fortran array +C + INTERFACE + SUBROUTINE FSAMPLERSEQUENCE(PSAMPLER, MOMENTA) + INTEGER*8 PSAMPLER + DOUBLE PRECISION MOMENTA(*) + END SUBROUTINE FSAMPLERSEQUENCE + END INTERFACE diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/nvtx.h b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/nvtx.h new file mode 100644 index 0000000000..e206b8e075 --- /dev/null +++ b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/nvtx.h @@ -0,0 +1,69 @@ +#ifndef MGONGPUNVTX_H +#define MGONGPUNVTX_H 1 + +// Provides macros for simply use of NVTX, if a compiler macro USE_NVTX is defined. +// Original author Peter Heywood +// With a few modifications by Andrea Valassi + +//------------------------------------------- +// NVTX is enabled +//------------------------------------------- + +#ifdef USE_NVTX + +#include + +// This assumes CUDA 10.0+ +#include "nvtx3/nvToolsExt.h" + +// Scope some things into a namespace +namespace nvtx +{ + + // Colour palette (RGB): https://colorbrewer2.org/#type=qualitative&scheme=Paired&n=12 + const uint32_t palette[] = { 0xffa6cee3, 0xff1f78b4, 0xffb2df8a, 0xff33a02c, 0xfffb9a99, 0xffe31a1c, 0xfffdbf6f, 0xffff7f00, 0xffcab2d6, 0xff6a3d9a, 0xffffff99, 0xffb15928 }; + const uint32_t colourCount = sizeof( palette ) / sizeof( uint32_t ); + + // Inline method to push an nvtx range + inline void push( const char* str, const uint32_t nextColourIdx ) + { + // Get the wrapped colour index + uint32_t colourIdx = nextColourIdx % colourCount; + // Build/populate the struct of nvtx event attributes + nvtxEventAttributes_t eventAttrib = { 0 }; // zero-out the struct (see https://nvidia.github.io/NVTX/doxygen/structnvtx_event_attributes__v2.html) + eventAttrib.version = NVTX_VERSION; + eventAttrib.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE; + eventAttrib.colorType = NVTX_COLOR_ARGB; + eventAttrib.messageType = NVTX_MESSAGE_TYPE_ASCII; + eventAttrib.color = palette[colourIdx]; + eventAttrib.message.ascii = str; + // Push the custom event. + nvtxRangePushEx( &eventAttrib ); + } + + // Inline method to pop an nvtx range + inline void pop() + { + nvtxRangePop(); + } + +} + +// Macro to push an arbitrary nvtx marker +#define NVTX_PUSH( str, idx ) nvtx::push( str, idx ) + +// Macro to pop an arbitrary nvtx marker +#define NVTX_POP() nvtx::pop() + +//------------------------------------------- +// NVTX is not enabled +//------------------------------------------- + +#else + +#define NVTX_PUSH( str, idx ) +#define NVTX_POP() + +#endif + +#endif // MGONGPUNVTX_H 1 diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/ompnumthreads.h b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/ompnumthreads.h new file mode 100644 index 0000000000..9f8dbbb7f9 --- /dev/null +++ b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/ompnumthreads.h @@ -0,0 +1,58 @@ +#ifndef OMPNUMTHREADS_H +#define OMPNUMTHREADS_H 1 + +#ifdef _OPENMP + +#include + +#include + +// The OMP_NUM_THREADS environment variable is used to control OMP multi-threading +// By default, all available $(nproc) threads are used if OMP_NUM_THREADS is not set: +// if ompnumthreadsNotSetMeansOneThread is called, only one thread is used instead +inline void +ompnumthreadsNotSetMeansOneThread( int debuglevel ) // quiet(-1), info(0), debug(1) +{ + // Set OMP_NUM_THREADS equal to 1 if it is not yet set + char* ompnthr = getenv( "OMP_NUM_THREADS" ); + if( debuglevel == 1 ) + { + std::cout << "DEBUG: entering ompnumthreadsNotSetMeansOneThread" << std::endl; + std::cout << "DEBUG: omp_get_num_threads() = " + << omp_get_num_threads() << std::endl; // always == 1 here! + std::cout << "DEBUG: omp_get_max_threads() = " + << omp_get_max_threads() << std::endl; + std::cout << "DEBUG: ${OMP_NUM_THREADS} = '" + << ( ompnthr == 0 ? "[not set]" : ompnthr ) << "'" << std::endl; + } + if( ompnthr == NULL || + std::string( ompnthr ).find_first_not_of( "0123456789" ) != std::string::npos || + atol( ompnthr ) == 0 ) + { + if( ompnthr != NULL ) + std::cout << "(ompnumthreadsNotSetMeansOneThread) " + << "WARNING! OMP_NUM_THREADS is invalid: will use only 1 thread" << std::endl; + else if( debuglevel >= 0 ) + std::cout << "(ompnumthreadsNotSetMeansOneThread) " + << "DEBUG: OMP_NUM_THREADS is not set: will use only 1 thread" << std::endl; + omp_set_num_threads( 1 ); // https://stackoverflow.com/a/22816325 + if( debuglevel == 1 ) + { + std::cout << "DEBUG: omp_get_num_threads() = " + << omp_get_num_threads() << std::endl; // always == 1 here! + std::cout << "DEBUG: omp_get_max_threads() = " + << omp_get_max_threads() << std::endl; + } + } + else if( debuglevel >= 0 ) + std::cout << "(ompnumthreadsNotSetMeansOneThread) " + << "DEBUG: OMP_NUM_THREADS = " << ompnthr << std::endl; + if( debuglevel >= 0 ) + std::cout << "(ompnumthreadsNotSetMeansOneThread) " + << "omp_get_max_threads() = " << omp_get_max_threads() << std::endl; + if( debuglevel == 1 ) + std::cout << "DEBUG: exiting ompnumthreadsNotSetMeansOneThread" << std::endl; +} +#endif + +#endif // OMPNUMTHREADS_H diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/perf.py b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/perf.py new file mode 100644 index 0000000000..63f4c714a7 --- /dev/null +++ b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/perf.py @@ -0,0 +1,346 @@ +#!/usr/bin/env python3 + +from optparse import OptionParser +from datetime import datetime +from mpl_toolkits.mplot3d import Axes3D # noqa: F401 +import matplotlib.pyplot as plt +from matplotlib import cm +from matplotlib.ticker import ScalarFormatter +import numpy as np +import copy +import sys +import json +from operator import itemgetter + + +class Perf(): + + def __init__(self, date, run, x, y, z, xrem, yrem, loc): + perffile = '%s/%s-perf-test-run%s.json' % (loc, date, run) + data = open(perffile, 'r') + readJson = json.loads(data.read()) + data.close() + self.axesn = [x, y, z] + self.axesr = [xrem, yrem] # remove outer bands from axes + self.axesv = [[], [], []] + self.data = self.prepData(readJson) + + def prepData(self, jsonData): + for data in jsonData: + for i in data: + if isinstance(data[i], type('test')): + idx = -1 + if data[i].find("sec") != -1: + idx = data[i].find("sec") + elif data[i].find("GEV") != -1: + idx = data[i].find("GeV") + + if idx != -1: + data[i] = float(data[i][:idx - 1]) + return jsonData + + def prepAxes3D(self): + for d in self.data: + ks = list(d.keys()) + for ax in self.axesn: + idx = self.axesn.index(ax) + axlist = self.axesv[idx] + if ax in ks: + axval = d[ax] + if axval not in axlist: + axlist.append(axval) + else: + print('Error: cannot find axes name %s in %s' % (ax, d)) + if len(self.axesv[0]) * len(self.axesv[1]) != len(self.axesv[2]): + print('Error: axes don\'t match x * y != z (%d * %d != %d' % + (len(self.axesv[0]), len(self.axesv[1]), len(self.axesv[2]))) + self.axesv[0].sort() + self.axesv[1].sort() + self.axesv[0] = self.axesv[0][self.axesr[0]:] # sr + self.axesv[1] = self.axesv[1][self.axesr[1]:] # sr + + def prepData3D(self): + xlen = len(self.axesv[0]) + ylen = len(self.axesv[1]) + self.data2d = [] + ylist = [0] * ylen + for i in range(xlen): + self.data2d.append(copy.deepcopy(ylist)) + for d in self.data: + xpos = -1 + ypos = -1 + if d[self.axesn[0]] in self.axesv[0]: + xpos = self.axesv[0].index(d[self.axesn[0]]) + if d[self.axesn[1]] in self.axesv[1]: + ypos = self.axesv[1].index(d[self.axesn[1]]) + if xpos != -1 and ypos != -1: + zval = d[self.axesn[2]] + self.data2d[xpos][ypos] = zval + + def plot3D(self): + self.prepAxes3D() + self.prepData3D() + + data_array = np.array(self.data2d) + fig = plt.figure() + ax = fig.add_subplot(111, projection='3d') + x_data, y_data = np.meshgrid(np.arange(data_array.shape[1]), + np.arange(data_array.shape[0])) + xticks = x_data[0] + yticks = np.array(list(range(len(y_data)))) + x_data = x_data.flatten() + y_data = y_data.flatten() + z_data = data_array.flatten() + ax.set_xlabel(self.axesn[1], {'fontsize': 'small'}) + ax.set_xticks(xticks) + # consider 'fontsize': 'small' for dict also yticklabels + ax.set_xticklabels(self.axesv[1], {'rotation': 45, 'fontsize': 'small'}) + ax.set_ylabel(self.axesn[0], {'fontsize': 'small'}) + ax.set_yticks(yticks) + # consider 'fontsize': 'small' for dict + ax.set_yticklabels(self.axesv[0], {'rotation': 45, 'fontsize': 'small'}) + ax.set_zlabel(self.axesn[2], {'fontsize': 'small'}) + # ax.set_zscale('log') + # z_data = np.log10(z_data) + ax.bar3d(x_data, y_data, np.zeros(len(z_data)), 1, 1, z_data) + plt.show() + + def prepData2D(self): + self.dataDict2D = {} + xname = self.axesn[0] + yname = self.axesn[1] + zname = self.axesn[2] + + for d in self.data: + xval = d[xname] + yval = d[yname] + zval = d[zname] + dim = xval * yval + tick = '%s/%s' % (str(xval), str(yval)) + vallist = [float(str(zval).split()[0]), tick] + if dim not in self.dataDict2D: + self.dataDict2D[dim] = [vallist] + else: + self.dataDict2D[dim].append(vallist) + + def plot2D(self): + self.prepData2D() + + # use this value to plot a flat line for the cpu values to compare with + cpuval = 0 + # cpuval = 79766.84 # tot + # cpuval = 427251.1 # rmb + me + # cpuval = 472578.7 # me + + cmap = {'32': 'red', '64': 'orange', '128': 'blue', '256': 'green'} + smap = {'32': 20, '64': 40, '128': 80, '256': 160} + + dims = list(self.dataDict2D.keys()) + dims.sort() + xlist = list(range(1, len(dims) + 1)) + ylist = [] + clist = [] + slist = [] + ylabels = [] + for d in dims: + ysublist = [] + for y in self.dataDict2D[d]: + ysublist.append(y) # y[0] + ysublist = sorted(ysublist, key=itemgetter(0), reverse=True) + clist.append([cmap[x[1].split('/')[0]] for x in ysublist]) + slist.append([smap[x[1].split('/')[0]] for x in ysublist]) + # Temporary conversion for total time for events -> events per sec + # ysublist[0][0] = d / ysublist[0][0] + ylabels.append([x[1] for x in ysublist]) + ylist.append([x[0] for x in ysublist]) + + fig, ax = plt.subplots() + print(xlist) + print(ylist) + for xe, ye, ce, se in zip(xlist, ylist, clist, slist): + print([xe] * len(ye)) + ax.scatter([xe] * len(ye), ye, s=se, facecolors='none', + edgecolors=ce) + if cpuval: + ax.scatter(xe, cpuval, marker='+', c='dimgrey') + + ax.set_xticks(xlist) + ax.set_xlabel('%s * %s' % (self.axesn[0], self.axesn[1])) + ax.set_ylabel('%s' % (self.axesn[2])) + ax.set_yscale('log') + ax.set_xticklabels(dims, rotation=45) + ax.yaxis.set_major_formatter(ScalarFormatter()) + plt.ticklabel_format(axis="y", style="sci", scilimits=(0, 0)) + # Commenting only for the current example due to an overlap of the + # product labels + # xpos = 1 + # for y in ylabels: + # xstr = '' + # for x in y: + # # xstr += x.replace('/', '\n') + # xstr += x + # xstr += '\n' + # ax.text(xpos, 1, xstr, {'fontsize': 'xx-small', + # 'ha': 'center', + # 'va': 'bottom'}) + # xpos += 1 + + handlelist = [] + for k in cmap: + handlelist.append(plt.scatter([], [], s=smap[k], marker='o', + color=cmap[k], facecolor='none')) + + print(handlelist) + plt.legend(handlelist, [str(x) for x in cmap.keys()], + title="# threads / block") + + plt.show() + + def plotStack(self, threads=32): + collist = ['Purples', 'Blues', 'Greens', 'Oranges', 'Reds', 'Greys'] + # collist = ['tab20b', 'tab20c'] + + bars = {} + blocks = [] + for d in self.data: + if d['NumThreadsPerBlock'] == threads: + blocks.append(d['NumBlocksPerGrid']) + for k in d: + if k[0].isdigit(): + if k not in bars: + bars[k] = [] + + barks = list(bars.keys()) + barks.sort() + blocks.sort() + + for d in self.data: + if d['NumThreadsPerBlock'] == threads: + for b in barks: + if b in d: + bars[b].append(d[b]) + else: + bars[b].append(0) + + ind = np.arange(len(bars[barks[0]])) + width = 0.35 + + plts = [] + ci = -1 + cj = 0.5 + plts.append(plt.bar(ind, bars[barks[0]], width, edgecolor='black', + color='white')) + bot = [0] * len(bars[barks[0]]) + for i in range(1, len(barks)): + colcod = barks[i][:2] + if colcod[1] == 'a': + ci += 1 + cj = 0.5 + else: + cj += 0.1 + print(colcod, ci, cj, bot[-1], barks[i]) + col = cm.get_cmap(collist[ci])(cj) + sumlist = [] + for (l1, l2) in zip(bot, bars[barks[i - 1]]): + sumlist.append(l1 + l2) + bot = sumlist + plts.append(plt.bar(ind, bars[barks[i]], width, + bottom=bot, color=col, edgecolor=col)) + + plt.ylabel('seconds') + plts.reverse() + barks.reverse() + plt.xticks(ind, [str(x) for x in blocks], rotation=45) + plt.legend([x[0] for x in plts], barks) + + plt.show() + + +# import numpy as np +# import matplotlib.pyplot as plt +# +# N = 5 +# menMeans = (20, 35, 30, 35, 27) +# womenMeans = (25, 32, 34, 20, 25) +# menStd = (2, 3, 4, 1, 2) +# womenStd = (3, 5, 2, 3, 3) +# ind = np.arange(N) # the x locations for the groups +# width = 0.35 # the width of the bars: can also be len(x) sequence +# +# p1 = plt.bar(ind, menMeans, width, yerr=menStd) +# p2 = plt.bar(ind, womenMeans, width, +# bottom=menMeans, yerr=womenStd) +# +# plt.ylabel('Scores') +# plt.title('Scores by group and gender') +# plt.xticks(ind, ('G1', 'G2', 'G3', 'G4', 'G5')) +# plt.yticks(np.arange(0, 81, 10)) +# plt.legend((p1[0], p2[0]), ('Men', 'Women')) +# +# plt.show() + +def print_keys(loc, date, run): + perffile = '%s/%s-perf-test-run%s.json' % (loc, date, run) + data = open(perffile, 'r') + readJson = json.loads(data.read()) + data.close() + for k in list(readJson[0].keys()): + print(k) + + +if __name__ == '__main__': + + n = datetime.now() + today = str(n.year) + str(n.month).rjust(2, '0') + str(n.day).rjust(2, '0') + parser = OptionParser() + parser.add_option('-l', '--location', dest='dir', default='data', + help='directory with data (default: data)') + parser.add_option('-d', '--date', dest='date', default=today, + help='date of data files YYYYMMDD (default: today)') + parser.add_option('-r', '--run', default='1', dest='run', + help='run number (default: 1)') + parser.add_option('-x', dest='xax', default='NumThreadsPerBlock', + help='variable name for x axis \ + (default: NumThreadsPerBlock)') + parser.add_option('-y', dest='yax', default='NumBlocksPerGrid', + help='variable name for y axis \ + (default: NumBlocksPerGrid)') + parser.add_option('-z', dest='zax', default='TotalTimeInWaveFuncs', + help='variable name for z axis \ + (default: TotalTimeInWaveFuncs)') + parser.add_option('--xrm', dest='xrm', default=0, + help='# of outer x dimensions to remove') + parser.add_option('--yrm', dest='yrm', default=0, + help='# of outer y dimensions to remove') + parser.add_option('-k', '--keys', dest='keys', action='store_true', + help='print available keys from data') + + (op, ar) = parser.parse_args() + + plotnames = ['2D', '3D', 'STACK'] + plot = '2D' + + xrm = 0 + yrm = 0 + if op.xrm: + xrm = int(op.xrm) + if op.yrm: + yrm = int(op.yrm) + + if op.keys: + print_keys(op.dir, op.date, op.run) + sys.exit(0) + + if (len(ar) == 1 and ar[0].upper() not in plotnames) or len(ar) > 1: + print(parser.print_help()) + sys.exit(1) + elif len(ar) == 1: + plot = ar[0].upper() + + p = Perf(op.date, op.run, op.xax, op.yax, op.zax, xrm, yrm, op.dir) + if plot == '3D': + p.plot3D() + if plot == '2D': + p.plot2D() + if plot == 'STACK': + p.plotStack() diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/profile.sh b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/profile.sh new file mode 100755 index 0000000000..1d60fa3542 --- /dev/null +++ b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/profile.sh @@ -0,0 +1,182 @@ +#!/bin/bash + +usage(){ + echo "Usage (GUI analysis): $0 -l label [-cc] [-p #blocks #threads #iterations]" + echo "Usage (CL analysis): $0 -nogui [-p #blocks #threads #iterations]" + exit 1 +} + +# Default options +tag=cu +###cuargs="16384 32 12" # NEW DEFAULT 2020.08.10 (faster on local, and allows comparison to global and shared memory) +###ccargs=" 256 32 12" # Similar to cuda config, but faster than using "16384 32 12" +##cuargs="16384 32 2" # faster tests +##ccargs=" 256 32 2" # faster tests +cuargs="2048 256 1" # NEW DEFAULT 2021.04.06 (matches "-p 2048 256 12" but only one iteration) +ccargs="2048 256 1" # NEW DEFAULT 2021.04.06 (matches "-p 2048 256 12" but only one iteration) +args= +label= + +# Command line arguments +while [ "$1" != "" ]; do + # Profile C++ instead of cuda + if [ "$1" == "-cc" ]; then + if [ "$tag" != "nogui" ]; then + tag=cc + shift + else + echo "ERROR! Incompatible options -gui and -cc" + usage + fi + # Fast no-GUI profiling with ncu + elif [ "$1" == "-nogui" ]; then + if [ "$tag" != "cc" ]; then + tag=nogui + shift + else + echo "ERROR! Incompatible options -gui and -cc" + usage + fi + # Override blocks/threads/iterations + # (NB do not exceed 12 iterations: profiling overhead per iteration is huge) + elif [ "$1" == "-p" ]; then + if [ "$4" != "" ]; then + args="$2 $3 $4" + shift 4 + else + usage + fi + # Label + elif [ "$1" == "-l" ]; then + if [ "$2" != "" ]; then + label="$2" + shift 2 + else + usage + fi + # Invalid arguments + else + usage + fi +done + +if [ "$tag" == "cc" ]; then + if [ "$args" == "" ]; then args=$ccargs; fi + cmd="./check.exe -p $args" + make +else + if [ "$args" == "" ]; then args=$cuargs; fi + cmd="./gcheck.exe -p $args" + make +fi + +ncu="ncu" +nsys="nsys" +ncugui="ncu-ui &" +nsysgui="nsight-sys &" + +# Settings specific to CERN condor/batch nodes +###host=$(hostname) +###if [ "${host%%cern.ch}" != "${host}" ] && [ "${host##b}" != "${host}" ]; then +### ncu=/usr/local/cuda-11.0/bin/ncu +### ###nsys=/usr/local/cuda-10.1/bin/nsys +### ###nsys=/usr/local/cuda-10.2/bin/nsys +### nsys=/cvmfs/sft.cern.ch/lcg/releases/cuda/11.0RC-d9c38/x86_64-centos7-gcc62-opt/bin/nsys +### ncugui="Launch the Nsight Compute GUI from Windows" +### nsysgui="Launch the Nsight System GUI from Windows" +###fi + +# Settings specific to CERN IT/SC nodes +# (nsys 11.4 and 11.5 fail with 'boost::wrapexcept') +host=$(hostname) +if [ "${host%%cern.ch}" != "${host}" ] && [ "${host##itsc}" != "${host}" ]; then + CUDA_NSIGHT_HOME=/usr/local/cuda-11.1 + echo "Using Nsight from ${CUDA_NSIGHT_HOME}" + ncu=${CUDA_NSIGHT_HOME}/bin/ncu + nsys=${CUDA_NSIGHT_HOME}/bin/nsys + ncugui="${CUDA_NSIGHT_HOME}/bin/ncu-ui &" + nsysgui="${CUDA_NSIGHT_HOME}/bin/nsight-sys &" +fi + +# Set the ncu sampling period (default is auto) +# The value is in the range [0..31], the actual period is 2**(5+value) cycles. +###ncu="${ncu} --sampling-interval 0" # MAX sampling frequency +###ncu="${ncu} --sampling-interval 31" # MIN sampling frequency + +# METRICS FOR COALESCED MEMORY ACCESS (AOSOA etc) +# See https://developer.nvidia.com/blog/using-nsight-compute-to-inspect-your-kernels/ +# These used to be called gld_transactions and global_load_requests +# See also https://docs.nvidia.com/nsight-compute/2019.5/NsightComputeCli/index.html#nvprof-metric-comparison +# See also https://stackoverflow.com/questions/60535867 +metrics=l1tex__t_sectors_pipe_lsu_mem_global_op_ld.sum,l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum + +# METRICS FOR REGISTER PRESSURE +metrics+=,launch__registers_per_thread + +# METRICS FOR DIVERGENCE +metrics+=,sm__sass_average_branch_targets_threads_uniform.pct + +# GUI analysis +if [ "$tag" != "nogui" ]; then + + if [ "$label" == "" ]; then + echo "ERROR! You must specify a label" + usage + fi + + arg1=$(echo $args | cut -d' ' -f1) + arg2=$(echo $args | cut -d' ' -f2) + arg3=$(echo $args | cut -d' ' -f3) + + ###if [ "${host%%raplab*}" != "${host}" ]; then + ### logs=nsight_logs_raplab + ###elif [ "${host%%cern.ch}" != "${host}" ] && [ "${host##b}" != "${host}" ]; then + ### logs=nsight_logs_lxbatch + ###else + ### logs=nsight_logs + ###fi + logs=nsight_logs + + if [ ! -d $logs ]; then mkdir -p $logs; fi + trace=$logs/Sigma_sm_gg_ttxgg_${tag}_`date +%m%d_%H%M`_b${arg1}_t${arg2}_i${arg3} + if [ "$label" != "" ]; then trace=${trace}_${label}; fi + + echo + echo "PROFILING: ${cmd}" + echo "OUTPUT: ${trace}.*" + echo + + \rm -f ${trace}.* + + hostname > ${trace}.txt + echo "nproc=$(nproc)" >> ${trace}.txt + echo >> ${trace}.txt + ( time ${cmd} ) 2>&1 | tee -a ${trace}.txt + nvidia-smi -q -d CLOCK >> ${trace}.txt + + if [ "$tag" == "cu" ]; then + echo + echo "${ncu} --set full --metrics ${metrics} -o ${trace} ${cmd}" + echo + ${ncu} --set full --metrics ${metrics} -o ${trace} ${cmd} + fi + echo + echo "${nsys} profile -o ${trace} ${cmd}" + echo + ${nsys} profile -o ${trace} ${cmd} + echo "" + echo "TO ANALYSE TRACE FILES:" + echo " ${ncugui}" + echo " ${nsysgui}" + +# NO-GUI analysis +else + + echo + echo "PROFILING: ${cmd}" + echo "${ncu} --metrics ${metrics} ${cmd}" + echo + echo sudo LD_LIBRARY_PATH=${LD_LIBRARY_PATH} $(which ${ncu}) --metrics ${metrics} --target-processes all ${cmd} + sudo LD_LIBRARY_PATH=${LD_LIBRARY_PATH} $(which ${ncu}) --metrics ${metrics} --target-processes all ${cmd} + +fi diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/runTest.cc b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/runTest.cc new file mode 100644 index 0000000000..a1cec39ced --- /dev/null +++ b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/runTest.cc @@ -0,0 +1,251 @@ +#include "mgOnGpuConfig.h" + +#include "CPPProcess.h" +#include "MadgraphTest.h" +#include "MatrixElementKernels.h" +#include "MemoryAccessMatrixElements.h" +#include "MemoryAccessMomenta.h" +#include "MemoryBuffers.h" +#include "RamboSamplingKernels.h" +#include "RandomNumberKernels.h" +#include "epoch_process_id.h" + +#ifdef __CUDACC__ +using namespace mg5amcGpu; +#else +using namespace mg5amcCpu; +#endif + +struct CUDA_CPU_TestBase : public TestDriverBase +{ + static constexpr int neppM = MemoryAccessMomenta::neppM; // AOSOA layout + static constexpr int np4 = mgOnGpu::np4; + static constexpr int npar = mgOnGpu::npar; + static_assert( gputhreads % neppM == 0, "ERROR! #threads/block should be a multiple of neppM" ); + static_assert( gputhreads <= mgOnGpu::ntpbMAX, "ERROR! #threads/block should be <= ntpbMAX" ); + CUDA_CPU_TestBase( const std::string& refFileName ) + : TestDriverBase( npar, refFileName ) {} +}; + +#ifndef __CUDACC__ +struct CPUTest : public CUDA_CPU_TestBase +{ + // Struct data members (process, and memory structures for random numbers, momenta, matrix elements and weights on host and device) + // [NB the hst/dev memory arrays must be initialised in the constructor, see issue #290] + CPPProcess process; + HostBufferRndNumMomenta hstRndmom; + HostBufferMomenta hstMomenta; + HostBufferGs hstGs; + HostBufferRndNumHelicity hstRndHel; + HostBufferRndNumColor hstRndCol; + HostBufferWeights hstWeights; + HostBufferMatrixElements hstMatrixElements; + HostBufferSelectedHelicity hstSelHel; + HostBufferSelectedColor hstSelCol; + HostBufferHelicityMask hstIsGoodHel; + + // Create a process object + // Read param_card and set parameters + // ** WARNING EVIL EVIL ** + // The CPPProcess constructor has side effects on the globals Proc::cHel, which is needed in ME calculations. + // Don't remove! + CPUTest( const std::string& refFileName ) + : CUDA_CPU_TestBase( refFileName ) + , process( /*verbose=*/false ) + , hstRndmom( nevt ) + , hstMomenta( nevt ) + , hstGs( nevt ) + , hstRndHel( nevt ) + , hstRndCol( nevt ) + , hstWeights( nevt ) + , hstMatrixElements( nevt ) + , hstSelHel( nevt ) + , hstSelCol( nevt ) + , hstIsGoodHel( mgOnGpu::ncomb ) + { + process.initProc( "../../Cards/param_card.dat" ); + } + + virtual ~CPUTest() {} + + void prepareRandomNumbers( unsigned int iiter ) override + { + CommonRandomNumberKernel rnk( hstRndmom ); + rnk.seedGenerator( 1337 + iiter ); + rnk.generateRnarray(); + } + + void prepareMomenta( fptype energy ) override + { + RamboSamplingKernelHost rsk( energy, hstRndmom, hstMomenta, hstWeights, nevt ); + // --- 2a. Fill in momenta of initial state particles on the device + rsk.getMomentaInitial(); + // --- 2b. Fill in momenta of final state particles using the RAMBO algorithm on the device + // (i.e. map random numbers to final-state particle momenta for each of nevt events) + rsk.getMomentaFinal(); + } + + void runSigmaKin( std::size_t iiter ) override + { + constexpr fptype fixedG = 1.2177157847767195; // fixed G for aS=0.118 (hardcoded for now in check_sa.cc, fcheck_sa.f, runTest.cc) + for( unsigned int i = 0; i < nevt; ++i ) hstGs[i] = fixedG; + MatrixElementKernelHost mek( hstMomenta, hstGs, hstRndHel, hstRndCol, hstMatrixElements, hstSelHel, hstSelCol, nevt ); + if( iiter == 0 ) mek.computeGoodHelicities(); + constexpr unsigned int channelId = 0; // TEMPORARY? disable multi-channel in runTest.exe #466 + mek.computeMatrixElements( channelId ); + } + + fptype getMomentum( std::size_t ievt, unsigned int ipar, unsigned int ip4 ) const override + { + assert( ipar < npar ); + assert( ip4 < np4 ); + return MemoryAccessMomenta::ieventAccessIp4IparConst( hstMomenta.data(), ievt, ip4, ipar ); + } + + fptype getMatrixElement( std::size_t ievt ) const override + { + return MemoryAccessMatrixElements::ieventAccessConst( hstMatrixElements.data(), ievt ); + } +}; +#endif + +#ifdef __CUDACC__ +struct CUDATest : public CUDA_CPU_TestBase +{ + // Reset the device when our test goes out of scope. Note that this should happen after + // the frees, i.e. be declared before the pointers to device memory. + struct DeviceReset + { + ~DeviceReset() + { + checkCuda( cudaDeviceReset() ); // this is needed by cuda-memcheck --leak-check full + } + } deviceResetter; + + // Struct data members (process, and memory structures for random numbers, momenta, matrix elements and weights on host and device) + // [NB the hst/dev memory arrays must be initialised in the constructor, see issue #290] + CPPProcess process; + PinnedHostBufferRndNumMomenta hstRndmom; + PinnedHostBufferMomenta hstMomenta; + PinnedHostBufferGs hstGs; + PinnedHostBufferRndNumHelicity hstRndHel; + PinnedHostBufferRndNumColor hstRndCol; + PinnedHostBufferWeights hstWeights; + PinnedHostBufferMatrixElements hstMatrixElements; + PinnedHostBufferSelectedHelicity hstSelHel; + PinnedHostBufferSelectedColor hstSelCol; + PinnedHostBufferHelicityMask hstIsGoodHel; + DeviceBufferRndNumMomenta devRndmom; + DeviceBufferMomenta devMomenta; + DeviceBufferGs devGs; + DeviceBufferRndNumHelicity devRndHel; + DeviceBufferRndNumColor devRndCol; + DeviceBufferWeights devWeights; + DeviceBufferMatrixElements devMatrixElements; + DeviceBufferSelectedHelicity devSelHel; + DeviceBufferSelectedColor devSelCol; + DeviceBufferHelicityMask devIsGoodHel; + + // Create a process object + // Read param_card and set parameters + // ** WARNING EVIL EVIL ** + // The CPPProcess constructor has side effects on the globals Proc::cHel, which is needed in ME calculations. + // Don't remove! + CUDATest( const std::string& refFileName ) + : CUDA_CPU_TestBase( refFileName ) + , process( /*verbose=*/false ) + , hstRndmom( nevt ) + , hstMomenta( nevt ) + , hstGs( nevt ) + , hstRndHel( nevt ) + , hstRndCol( nevt ) + , hstWeights( nevt ) + , hstMatrixElements( nevt ) + , hstSelHel( nevt ) + , hstSelCol( nevt ) + , hstIsGoodHel( mgOnGpu::ncomb ) + , devRndmom( nevt ) + , devMomenta( nevt ) + , devGs( nevt ) + , devRndHel( nevt ) + , devRndCol( nevt ) + , devWeights( nevt ) + , devMatrixElements( nevt ) + , devSelHel( nevt ) + , devSelCol( nevt ) + , devIsGoodHel( mgOnGpu::ncomb ) + { + process.initProc( "../../Cards/param_card.dat" ); + } + + virtual ~CUDATest() {} + + void prepareRandomNumbers( unsigned int iiter ) override + { + CommonRandomNumberKernel rnk( hstRndmom ); + rnk.seedGenerator( 1337 + iiter ); + rnk.generateRnarray(); + copyDeviceFromHost( devRndmom, hstRndmom ); + } + + void prepareMomenta( fptype energy ) override + { + RamboSamplingKernelDevice rsk( energy, devRndmom, devMomenta, devWeights, gpublocks, gputhreads ); + // --- 2a. Fill in momenta of initial state particles on the device + rsk.getMomentaInitial(); + // --- 2b. Fill in momenta of final state particles using the RAMBO algorithm on the device + // (i.e. map random numbers to final-state particle momenta for each of nevt events) + rsk.getMomentaFinal(); + // --- 2c. CopyDToH Weights + copyHostFromDevice( hstWeights, devWeights ); + // --- 2d. CopyDToH Momenta + copyHostFromDevice( hstMomenta, devMomenta ); + } + + void runSigmaKin( std::size_t iiter ) override + { + constexpr fptype fixedG = 1.2177157847767195; // fixed G for aS=0.118 (hardcoded for now in check_sa.cc, fcheck_sa.f, runTest.cc) + for( unsigned int i = 0; i < nevt; ++i ) hstGs[i] = fixedG; + copyDeviceFromHost( devGs, hstGs ); // BUG FIX #566 + MatrixElementKernelDevice mek( devMomenta, devGs, devRndHel, devRndCol, devMatrixElements, devSelHel, devSelCol, gpublocks, gputhreads ); + if( iiter == 0 ) mek.computeGoodHelicities(); + constexpr unsigned int channelId = 0; // TEMPORARY? disable multi-channel in runTest.exe #466 + mek.computeMatrixElements( channelId ); + copyHostFromDevice( hstMatrixElements, devMatrixElements ); + } + + fptype getMomentum( std::size_t ievt, unsigned int ipar, unsigned int ip4 ) const override + { + assert( ipar < npar ); + assert( ip4 < np4 ); + return MemoryAccessMomenta::ieventAccessIp4IparConst( hstMomenta.data(), ievt, ip4, ipar ); + } + + fptype getMatrixElement( std::size_t ievt ) const override + { + return MemoryAccessMatrixElements::ieventAccessConst( hstMatrixElements.data(), ievt ); + } +}; +#endif + +// Use two levels of macros to force stringification at the right level +// (see https://gcc.gnu.org/onlinedocs/gcc-3.0.1/cpp_3.html#SEC17 and https://stackoverflow.com/a/3419392) +// Google macro is in https://github.com/google/googletest/blob/master/googletest/include/gtest/gtest-param-test.h +#define TESTID_CPU( s ) s##_CPU +#define XTESTID_CPU( s ) TESTID_CPU( s ) +#define MG_INSTANTIATE_TEST_SUITE_CPU( prefix, test_suite_name ) \ +INSTANTIATE_TEST_SUITE_P( prefix, \ + test_suite_name, \ + testing::Values( new CPUTest( MG_EPOCH_REFERENCE_FILE_NAME ) ) ); +#define TESTID_GPU( s ) s##_GPU +#define XTESTID_GPU( s ) TESTID_GPU( s ) +#define MG_INSTANTIATE_TEST_SUITE_GPU( prefix, test_suite_name ) \ +INSTANTIATE_TEST_SUITE_P( prefix, \ + test_suite_name, \ + testing::Values( new CUDATest( MG_EPOCH_REFERENCE_FILE_NAME ) ) ); + +#ifdef __CUDACC__ +MG_INSTANTIATE_TEST_SUITE_GPU( XTESTID_GPU( MG_EPOCH_PROCESS_ID ), MadgraphTest ); +#else +MG_INSTANTIATE_TEST_SUITE_CPU( XTESTID_CPU( MG_EPOCH_PROCESS_ID ), MadgraphTest ); +#endif diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/testmisc.cc b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/testmisc.cc new file mode 100644 index 0000000000..5fa8ac70fe --- /dev/null +++ b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/testmisc.cc @@ -0,0 +1,217 @@ +// Use ./runTest.exe --gtest_filter=*misc to run only this test + +#include "mgOnGpuConfig.h" + +#include "mgOnGpuVectors.h" + +#include "epoch_process_id.h" + +#include + +#include +#include + +#ifdef __CUDACC__ +#define TESTID( s ) s##_GPU_MISC +#else +#define TESTID( s ) s##_CPU_MISC +#endif + +#define XTESTID( s ) TESTID( s ) + +#ifdef MGONGPU_CPPSIMD /* clang-format off */ +bool maskand( const bool_v& mask ){ bool out = true; for ( int i=0; i=1] + EXPECT_TRUE( ( f[i] == 0 ) ); // equals 0, not 1 + } +#endif + } + +#ifdef MGONGPU_CPPSIMD + // Vector initialization for cxtype_sv - demonstrate fix for bug #339 + { + fptype_sv f1 = fptype_v{ 0 } + 1; + EXPECT_TRUE_sv( f1 == 1 ); + cxtype_v c12 = cxmake( f1, 2 ); + //std::cout << c12 << std::endl << boolTF( c12.real() == 1 ) << std::endl << boolTF( c12.imag() == 2 ) << std::endl; + EXPECT_TRUE_sv( c12.real() == 1 ); + EXPECT_TRUE_sv( c12.imag() == 2 ); + cxtype_v c21 = cxmake( 2, f1 ); + //std::cout << c21 << std::endl << boolTF( c21.real() == 2 ) << std::endl << boolTF( c21.imag() == 1 ) << std::endl; + EXPECT_TRUE_sv( c21.real() == 2 ); + EXPECT_TRUE_sv( c21.imag() == 1 ); + } +#endif + + // Vector initialization for cxtype_sv + { + cxtype_sv c = cxzero_sv(); + EXPECT_TRUE_sv( c.real() == 0 ); + EXPECT_TRUE_sv( c.imag() == 0 ); + } + { + cxtype_sv c = cxmake( 1, fptype_sv{ 0 } ); // here was a bug #339 + EXPECT_TRUE_sv( c.real() == 1 ); + EXPECT_TRUE_sv( c.imag() == 0 ); + } + { + cxtype_sv c = cxmake( fptype_sv{ 0 }, 1 ); // here was a bug #339 + EXPECT_TRUE_sv( c.real() == 0 ); + EXPECT_TRUE_sv( c.imag() == 1 ); + } + + // Array initialization for cxtype_sv array (example: jamp_sv in CPPProcess.cc) + { + cxtype_sv array[2] = {}; // all zeros (NB: vector cxtype_v IS initialized to 0, but scalar cxype is NOT, if "= {}" is missing!) + //std::cout << array[0].real() << std::endl; std::cout << boolTF( array[0].real() == 0 ) << std::endl; + EXPECT_TRUE_sv( array[0].real() == 0 ); + EXPECT_TRUE_sv( array[0].imag() == 0 ); + EXPECT_TRUE_sv( array[1].real() == 0 ); + EXPECT_TRUE_sv( array[1].imag() == 0 ); + } + + // Alternative array initialization for cxtype_sv array (example: was used for outwf in testxxx.cc) + { + cxtype_sv array[2]{}; // all zeros (NB: vector cxtype_v IS initialized to 0, but scalar cxype is NOT, if "{}" is missing!) + //std::cout << array[0].real() << std::endl; std::cout << boolTF( array[0].real() == 0 ) << std::endl; + EXPECT_TRUE_sv( array[0].real() == 0 ); + EXPECT_TRUE_sv( array[0].imag() == 0 ); + EXPECT_TRUE_sv( array[1].real() == 0 ); + EXPECT_TRUE_sv( array[1].imag() == 0 ); + } + + //-------------------------------------------------------------------------- + + // Scalar complex references + { + using namespace mgOnGpu; + // Refs to f1, f2 + fptype f1 = 1; + fptype f2 = 2; + cxtype_ref r12( f1, f2 ); // copy refs + //cxtype_ref r12a( r12 ); //deleted + cxtype_ref r12a( cxtype_ref( f1, f2 ) ); // copy refs + //cxtype_ref r12b = r12; // deleted + cxtype_ref r12b = cxtype_ref( f1, f2 ); // copy refs + EXPECT_TRUE( cxtype( r12 ).real() == 1 ); + EXPECT_TRUE( cxtype( r12 ).imag() == 2 ); + EXPECT_TRUE( cxtype( r12a ).real() == 1 ); + EXPECT_TRUE( cxtype( r12a ).imag() == 2 ); + EXPECT_TRUE( cxtype( r12b ).real() == 1 ); + EXPECT_TRUE( cxtype( r12b ).imag() == 2 ); + // Refs to f1c, f2c + fptype f1c = 0; + fptype f2c = 0; + cxtype_ref r12c( f1c, f2c ); + EXPECT_TRUE( cxtype( r12c ).real() == 0 ); + EXPECT_TRUE( cxtype( r12c ).imag() == 0 ); + //r12c = r12; // deleted + r12c = cxtype( r12 ); // copy values + EXPECT_TRUE( cxtype( r12c ).real() == 1 ); + EXPECT_TRUE( cxtype( r12c ).imag() == 2 ); + // Update f1, f2 + f1 = 10; + f2 = 20; + EXPECT_TRUE( cxtype( r12 ).real() == 10 ); + EXPECT_TRUE( cxtype( r12 ).imag() == 20 ); + EXPECT_TRUE( cxtype( r12a ).real() == 10 ); + EXPECT_TRUE( cxtype( r12a ).imag() == 20 ); + EXPECT_TRUE( cxtype( r12b ).real() == 10 ); + EXPECT_TRUE( cxtype( r12b ).imag() == 20 ); + EXPECT_TRUE( cxtype( r12c ).real() == 1 ); // points to f1c, not to f1 + EXPECT_TRUE( cxtype( r12c ).imag() == 2 ); // points to f2c, not to f2 + } + + // Vector complex references + { + using namespace mgOnGpu; + // Refs to f1, f2 + fptype_sv f1 = fptype_sv{ 0 } + 1; + fptype_sv f2 = fptype_sv{ 0 } + 2; + cxtype_sv_ref r12( f1, f2 ); // copy refs + //cxtype_sv_ref r12a( r12 ); //deleted + cxtype_sv_ref r12a( cxtype_sv_ref( f1, f2 ) ); // copy refs + //cxtype_sv_ref r12b = r12; // deleted + cxtype_sv_ref r12b = cxtype_sv_ref( f1, f2 ); // copy refs + EXPECT_TRUE_sv( cxtype_sv( r12 ).real() == 1 ); + EXPECT_TRUE_sv( cxtype_sv( r12 ).imag() == 2 ); + EXPECT_TRUE_sv( cxtype_sv( r12a ).real() == 1 ); + EXPECT_TRUE_sv( cxtype_sv( r12a ).imag() == 2 ); + EXPECT_TRUE_sv( cxtype_sv( r12b ).real() == 1 ); + EXPECT_TRUE_sv( cxtype_sv( r12b ).imag() == 2 ); + // Refs to f1c, f2c + fptype_sv f1c = fptype_sv{ 0 }; + fptype_sv f2c = fptype_sv{ 0 }; + cxtype_sv_ref r12c( f1c, f2c ); + EXPECT_TRUE_sv( cxtype_sv( r12c ).real() == 0 ); + EXPECT_TRUE_sv( cxtype_sv( r12c ).imag() == 0 ); + //r12c = r12; // deleted + r12c = cxtype_sv( r12 ); // copy values + EXPECT_TRUE_sv( cxtype_sv( r12c ).real() == 1 ); + EXPECT_TRUE_sv( cxtype_sv( r12c ).imag() == 2 ); + // Update f1, f2 + f1 = fptype_sv{ 0 } + 10; + f2 = fptype_sv{ 0 } + 20; + EXPECT_TRUE_sv( cxtype_sv( r12 ).real() == 10 ); + EXPECT_TRUE_sv( cxtype_sv( r12 ).imag() == 20 ); + EXPECT_TRUE_sv( cxtype_sv( r12a ).real() == 10 ); + EXPECT_TRUE_sv( cxtype_sv( r12a ).imag() == 20 ); + EXPECT_TRUE_sv( cxtype_sv( r12b ).real() == 10 ); + EXPECT_TRUE_sv( cxtype_sv( r12b ).imag() == 20 ); + EXPECT_TRUE_sv( cxtype_sv( r12c ).real() == 1 ); // points to f1c, not to f1 + EXPECT_TRUE_sv( cxtype_sv( r12c ).imag() == 2 ); // points to f2c, not to f2 + } + + //-------------------------------------------------------------------------- +} diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/testxxx.cc b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/testxxx.cc new file mode 100644 index 0000000000..849678acca --- /dev/null +++ b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/testxxx.cc @@ -0,0 +1,323 @@ +#include "mgOnGpuConfig.h" + +#include "CPPProcess.h" +#include "HelAmps_SMEFTsim_topU3l_MwScheme_UFO.h" +#include "MemoryAccessMomenta.h" +#include "MemoryAccessWavefunctions.h" +#include "MemoryBuffers.h" +#include "epoch_process_id.h" + +#include + +#include +#include +#include +#include +#include +#include +#ifdef __CUDACC__ +#define TESTID( s ) s##_GPU_XXX +#else +#define TESTID( s ) s##_CPU_XXX +#endif + +#define XTESTID( s ) TESTID( s ) + +TEST( XTESTID( MG_EPOCH_PROCESS_ID ), testxxx ) +{ + constexpr bool dumpEvents = false; // dump the expected output of the test? + constexpr bool testEvents = !dumpEvents; // run the test? + constexpr fptype toleranceXXXs = std::is_same::value ? 1.E-15 : 1.E-5; + // Constant parameters + constexpr int neppM = MemoryAccessMomenta::neppM; // AOSOA layout + using mgOnGpu::neppV; + using mgOnGpu::np4; + using mgOnGpu::npar; + const int nevt = 16; // 12 independent tests plus 4 duplicates (need a multiple of 8 for floats or for '512z') + assert( nevt % neppM == 0 ); // nevt must be a multiple of neppM + assert( nevt % neppV == 0 ); // nevt must be a multiple of neppV + // Fill in the input momenta +#ifdef __CUDACC__ + mg5amcGpu::PinnedHostBufferMomenta hstMomenta( nevt ); // AOSOA[npagM][npar=4][np4=4][neppM] +#else + mg5amcCpu::HostBufferMomenta hstMomenta( nevt ); // AOSOA[npagM][npar=4][np4=4][neppM] +#endif /* clang-format off */ + const fptype par0[np4 * nevt] = // AOS[nevt][np4] + { + 500, 0, 0, 500, // #0 (m=0 pT=0 E=pz>0) + 500, 0, 0, -500, // #1 (m=0 pT=0 -E=pz<0) + 500, 300, 400, 0, // #2 (m=0 pT>0 pz=0) + 500, 180, 240, 400, // #3 (m=0 pT>0 pz>0) + 500, 180, 240, -400, // #4 (m=0 pT>0 pz<0) + 500, 0, 0, 0, // #5 (m=50>0 pT=0 pz=0) + 500, 0, 0, 300, // #6 (m=40>0 pT=0 pz>0) + 500, 0, 0, -300, // #7 (m=40>0 pT=0 pz<0) + 500, 180, 240, 0, // #8 (m=40>0 pT>0 pz=0) + 500, -240, -180, 0, // #9 (m=40>0 pT>0 pz=0) + 500, 180, 192, 144, // #10 (m=40>0 pT>0 pz>0) + 500, 180, 192, -144, // #11 (m=40>0 pT>0 pz<0) + 500, 0, 0, 500, // DUPLICATE #12 == #0 (m=0 pT=0 E=pz>0) + 500, 0, 0, -500, // DUPLICATE #13 == #1 (m=0 pT=0 -E=pz<0) + 500, 300, 400, 0, // DUPLICATE #14 == #2 (m=0 pT>0 pz=0) + 500, 180, 240, 400 // DUPLICATE #15 == #3 (m=0 pT>0 pz>0) + }; /* clang-format on */ + // Array initialization: zero-out as "{0}" (C and C++) or as "{}" (C++ only) + // See https://en.cppreference.com/w/c/language/array_initialization#Notes + fptype mass0[nevt] = {}; + bool ispzgt0[nevt] = {}; + bool ispzlt0[nevt] = {}; + bool isptgt0[nevt] = {}; + for( int ievt = 0; ievt < nevt; ievt++ ) + { + const fptype p0 = par0[ievt * np4 + 0]; + const fptype p1 = par0[ievt * np4 + 1]; + const fptype p2 = par0[ievt * np4 + 2]; + const fptype p3 = par0[ievt * np4 + 3]; + mass0[ievt] = sqrt( p0 * p0 - p1 * p1 - p2 * p2 - p3 * p3 ); + ispzgt0[ievt] = ( p3 > 0 ); + ispzlt0[ievt] = ( p3 < 0 ); + isptgt0[ievt] = ( p1 != 0 ) || ( p2 != 0 ); + } + const int ipar0 = 0; // use only particle0 for this test + for( int ievt = 0; ievt < nevt; ievt++ ) + { + for( int ip4 = 0; ip4 < np4; ip4++ ) + { + MemoryAccessMomenta::ieventAccessIp4Ipar( hstMomenta.data(), ievt, ip4, ipar0 ) = par0[ievt * np4 + ip4]; // AOS to AOSOA + } + } + // Expected output wavefunctions + std::vector> expwfs; +#include "testxxx_cc_ref.txt" // expwfs.push_back( {...} ); + std::string dumpFileName = "testxxx_cc_ref.txt.new"; + // Compute the output wavefunctions + // Dump new reference file if requested + using mgOnGpu::nw6; // dimensions of each wavefunction (HELAS KEK 91-11): e.g. 6 for e+ e- -> mu+ mu- (fermions and vectors) + int itest = 0; // index on the expected output vector + std::ofstream dumpFile; + if( dumpEvents ) dumpFile.open( dumpFileName, std::ios::trunc ); + auto dumpwf6 = [&]( std::ostream& out, const cxtype_sv wf[6], const char* xxx, int ievt, int nsp, fptype mass ) + { + out << std::setprecision( 15 ) << std::scientific; + out << " expwfs.push_back( {"; + out << " // ---------" << std::endl; + for( int iw6 = 0; iw6 < nw6; iw6++ ) + { +#ifdef MGONGPU_CPPSIMD + const int ieppV = ievt % neppV; // #event in the current event vector in this iteration +#ifdef MGONGPU_HAS_CPPCXTYPEV_BRK + out << std::setw( 26 ) << cxreal( wf[iw6][ieppV] ) << ", "; + out << std::setw( 22 ) << cximag( wf[iw6][ieppV] ); +#else + out << std::setw( 26 ) << wf[iw6].real()[ieppV] << ", "; + out << std::setw( 22 ) << wf[iw6].imag()[ieppV]; +#endif +#else + out << std::setw( 26 ) << wf[iw6].real(); + out << ", " << std::setw( 22 ) << wf[iw6].imag(); +#endif + if( iw6 < nw6 - 1 ) + out << ", "; + else + out << " } );"; + out << " // itest=" << itest << ": " << xxx << "#" << ievt; + out << " nsp=" << nsp << " mass=" << (int)mass << std::endl; + } + out << std::defaultfloat; + }; + auto testwf6 = [&]( const cxtype_sv wf[6], const char* xxx, int ievt, int nsp, fptype mass ) + { + if( dumpEvents ) dumpwf6( dumpFile, wf, xxx, ievt, nsp, mass ); + if( testEvents ) + { + std::array& expwf = expwfs[itest]; + //std::cout << "Testing " << std::setw(3) << itest << ": " << xxx << " #" << ievt << std::endl; + ////for ( int iw6 = 0; iw6( outwfI ); // proof of concept for using fptype* in the interface + fptype* fp_outwfO = reinterpret_cast( outwfO ); // proof of concept for using fptype* in the interface + fptype* fp_outwf = reinterpret_cast( outwf ); // proof of concept for using fptype* in the interface + fptype* fp_outwf3 = reinterpret_cast( outwf3 ); // proof of concept for using fptype* in the interface + const int nhel = 1; + for( auto nsp: { -1, +1 } ) // antifermion/fermion (or initial/final for scalar and vector) + { + for( int ievt = 0; ievt < nevt; ievt++ ) + { +#ifdef __CUDACC__ + using namespace mg5amcGpu; +#else + using namespace mg5amcCpu; +#endif + if( false ) + { + std::cout << std::endl; + for( int ip4 = 0; ip4 < np4; ip4++ ) std::cout << par0[ievt * np4 + ip4] << ", "; + std::cout << std::endl; + } + const int ipagV = ievt / neppV; // #event vector in this iteration + const fptype* ievt0Momenta = MemoryAccessMomenta::ieventAccessRecordConst( hstMomenta.data(), ipagV * neppV ); + // Test ixxxxx - NO ASSUMPTIONS + { + const fptype fmass = mass0[ievt]; + ixxxxx( ievt0Momenta, fmass, nhel, nsp, fp_outwfI, ipar0 ); + testwf6( outwfI, "ixxxxx", ievt, nsp, fmass ); + ixxxxx( ievt0Momenta, -fmass, nhel, nsp, fp_outwfI, ipar0 ); + testwf6( outwfI, "ixxxxx", ievt, nsp, -fmass ); + } + // Test ipzxxx - ASSUMPTIONS: (FMASS == 0) and (PX == PY == 0 and E == +PZ > 0) + if( mass0[ievt] == 0 && !isptgt0[ievt] && ispzgt0[ievt] ) + { + ipzxxx( ievt0Momenta, nhel, nsp, fp_outwf, ipar0 ); + testwf6two( outwf, outwfI, "ipzxxx", ievt ); + testwf6( outwf, "ipzxxx", ievt, nsp, 0 ); + } + // Test imzxxx - ASSUMPTIONS: (FMASS == 0) and (PX == PY == 0 and E == -PZ > 0) + if( mass0[ievt] == 0 && !isptgt0[ievt] && ispzlt0[ievt] ) + { + imzxxx( ievt0Momenta, nhel, nsp, fp_outwf, ipar0 ); + testwf6two( outwf, outwfI, "imzxxx", ievt ); + testwf6( outwf, "imzxxx", ievt, nsp, 0 ); + } + // Test ixzxxx - ASSUMPTIONS: (FMASS == 0) and (PT > 0) + if( mass0[ievt] == 0 && isptgt0[ievt] ) + { + ixzxxx( ievt0Momenta, nhel, nsp, fp_outwf, ipar0 ); + testwf6two( outwf, outwfI, "ixzxxx", ievt ); + testwf6( outwf, "ixzxxx", ievt, nsp, 0 ); + } + // Test vxxxxx - NO ASSUMPTIONS + { + const fptype vmass = mass0[ievt]; + vxxxxx( ievt0Momenta, vmass, nhel, nsp, fp_outwf, ipar0 ); + testwf6( outwf, "vxxxxx", ievt, nsp, vmass ); + vxxxxx( ievt0Momenta, -vmass, nhel, nsp, fp_outwf, ipar0 ); + testwf6( outwf, "vxxxxx", ievt, nsp, -vmass ); + } + // Test sxxxxx - NO ASSUMPTIONS + { + const fptype smass = mass0[ievt]; + sxxxxx( ievt0Momenta, nsp, fp_outwf3, ipar0 ); // no mass, no helicity (was "smass>0") + testwf6( outwf3, "sxxxxx", ievt, nsp, smass ); + sxxxxx( ievt0Momenta, nsp, fp_outwf3, ipar0 ); // no mass, no helicity (was "smass<0") + testwf6( outwf3, "sxxxxx", ievt, nsp, -smass ); + } + // Test oxxxxx - NO ASSUMPTIONS + { + const fptype fmass = mass0[ievt]; + oxxxxx( ievt0Momenta, fmass, nhel, nsp, fp_outwfO, ipar0 ); + testwf6( outwfO, "oxxxxx", ievt, nsp, fmass ); + oxxxxx( ievt0Momenta, -fmass, nhel, nsp, fp_outwfO, ipar0 ); + testwf6( outwfO, "oxxxxx", ievt, nsp, -fmass ); + } + // Test opzxxx - ASSUMPTIONS: (FMASS == 0) and (PX == PY == 0 and E == +PZ > 0) + if( mass0[ievt] == 0 && !isptgt0[ievt] && ispzgt0[ievt] ) + { + opzxxx( ievt0Momenta, nhel, nsp, fp_outwf, ipar0 ); + testwf6two( outwf, outwfO, "opzxxx", ievt ); + testwf6( outwf, "opzxxx", ievt, nsp, 0 ); + } + // Test omzxxx - ASSUMPTIONS: (FMASS == 0) and (PX == PY == 0 and E == -PZ > 0) + if( mass0[ievt] == 0 && !isptgt0[ievt] && ispzlt0[ievt] ) + { + omzxxx( ievt0Momenta, nhel, nsp, fp_outwf, ipar0 ); + testwf6two( outwf, outwfO, "omzxxx", ievt ); + testwf6( outwf, "omzxxx", ievt, nsp, 0 ); + } + // Test oxzxxx - ASSUMPTIONS: (FMASS == 0) and (PT > 0) + if( mass0[ievt] == 0 && isptgt0[ievt] ) + { + oxzxxx( ievt0Momenta, nhel, nsp, reinterpret_cast( outwf ), ipar0 ); + testwf6two( outwf, outwfO, "oxzxxx", ievt ); + testwf6( outwf, "oxzxxx", ievt, nsp, 0 ); + } + } + } + if( dumpEvents ) + { + dumpFile.close(); + std::cout << "INFO: New reference data dumped to file '" << dumpFileName << "'" << std::endl; + } +} + +//========================================================================== diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/testxxx_cc_ref.txt b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/testxxx_cc_ref.txt new file mode 100644 index 0000000000..8bc0384a68 --- /dev/null +++ b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/testxxx_cc_ref.txt @@ -0,0 +1,2044 @@ + expwfs.push_back( { // --------- + 5.000000000000000e+02, 5.000000000000000e+02, // itest=0: ixxxxx#0 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=0: ixxxxx#0 nsp=-1 mass=0 + 0.000000000000000e+00, -0.000000000000000e+00, // itest=0: ixxxxx#0 nsp=-1 mass=0 + -3.162277660168379e+01, 0.000000000000000e+00, // itest=0: ixxxxx#0 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=0: ixxxxx#0 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00 } ); // itest=0: ixxxxx#0 nsp=-1 mass=0 + expwfs.push_back( { // --------- + 5.000000000000000e+02, 5.000000000000000e+02, // itest=1: ixxxxx#0 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=1: ixxxxx#0 nsp=-1 mass=0 + 0.000000000000000e+00, -0.000000000000000e+00, // itest=1: ixxxxx#0 nsp=-1 mass=0 + -3.162277660168379e+01, 0.000000000000000e+00, // itest=1: ixxxxx#0 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=1: ixxxxx#0 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00 } ); // itest=1: ixxxxx#0 nsp=-1 mass=0 + expwfs.push_back( { // --------- + 5.000000000000000e+02, 5.000000000000000e+02, // itest=2: ipzxxx#0 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=2: ipzxxx#0 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=2: ipzxxx#0 nsp=-1 mass=0 + -3.162277660168379e+01, 0.000000000000000e+00, // itest=2: ipzxxx#0 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=2: ipzxxx#0 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00 } ); // itest=2: ipzxxx#0 nsp=-1 mass=0 + expwfs.push_back( { // --------- + -5.000000000000000e+02, -5.000000000000000e+02, // itest=3: vxxxxx#0 nsp=-1 mass=0 + -0.000000000000000e+00, -0.000000000000000e+00, // itest=3: vxxxxx#0 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=3: vxxxxx#0 nsp=-1 mass=0 + -7.071067811865476e-01, 0.000000000000000e+00, // itest=3: vxxxxx#0 nsp=-1 mass=0 + 0.000000000000000e+00, -7.071067811865476e-01, // itest=3: vxxxxx#0 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00 } ); // itest=3: vxxxxx#0 nsp=-1 mass=0 + expwfs.push_back( { // --------- + -5.000000000000000e+02, -5.000000000000000e+02, // itest=4: vxxxxx#0 nsp=-1 mass=0 + -0.000000000000000e+00, -0.000000000000000e+00, // itest=4: vxxxxx#0 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=4: vxxxxx#0 nsp=-1 mass=0 + -7.071067811865476e-01, 0.000000000000000e+00, // itest=4: vxxxxx#0 nsp=-1 mass=0 + 0.000000000000000e+00, -7.071067811865476e-01, // itest=4: vxxxxx#0 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00 } ); // itest=4: vxxxxx#0 nsp=-1 mass=0 + expwfs.push_back( { // --------- + -5.000000000000000e+02, -5.000000000000000e+02, // itest=5: sxxxxx#0 nsp=-1 mass=0 + -0.000000000000000e+00, -0.000000000000000e+00, // itest=5: sxxxxx#0 nsp=-1 mass=0 + 1.000000000000000e+00, 0.000000000000000e+00, // itest=5: sxxxxx#0 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=5: sxxxxx#0 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=5: sxxxxx#0 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00 } ); // itest=5: sxxxxx#0 nsp=-1 mass=0 + expwfs.push_back( { // --------- + -5.000000000000000e+02, -5.000000000000000e+02, // itest=6: sxxxxx#0 nsp=-1 mass=0 + -0.000000000000000e+00, -0.000000000000000e+00, // itest=6: sxxxxx#0 nsp=-1 mass=0 + 1.000000000000000e+00, 0.000000000000000e+00, // itest=6: sxxxxx#0 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=6: sxxxxx#0 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=6: sxxxxx#0 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00 } ); // itest=6: sxxxxx#0 nsp=-1 mass=0 + expwfs.push_back( { // --------- + -5.000000000000000e+02, -5.000000000000000e+02, // itest=7: oxxxxx#0 nsp=-1 mass=0 + -0.000000000000000e+00, -0.000000000000000e+00, // itest=7: oxxxxx#0 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=7: oxxxxx#0 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=7: oxxxxx#0 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=7: oxxxxx#0 nsp=-1 mass=0 + -3.162277660168379e+01, 0.000000000000000e+00 } ); // itest=7: oxxxxx#0 nsp=-1 mass=0 + expwfs.push_back( { // --------- + -5.000000000000000e+02, -5.000000000000000e+02, // itest=8: oxxxxx#0 nsp=-1 mass=0 + -0.000000000000000e+00, -0.000000000000000e+00, // itest=8: oxxxxx#0 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=8: oxxxxx#0 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=8: oxxxxx#0 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=8: oxxxxx#0 nsp=-1 mass=0 + -3.162277660168379e+01, 0.000000000000000e+00 } ); // itest=8: oxxxxx#0 nsp=-1 mass=0 + expwfs.push_back( { // --------- + -5.000000000000000e+02, -5.000000000000000e+02, // itest=9: opzxxx#0 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=9: opzxxx#0 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=9: opzxxx#0 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=9: opzxxx#0 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=9: opzxxx#0 nsp=-1 mass=0 + -3.162277660168379e+01, 0.000000000000000e+00 } ); // itest=9: opzxxx#0 nsp=-1 mass=0 + expwfs.push_back( { // --------- + 5.000000000000000e+02, -5.000000000000000e+02, // itest=10: ixxxxx#1 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=10: ixxxxx#1 nsp=-1 mass=0 + -3.162277660168379e+01, 0.000000000000000e+00, // itest=10: ixxxxx#1 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=10: ixxxxx#1 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=10: ixxxxx#1 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00 } ); // itest=10: ixxxxx#1 nsp=-1 mass=0 + expwfs.push_back( { // --------- + 5.000000000000000e+02, -5.000000000000000e+02, // itest=11: ixxxxx#1 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=11: ixxxxx#1 nsp=-1 mass=0 + -3.162277660168379e+01, -0.000000000000000e+00, // itest=11: ixxxxx#1 nsp=-1 mass=0 + -0.000000000000000e+00, 0.000000000000000e+00, // itest=11: ixxxxx#1 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=11: ixxxxx#1 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00 } ); // itest=11: ixxxxx#1 nsp=-1 mass=0 + expwfs.push_back( { // --------- + 5.000000000000000e+02, -5.000000000000000e+02, // itest=12: imzxxx#1 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=12: imzxxx#1 nsp=-1 mass=0 + -3.162277660168379e+01, 0.000000000000000e+00, // itest=12: imzxxx#1 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=12: imzxxx#1 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=12: imzxxx#1 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00 } ); // itest=12: imzxxx#1 nsp=-1 mass=0 + expwfs.push_back( { // --------- + -5.000000000000000e+02, 5.000000000000000e+02, // itest=13: vxxxxx#1 nsp=-1 mass=0 + -0.000000000000000e+00, -0.000000000000000e+00, // itest=13: vxxxxx#1 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=13: vxxxxx#1 nsp=-1 mass=0 + -7.071067811865476e-01, 0.000000000000000e+00, // itest=13: vxxxxx#1 nsp=-1 mass=0 + 0.000000000000000e+00, 7.071067811865476e-01, // itest=13: vxxxxx#1 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00 } ); // itest=13: vxxxxx#1 nsp=-1 mass=0 + expwfs.push_back( { // --------- + -5.000000000000000e+02, 5.000000000000000e+02, // itest=14: vxxxxx#1 nsp=-1 mass=0 + -0.000000000000000e+00, -0.000000000000000e+00, // itest=14: vxxxxx#1 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=14: vxxxxx#1 nsp=-1 mass=0 + -7.071067811865476e-01, 0.000000000000000e+00, // itest=14: vxxxxx#1 nsp=-1 mass=0 + 0.000000000000000e+00, 7.071067811865476e-01, // itest=14: vxxxxx#1 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00 } ); // itest=14: vxxxxx#1 nsp=-1 mass=0 + expwfs.push_back( { // --------- + -5.000000000000000e+02, 5.000000000000000e+02, // itest=15: sxxxxx#1 nsp=-1 mass=0 + -0.000000000000000e+00, -0.000000000000000e+00, // itest=15: sxxxxx#1 nsp=-1 mass=0 + 1.000000000000000e+00, 0.000000000000000e+00, // itest=15: sxxxxx#1 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=15: sxxxxx#1 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=15: sxxxxx#1 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00 } ); // itest=15: sxxxxx#1 nsp=-1 mass=0 + expwfs.push_back( { // --------- + -5.000000000000000e+02, 5.000000000000000e+02, // itest=16: sxxxxx#1 nsp=-1 mass=0 + -0.000000000000000e+00, -0.000000000000000e+00, // itest=16: sxxxxx#1 nsp=-1 mass=0 + 1.000000000000000e+00, 0.000000000000000e+00, // itest=16: sxxxxx#1 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=16: sxxxxx#1 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=16: sxxxxx#1 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00 } ); // itest=16: sxxxxx#1 nsp=-1 mass=0 + expwfs.push_back( { // --------- + -5.000000000000000e+02, 5.000000000000000e+02, // itest=17: oxxxxx#1 nsp=-1 mass=0 + -0.000000000000000e+00, -0.000000000000000e+00, // itest=17: oxxxxx#1 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=17: oxxxxx#1 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=17: oxxxxx#1 nsp=-1 mass=0 + -3.162277660168379e+01, 0.000000000000000e+00, // itest=17: oxxxxx#1 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00 } ); // itest=17: oxxxxx#1 nsp=-1 mass=0 + expwfs.push_back( { // --------- + -5.000000000000000e+02, 5.000000000000000e+02, // itest=18: oxxxxx#1 nsp=-1 mass=0 + -0.000000000000000e+00, -0.000000000000000e+00, // itest=18: oxxxxx#1 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=18: oxxxxx#1 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=18: oxxxxx#1 nsp=-1 mass=0 + -3.162277660168379e+01, -0.000000000000000e+00, // itest=18: oxxxxx#1 nsp=-1 mass=0 + -0.000000000000000e+00, 0.000000000000000e+00 } ); // itest=18: oxxxxx#1 nsp=-1 mass=0 + expwfs.push_back( { // --------- + -5.000000000000000e+02, 5.000000000000000e+02, // itest=19: omzxxx#1 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=19: omzxxx#1 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=19: omzxxx#1 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=19: omzxxx#1 nsp=-1 mass=0 + -3.162277660168379e+01, 0.000000000000000e+00, // itest=19: omzxxx#1 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00 } ); // itest=19: omzxxx#1 nsp=-1 mass=0 + expwfs.push_back( { // --------- + 5.000000000000000e+02, 0.000000000000000e+00, // itest=20: ixxxxx#2 nsp=-1 mass=0 + 3.000000000000000e+02, 4.000000000000000e+02, // itest=20: ixxxxx#2 nsp=-1 mass=0 + 1.341640786499874e+01, -1.788854381999832e+01, // itest=20: ixxxxx#2 nsp=-1 mass=0 + -2.236067977499790e+01, 0.000000000000000e+00, // itest=20: ixxxxx#2 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=20: ixxxxx#2 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00 } ); // itest=20: ixxxxx#2 nsp=-1 mass=0 + expwfs.push_back( { // --------- + 5.000000000000000e+02, 0.000000000000000e+00, // itest=21: ixxxxx#2 nsp=-1 mass=0 + 3.000000000000000e+02, 4.000000000000000e+02, // itest=21: ixxxxx#2 nsp=-1 mass=0 + 1.341640786499874e+01, -1.788854381999832e+01, // itest=21: ixxxxx#2 nsp=-1 mass=0 + -2.236067977499790e+01, 0.000000000000000e+00, // itest=21: ixxxxx#2 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=21: ixxxxx#2 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00 } ); // itest=21: ixxxxx#2 nsp=-1 mass=0 + expwfs.push_back( { // --------- + 5.000000000000000e+02, 0.000000000000000e+00, // itest=22: ixzxxx#2 nsp=-1 mass=0 + 3.000000000000000e+02, 4.000000000000000e+02, // itest=22: ixzxxx#2 nsp=-1 mass=0 + 1.341640786499874e+01, -1.788854381999832e+01, // itest=22: ixzxxx#2 nsp=-1 mass=0 + -2.236067977499790e+01, 0.000000000000000e+00, // itest=22: ixzxxx#2 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=22: ixzxxx#2 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00 } ); // itest=22: ixzxxx#2 nsp=-1 mass=0 + expwfs.push_back( { // --------- + -5.000000000000000e+02, -0.000000000000000e+00, // itest=23: vxxxxx#2 nsp=-1 mass=0 + -3.000000000000000e+02, -4.000000000000000e+02, // itest=23: vxxxxx#2 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=23: vxxxxx#2 nsp=-1 mass=0 + -0.000000000000000e+00, 5.656854249492381e-01, // itest=23: vxxxxx#2 nsp=-1 mass=0 + -0.000000000000000e+00, -4.242640687119285e-01, // itest=23: vxxxxx#2 nsp=-1 mass=0 + 7.071067811865476e-01, 0.000000000000000e+00 } ); // itest=23: vxxxxx#2 nsp=-1 mass=0 + expwfs.push_back( { // --------- + -5.000000000000000e+02, -0.000000000000000e+00, // itest=24: vxxxxx#2 nsp=-1 mass=0 + -3.000000000000000e+02, -4.000000000000000e+02, // itest=24: vxxxxx#2 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=24: vxxxxx#2 nsp=-1 mass=0 + -0.000000000000000e+00, 5.656854249492381e-01, // itest=24: vxxxxx#2 nsp=-1 mass=0 + -0.000000000000000e+00, -4.242640687119285e-01, // itest=24: vxxxxx#2 nsp=-1 mass=0 + 7.071067811865476e-01, 0.000000000000000e+00 } ); // itest=24: vxxxxx#2 nsp=-1 mass=0 + expwfs.push_back( { // --------- + -5.000000000000000e+02, -0.000000000000000e+00, // itest=25: sxxxxx#2 nsp=-1 mass=0 + -3.000000000000000e+02, -4.000000000000000e+02, // itest=25: sxxxxx#2 nsp=-1 mass=0 + 1.000000000000000e+00, 0.000000000000000e+00, // itest=25: sxxxxx#2 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=25: sxxxxx#2 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=25: sxxxxx#2 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00 } ); // itest=25: sxxxxx#2 nsp=-1 mass=0 + expwfs.push_back( { // --------- + -5.000000000000000e+02, -0.000000000000000e+00, // itest=26: sxxxxx#2 nsp=-1 mass=0 + -3.000000000000000e+02, -4.000000000000000e+02, // itest=26: sxxxxx#2 nsp=-1 mass=0 + 1.000000000000000e+00, 0.000000000000000e+00, // itest=26: sxxxxx#2 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=26: sxxxxx#2 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=26: sxxxxx#2 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00 } ); // itest=26: sxxxxx#2 nsp=-1 mass=0 + expwfs.push_back( { // --------- + -5.000000000000000e+02, -0.000000000000000e+00, // itest=27: oxxxxx#2 nsp=-1 mass=0 + -3.000000000000000e+02, -4.000000000000000e+02, // itest=27: oxxxxx#2 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=27: oxxxxx#2 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=27: oxxxxx#2 nsp=-1 mass=0 + 1.341640786499874e+01, 1.788854381999832e+01, // itest=27: oxxxxx#2 nsp=-1 mass=0 + -2.236067977499790e+01, 0.000000000000000e+00 } ); // itest=27: oxxxxx#2 nsp=-1 mass=0 + expwfs.push_back( { // --------- + -5.000000000000000e+02, -0.000000000000000e+00, // itest=28: oxxxxx#2 nsp=-1 mass=0 + -3.000000000000000e+02, -4.000000000000000e+02, // itest=28: oxxxxx#2 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=28: oxxxxx#2 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=28: oxxxxx#2 nsp=-1 mass=0 + 1.341640786499874e+01, 1.788854381999832e+01, // itest=28: oxxxxx#2 nsp=-1 mass=0 + -2.236067977499790e+01, 0.000000000000000e+00 } ); // itest=28: oxxxxx#2 nsp=-1 mass=0 + expwfs.push_back( { // --------- + -5.000000000000000e+02, -0.000000000000000e+00, // itest=29: oxzxxx#2 nsp=-1 mass=0 + -3.000000000000000e+02, -4.000000000000000e+02, // itest=29: oxzxxx#2 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=29: oxzxxx#2 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=29: oxzxxx#2 nsp=-1 mass=0 + 1.341640786499874e+01, 1.788854381999832e+01, // itest=29: oxzxxx#2 nsp=-1 mass=0 + -2.236067977499790e+01, 0.000000000000000e+00 } ); // itest=29: oxzxxx#2 nsp=-1 mass=0 + expwfs.push_back( { // --------- + 5.000000000000000e+02, 4.000000000000000e+02, // itest=30: ixxxxx#3 nsp=-1 mass=0 + 1.800000000000000e+02, 2.400000000000000e+02, // itest=30: ixxxxx#3 nsp=-1 mass=0 + 6.000000000000000e+00, -8.000000000000000e+00, // itest=30: ixxxxx#3 nsp=-1 mass=0 + -3.000000000000000e+01, 0.000000000000000e+00, // itest=30: ixxxxx#3 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=30: ixxxxx#3 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00 } ); // itest=30: ixxxxx#3 nsp=-1 mass=0 + expwfs.push_back( { // --------- + 5.000000000000000e+02, 4.000000000000000e+02, // itest=31: ixxxxx#3 nsp=-1 mass=0 + 1.800000000000000e+02, 2.400000000000000e+02, // itest=31: ixxxxx#3 nsp=-1 mass=0 + 6.000000000000000e+00, -8.000000000000000e+00, // itest=31: ixxxxx#3 nsp=-1 mass=0 + -3.000000000000000e+01, 0.000000000000000e+00, // itest=31: ixxxxx#3 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=31: ixxxxx#3 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00 } ); // itest=31: ixxxxx#3 nsp=-1 mass=0 + expwfs.push_back( { // --------- + 5.000000000000000e+02, 4.000000000000000e+02, // itest=32: ixzxxx#3 nsp=-1 mass=0 + 1.800000000000000e+02, 2.400000000000000e+02, // itest=32: ixzxxx#3 nsp=-1 mass=0 + 6.000000000000000e+00, -8.000000000000000e+00, // itest=32: ixzxxx#3 nsp=-1 mass=0 + -3.000000000000000e+01, 0.000000000000000e+00, // itest=32: ixzxxx#3 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=32: ixzxxx#3 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00 } ); // itest=32: ixzxxx#3 nsp=-1 mass=0 + expwfs.push_back( { // --------- + -5.000000000000000e+02, -4.000000000000000e+02, // itest=33: vxxxxx#3 nsp=-1 mass=0 + -1.800000000000000e+02, -2.400000000000000e+02, // itest=33: vxxxxx#3 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=33: vxxxxx#3 nsp=-1 mass=0 + -3.394112549695428e-01, 5.656854249492381e-01, // itest=33: vxxxxx#3 nsp=-1 mass=0 + -4.525483399593904e-01, -4.242640687119285e-01, // itest=33: vxxxxx#3 nsp=-1 mass=0 + 4.242640687119285e-01, 0.000000000000000e+00 } ); // itest=33: vxxxxx#3 nsp=-1 mass=0 + expwfs.push_back( { // --------- + -5.000000000000000e+02, -4.000000000000000e+02, // itest=34: vxxxxx#3 nsp=-1 mass=0 + -1.800000000000000e+02, -2.400000000000000e+02, // itest=34: vxxxxx#3 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=34: vxxxxx#3 nsp=-1 mass=0 + -3.394112549695428e-01, 5.656854249492381e-01, // itest=34: vxxxxx#3 nsp=-1 mass=0 + -4.525483399593904e-01, -4.242640687119285e-01, // itest=34: vxxxxx#3 nsp=-1 mass=0 + 4.242640687119285e-01, 0.000000000000000e+00 } ); // itest=34: vxxxxx#3 nsp=-1 mass=0 + expwfs.push_back( { // --------- + -5.000000000000000e+02, -4.000000000000000e+02, // itest=35: sxxxxx#3 nsp=-1 mass=0 + -1.800000000000000e+02, -2.400000000000000e+02, // itest=35: sxxxxx#3 nsp=-1 mass=0 + 1.000000000000000e+00, 0.000000000000000e+00, // itest=35: sxxxxx#3 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=35: sxxxxx#3 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=35: sxxxxx#3 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00 } ); // itest=35: sxxxxx#3 nsp=-1 mass=0 + expwfs.push_back( { // --------- + -5.000000000000000e+02, -4.000000000000000e+02, // itest=36: sxxxxx#3 nsp=-1 mass=0 + -1.800000000000000e+02, -2.400000000000000e+02, // itest=36: sxxxxx#3 nsp=-1 mass=0 + 1.000000000000000e+00, 0.000000000000000e+00, // itest=36: sxxxxx#3 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=36: sxxxxx#3 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=36: sxxxxx#3 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00 } ); // itest=36: sxxxxx#3 nsp=-1 mass=0 + expwfs.push_back( { // --------- + -5.000000000000000e+02, -4.000000000000000e+02, // itest=37: oxxxxx#3 nsp=-1 mass=0 + -1.800000000000000e+02, -2.400000000000000e+02, // itest=37: oxxxxx#3 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=37: oxxxxx#3 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=37: oxxxxx#3 nsp=-1 mass=0 + 6.000000000000000e+00, 8.000000000000000e+00, // itest=37: oxxxxx#3 nsp=-1 mass=0 + -3.000000000000000e+01, 0.000000000000000e+00 } ); // itest=37: oxxxxx#3 nsp=-1 mass=0 + expwfs.push_back( { // --------- + -5.000000000000000e+02, -4.000000000000000e+02, // itest=38: oxxxxx#3 nsp=-1 mass=0 + -1.800000000000000e+02, -2.400000000000000e+02, // itest=38: oxxxxx#3 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=38: oxxxxx#3 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=38: oxxxxx#3 nsp=-1 mass=0 + 6.000000000000000e+00, 8.000000000000000e+00, // itest=38: oxxxxx#3 nsp=-1 mass=0 + -3.000000000000000e+01, 0.000000000000000e+00 } ); // itest=38: oxxxxx#3 nsp=-1 mass=0 + expwfs.push_back( { // --------- + -5.000000000000000e+02, -4.000000000000000e+02, // itest=39: oxzxxx#3 nsp=-1 mass=0 + -1.800000000000000e+02, -2.400000000000000e+02, // itest=39: oxzxxx#3 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=39: oxzxxx#3 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=39: oxzxxx#3 nsp=-1 mass=0 + 6.000000000000000e+00, 8.000000000000000e+00, // itest=39: oxzxxx#3 nsp=-1 mass=0 + -3.000000000000000e+01, 0.000000000000000e+00 } ); // itest=39: oxzxxx#3 nsp=-1 mass=0 + expwfs.push_back( { // --------- + 5.000000000000000e+02, -4.000000000000000e+02, // itest=40: ixxxxx#4 nsp=-1 mass=0 + 1.800000000000000e+02, 2.400000000000000e+02, // itest=40: ixxxxx#4 nsp=-1 mass=0 + 1.800000000000000e+01, -2.400000000000000e+01, // itest=40: ixxxxx#4 nsp=-1 mass=0 + -1.000000000000000e+01, 0.000000000000000e+00, // itest=40: ixxxxx#4 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=40: ixxxxx#4 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00 } ); // itest=40: ixxxxx#4 nsp=-1 mass=0 + expwfs.push_back( { // --------- + 5.000000000000000e+02, -4.000000000000000e+02, // itest=41: ixxxxx#4 nsp=-1 mass=0 + 1.800000000000000e+02, 2.400000000000000e+02, // itest=41: ixxxxx#4 nsp=-1 mass=0 + 1.800000000000000e+01, -2.400000000000000e+01, // itest=41: ixxxxx#4 nsp=-1 mass=0 + -1.000000000000000e+01, 0.000000000000000e+00, // itest=41: ixxxxx#4 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=41: ixxxxx#4 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00 } ); // itest=41: ixxxxx#4 nsp=-1 mass=0 + expwfs.push_back( { // --------- + 5.000000000000000e+02, -4.000000000000000e+02, // itest=42: ixzxxx#4 nsp=-1 mass=0 + 1.800000000000000e+02, 2.400000000000000e+02, // itest=42: ixzxxx#4 nsp=-1 mass=0 + 1.800000000000000e+01, -2.400000000000000e+01, // itest=42: ixzxxx#4 nsp=-1 mass=0 + -1.000000000000000e+01, 0.000000000000000e+00, // itest=42: ixzxxx#4 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=42: ixzxxx#4 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00 } ); // itest=42: ixzxxx#4 nsp=-1 mass=0 + expwfs.push_back( { // --------- + -5.000000000000000e+02, 4.000000000000000e+02, // itest=43: vxxxxx#4 nsp=-1 mass=0 + -1.800000000000000e+02, -2.400000000000000e+02, // itest=43: vxxxxx#4 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=43: vxxxxx#4 nsp=-1 mass=0 + 3.394112549695428e-01, 5.656854249492381e-01, // itest=43: vxxxxx#4 nsp=-1 mass=0 + 4.525483399593904e-01, -4.242640687119285e-01, // itest=43: vxxxxx#4 nsp=-1 mass=0 + 4.242640687119285e-01, 0.000000000000000e+00 } ); // itest=43: vxxxxx#4 nsp=-1 mass=0 + expwfs.push_back( { // --------- + -5.000000000000000e+02, 4.000000000000000e+02, // itest=44: vxxxxx#4 nsp=-1 mass=0 + -1.800000000000000e+02, -2.400000000000000e+02, // itest=44: vxxxxx#4 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=44: vxxxxx#4 nsp=-1 mass=0 + 3.394112549695428e-01, 5.656854249492381e-01, // itest=44: vxxxxx#4 nsp=-1 mass=0 + 4.525483399593904e-01, -4.242640687119285e-01, // itest=44: vxxxxx#4 nsp=-1 mass=0 + 4.242640687119285e-01, 0.000000000000000e+00 } ); // itest=44: vxxxxx#4 nsp=-1 mass=0 + expwfs.push_back( { // --------- + -5.000000000000000e+02, 4.000000000000000e+02, // itest=45: sxxxxx#4 nsp=-1 mass=0 + -1.800000000000000e+02, -2.400000000000000e+02, // itest=45: sxxxxx#4 nsp=-1 mass=0 + 1.000000000000000e+00, 0.000000000000000e+00, // itest=45: sxxxxx#4 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=45: sxxxxx#4 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=45: sxxxxx#4 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00 } ); // itest=45: sxxxxx#4 nsp=-1 mass=0 + expwfs.push_back( { // --------- + -5.000000000000000e+02, 4.000000000000000e+02, // itest=46: sxxxxx#4 nsp=-1 mass=0 + -1.800000000000000e+02, -2.400000000000000e+02, // itest=46: sxxxxx#4 nsp=-1 mass=0 + 1.000000000000000e+00, 0.000000000000000e+00, // itest=46: sxxxxx#4 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=46: sxxxxx#4 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=46: sxxxxx#4 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00 } ); // itest=46: sxxxxx#4 nsp=-1 mass=0 + expwfs.push_back( { // --------- + -5.000000000000000e+02, 4.000000000000000e+02, // itest=47: oxxxxx#4 nsp=-1 mass=0 + -1.800000000000000e+02, -2.400000000000000e+02, // itest=47: oxxxxx#4 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=47: oxxxxx#4 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=47: oxxxxx#4 nsp=-1 mass=0 + 1.800000000000000e+01, 2.400000000000000e+01, // itest=47: oxxxxx#4 nsp=-1 mass=0 + -1.000000000000000e+01, 0.000000000000000e+00 } ); // itest=47: oxxxxx#4 nsp=-1 mass=0 + expwfs.push_back( { // --------- + -5.000000000000000e+02, 4.000000000000000e+02, // itest=48: oxxxxx#4 nsp=-1 mass=0 + -1.800000000000000e+02, -2.400000000000000e+02, // itest=48: oxxxxx#4 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=48: oxxxxx#4 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=48: oxxxxx#4 nsp=-1 mass=0 + 1.800000000000000e+01, 2.400000000000000e+01, // itest=48: oxxxxx#4 nsp=-1 mass=0 + -1.000000000000000e+01, 0.000000000000000e+00 } ); // itest=48: oxxxxx#4 nsp=-1 mass=0 + expwfs.push_back( { // --------- + -5.000000000000000e+02, 4.000000000000000e+02, // itest=49: oxzxxx#4 nsp=-1 mass=0 + -1.800000000000000e+02, -2.400000000000000e+02, // itest=49: oxzxxx#4 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=49: oxzxxx#4 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=49: oxzxxx#4 nsp=-1 mass=0 + 1.800000000000000e+01, 2.400000000000000e+01, // itest=49: oxzxxx#4 nsp=-1 mass=0 + -1.000000000000000e+01, 0.000000000000000e+00 } ); // itest=49: oxzxxx#4 nsp=-1 mass=0 + expwfs.push_back( { // --------- + 5.000000000000000e+02, 0.000000000000000e+00, // itest=50: ixxxxx#5 nsp=-1 mass=500 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=50: ixxxxx#5 nsp=-1 mass=500 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=50: ixxxxx#5 nsp=-1 mass=500 + -2.236067977499790e+01, 0.000000000000000e+00, // itest=50: ixxxxx#5 nsp=-1 mass=500 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=50: ixxxxx#5 nsp=-1 mass=500 + 2.236067977499790e+01, 0.000000000000000e+00 } ); // itest=50: ixxxxx#5 nsp=-1 mass=500 + expwfs.push_back( { // --------- + 5.000000000000000e+02, 0.000000000000000e+00, // itest=51: ixxxxx#5 nsp=-1 mass=-500 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=51: ixxxxx#5 nsp=-1 mass=-500 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=51: ixxxxx#5 nsp=-1 mass=-500 + -2.236067977499790e+01, 0.000000000000000e+00, // itest=51: ixxxxx#5 nsp=-1 mass=-500 + -0.000000000000000e+00, 0.000000000000000e+00, // itest=51: ixxxxx#5 nsp=-1 mass=-500 + -2.236067977499790e+01, 0.000000000000000e+00 } ); // itest=51: ixxxxx#5 nsp=-1 mass=-500 + expwfs.push_back( { // --------- + -5.000000000000000e+02, -0.000000000000000e+00, // itest=52: vxxxxx#5 nsp=-1 mass=500 + -0.000000000000000e+00, -0.000000000000000e+00, // itest=52: vxxxxx#5 nsp=-1 mass=500 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=52: vxxxxx#5 nsp=-1 mass=500 + -7.071067811865476e-01, 0.000000000000000e+00, // itest=52: vxxxxx#5 nsp=-1 mass=500 + 0.000000000000000e+00, -7.071067811865476e-01, // itest=52: vxxxxx#5 nsp=-1 mass=500 + 0.000000000000000e+00, 0.000000000000000e+00 } ); // itest=52: vxxxxx#5 nsp=-1 mass=500 + expwfs.push_back( { // --------- + -5.000000000000000e+02, -0.000000000000000e+00, // itest=53: vxxxxx#5 nsp=-1 mass=-500 + -0.000000000000000e+00, -0.000000000000000e+00, // itest=53: vxxxxx#5 nsp=-1 mass=-500 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=53: vxxxxx#5 nsp=-1 mass=-500 + -7.071067811865476e-01, 0.000000000000000e+00, // itest=53: vxxxxx#5 nsp=-1 mass=-500 + 0.000000000000000e+00, -7.071067811865476e-01, // itest=53: vxxxxx#5 nsp=-1 mass=-500 + 0.000000000000000e+00, 0.000000000000000e+00 } ); // itest=53: vxxxxx#5 nsp=-1 mass=-500 + expwfs.push_back( { // --------- + -5.000000000000000e+02, -0.000000000000000e+00, // itest=54: sxxxxx#5 nsp=-1 mass=500 + -0.000000000000000e+00, -0.000000000000000e+00, // itest=54: sxxxxx#5 nsp=-1 mass=500 + 1.000000000000000e+00, 0.000000000000000e+00, // itest=54: sxxxxx#5 nsp=-1 mass=500 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=54: sxxxxx#5 nsp=-1 mass=500 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=54: sxxxxx#5 nsp=-1 mass=500 + 0.000000000000000e+00, 0.000000000000000e+00 } ); // itest=54: sxxxxx#5 nsp=-1 mass=500 + expwfs.push_back( { // --------- + -5.000000000000000e+02, -0.000000000000000e+00, // itest=55: sxxxxx#5 nsp=-1 mass=-500 + -0.000000000000000e+00, -0.000000000000000e+00, // itest=55: sxxxxx#5 nsp=-1 mass=-500 + 1.000000000000000e+00, 0.000000000000000e+00, // itest=55: sxxxxx#5 nsp=-1 mass=-500 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=55: sxxxxx#5 nsp=-1 mass=-500 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=55: sxxxxx#5 nsp=-1 mass=-500 + 0.000000000000000e+00, 0.000000000000000e+00 } ); // itest=55: sxxxxx#5 nsp=-1 mass=-500 + expwfs.push_back( { // --------- + -5.000000000000000e+02, -0.000000000000000e+00, // itest=56: oxxxxx#5 nsp=-1 mass=500 + -0.000000000000000e+00, -0.000000000000000e+00, // itest=56: oxxxxx#5 nsp=-1 mass=500 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=56: oxxxxx#5 nsp=-1 mass=500 + 2.236067977499790e+01, 0.000000000000000e+00, // itest=56: oxxxxx#5 nsp=-1 mass=500 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=56: oxxxxx#5 nsp=-1 mass=500 + -2.236067977499790e+01, 0.000000000000000e+00 } ); // itest=56: oxxxxx#5 nsp=-1 mass=500 + expwfs.push_back( { // --------- + -5.000000000000000e+02, -0.000000000000000e+00, // itest=57: oxxxxx#5 nsp=-1 mass=-500 + -0.000000000000000e+00, -0.000000000000000e+00, // itest=57: oxxxxx#5 nsp=-1 mass=-500 + -0.000000000000000e+00, 0.000000000000000e+00, // itest=57: oxxxxx#5 nsp=-1 mass=-500 + -2.236067977499790e+01, 0.000000000000000e+00, // itest=57: oxxxxx#5 nsp=-1 mass=-500 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=57: oxxxxx#5 nsp=-1 mass=-500 + -2.236067977499790e+01, 0.000000000000000e+00 } ); // itest=57: oxxxxx#5 nsp=-1 mass=-500 + expwfs.push_back( { // --------- + 5.000000000000000e+02, 3.000000000000000e+02, // itest=58: ixxxxx#6 nsp=-1 mass=400 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=58: ixxxxx#6 nsp=-1 mass=400 + 0.000000000000000e+00, -0.000000000000000e+00, // itest=58: ixxxxx#6 nsp=-1 mass=400 + -2.828427124746190e+01, -0.000000000000000e+00, // itest=58: ixxxxx#6 nsp=-1 mass=400 + -0.000000000000000e+00, 0.000000000000000e+00, // itest=58: ixxxxx#6 nsp=-1 mass=400 + 1.414213562373095e+01, 0.000000000000000e+00 } ); // itest=58: ixxxxx#6 nsp=-1 mass=400 + expwfs.push_back( { // --------- + 5.000000000000000e+02, 3.000000000000000e+02, // itest=59: ixxxxx#6 nsp=-1 mass=-400 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=59: ixxxxx#6 nsp=-1 mass=-400 + 0.000000000000000e+00, -0.000000000000000e+00, // itest=59: ixxxxx#6 nsp=-1 mass=-400 + -2.828427124746190e+01, -0.000000000000000e+00, // itest=59: ixxxxx#6 nsp=-1 mass=-400 + 0.000000000000000e+00, -0.000000000000000e+00, // itest=59: ixxxxx#6 nsp=-1 mass=-400 + -1.414213562373095e+01, -0.000000000000000e+00 } ); // itest=59: ixxxxx#6 nsp=-1 mass=-400 + expwfs.push_back( { // --------- + -5.000000000000000e+02, -3.000000000000000e+02, // itest=60: vxxxxx#6 nsp=-1 mass=400 + -0.000000000000000e+00, -0.000000000000000e+00, // itest=60: vxxxxx#6 nsp=-1 mass=400 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=60: vxxxxx#6 nsp=-1 mass=400 + -7.071067811865476e-01, 0.000000000000000e+00, // itest=60: vxxxxx#6 nsp=-1 mass=400 + 0.000000000000000e+00, -7.071067811865476e-01, // itest=60: vxxxxx#6 nsp=-1 mass=400 + 0.000000000000000e+00, 0.000000000000000e+00 } ); // itest=60: vxxxxx#6 nsp=-1 mass=400 + expwfs.push_back( { // --------- + -5.000000000000000e+02, -3.000000000000000e+02, // itest=61: vxxxxx#6 nsp=-1 mass=-400 + -0.000000000000000e+00, -0.000000000000000e+00, // itest=61: vxxxxx#6 nsp=-1 mass=-400 + -0.000000000000000e+00, 0.000000000000000e+00, // itest=61: vxxxxx#6 nsp=-1 mass=-400 + -7.071067811865476e-01, 0.000000000000000e+00, // itest=61: vxxxxx#6 nsp=-1 mass=-400 + 0.000000000000000e+00, -7.071067811865476e-01, // itest=61: vxxxxx#6 nsp=-1 mass=-400 + 0.000000000000000e+00, 0.000000000000000e+00 } ); // itest=61: vxxxxx#6 nsp=-1 mass=-400 + expwfs.push_back( { // --------- + -5.000000000000000e+02, -3.000000000000000e+02, // itest=62: sxxxxx#6 nsp=-1 mass=400 + -0.000000000000000e+00, -0.000000000000000e+00, // itest=62: sxxxxx#6 nsp=-1 mass=400 + 1.000000000000000e+00, 0.000000000000000e+00, // itest=62: sxxxxx#6 nsp=-1 mass=400 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=62: sxxxxx#6 nsp=-1 mass=400 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=62: sxxxxx#6 nsp=-1 mass=400 + 0.000000000000000e+00, 0.000000000000000e+00 } ); // itest=62: sxxxxx#6 nsp=-1 mass=400 + expwfs.push_back( { // --------- + -5.000000000000000e+02, -3.000000000000000e+02, // itest=63: sxxxxx#6 nsp=-1 mass=-400 + -0.000000000000000e+00, -0.000000000000000e+00, // itest=63: sxxxxx#6 nsp=-1 mass=-400 + 1.000000000000000e+00, 0.000000000000000e+00, // itest=63: sxxxxx#6 nsp=-1 mass=-400 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=63: sxxxxx#6 nsp=-1 mass=-400 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=63: sxxxxx#6 nsp=-1 mass=-400 + 0.000000000000000e+00, 0.000000000000000e+00 } ); // itest=63: sxxxxx#6 nsp=-1 mass=-400 + expwfs.push_back( { // --------- + -5.000000000000000e+02, -3.000000000000000e+02, // itest=64: oxxxxx#6 nsp=-1 mass=400 + -0.000000000000000e+00, -0.000000000000000e+00, // itest=64: oxxxxx#6 nsp=-1 mass=400 + -0.000000000000000e+00, -0.000000000000000e+00, // itest=64: oxxxxx#6 nsp=-1 mass=400 + 1.414213562373095e+01, 0.000000000000000e+00, // itest=64: oxxxxx#6 nsp=-1 mass=400 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=64: oxxxxx#6 nsp=-1 mass=400 + -2.828427124746190e+01, -0.000000000000000e+00 } ); // itest=64: oxxxxx#6 nsp=-1 mass=400 + expwfs.push_back( { // --------- + -5.000000000000000e+02, -3.000000000000000e+02, // itest=65: oxxxxx#6 nsp=-1 mass=-400 + -0.000000000000000e+00, -0.000000000000000e+00, // itest=65: oxxxxx#6 nsp=-1 mass=-400 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=65: oxxxxx#6 nsp=-1 mass=-400 + -1.414213562373095e+01, -0.000000000000000e+00, // itest=65: oxxxxx#6 nsp=-1 mass=-400 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=65: oxxxxx#6 nsp=-1 mass=-400 + -2.828427124746190e+01, -0.000000000000000e+00 } ); // itest=65: oxxxxx#6 nsp=-1 mass=-400 + expwfs.push_back( { // --------- + 5.000000000000000e+02, -3.000000000000000e+02, // itest=66: ixxxxx#7 nsp=-1 mass=400 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=66: ixxxxx#7 nsp=-1 mass=400 + -2.828427124746190e+01, -0.000000000000000e+00, // itest=66: ixxxxx#7 nsp=-1 mass=400 + -0.000000000000000e+00, -0.000000000000000e+00, // itest=66: ixxxxx#7 nsp=-1 mass=400 + 1.414213562373095e+01, 0.000000000000000e+00, // itest=66: ixxxxx#7 nsp=-1 mass=400 + 0.000000000000000e+00, 0.000000000000000e+00 } ); // itest=66: ixxxxx#7 nsp=-1 mass=400 + expwfs.push_back( { // --------- + 5.000000000000000e+02, -3.000000000000000e+02, // itest=67: ixxxxx#7 nsp=-1 mass=-400 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=67: ixxxxx#7 nsp=-1 mass=-400 + -2.828427124746190e+01, -0.000000000000000e+00, // itest=67: ixxxxx#7 nsp=-1 mass=-400 + -0.000000000000000e+00, -0.000000000000000e+00, // itest=67: ixxxxx#7 nsp=-1 mass=-400 + -1.414213562373095e+01, -0.000000000000000e+00, // itest=67: ixxxxx#7 nsp=-1 mass=-400 + -0.000000000000000e+00, -0.000000000000000e+00 } ); // itest=67: ixxxxx#7 nsp=-1 mass=-400 + expwfs.push_back( { // --------- + -5.000000000000000e+02, 3.000000000000000e+02, // itest=68: vxxxxx#7 nsp=-1 mass=400 + -0.000000000000000e+00, -0.000000000000000e+00, // itest=68: vxxxxx#7 nsp=-1 mass=400 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=68: vxxxxx#7 nsp=-1 mass=400 + -7.071067811865476e-01, 0.000000000000000e+00, // itest=68: vxxxxx#7 nsp=-1 mass=400 + 0.000000000000000e+00, 7.071067811865476e-01, // itest=68: vxxxxx#7 nsp=-1 mass=400 + 0.000000000000000e+00, 0.000000000000000e+00 } ); // itest=68: vxxxxx#7 nsp=-1 mass=400 + expwfs.push_back( { // --------- + -5.000000000000000e+02, 3.000000000000000e+02, // itest=69: vxxxxx#7 nsp=-1 mass=-400 + -0.000000000000000e+00, -0.000000000000000e+00, // itest=69: vxxxxx#7 nsp=-1 mass=-400 + -0.000000000000000e+00, 0.000000000000000e+00, // itest=69: vxxxxx#7 nsp=-1 mass=-400 + -7.071067811865476e-01, 0.000000000000000e+00, // itest=69: vxxxxx#7 nsp=-1 mass=-400 + 0.000000000000000e+00, 7.071067811865476e-01, // itest=69: vxxxxx#7 nsp=-1 mass=-400 + 0.000000000000000e+00, 0.000000000000000e+00 } ); // itest=69: vxxxxx#7 nsp=-1 mass=-400 + expwfs.push_back( { // --------- + -5.000000000000000e+02, 3.000000000000000e+02, // itest=70: sxxxxx#7 nsp=-1 mass=400 + -0.000000000000000e+00, -0.000000000000000e+00, // itest=70: sxxxxx#7 nsp=-1 mass=400 + 1.000000000000000e+00, 0.000000000000000e+00, // itest=70: sxxxxx#7 nsp=-1 mass=400 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=70: sxxxxx#7 nsp=-1 mass=400 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=70: sxxxxx#7 nsp=-1 mass=400 + 0.000000000000000e+00, 0.000000000000000e+00 } ); // itest=70: sxxxxx#7 nsp=-1 mass=400 + expwfs.push_back( { // --------- + -5.000000000000000e+02, 3.000000000000000e+02, // itest=71: sxxxxx#7 nsp=-1 mass=-400 + -0.000000000000000e+00, -0.000000000000000e+00, // itest=71: sxxxxx#7 nsp=-1 mass=-400 + 1.000000000000000e+00, 0.000000000000000e+00, // itest=71: sxxxxx#7 nsp=-1 mass=-400 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=71: sxxxxx#7 nsp=-1 mass=-400 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=71: sxxxxx#7 nsp=-1 mass=-400 + 0.000000000000000e+00, 0.000000000000000e+00 } ); // itest=71: sxxxxx#7 nsp=-1 mass=-400 + expwfs.push_back( { // --------- + -5.000000000000000e+02, 3.000000000000000e+02, // itest=72: oxxxxx#7 nsp=-1 mass=400 + -0.000000000000000e+00, -0.000000000000000e+00, // itest=72: oxxxxx#7 nsp=-1 mass=400 + 1.414213562373095e+01, 0.000000000000000e+00, // itest=72: oxxxxx#7 nsp=-1 mass=400 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=72: oxxxxx#7 nsp=-1 mass=400 + -2.828427124746190e+01, -0.000000000000000e+00, // itest=72: oxxxxx#7 nsp=-1 mass=400 + -0.000000000000000e+00, -0.000000000000000e+00 } ); // itest=72: oxxxxx#7 nsp=-1 mass=400 + expwfs.push_back( { // --------- + -5.000000000000000e+02, 3.000000000000000e+02, // itest=73: oxxxxx#7 nsp=-1 mass=-400 + -0.000000000000000e+00, -0.000000000000000e+00, // itest=73: oxxxxx#7 nsp=-1 mass=-400 + -1.414213562373095e+01, -0.000000000000000e+00, // itest=73: oxxxxx#7 nsp=-1 mass=-400 + -0.000000000000000e+00, -0.000000000000000e+00, // itest=73: oxxxxx#7 nsp=-1 mass=-400 + -2.828427124746190e+01, -0.000000000000000e+00, // itest=73: oxxxxx#7 nsp=-1 mass=-400 + -0.000000000000000e+00, -0.000000000000000e+00 } ); // itest=73: oxxxxx#7 nsp=-1 mass=-400 + expwfs.push_back( { // --------- + 5.000000000000000e+02, 0.000000000000000e+00, // itest=74: ixxxxx#8 nsp=-1 mass=400 + 1.800000000000000e+02, 2.400000000000000e+02, // itest=74: ixxxxx#8 nsp=-1 mass=400 + 1.200000000000000e+01, -1.600000000000000e+01, // itest=74: ixxxxx#8 nsp=-1 mass=400 + -2.000000000000000e+01, -0.000000000000000e+00, // itest=74: ixxxxx#8 nsp=-1 mass=400 + -5.999999999999999e+00, 7.999999999999999e+00, // itest=74: ixxxxx#8 nsp=-1 mass=400 + 1.000000000000000e+01, 0.000000000000000e+00 } ); // itest=74: ixxxxx#8 nsp=-1 mass=400 + expwfs.push_back( { // --------- + 5.000000000000000e+02, 0.000000000000000e+00, // itest=75: ixxxxx#8 nsp=-1 mass=-400 + 1.800000000000000e+02, 2.400000000000000e+02, // itest=75: ixxxxx#8 nsp=-1 mass=-400 + 1.200000000000000e+01, -1.600000000000000e+01, // itest=75: ixxxxx#8 nsp=-1 mass=-400 + -2.000000000000000e+01, -0.000000000000000e+00, // itest=75: ixxxxx#8 nsp=-1 mass=-400 + 5.999999999999999e+00, -7.999999999999999e+00, // itest=75: ixxxxx#8 nsp=-1 mass=-400 + -1.000000000000000e+01, -0.000000000000000e+00 } ); // itest=75: ixxxxx#8 nsp=-1 mass=-400 + expwfs.push_back( { // --------- + -5.000000000000000e+02, -0.000000000000000e+00, // itest=76: vxxxxx#8 nsp=-1 mass=400 + -1.800000000000000e+02, -2.400000000000000e+02, // itest=76: vxxxxx#8 nsp=-1 mass=400 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=76: vxxxxx#8 nsp=-1 mass=400 + 0.000000000000000e+00, 5.656854249492381e-01, // itest=76: vxxxxx#8 nsp=-1 mass=400 + 0.000000000000000e+00, -4.242640687119285e-01, // itest=76: vxxxxx#8 nsp=-1 mass=400 + 7.071067811865476e-01, 0.000000000000000e+00 } ); // itest=76: vxxxxx#8 nsp=-1 mass=400 + expwfs.push_back( { // --------- + -5.000000000000000e+02, -0.000000000000000e+00, // itest=77: vxxxxx#8 nsp=-1 mass=-400 + -1.800000000000000e+02, -2.400000000000000e+02, // itest=77: vxxxxx#8 nsp=-1 mass=-400 + -0.000000000000000e+00, 0.000000000000000e+00, // itest=77: vxxxxx#8 nsp=-1 mass=-400 + -0.000000000000000e+00, 5.656854249492381e-01, // itest=77: vxxxxx#8 nsp=-1 mass=-400 + -0.000000000000000e+00, -4.242640687119285e-01, // itest=77: vxxxxx#8 nsp=-1 mass=-400 + 7.071067811865476e-01, 0.000000000000000e+00 } ); // itest=77: vxxxxx#8 nsp=-1 mass=-400 + expwfs.push_back( { // --------- + -5.000000000000000e+02, -0.000000000000000e+00, // itest=78: sxxxxx#8 nsp=-1 mass=400 + -1.800000000000000e+02, -2.400000000000000e+02, // itest=78: sxxxxx#8 nsp=-1 mass=400 + 1.000000000000000e+00, 0.000000000000000e+00, // itest=78: sxxxxx#8 nsp=-1 mass=400 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=78: sxxxxx#8 nsp=-1 mass=400 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=78: sxxxxx#8 nsp=-1 mass=400 + 0.000000000000000e+00, 0.000000000000000e+00 } ); // itest=78: sxxxxx#8 nsp=-1 mass=400 + expwfs.push_back( { // --------- + -5.000000000000000e+02, -0.000000000000000e+00, // itest=79: sxxxxx#8 nsp=-1 mass=-400 + -1.800000000000000e+02, -2.400000000000000e+02, // itest=79: sxxxxx#8 nsp=-1 mass=-400 + 1.000000000000000e+00, 0.000000000000000e+00, // itest=79: sxxxxx#8 nsp=-1 mass=-400 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=79: sxxxxx#8 nsp=-1 mass=-400 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=79: sxxxxx#8 nsp=-1 mass=-400 + 0.000000000000000e+00, 0.000000000000000e+00 } ); // itest=79: sxxxxx#8 nsp=-1 mass=-400 + expwfs.push_back( { // --------- + -5.000000000000000e+02, -0.000000000000000e+00, // itest=80: oxxxxx#8 nsp=-1 mass=400 + -1.800000000000000e+02, -2.400000000000000e+02, // itest=80: oxxxxx#8 nsp=-1 mass=400 + -5.999999999999999e+00, -7.999999999999999e+00, // itest=80: oxxxxx#8 nsp=-1 mass=400 + 1.000000000000000e+01, 0.000000000000000e+00, // itest=80: oxxxxx#8 nsp=-1 mass=400 + 1.200000000000000e+01, 1.600000000000000e+01, // itest=80: oxxxxx#8 nsp=-1 mass=400 + -2.000000000000000e+01, -0.000000000000000e+00 } ); // itest=80: oxxxxx#8 nsp=-1 mass=400 + expwfs.push_back( { // --------- + -5.000000000000000e+02, -0.000000000000000e+00, // itest=81: oxxxxx#8 nsp=-1 mass=-400 + -1.800000000000000e+02, -2.400000000000000e+02, // itest=81: oxxxxx#8 nsp=-1 mass=-400 + 5.999999999999999e+00, 7.999999999999999e+00, // itest=81: oxxxxx#8 nsp=-1 mass=-400 + -1.000000000000000e+01, -0.000000000000000e+00, // itest=81: oxxxxx#8 nsp=-1 mass=-400 + 1.200000000000000e+01, 1.600000000000000e+01, // itest=81: oxxxxx#8 nsp=-1 mass=-400 + -2.000000000000000e+01, -0.000000000000000e+00 } ); // itest=81: oxxxxx#8 nsp=-1 mass=-400 + expwfs.push_back( { // --------- + 5.000000000000000e+02, 0.000000000000000e+00, // itest=82: ixxxxx#9 nsp=-1 mass=400 + -2.400000000000000e+02, -1.800000000000000e+02, // itest=82: ixxxxx#9 nsp=-1 mass=400 + -1.600000000000000e+01, 1.200000000000000e+01, // itest=82: ixxxxx#9 nsp=-1 mass=400 + -2.000000000000000e+01, -0.000000000000000e+00, // itest=82: ixxxxx#9 nsp=-1 mass=400 + 7.999999999999999e+00, -5.999999999999999e+00, // itest=82: ixxxxx#9 nsp=-1 mass=400 + 1.000000000000000e+01, 0.000000000000000e+00 } ); // itest=82: ixxxxx#9 nsp=-1 mass=400 + expwfs.push_back( { // --------- + 5.000000000000000e+02, 0.000000000000000e+00, // itest=83: ixxxxx#9 nsp=-1 mass=-400 + -2.400000000000000e+02, -1.800000000000000e+02, // itest=83: ixxxxx#9 nsp=-1 mass=-400 + -1.600000000000000e+01, 1.200000000000000e+01, // itest=83: ixxxxx#9 nsp=-1 mass=-400 + -2.000000000000000e+01, -0.000000000000000e+00, // itest=83: ixxxxx#9 nsp=-1 mass=-400 + -7.999999999999999e+00, 5.999999999999999e+00, // itest=83: ixxxxx#9 nsp=-1 mass=-400 + -1.000000000000000e+01, -0.000000000000000e+00 } ); // itest=83: ixxxxx#9 nsp=-1 mass=-400 + expwfs.push_back( { // --------- + -5.000000000000000e+02, -0.000000000000000e+00, // itest=84: vxxxxx#9 nsp=-1 mass=400 + 2.400000000000000e+02, 1.800000000000000e+02, // itest=84: vxxxxx#9 nsp=-1 mass=400 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=84: vxxxxx#9 nsp=-1 mass=400 + 0.000000000000000e+00, -4.242640687119285e-01, // itest=84: vxxxxx#9 nsp=-1 mass=400 + 0.000000000000000e+00, 5.656854249492381e-01, // itest=84: vxxxxx#9 nsp=-1 mass=400 + 7.071067811865476e-01, 0.000000000000000e+00 } ); // itest=84: vxxxxx#9 nsp=-1 mass=400 + expwfs.push_back( { // --------- + -5.000000000000000e+02, -0.000000000000000e+00, // itest=85: vxxxxx#9 nsp=-1 mass=-400 + 2.400000000000000e+02, 1.800000000000000e+02, // itest=85: vxxxxx#9 nsp=-1 mass=-400 + -0.000000000000000e+00, 0.000000000000000e+00, // itest=85: vxxxxx#9 nsp=-1 mass=-400 + 0.000000000000000e+00, -4.242640687119285e-01, // itest=85: vxxxxx#9 nsp=-1 mass=-400 + 0.000000000000000e+00, 5.656854249492381e-01, // itest=85: vxxxxx#9 nsp=-1 mass=-400 + 7.071067811865476e-01, 0.000000000000000e+00 } ); // itest=85: vxxxxx#9 nsp=-1 mass=-400 + expwfs.push_back( { // --------- + -5.000000000000000e+02, -0.000000000000000e+00, // itest=86: sxxxxx#9 nsp=-1 mass=400 + 2.400000000000000e+02, 1.800000000000000e+02, // itest=86: sxxxxx#9 nsp=-1 mass=400 + 1.000000000000000e+00, 0.000000000000000e+00, // itest=86: sxxxxx#9 nsp=-1 mass=400 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=86: sxxxxx#9 nsp=-1 mass=400 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=86: sxxxxx#9 nsp=-1 mass=400 + 0.000000000000000e+00, 0.000000000000000e+00 } ); // itest=86: sxxxxx#9 nsp=-1 mass=400 + expwfs.push_back( { // --------- + -5.000000000000000e+02, -0.000000000000000e+00, // itest=87: sxxxxx#9 nsp=-1 mass=-400 + 2.400000000000000e+02, 1.800000000000000e+02, // itest=87: sxxxxx#9 nsp=-1 mass=-400 + 1.000000000000000e+00, 0.000000000000000e+00, // itest=87: sxxxxx#9 nsp=-1 mass=-400 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=87: sxxxxx#9 nsp=-1 mass=-400 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=87: sxxxxx#9 nsp=-1 mass=-400 + 0.000000000000000e+00, 0.000000000000000e+00 } ); // itest=87: sxxxxx#9 nsp=-1 mass=-400 + expwfs.push_back( { // --------- + -5.000000000000000e+02, -0.000000000000000e+00, // itest=88: oxxxxx#9 nsp=-1 mass=400 + 2.400000000000000e+02, 1.800000000000000e+02, // itest=88: oxxxxx#9 nsp=-1 mass=400 + 7.999999999999999e+00, 5.999999999999999e+00, // itest=88: oxxxxx#9 nsp=-1 mass=400 + 1.000000000000000e+01, 0.000000000000000e+00, // itest=88: oxxxxx#9 nsp=-1 mass=400 + -1.600000000000000e+01, -1.200000000000000e+01, // itest=88: oxxxxx#9 nsp=-1 mass=400 + -2.000000000000000e+01, -0.000000000000000e+00 } ); // itest=88: oxxxxx#9 nsp=-1 mass=400 + expwfs.push_back( { // --------- + -5.000000000000000e+02, -0.000000000000000e+00, // itest=89: oxxxxx#9 nsp=-1 mass=-400 + 2.400000000000000e+02, 1.800000000000000e+02, // itest=89: oxxxxx#9 nsp=-1 mass=-400 + -7.999999999999999e+00, -5.999999999999999e+00, // itest=89: oxxxxx#9 nsp=-1 mass=-400 + -1.000000000000000e+01, -0.000000000000000e+00, // itest=89: oxxxxx#9 nsp=-1 mass=-400 + -1.600000000000000e+01, -1.200000000000000e+01, // itest=89: oxxxxx#9 nsp=-1 mass=-400 + -2.000000000000000e+01, -0.000000000000000e+00 } ); // itest=89: oxxxxx#9 nsp=-1 mass=-400 + expwfs.push_back( { // --------- + 5.000000000000000e+02, 1.440000000000000e+02, // itest=90: ixxxxx#10 nsp=-1 mass=400 + 1.800000000000000e+02, 1.920000000000000e+02, // itest=90: ixxxxx#10 nsp=-1 mass=400 + 9.863939238321439e+00, -1.052153518754287e+01, // itest=90: ixxxxx#10 nsp=-1 mass=400 + -2.433105012119288e+01, -0.000000000000000e+00, // itest=90: ixxxxx#10 nsp=-1 mass=400 + -4.931969619160719e+00, 5.260767593771432e+00, // itest=90: ixxxxx#10 nsp=-1 mass=400 + 1.216552506059644e+01, 0.000000000000000e+00 } ); // itest=90: ixxxxx#10 nsp=-1 mass=400 + expwfs.push_back( { // --------- + 5.000000000000000e+02, 1.440000000000000e+02, // itest=91: ixxxxx#10 nsp=-1 mass=-400 + 1.800000000000000e+02, 1.920000000000000e+02, // itest=91: ixxxxx#10 nsp=-1 mass=-400 + 9.863939238321439e+00, -1.052153518754287e+01, // itest=91: ixxxxx#10 nsp=-1 mass=-400 + -2.433105012119288e+01, -0.000000000000000e+00, // itest=91: ixxxxx#10 nsp=-1 mass=-400 + 4.931969619160719e+00, -5.260767593771432e+00, // itest=91: ixxxxx#10 nsp=-1 mass=-400 + -1.216552506059644e+01, -0.000000000000000e+00 } ); // itest=91: ixxxxx#10 nsp=-1 mass=-400 + expwfs.push_back( { // --------- + -5.000000000000000e+02, -1.440000000000000e+02, // itest=92: vxxxxx#10 nsp=-1 mass=400 + -1.800000000000000e+02, -1.920000000000000e+02, // itest=92: vxxxxx#10 nsp=-1 mass=400 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=92: vxxxxx#10 nsp=-1 mass=400 + -2.321373168788980e-01, 5.158607041753289e-01, // itest=92: vxxxxx#10 nsp=-1 mass=400 + -2.476131380041579e-01, -4.836194101643708e-01, // itest=92: vxxxxx#10 nsp=-1 mass=400 + 6.203224967708328e-01, 0.000000000000000e+00 } ); // itest=92: vxxxxx#10 nsp=-1 mass=400 + expwfs.push_back( { // --------- + -5.000000000000000e+02, -1.440000000000000e+02, // itest=93: vxxxxx#10 nsp=-1 mass=-400 + -1.800000000000000e+02, -1.920000000000000e+02, // itest=93: vxxxxx#10 nsp=-1 mass=-400 + -0.000000000000000e+00, 0.000000000000000e+00, // itest=93: vxxxxx#10 nsp=-1 mass=-400 + -2.321373168788980e-01, 5.158607041753289e-01, // itest=93: vxxxxx#10 nsp=-1 mass=-400 + -2.476131380041579e-01, -4.836194101643708e-01, // itest=93: vxxxxx#10 nsp=-1 mass=-400 + 6.203224967708328e-01, 0.000000000000000e+00 } ); // itest=93: vxxxxx#10 nsp=-1 mass=-400 + expwfs.push_back( { // --------- + -5.000000000000000e+02, -1.440000000000000e+02, // itest=94: sxxxxx#10 nsp=-1 mass=400 + -1.800000000000000e+02, -1.920000000000000e+02, // itest=94: sxxxxx#10 nsp=-1 mass=400 + 1.000000000000000e+00, 0.000000000000000e+00, // itest=94: sxxxxx#10 nsp=-1 mass=400 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=94: sxxxxx#10 nsp=-1 mass=400 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=94: sxxxxx#10 nsp=-1 mass=400 + 0.000000000000000e+00, 0.000000000000000e+00 } ); // itest=94: sxxxxx#10 nsp=-1 mass=400 + expwfs.push_back( { // --------- + -5.000000000000000e+02, -1.440000000000000e+02, // itest=95: sxxxxx#10 nsp=-1 mass=-400 + -1.800000000000000e+02, -1.920000000000000e+02, // itest=95: sxxxxx#10 nsp=-1 mass=-400 + 1.000000000000000e+00, 0.000000000000000e+00, // itest=95: sxxxxx#10 nsp=-1 mass=-400 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=95: sxxxxx#10 nsp=-1 mass=-400 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=95: sxxxxx#10 nsp=-1 mass=-400 + 0.000000000000000e+00, 0.000000000000000e+00 } ); // itest=95: sxxxxx#10 nsp=-1 mass=-400 + expwfs.push_back( { // --------- + -5.000000000000000e+02, -1.440000000000000e+02, // itest=96: oxxxxx#10 nsp=-1 mass=400 + -1.800000000000000e+02, -1.920000000000000e+02, // itest=96: oxxxxx#10 nsp=-1 mass=400 + -4.931969619160719e+00, -5.260767593771432e+00, // itest=96: oxxxxx#10 nsp=-1 mass=400 + 1.216552506059644e+01, 0.000000000000000e+00, // itest=96: oxxxxx#10 nsp=-1 mass=400 + 9.863939238321439e+00, 1.052153518754287e+01, // itest=96: oxxxxx#10 nsp=-1 mass=400 + -2.433105012119288e+01, -0.000000000000000e+00 } ); // itest=96: oxxxxx#10 nsp=-1 mass=400 + expwfs.push_back( { // --------- + -5.000000000000000e+02, -1.440000000000000e+02, // itest=97: oxxxxx#10 nsp=-1 mass=-400 + -1.800000000000000e+02, -1.920000000000000e+02, // itest=97: oxxxxx#10 nsp=-1 mass=-400 + 4.931969619160719e+00, 5.260767593771432e+00, // itest=97: oxxxxx#10 nsp=-1 mass=-400 + -1.216552506059644e+01, -0.000000000000000e+00, // itest=97: oxxxxx#10 nsp=-1 mass=-400 + 9.863939238321439e+00, 1.052153518754287e+01, // itest=97: oxxxxx#10 nsp=-1 mass=-400 + -2.433105012119288e+01, -0.000000000000000e+00 } ); // itest=97: oxxxxx#10 nsp=-1 mass=-400 + expwfs.push_back( { // --------- + 5.000000000000000e+02, -1.440000000000000e+02, // itest=98: ixxxxx#11 nsp=-1 mass=400 + 1.800000000000000e+02, 1.920000000000000e+02, // itest=98: ixxxxx#11 nsp=-1 mass=400 + 1.664100588675688e+01, -1.775040627920733e+01, // itest=98: ixxxxx#11 nsp=-1 mass=400 + -1.442220510185596e+01, -0.000000000000000e+00, // itest=98: ixxxxx#11 nsp=-1 mass=400 + -8.320502943378436e+00, 8.875203139603666e+00, // itest=98: ixxxxx#11 nsp=-1 mass=400 + 7.211102550927978e+00, 0.000000000000000e+00 } ); // itest=98: ixxxxx#11 nsp=-1 mass=400 + expwfs.push_back( { // --------- + 5.000000000000000e+02, -1.440000000000000e+02, // itest=99: ixxxxx#11 nsp=-1 mass=-400 + 1.800000000000000e+02, 1.920000000000000e+02, // itest=99: ixxxxx#11 nsp=-1 mass=-400 + 1.664100588675688e+01, -1.775040627920733e+01, // itest=99: ixxxxx#11 nsp=-1 mass=-400 + -1.442220510185596e+01, -0.000000000000000e+00, // itest=99: ixxxxx#11 nsp=-1 mass=-400 + 8.320502943378436e+00, -8.875203139603666e+00, // itest=99: ixxxxx#11 nsp=-1 mass=-400 + -7.211102550927978e+00, -0.000000000000000e+00 } ); // itest=99: ixxxxx#11 nsp=-1 mass=-400 + expwfs.push_back( { // --------- + -5.000000000000000e+02, 1.440000000000000e+02, // itest=100: vxxxxx#11 nsp=-1 mass=400 + -1.800000000000000e+02, -1.920000000000000e+02, // itest=100: vxxxxx#11 nsp=-1 mass=400 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=100: vxxxxx#11 nsp=-1 mass=400 + 2.321373168788980e-01, 5.158607041753289e-01, // itest=100: vxxxxx#11 nsp=-1 mass=400 + 2.476131380041579e-01, -4.836194101643708e-01, // itest=100: vxxxxx#11 nsp=-1 mass=400 + 6.203224967708328e-01, 0.000000000000000e+00 } ); // itest=100: vxxxxx#11 nsp=-1 mass=400 + expwfs.push_back( { // --------- + -5.000000000000000e+02, 1.440000000000000e+02, // itest=101: vxxxxx#11 nsp=-1 mass=-400 + -1.800000000000000e+02, -1.920000000000000e+02, // itest=101: vxxxxx#11 nsp=-1 mass=-400 + -0.000000000000000e+00, 0.000000000000000e+00, // itest=101: vxxxxx#11 nsp=-1 mass=-400 + 2.321373168788980e-01, 5.158607041753289e-01, // itest=101: vxxxxx#11 nsp=-1 mass=-400 + 2.476131380041579e-01, -4.836194101643708e-01, // itest=101: vxxxxx#11 nsp=-1 mass=-400 + 6.203224967708328e-01, 0.000000000000000e+00 } ); // itest=101: vxxxxx#11 nsp=-1 mass=-400 + expwfs.push_back( { // --------- + -5.000000000000000e+02, 1.440000000000000e+02, // itest=102: sxxxxx#11 nsp=-1 mass=400 + -1.800000000000000e+02, -1.920000000000000e+02, // itest=102: sxxxxx#11 nsp=-1 mass=400 + 1.000000000000000e+00, 0.000000000000000e+00, // itest=102: sxxxxx#11 nsp=-1 mass=400 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=102: sxxxxx#11 nsp=-1 mass=400 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=102: sxxxxx#11 nsp=-1 mass=400 + 0.000000000000000e+00, 0.000000000000000e+00 } ); // itest=102: sxxxxx#11 nsp=-1 mass=400 + expwfs.push_back( { // --------- + -5.000000000000000e+02, 1.440000000000000e+02, // itest=103: sxxxxx#11 nsp=-1 mass=-400 + -1.800000000000000e+02, -1.920000000000000e+02, // itest=103: sxxxxx#11 nsp=-1 mass=-400 + 1.000000000000000e+00, 0.000000000000000e+00, // itest=103: sxxxxx#11 nsp=-1 mass=-400 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=103: sxxxxx#11 nsp=-1 mass=-400 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=103: sxxxxx#11 nsp=-1 mass=-400 + 0.000000000000000e+00, 0.000000000000000e+00 } ); // itest=103: sxxxxx#11 nsp=-1 mass=-400 + expwfs.push_back( { // --------- + -5.000000000000000e+02, 1.440000000000000e+02, // itest=104: oxxxxx#11 nsp=-1 mass=400 + -1.800000000000000e+02, -1.920000000000000e+02, // itest=104: oxxxxx#11 nsp=-1 mass=400 + -8.320502943378436e+00, -8.875203139603666e+00, // itest=104: oxxxxx#11 nsp=-1 mass=400 + 7.211102550927978e+00, 0.000000000000000e+00, // itest=104: oxxxxx#11 nsp=-1 mass=400 + 1.664100588675688e+01, 1.775040627920733e+01, // itest=104: oxxxxx#11 nsp=-1 mass=400 + -1.442220510185596e+01, -0.000000000000000e+00 } ); // itest=104: oxxxxx#11 nsp=-1 mass=400 + expwfs.push_back( { // --------- + -5.000000000000000e+02, 1.440000000000000e+02, // itest=105: oxxxxx#11 nsp=-1 mass=-400 + -1.800000000000000e+02, -1.920000000000000e+02, // itest=105: oxxxxx#11 nsp=-1 mass=-400 + 8.320502943378436e+00, 8.875203139603666e+00, // itest=105: oxxxxx#11 nsp=-1 mass=-400 + -7.211102550927978e+00, -0.000000000000000e+00, // itest=105: oxxxxx#11 nsp=-1 mass=-400 + 1.664100588675688e+01, 1.775040627920733e+01, // itest=105: oxxxxx#11 nsp=-1 mass=-400 + -1.442220510185596e+01, -0.000000000000000e+00 } ); // itest=105: oxxxxx#11 nsp=-1 mass=-400 + expwfs.push_back( { // --------- + 5.000000000000000e+02, 5.000000000000000e+02, // itest=106: ixxxxx#12 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=106: ixxxxx#12 nsp=-1 mass=0 + 0.000000000000000e+00, -0.000000000000000e+00, // itest=106: ixxxxx#12 nsp=-1 mass=0 + -3.162277660168379e+01, 0.000000000000000e+00, // itest=106: ixxxxx#12 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=106: ixxxxx#12 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00 } ); // itest=106: ixxxxx#12 nsp=-1 mass=0 + expwfs.push_back( { // --------- + 5.000000000000000e+02, 5.000000000000000e+02, // itest=107: ixxxxx#12 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=107: ixxxxx#12 nsp=-1 mass=0 + 0.000000000000000e+00, -0.000000000000000e+00, // itest=107: ixxxxx#12 nsp=-1 mass=0 + -3.162277660168379e+01, 0.000000000000000e+00, // itest=107: ixxxxx#12 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=107: ixxxxx#12 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00 } ); // itest=107: ixxxxx#12 nsp=-1 mass=0 + expwfs.push_back( { // --------- + 5.000000000000000e+02, 5.000000000000000e+02, // itest=108: ipzxxx#12 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=108: ipzxxx#12 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=108: ipzxxx#12 nsp=-1 mass=0 + -3.162277660168379e+01, 0.000000000000000e+00, // itest=108: ipzxxx#12 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=108: ipzxxx#12 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00 } ); // itest=108: ipzxxx#12 nsp=-1 mass=0 + expwfs.push_back( { // --------- + -5.000000000000000e+02, -5.000000000000000e+02, // itest=109: vxxxxx#12 nsp=-1 mass=0 + -0.000000000000000e+00, -0.000000000000000e+00, // itest=109: vxxxxx#12 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=109: vxxxxx#12 nsp=-1 mass=0 + -7.071067811865476e-01, 0.000000000000000e+00, // itest=109: vxxxxx#12 nsp=-1 mass=0 + 0.000000000000000e+00, -7.071067811865476e-01, // itest=109: vxxxxx#12 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00 } ); // itest=109: vxxxxx#12 nsp=-1 mass=0 + expwfs.push_back( { // --------- + -5.000000000000000e+02, -5.000000000000000e+02, // itest=110: vxxxxx#12 nsp=-1 mass=0 + -0.000000000000000e+00, -0.000000000000000e+00, // itest=110: vxxxxx#12 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=110: vxxxxx#12 nsp=-1 mass=0 + -7.071067811865476e-01, 0.000000000000000e+00, // itest=110: vxxxxx#12 nsp=-1 mass=0 + 0.000000000000000e+00, -7.071067811865476e-01, // itest=110: vxxxxx#12 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00 } ); // itest=110: vxxxxx#12 nsp=-1 mass=0 + expwfs.push_back( { // --------- + -5.000000000000000e+02, -5.000000000000000e+02, // itest=111: sxxxxx#12 nsp=-1 mass=0 + -0.000000000000000e+00, -0.000000000000000e+00, // itest=111: sxxxxx#12 nsp=-1 mass=0 + 1.000000000000000e+00, 0.000000000000000e+00, // itest=111: sxxxxx#12 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=111: sxxxxx#12 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=111: sxxxxx#12 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00 } ); // itest=111: sxxxxx#12 nsp=-1 mass=0 + expwfs.push_back( { // --------- + -5.000000000000000e+02, -5.000000000000000e+02, // itest=112: sxxxxx#12 nsp=-1 mass=0 + -0.000000000000000e+00, -0.000000000000000e+00, // itest=112: sxxxxx#12 nsp=-1 mass=0 + 1.000000000000000e+00, 0.000000000000000e+00, // itest=112: sxxxxx#12 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=112: sxxxxx#12 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=112: sxxxxx#12 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00 } ); // itest=112: sxxxxx#12 nsp=-1 mass=0 + expwfs.push_back( { // --------- + -5.000000000000000e+02, -5.000000000000000e+02, // itest=113: oxxxxx#12 nsp=-1 mass=0 + -0.000000000000000e+00, -0.000000000000000e+00, // itest=113: oxxxxx#12 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=113: oxxxxx#12 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=113: oxxxxx#12 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=113: oxxxxx#12 nsp=-1 mass=0 + -3.162277660168379e+01, 0.000000000000000e+00 } ); // itest=113: oxxxxx#12 nsp=-1 mass=0 + expwfs.push_back( { // --------- + -5.000000000000000e+02, -5.000000000000000e+02, // itest=114: oxxxxx#12 nsp=-1 mass=0 + -0.000000000000000e+00, -0.000000000000000e+00, // itest=114: oxxxxx#12 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=114: oxxxxx#12 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=114: oxxxxx#12 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=114: oxxxxx#12 nsp=-1 mass=0 + -3.162277660168379e+01, 0.000000000000000e+00 } ); // itest=114: oxxxxx#12 nsp=-1 mass=0 + expwfs.push_back( { // --------- + -5.000000000000000e+02, -5.000000000000000e+02, // itest=115: opzxxx#12 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=115: opzxxx#12 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=115: opzxxx#12 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=115: opzxxx#12 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=115: opzxxx#12 nsp=-1 mass=0 + -3.162277660168379e+01, 0.000000000000000e+00 } ); // itest=115: opzxxx#12 nsp=-1 mass=0 + expwfs.push_back( { // --------- + 5.000000000000000e+02, -5.000000000000000e+02, // itest=116: ixxxxx#13 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=116: ixxxxx#13 nsp=-1 mass=0 + -3.162277660168379e+01, 0.000000000000000e+00, // itest=116: ixxxxx#13 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=116: ixxxxx#13 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=116: ixxxxx#13 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00 } ); // itest=116: ixxxxx#13 nsp=-1 mass=0 + expwfs.push_back( { // --------- + 5.000000000000000e+02, -5.000000000000000e+02, // itest=117: ixxxxx#13 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=117: ixxxxx#13 nsp=-1 mass=0 + -3.162277660168379e+01, -0.000000000000000e+00, // itest=117: ixxxxx#13 nsp=-1 mass=0 + -0.000000000000000e+00, 0.000000000000000e+00, // itest=117: ixxxxx#13 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=117: ixxxxx#13 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00 } ); // itest=117: ixxxxx#13 nsp=-1 mass=0 + expwfs.push_back( { // --------- + 5.000000000000000e+02, -5.000000000000000e+02, // itest=118: imzxxx#13 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=118: imzxxx#13 nsp=-1 mass=0 + -3.162277660168379e+01, 0.000000000000000e+00, // itest=118: imzxxx#13 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=118: imzxxx#13 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=118: imzxxx#13 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00 } ); // itest=118: imzxxx#13 nsp=-1 mass=0 + expwfs.push_back( { // --------- + -5.000000000000000e+02, 5.000000000000000e+02, // itest=119: vxxxxx#13 nsp=-1 mass=0 + -0.000000000000000e+00, -0.000000000000000e+00, // itest=119: vxxxxx#13 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=119: vxxxxx#13 nsp=-1 mass=0 + -7.071067811865476e-01, 0.000000000000000e+00, // itest=119: vxxxxx#13 nsp=-1 mass=0 + 0.000000000000000e+00, 7.071067811865476e-01, // itest=119: vxxxxx#13 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00 } ); // itest=119: vxxxxx#13 nsp=-1 mass=0 + expwfs.push_back( { // --------- + -5.000000000000000e+02, 5.000000000000000e+02, // itest=120: vxxxxx#13 nsp=-1 mass=0 + -0.000000000000000e+00, -0.000000000000000e+00, // itest=120: vxxxxx#13 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=120: vxxxxx#13 nsp=-1 mass=0 + -7.071067811865476e-01, 0.000000000000000e+00, // itest=120: vxxxxx#13 nsp=-1 mass=0 + 0.000000000000000e+00, 7.071067811865476e-01, // itest=120: vxxxxx#13 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00 } ); // itest=120: vxxxxx#13 nsp=-1 mass=0 + expwfs.push_back( { // --------- + -5.000000000000000e+02, 5.000000000000000e+02, // itest=121: sxxxxx#13 nsp=-1 mass=0 + -0.000000000000000e+00, -0.000000000000000e+00, // itest=121: sxxxxx#13 nsp=-1 mass=0 + 1.000000000000000e+00, 0.000000000000000e+00, // itest=121: sxxxxx#13 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=121: sxxxxx#13 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=121: sxxxxx#13 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00 } ); // itest=121: sxxxxx#13 nsp=-1 mass=0 + expwfs.push_back( { // --------- + -5.000000000000000e+02, 5.000000000000000e+02, // itest=122: sxxxxx#13 nsp=-1 mass=0 + -0.000000000000000e+00, -0.000000000000000e+00, // itest=122: sxxxxx#13 nsp=-1 mass=0 + 1.000000000000000e+00, 0.000000000000000e+00, // itest=122: sxxxxx#13 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=122: sxxxxx#13 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=122: sxxxxx#13 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00 } ); // itest=122: sxxxxx#13 nsp=-1 mass=0 + expwfs.push_back( { // --------- + -5.000000000000000e+02, 5.000000000000000e+02, // itest=123: oxxxxx#13 nsp=-1 mass=0 + -0.000000000000000e+00, -0.000000000000000e+00, // itest=123: oxxxxx#13 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=123: oxxxxx#13 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=123: oxxxxx#13 nsp=-1 mass=0 + -3.162277660168379e+01, 0.000000000000000e+00, // itest=123: oxxxxx#13 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00 } ); // itest=123: oxxxxx#13 nsp=-1 mass=0 + expwfs.push_back( { // --------- + -5.000000000000000e+02, 5.000000000000000e+02, // itest=124: oxxxxx#13 nsp=-1 mass=0 + -0.000000000000000e+00, -0.000000000000000e+00, // itest=124: oxxxxx#13 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=124: oxxxxx#13 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=124: oxxxxx#13 nsp=-1 mass=0 + -3.162277660168379e+01, -0.000000000000000e+00, // itest=124: oxxxxx#13 nsp=-1 mass=0 + -0.000000000000000e+00, 0.000000000000000e+00 } ); // itest=124: oxxxxx#13 nsp=-1 mass=0 + expwfs.push_back( { // --------- + -5.000000000000000e+02, 5.000000000000000e+02, // itest=125: omzxxx#13 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=125: omzxxx#13 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=125: omzxxx#13 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=125: omzxxx#13 nsp=-1 mass=0 + -3.162277660168379e+01, 0.000000000000000e+00, // itest=125: omzxxx#13 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00 } ); // itest=125: omzxxx#13 nsp=-1 mass=0 + expwfs.push_back( { // --------- + 5.000000000000000e+02, 0.000000000000000e+00, // itest=126: ixxxxx#14 nsp=-1 mass=0 + 3.000000000000000e+02, 4.000000000000000e+02, // itest=126: ixxxxx#14 nsp=-1 mass=0 + 1.341640786499874e+01, -1.788854381999832e+01, // itest=126: ixxxxx#14 nsp=-1 mass=0 + -2.236067977499790e+01, 0.000000000000000e+00, // itest=126: ixxxxx#14 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=126: ixxxxx#14 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00 } ); // itest=126: ixxxxx#14 nsp=-1 mass=0 + expwfs.push_back( { // --------- + 5.000000000000000e+02, 0.000000000000000e+00, // itest=127: ixxxxx#14 nsp=-1 mass=0 + 3.000000000000000e+02, 4.000000000000000e+02, // itest=127: ixxxxx#14 nsp=-1 mass=0 + 1.341640786499874e+01, -1.788854381999832e+01, // itest=127: ixxxxx#14 nsp=-1 mass=0 + -2.236067977499790e+01, 0.000000000000000e+00, // itest=127: ixxxxx#14 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=127: ixxxxx#14 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00 } ); // itest=127: ixxxxx#14 nsp=-1 mass=0 + expwfs.push_back( { // --------- + 5.000000000000000e+02, 0.000000000000000e+00, // itest=128: ixzxxx#14 nsp=-1 mass=0 + 3.000000000000000e+02, 4.000000000000000e+02, // itest=128: ixzxxx#14 nsp=-1 mass=0 + 1.341640786499874e+01, -1.788854381999832e+01, // itest=128: ixzxxx#14 nsp=-1 mass=0 + -2.236067977499790e+01, 0.000000000000000e+00, // itest=128: ixzxxx#14 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=128: ixzxxx#14 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00 } ); // itest=128: ixzxxx#14 nsp=-1 mass=0 + expwfs.push_back( { // --------- + -5.000000000000000e+02, -0.000000000000000e+00, // itest=129: vxxxxx#14 nsp=-1 mass=0 + -3.000000000000000e+02, -4.000000000000000e+02, // itest=129: vxxxxx#14 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=129: vxxxxx#14 nsp=-1 mass=0 + -0.000000000000000e+00, 5.656854249492381e-01, // itest=129: vxxxxx#14 nsp=-1 mass=0 + -0.000000000000000e+00, -4.242640687119285e-01, // itest=129: vxxxxx#14 nsp=-1 mass=0 + 7.071067811865476e-01, 0.000000000000000e+00 } ); // itest=129: vxxxxx#14 nsp=-1 mass=0 + expwfs.push_back( { // --------- + -5.000000000000000e+02, -0.000000000000000e+00, // itest=130: vxxxxx#14 nsp=-1 mass=0 + -3.000000000000000e+02, -4.000000000000000e+02, // itest=130: vxxxxx#14 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=130: vxxxxx#14 nsp=-1 mass=0 + -0.000000000000000e+00, 5.656854249492381e-01, // itest=130: vxxxxx#14 nsp=-1 mass=0 + -0.000000000000000e+00, -4.242640687119285e-01, // itest=130: vxxxxx#14 nsp=-1 mass=0 + 7.071067811865476e-01, 0.000000000000000e+00 } ); // itest=130: vxxxxx#14 nsp=-1 mass=0 + expwfs.push_back( { // --------- + -5.000000000000000e+02, -0.000000000000000e+00, // itest=131: sxxxxx#14 nsp=-1 mass=0 + -3.000000000000000e+02, -4.000000000000000e+02, // itest=131: sxxxxx#14 nsp=-1 mass=0 + 1.000000000000000e+00, 0.000000000000000e+00, // itest=131: sxxxxx#14 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=131: sxxxxx#14 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=131: sxxxxx#14 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00 } ); // itest=131: sxxxxx#14 nsp=-1 mass=0 + expwfs.push_back( { // --------- + -5.000000000000000e+02, -0.000000000000000e+00, // itest=132: sxxxxx#14 nsp=-1 mass=0 + -3.000000000000000e+02, -4.000000000000000e+02, // itest=132: sxxxxx#14 nsp=-1 mass=0 + 1.000000000000000e+00, 0.000000000000000e+00, // itest=132: sxxxxx#14 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=132: sxxxxx#14 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=132: sxxxxx#14 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00 } ); // itest=132: sxxxxx#14 nsp=-1 mass=0 + expwfs.push_back( { // --------- + -5.000000000000000e+02, -0.000000000000000e+00, // itest=133: oxxxxx#14 nsp=-1 mass=0 + -3.000000000000000e+02, -4.000000000000000e+02, // itest=133: oxxxxx#14 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=133: oxxxxx#14 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=133: oxxxxx#14 nsp=-1 mass=0 + 1.341640786499874e+01, 1.788854381999832e+01, // itest=133: oxxxxx#14 nsp=-1 mass=0 + -2.236067977499790e+01, 0.000000000000000e+00 } ); // itest=133: oxxxxx#14 nsp=-1 mass=0 + expwfs.push_back( { // --------- + -5.000000000000000e+02, -0.000000000000000e+00, // itest=134: oxxxxx#14 nsp=-1 mass=0 + -3.000000000000000e+02, -4.000000000000000e+02, // itest=134: oxxxxx#14 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=134: oxxxxx#14 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=134: oxxxxx#14 nsp=-1 mass=0 + 1.341640786499874e+01, 1.788854381999832e+01, // itest=134: oxxxxx#14 nsp=-1 mass=0 + -2.236067977499790e+01, 0.000000000000000e+00 } ); // itest=134: oxxxxx#14 nsp=-1 mass=0 + expwfs.push_back( { // --------- + -5.000000000000000e+02, -0.000000000000000e+00, // itest=135: oxzxxx#14 nsp=-1 mass=0 + -3.000000000000000e+02, -4.000000000000000e+02, // itest=135: oxzxxx#14 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=135: oxzxxx#14 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=135: oxzxxx#14 nsp=-1 mass=0 + 1.341640786499874e+01, 1.788854381999832e+01, // itest=135: oxzxxx#14 nsp=-1 mass=0 + -2.236067977499790e+01, 0.000000000000000e+00 } ); // itest=135: oxzxxx#14 nsp=-1 mass=0 + expwfs.push_back( { // --------- + 5.000000000000000e+02, 4.000000000000000e+02, // itest=136: ixxxxx#15 nsp=-1 mass=0 + 1.800000000000000e+02, 2.400000000000000e+02, // itest=136: ixxxxx#15 nsp=-1 mass=0 + 6.000000000000000e+00, -8.000000000000000e+00, // itest=136: ixxxxx#15 nsp=-1 mass=0 + -3.000000000000000e+01, 0.000000000000000e+00, // itest=136: ixxxxx#15 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=136: ixxxxx#15 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00 } ); // itest=136: ixxxxx#15 nsp=-1 mass=0 + expwfs.push_back( { // --------- + 5.000000000000000e+02, 4.000000000000000e+02, // itest=137: ixxxxx#15 nsp=-1 mass=0 + 1.800000000000000e+02, 2.400000000000000e+02, // itest=137: ixxxxx#15 nsp=-1 mass=0 + 6.000000000000000e+00, -8.000000000000000e+00, // itest=137: ixxxxx#15 nsp=-1 mass=0 + -3.000000000000000e+01, 0.000000000000000e+00, // itest=137: ixxxxx#15 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=137: ixxxxx#15 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00 } ); // itest=137: ixxxxx#15 nsp=-1 mass=0 + expwfs.push_back( { // --------- + 5.000000000000000e+02, 4.000000000000000e+02, // itest=138: ixzxxx#15 nsp=-1 mass=0 + 1.800000000000000e+02, 2.400000000000000e+02, // itest=138: ixzxxx#15 nsp=-1 mass=0 + 6.000000000000000e+00, -8.000000000000000e+00, // itest=138: ixzxxx#15 nsp=-1 mass=0 + -3.000000000000000e+01, 0.000000000000000e+00, // itest=138: ixzxxx#15 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=138: ixzxxx#15 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00 } ); // itest=138: ixzxxx#15 nsp=-1 mass=0 + expwfs.push_back( { // --------- + -5.000000000000000e+02, -4.000000000000000e+02, // itest=139: vxxxxx#15 nsp=-1 mass=0 + -1.800000000000000e+02, -2.400000000000000e+02, // itest=139: vxxxxx#15 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=139: vxxxxx#15 nsp=-1 mass=0 + -3.394112549695428e-01, 5.656854249492381e-01, // itest=139: vxxxxx#15 nsp=-1 mass=0 + -4.525483399593904e-01, -4.242640687119285e-01, // itest=139: vxxxxx#15 nsp=-1 mass=0 + 4.242640687119285e-01, 0.000000000000000e+00 } ); // itest=139: vxxxxx#15 nsp=-1 mass=0 + expwfs.push_back( { // --------- + -5.000000000000000e+02, -4.000000000000000e+02, // itest=140: vxxxxx#15 nsp=-1 mass=0 + -1.800000000000000e+02, -2.400000000000000e+02, // itest=140: vxxxxx#15 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=140: vxxxxx#15 nsp=-1 mass=0 + -3.394112549695428e-01, 5.656854249492381e-01, // itest=140: vxxxxx#15 nsp=-1 mass=0 + -4.525483399593904e-01, -4.242640687119285e-01, // itest=140: vxxxxx#15 nsp=-1 mass=0 + 4.242640687119285e-01, 0.000000000000000e+00 } ); // itest=140: vxxxxx#15 nsp=-1 mass=0 + expwfs.push_back( { // --------- + -5.000000000000000e+02, -4.000000000000000e+02, // itest=141: sxxxxx#15 nsp=-1 mass=0 + -1.800000000000000e+02, -2.400000000000000e+02, // itest=141: sxxxxx#15 nsp=-1 mass=0 + 1.000000000000000e+00, 0.000000000000000e+00, // itest=141: sxxxxx#15 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=141: sxxxxx#15 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=141: sxxxxx#15 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00 } ); // itest=141: sxxxxx#15 nsp=-1 mass=0 + expwfs.push_back( { // --------- + -5.000000000000000e+02, -4.000000000000000e+02, // itest=142: sxxxxx#15 nsp=-1 mass=0 + -1.800000000000000e+02, -2.400000000000000e+02, // itest=142: sxxxxx#15 nsp=-1 mass=0 + 1.000000000000000e+00, 0.000000000000000e+00, // itest=142: sxxxxx#15 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=142: sxxxxx#15 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=142: sxxxxx#15 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00 } ); // itest=142: sxxxxx#15 nsp=-1 mass=0 + expwfs.push_back( { // --------- + -5.000000000000000e+02, -4.000000000000000e+02, // itest=143: oxxxxx#15 nsp=-1 mass=0 + -1.800000000000000e+02, -2.400000000000000e+02, // itest=143: oxxxxx#15 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=143: oxxxxx#15 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=143: oxxxxx#15 nsp=-1 mass=0 + 6.000000000000000e+00, 8.000000000000000e+00, // itest=143: oxxxxx#15 nsp=-1 mass=0 + -3.000000000000000e+01, 0.000000000000000e+00 } ); // itest=143: oxxxxx#15 nsp=-1 mass=0 + expwfs.push_back( { // --------- + -5.000000000000000e+02, -4.000000000000000e+02, // itest=144: oxxxxx#15 nsp=-1 mass=0 + -1.800000000000000e+02, -2.400000000000000e+02, // itest=144: oxxxxx#15 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=144: oxxxxx#15 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=144: oxxxxx#15 nsp=-1 mass=0 + 6.000000000000000e+00, 8.000000000000000e+00, // itest=144: oxxxxx#15 nsp=-1 mass=0 + -3.000000000000000e+01, 0.000000000000000e+00 } ); // itest=144: oxxxxx#15 nsp=-1 mass=0 + expwfs.push_back( { // --------- + -5.000000000000000e+02, -4.000000000000000e+02, // itest=145: oxzxxx#15 nsp=-1 mass=0 + -1.800000000000000e+02, -2.400000000000000e+02, // itest=145: oxzxxx#15 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=145: oxzxxx#15 nsp=-1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=145: oxzxxx#15 nsp=-1 mass=0 + 6.000000000000000e+00, 8.000000000000000e+00, // itest=145: oxzxxx#15 nsp=-1 mass=0 + -3.000000000000000e+01, 0.000000000000000e+00 } ); // itest=145: oxzxxx#15 nsp=-1 mass=0 + expwfs.push_back( { // --------- + -5.000000000000000e+02, -5.000000000000000e+02, // itest=146: ixxxxx#0 nsp=1 mass=0 + -0.000000000000000e+00, -0.000000000000000e+00, // itest=146: ixxxxx#0 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=146: ixxxxx#0 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=146: ixxxxx#0 nsp=1 mass=0 + 3.162277660168379e+01, 0.000000000000000e+00, // itest=146: ixxxxx#0 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00 } ); // itest=146: ixxxxx#0 nsp=1 mass=0 + expwfs.push_back( { // --------- + -5.000000000000000e+02, -5.000000000000000e+02, // itest=147: ixxxxx#0 nsp=1 mass=0 + -0.000000000000000e+00, -0.000000000000000e+00, // itest=147: ixxxxx#0 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=147: ixxxxx#0 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=147: ixxxxx#0 nsp=1 mass=0 + 3.162277660168379e+01, 0.000000000000000e+00, // itest=147: ixxxxx#0 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00 } ); // itest=147: ixxxxx#0 nsp=1 mass=0 + expwfs.push_back( { // --------- + -5.000000000000000e+02, -5.000000000000000e+02, // itest=148: ipzxxx#0 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=148: ipzxxx#0 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=148: ipzxxx#0 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=148: ipzxxx#0 nsp=1 mass=0 + 3.162277660168379e+01, 0.000000000000000e+00, // itest=148: ipzxxx#0 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00 } ); // itest=148: ipzxxx#0 nsp=1 mass=0 + expwfs.push_back( { // --------- + 5.000000000000000e+02, 5.000000000000000e+02, // itest=149: vxxxxx#0 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=149: vxxxxx#0 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=149: vxxxxx#0 nsp=1 mass=0 + -7.071067811865476e-01, 0.000000000000000e+00, // itest=149: vxxxxx#0 nsp=1 mass=0 + 0.000000000000000e+00, 7.071067811865476e-01, // itest=149: vxxxxx#0 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00 } ); // itest=149: vxxxxx#0 nsp=1 mass=0 + expwfs.push_back( { // --------- + 5.000000000000000e+02, 5.000000000000000e+02, // itest=150: vxxxxx#0 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=150: vxxxxx#0 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=150: vxxxxx#0 nsp=1 mass=0 + -7.071067811865476e-01, 0.000000000000000e+00, // itest=150: vxxxxx#0 nsp=1 mass=0 + 0.000000000000000e+00, 7.071067811865476e-01, // itest=150: vxxxxx#0 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00 } ); // itest=150: vxxxxx#0 nsp=1 mass=0 + expwfs.push_back( { // --------- + 5.000000000000000e+02, 5.000000000000000e+02, // itest=151: sxxxxx#0 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=151: sxxxxx#0 nsp=1 mass=0 + 1.000000000000000e+00, 0.000000000000000e+00, // itest=151: sxxxxx#0 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=151: sxxxxx#0 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=151: sxxxxx#0 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00 } ); // itest=151: sxxxxx#0 nsp=1 mass=0 + expwfs.push_back( { // --------- + 5.000000000000000e+02, 5.000000000000000e+02, // itest=152: sxxxxx#0 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=152: sxxxxx#0 nsp=1 mass=0 + 1.000000000000000e+00, 0.000000000000000e+00, // itest=152: sxxxxx#0 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=152: sxxxxx#0 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=152: sxxxxx#0 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00 } ); // itest=152: sxxxxx#0 nsp=1 mass=0 + expwfs.push_back( { // --------- + 5.000000000000000e+02, 5.000000000000000e+02, // itest=153: oxxxxx#0 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=153: oxxxxx#0 nsp=1 mass=0 + 3.162277660168379e+01, 0.000000000000000e+00, // itest=153: oxxxxx#0 nsp=1 mass=0 + 0.000000000000000e+00, -0.000000000000000e+00, // itest=153: oxxxxx#0 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=153: oxxxxx#0 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00 } ); // itest=153: oxxxxx#0 nsp=1 mass=0 + expwfs.push_back( { // --------- + 5.000000000000000e+02, 5.000000000000000e+02, // itest=154: oxxxxx#0 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=154: oxxxxx#0 nsp=1 mass=0 + 3.162277660168379e+01, 0.000000000000000e+00, // itest=154: oxxxxx#0 nsp=1 mass=0 + 0.000000000000000e+00, -0.000000000000000e+00, // itest=154: oxxxxx#0 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=154: oxxxxx#0 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00 } ); // itest=154: oxxxxx#0 nsp=1 mass=0 + expwfs.push_back( { // --------- + 5.000000000000000e+02, 5.000000000000000e+02, // itest=155: opzxxx#0 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=155: opzxxx#0 nsp=1 mass=0 + 3.162277660168379e+01, 0.000000000000000e+00, // itest=155: opzxxx#0 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=155: opzxxx#0 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=155: opzxxx#0 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00 } ); // itest=155: opzxxx#0 nsp=1 mass=0 + expwfs.push_back( { // --------- + -5.000000000000000e+02, 5.000000000000000e+02, // itest=156: ixxxxx#1 nsp=1 mass=0 + -0.000000000000000e+00, -0.000000000000000e+00, // itest=156: ixxxxx#1 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=156: ixxxxx#1 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=156: ixxxxx#1 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=156: ixxxxx#1 nsp=1 mass=0 + -3.162277660168379e+01, 0.000000000000000e+00 } ); // itest=156: ixxxxx#1 nsp=1 mass=0 + expwfs.push_back( { // --------- + -5.000000000000000e+02, 5.000000000000000e+02, // itest=157: ixxxxx#1 nsp=1 mass=0 + -0.000000000000000e+00, -0.000000000000000e+00, // itest=157: ixxxxx#1 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=157: ixxxxx#1 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=157: ixxxxx#1 nsp=1 mass=0 + -0.000000000000000e+00, 0.000000000000000e+00, // itest=157: ixxxxx#1 nsp=1 mass=0 + -3.162277660168379e+01, -0.000000000000000e+00 } ); // itest=157: ixxxxx#1 nsp=1 mass=0 + expwfs.push_back( { // --------- + -5.000000000000000e+02, 5.000000000000000e+02, // itest=158: imzxxx#1 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=158: imzxxx#1 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=158: imzxxx#1 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=158: imzxxx#1 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=158: imzxxx#1 nsp=1 mass=0 + -3.162277660168379e+01, 0.000000000000000e+00 } ); // itest=158: imzxxx#1 nsp=1 mass=0 + expwfs.push_back( { // --------- + 5.000000000000000e+02, -5.000000000000000e+02, // itest=159: vxxxxx#1 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=159: vxxxxx#1 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=159: vxxxxx#1 nsp=1 mass=0 + -7.071067811865476e-01, 0.000000000000000e+00, // itest=159: vxxxxx#1 nsp=1 mass=0 + 0.000000000000000e+00, -7.071067811865476e-01, // itest=159: vxxxxx#1 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00 } ); // itest=159: vxxxxx#1 nsp=1 mass=0 + expwfs.push_back( { // --------- + 5.000000000000000e+02, -5.000000000000000e+02, // itest=160: vxxxxx#1 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=160: vxxxxx#1 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=160: vxxxxx#1 nsp=1 mass=0 + -7.071067811865476e-01, 0.000000000000000e+00, // itest=160: vxxxxx#1 nsp=1 mass=0 + 0.000000000000000e+00, -7.071067811865476e-01, // itest=160: vxxxxx#1 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00 } ); // itest=160: vxxxxx#1 nsp=1 mass=0 + expwfs.push_back( { // --------- + 5.000000000000000e+02, -5.000000000000000e+02, // itest=161: sxxxxx#1 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=161: sxxxxx#1 nsp=1 mass=0 + 1.000000000000000e+00, 0.000000000000000e+00, // itest=161: sxxxxx#1 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=161: sxxxxx#1 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=161: sxxxxx#1 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00 } ); // itest=161: sxxxxx#1 nsp=1 mass=0 + expwfs.push_back( { // --------- + 5.000000000000000e+02, -5.000000000000000e+02, // itest=162: sxxxxx#1 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=162: sxxxxx#1 nsp=1 mass=0 + 1.000000000000000e+00, 0.000000000000000e+00, // itest=162: sxxxxx#1 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=162: sxxxxx#1 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=162: sxxxxx#1 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00 } ); // itest=162: sxxxxx#1 nsp=1 mass=0 + expwfs.push_back( { // --------- + 5.000000000000000e+02, -5.000000000000000e+02, // itest=163: oxxxxx#1 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=163: oxxxxx#1 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=163: oxxxxx#1 nsp=1 mass=0 + -3.162277660168379e+01, 0.000000000000000e+00, // itest=163: oxxxxx#1 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=163: oxxxxx#1 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00 } ); // itest=163: oxxxxx#1 nsp=1 mass=0 + expwfs.push_back( { // --------- + 5.000000000000000e+02, -5.000000000000000e+02, // itest=164: oxxxxx#1 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=164: oxxxxx#1 nsp=1 mass=0 + -0.000000000000000e+00, 0.000000000000000e+00, // itest=164: oxxxxx#1 nsp=1 mass=0 + -3.162277660168379e+01, -0.000000000000000e+00, // itest=164: oxxxxx#1 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=164: oxxxxx#1 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00 } ); // itest=164: oxxxxx#1 nsp=1 mass=0 + expwfs.push_back( { // --------- + 5.000000000000000e+02, -5.000000000000000e+02, // itest=165: omzxxx#1 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=165: omzxxx#1 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=165: omzxxx#1 nsp=1 mass=0 + -3.162277660168379e+01, 0.000000000000000e+00, // itest=165: omzxxx#1 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=165: omzxxx#1 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00 } ); // itest=165: omzxxx#1 nsp=1 mass=0 + expwfs.push_back( { // --------- + -5.000000000000000e+02, -0.000000000000000e+00, // itest=166: ixxxxx#2 nsp=1 mass=0 + -3.000000000000000e+02, -4.000000000000000e+02, // itest=166: ixxxxx#2 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=166: ixxxxx#2 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=166: ixxxxx#2 nsp=1 mass=0 + 2.236067977499790e+01, 0.000000000000000e+00, // itest=166: ixxxxx#2 nsp=1 mass=0 + 1.341640786499874e+01, 1.788854381999832e+01 } ); // itest=166: ixxxxx#2 nsp=1 mass=0 + expwfs.push_back( { // --------- + -5.000000000000000e+02, -0.000000000000000e+00, // itest=167: ixxxxx#2 nsp=1 mass=0 + -3.000000000000000e+02, -4.000000000000000e+02, // itest=167: ixxxxx#2 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=167: ixxxxx#2 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=167: ixxxxx#2 nsp=1 mass=0 + 2.236067977499790e+01, 0.000000000000000e+00, // itest=167: ixxxxx#2 nsp=1 mass=0 + 1.341640786499874e+01, 1.788854381999832e+01 } ); // itest=167: ixxxxx#2 nsp=1 mass=0 + expwfs.push_back( { // --------- + -5.000000000000000e+02, -0.000000000000000e+00, // itest=168: ixzxxx#2 nsp=1 mass=0 + -3.000000000000000e+02, -4.000000000000000e+02, // itest=168: ixzxxx#2 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=168: ixzxxx#2 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=168: ixzxxx#2 nsp=1 mass=0 + 2.236067977499790e+01, 0.000000000000000e+00, // itest=168: ixzxxx#2 nsp=1 mass=0 + 1.341640786499874e+01, 1.788854381999832e+01 } ); // itest=168: ixzxxx#2 nsp=1 mass=0 + expwfs.push_back( { // --------- + 5.000000000000000e+02, 0.000000000000000e+00, // itest=169: vxxxxx#2 nsp=1 mass=0 + 3.000000000000000e+02, 4.000000000000000e+02, // itest=169: vxxxxx#2 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=169: vxxxxx#2 nsp=1 mass=0 + -0.000000000000000e+00, -5.656854249492381e-01, // itest=169: vxxxxx#2 nsp=1 mass=0 + -0.000000000000000e+00, 4.242640687119285e-01, // itest=169: vxxxxx#2 nsp=1 mass=0 + 7.071067811865476e-01, 0.000000000000000e+00 } ); // itest=169: vxxxxx#2 nsp=1 mass=0 + expwfs.push_back( { // --------- + 5.000000000000000e+02, 0.000000000000000e+00, // itest=170: vxxxxx#2 nsp=1 mass=0 + 3.000000000000000e+02, 4.000000000000000e+02, // itest=170: vxxxxx#2 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=170: vxxxxx#2 nsp=1 mass=0 + -0.000000000000000e+00, -5.656854249492381e-01, // itest=170: vxxxxx#2 nsp=1 mass=0 + -0.000000000000000e+00, 4.242640687119285e-01, // itest=170: vxxxxx#2 nsp=1 mass=0 + 7.071067811865476e-01, 0.000000000000000e+00 } ); // itest=170: vxxxxx#2 nsp=1 mass=0 + expwfs.push_back( { // --------- + 5.000000000000000e+02, 0.000000000000000e+00, // itest=171: sxxxxx#2 nsp=1 mass=0 + 3.000000000000000e+02, 4.000000000000000e+02, // itest=171: sxxxxx#2 nsp=1 mass=0 + 1.000000000000000e+00, 0.000000000000000e+00, // itest=171: sxxxxx#2 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=171: sxxxxx#2 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=171: sxxxxx#2 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00 } ); // itest=171: sxxxxx#2 nsp=1 mass=0 + expwfs.push_back( { // --------- + 5.000000000000000e+02, 0.000000000000000e+00, // itest=172: sxxxxx#2 nsp=1 mass=0 + 3.000000000000000e+02, 4.000000000000000e+02, // itest=172: sxxxxx#2 nsp=1 mass=0 + 1.000000000000000e+00, 0.000000000000000e+00, // itest=172: sxxxxx#2 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=172: sxxxxx#2 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=172: sxxxxx#2 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00 } ); // itest=172: sxxxxx#2 nsp=1 mass=0 + expwfs.push_back( { // --------- + 5.000000000000000e+02, 0.000000000000000e+00, // itest=173: oxxxxx#2 nsp=1 mass=0 + 3.000000000000000e+02, 4.000000000000000e+02, // itest=173: oxxxxx#2 nsp=1 mass=0 + 2.236067977499790e+01, 0.000000000000000e+00, // itest=173: oxxxxx#2 nsp=1 mass=0 + 1.341640786499874e+01, -1.788854381999832e+01, // itest=173: oxxxxx#2 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=173: oxxxxx#2 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00 } ); // itest=173: oxxxxx#2 nsp=1 mass=0 + expwfs.push_back( { // --------- + 5.000000000000000e+02, 0.000000000000000e+00, // itest=174: oxxxxx#2 nsp=1 mass=0 + 3.000000000000000e+02, 4.000000000000000e+02, // itest=174: oxxxxx#2 nsp=1 mass=0 + 2.236067977499790e+01, 0.000000000000000e+00, // itest=174: oxxxxx#2 nsp=1 mass=0 + 1.341640786499874e+01, -1.788854381999832e+01, // itest=174: oxxxxx#2 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=174: oxxxxx#2 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00 } ); // itest=174: oxxxxx#2 nsp=1 mass=0 + expwfs.push_back( { // --------- + 5.000000000000000e+02, 0.000000000000000e+00, // itest=175: oxzxxx#2 nsp=1 mass=0 + 3.000000000000000e+02, 4.000000000000000e+02, // itest=175: oxzxxx#2 nsp=1 mass=0 + 2.236067977499790e+01, 0.000000000000000e+00, // itest=175: oxzxxx#2 nsp=1 mass=0 + 1.341640786499874e+01, -1.788854381999832e+01, // itest=175: oxzxxx#2 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=175: oxzxxx#2 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00 } ); // itest=175: oxzxxx#2 nsp=1 mass=0 + expwfs.push_back( { // --------- + -5.000000000000000e+02, -4.000000000000000e+02, // itest=176: ixxxxx#3 nsp=1 mass=0 + -1.800000000000000e+02, -2.400000000000000e+02, // itest=176: ixxxxx#3 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=176: ixxxxx#3 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=176: ixxxxx#3 nsp=1 mass=0 + 3.000000000000000e+01, 0.000000000000000e+00, // itest=176: ixxxxx#3 nsp=1 mass=0 + 6.000000000000000e+00, 8.000000000000000e+00 } ); // itest=176: ixxxxx#3 nsp=1 mass=0 + expwfs.push_back( { // --------- + -5.000000000000000e+02, -4.000000000000000e+02, // itest=177: ixxxxx#3 nsp=1 mass=0 + -1.800000000000000e+02, -2.400000000000000e+02, // itest=177: ixxxxx#3 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=177: ixxxxx#3 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=177: ixxxxx#3 nsp=1 mass=0 + 3.000000000000000e+01, 0.000000000000000e+00, // itest=177: ixxxxx#3 nsp=1 mass=0 + 6.000000000000000e+00, 8.000000000000000e+00 } ); // itest=177: ixxxxx#3 nsp=1 mass=0 + expwfs.push_back( { // --------- + -5.000000000000000e+02, -4.000000000000000e+02, // itest=178: ixzxxx#3 nsp=1 mass=0 + -1.800000000000000e+02, -2.400000000000000e+02, // itest=178: ixzxxx#3 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=178: ixzxxx#3 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=178: ixzxxx#3 nsp=1 mass=0 + 3.000000000000000e+01, 0.000000000000000e+00, // itest=178: ixzxxx#3 nsp=1 mass=0 + 6.000000000000000e+00, 8.000000000000000e+00 } ); // itest=178: ixzxxx#3 nsp=1 mass=0 + expwfs.push_back( { // --------- + 5.000000000000000e+02, 4.000000000000000e+02, // itest=179: vxxxxx#3 nsp=1 mass=0 + 1.800000000000000e+02, 2.400000000000000e+02, // itest=179: vxxxxx#3 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=179: vxxxxx#3 nsp=1 mass=0 + -3.394112549695428e-01, -5.656854249492381e-01, // itest=179: vxxxxx#3 nsp=1 mass=0 + -4.525483399593904e-01, 4.242640687119285e-01, // itest=179: vxxxxx#3 nsp=1 mass=0 + 4.242640687119285e-01, 0.000000000000000e+00 } ); // itest=179: vxxxxx#3 nsp=1 mass=0 + expwfs.push_back( { // --------- + 5.000000000000000e+02, 4.000000000000000e+02, // itest=180: vxxxxx#3 nsp=1 mass=0 + 1.800000000000000e+02, 2.400000000000000e+02, // itest=180: vxxxxx#3 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=180: vxxxxx#3 nsp=1 mass=0 + -3.394112549695428e-01, -5.656854249492381e-01, // itest=180: vxxxxx#3 nsp=1 mass=0 + -4.525483399593904e-01, 4.242640687119285e-01, // itest=180: vxxxxx#3 nsp=1 mass=0 + 4.242640687119285e-01, 0.000000000000000e+00 } ); // itest=180: vxxxxx#3 nsp=1 mass=0 + expwfs.push_back( { // --------- + 5.000000000000000e+02, 4.000000000000000e+02, // itest=181: sxxxxx#3 nsp=1 mass=0 + 1.800000000000000e+02, 2.400000000000000e+02, // itest=181: sxxxxx#3 nsp=1 mass=0 + 1.000000000000000e+00, 0.000000000000000e+00, // itest=181: sxxxxx#3 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=181: sxxxxx#3 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=181: sxxxxx#3 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00 } ); // itest=181: sxxxxx#3 nsp=1 mass=0 + expwfs.push_back( { // --------- + 5.000000000000000e+02, 4.000000000000000e+02, // itest=182: sxxxxx#3 nsp=1 mass=0 + 1.800000000000000e+02, 2.400000000000000e+02, // itest=182: sxxxxx#3 nsp=1 mass=0 + 1.000000000000000e+00, 0.000000000000000e+00, // itest=182: sxxxxx#3 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=182: sxxxxx#3 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=182: sxxxxx#3 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00 } ); // itest=182: sxxxxx#3 nsp=1 mass=0 + expwfs.push_back( { // --------- + 5.000000000000000e+02, 4.000000000000000e+02, // itest=183: oxxxxx#3 nsp=1 mass=0 + 1.800000000000000e+02, 2.400000000000000e+02, // itest=183: oxxxxx#3 nsp=1 mass=0 + 3.000000000000000e+01, 0.000000000000000e+00, // itest=183: oxxxxx#3 nsp=1 mass=0 + 6.000000000000000e+00, -8.000000000000000e+00, // itest=183: oxxxxx#3 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=183: oxxxxx#3 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00 } ); // itest=183: oxxxxx#3 nsp=1 mass=0 + expwfs.push_back( { // --------- + 5.000000000000000e+02, 4.000000000000000e+02, // itest=184: oxxxxx#3 nsp=1 mass=0 + 1.800000000000000e+02, 2.400000000000000e+02, // itest=184: oxxxxx#3 nsp=1 mass=0 + 3.000000000000000e+01, 0.000000000000000e+00, // itest=184: oxxxxx#3 nsp=1 mass=0 + 6.000000000000000e+00, -8.000000000000000e+00, // itest=184: oxxxxx#3 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=184: oxxxxx#3 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00 } ); // itest=184: oxxxxx#3 nsp=1 mass=0 + expwfs.push_back( { // --------- + 5.000000000000000e+02, 4.000000000000000e+02, // itest=185: oxzxxx#3 nsp=1 mass=0 + 1.800000000000000e+02, 2.400000000000000e+02, // itest=185: oxzxxx#3 nsp=1 mass=0 + 3.000000000000000e+01, 0.000000000000000e+00, // itest=185: oxzxxx#3 nsp=1 mass=0 + 6.000000000000000e+00, -8.000000000000000e+00, // itest=185: oxzxxx#3 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=185: oxzxxx#3 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00 } ); // itest=185: oxzxxx#3 nsp=1 mass=0 + expwfs.push_back( { // --------- + -5.000000000000000e+02, 4.000000000000000e+02, // itest=186: ixxxxx#4 nsp=1 mass=0 + -1.800000000000000e+02, -2.400000000000000e+02, // itest=186: ixxxxx#4 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=186: ixxxxx#4 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=186: ixxxxx#4 nsp=1 mass=0 + 1.000000000000000e+01, 0.000000000000000e+00, // itest=186: ixxxxx#4 nsp=1 mass=0 + 1.800000000000000e+01, 2.400000000000000e+01 } ); // itest=186: ixxxxx#4 nsp=1 mass=0 + expwfs.push_back( { // --------- + -5.000000000000000e+02, 4.000000000000000e+02, // itest=187: ixxxxx#4 nsp=1 mass=0 + -1.800000000000000e+02, -2.400000000000000e+02, // itest=187: ixxxxx#4 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=187: ixxxxx#4 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=187: ixxxxx#4 nsp=1 mass=0 + 1.000000000000000e+01, 0.000000000000000e+00, // itest=187: ixxxxx#4 nsp=1 mass=0 + 1.800000000000000e+01, 2.400000000000000e+01 } ); // itest=187: ixxxxx#4 nsp=1 mass=0 + expwfs.push_back( { // --------- + -5.000000000000000e+02, 4.000000000000000e+02, // itest=188: ixzxxx#4 nsp=1 mass=0 + -1.800000000000000e+02, -2.400000000000000e+02, // itest=188: ixzxxx#4 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=188: ixzxxx#4 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=188: ixzxxx#4 nsp=1 mass=0 + 1.000000000000000e+01, 0.000000000000000e+00, // itest=188: ixzxxx#4 nsp=1 mass=0 + 1.800000000000000e+01, 2.400000000000000e+01 } ); // itest=188: ixzxxx#4 nsp=1 mass=0 + expwfs.push_back( { // --------- + 5.000000000000000e+02, -4.000000000000000e+02, // itest=189: vxxxxx#4 nsp=1 mass=0 + 1.800000000000000e+02, 2.400000000000000e+02, // itest=189: vxxxxx#4 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=189: vxxxxx#4 nsp=1 mass=0 + 3.394112549695428e-01, -5.656854249492381e-01, // itest=189: vxxxxx#4 nsp=1 mass=0 + 4.525483399593904e-01, 4.242640687119285e-01, // itest=189: vxxxxx#4 nsp=1 mass=0 + 4.242640687119285e-01, 0.000000000000000e+00 } ); // itest=189: vxxxxx#4 nsp=1 mass=0 + expwfs.push_back( { // --------- + 5.000000000000000e+02, -4.000000000000000e+02, // itest=190: vxxxxx#4 nsp=1 mass=0 + 1.800000000000000e+02, 2.400000000000000e+02, // itest=190: vxxxxx#4 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=190: vxxxxx#4 nsp=1 mass=0 + 3.394112549695428e-01, -5.656854249492381e-01, // itest=190: vxxxxx#4 nsp=1 mass=0 + 4.525483399593904e-01, 4.242640687119285e-01, // itest=190: vxxxxx#4 nsp=1 mass=0 + 4.242640687119285e-01, 0.000000000000000e+00 } ); // itest=190: vxxxxx#4 nsp=1 mass=0 + expwfs.push_back( { // --------- + 5.000000000000000e+02, -4.000000000000000e+02, // itest=191: sxxxxx#4 nsp=1 mass=0 + 1.800000000000000e+02, 2.400000000000000e+02, // itest=191: sxxxxx#4 nsp=1 mass=0 + 1.000000000000000e+00, 0.000000000000000e+00, // itest=191: sxxxxx#4 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=191: sxxxxx#4 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=191: sxxxxx#4 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00 } ); // itest=191: sxxxxx#4 nsp=1 mass=0 + expwfs.push_back( { // --------- + 5.000000000000000e+02, -4.000000000000000e+02, // itest=192: sxxxxx#4 nsp=1 mass=0 + 1.800000000000000e+02, 2.400000000000000e+02, // itest=192: sxxxxx#4 nsp=1 mass=0 + 1.000000000000000e+00, 0.000000000000000e+00, // itest=192: sxxxxx#4 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=192: sxxxxx#4 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=192: sxxxxx#4 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00 } ); // itest=192: sxxxxx#4 nsp=1 mass=0 + expwfs.push_back( { // --------- + 5.000000000000000e+02, -4.000000000000000e+02, // itest=193: oxxxxx#4 nsp=1 mass=0 + 1.800000000000000e+02, 2.400000000000000e+02, // itest=193: oxxxxx#4 nsp=1 mass=0 + 1.000000000000000e+01, 0.000000000000000e+00, // itest=193: oxxxxx#4 nsp=1 mass=0 + 1.800000000000000e+01, -2.400000000000000e+01, // itest=193: oxxxxx#4 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=193: oxxxxx#4 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00 } ); // itest=193: oxxxxx#4 nsp=1 mass=0 + expwfs.push_back( { // --------- + 5.000000000000000e+02, -4.000000000000000e+02, // itest=194: oxxxxx#4 nsp=1 mass=0 + 1.800000000000000e+02, 2.400000000000000e+02, // itest=194: oxxxxx#4 nsp=1 mass=0 + 1.000000000000000e+01, 0.000000000000000e+00, // itest=194: oxxxxx#4 nsp=1 mass=0 + 1.800000000000000e+01, -2.400000000000000e+01, // itest=194: oxxxxx#4 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=194: oxxxxx#4 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00 } ); // itest=194: oxxxxx#4 nsp=1 mass=0 + expwfs.push_back( { // --------- + 5.000000000000000e+02, -4.000000000000000e+02, // itest=195: oxzxxx#4 nsp=1 mass=0 + 1.800000000000000e+02, 2.400000000000000e+02, // itest=195: oxzxxx#4 nsp=1 mass=0 + 1.000000000000000e+01, 0.000000000000000e+00, // itest=195: oxzxxx#4 nsp=1 mass=0 + 1.800000000000000e+01, -2.400000000000000e+01, // itest=195: oxzxxx#4 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=195: oxzxxx#4 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00 } ); // itest=195: oxzxxx#4 nsp=1 mass=0 + expwfs.push_back( { // --------- + -5.000000000000000e+02, -0.000000000000000e+00, // itest=196: ixxxxx#5 nsp=1 mass=500 + -0.000000000000000e+00, -0.000000000000000e+00, // itest=196: ixxxxx#5 nsp=1 mass=500 + 2.236067977499790e+01, 0.000000000000000e+00, // itest=196: ixxxxx#5 nsp=1 mass=500 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=196: ixxxxx#5 nsp=1 mass=500 + 2.236067977499790e+01, 0.000000000000000e+00, // itest=196: ixxxxx#5 nsp=1 mass=500 + 0.000000000000000e+00, 0.000000000000000e+00 } ); // itest=196: ixxxxx#5 nsp=1 mass=500 + expwfs.push_back( { // --------- + -5.000000000000000e+02, -0.000000000000000e+00, // itest=197: ixxxxx#5 nsp=1 mass=-500 + -0.000000000000000e+00, -0.000000000000000e+00, // itest=197: ixxxxx#5 nsp=1 mass=-500 + -2.236067977499790e+01, 0.000000000000000e+00, // itest=197: ixxxxx#5 nsp=1 mass=-500 + -0.000000000000000e+00, 0.000000000000000e+00, // itest=197: ixxxxx#5 nsp=1 mass=-500 + 2.236067977499790e+01, 0.000000000000000e+00, // itest=197: ixxxxx#5 nsp=1 mass=-500 + 0.000000000000000e+00, 0.000000000000000e+00 } ); // itest=197: ixxxxx#5 nsp=1 mass=-500 + expwfs.push_back( { // --------- + 5.000000000000000e+02, 0.000000000000000e+00, // itest=198: vxxxxx#5 nsp=1 mass=500 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=198: vxxxxx#5 nsp=1 mass=500 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=198: vxxxxx#5 nsp=1 mass=500 + -7.071067811865476e-01, 0.000000000000000e+00, // itest=198: vxxxxx#5 nsp=1 mass=500 + 0.000000000000000e+00, 7.071067811865476e-01, // itest=198: vxxxxx#5 nsp=1 mass=500 + 0.000000000000000e+00, 0.000000000000000e+00 } ); // itest=198: vxxxxx#5 nsp=1 mass=500 + expwfs.push_back( { // --------- + 5.000000000000000e+02, 0.000000000000000e+00, // itest=199: vxxxxx#5 nsp=1 mass=-500 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=199: vxxxxx#5 nsp=1 mass=-500 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=199: vxxxxx#5 nsp=1 mass=-500 + -7.071067811865476e-01, 0.000000000000000e+00, // itest=199: vxxxxx#5 nsp=1 mass=-500 + 0.000000000000000e+00, 7.071067811865476e-01, // itest=199: vxxxxx#5 nsp=1 mass=-500 + 0.000000000000000e+00, 0.000000000000000e+00 } ); // itest=199: vxxxxx#5 nsp=1 mass=-500 + expwfs.push_back( { // --------- + 5.000000000000000e+02, 0.000000000000000e+00, // itest=200: sxxxxx#5 nsp=1 mass=500 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=200: sxxxxx#5 nsp=1 mass=500 + 1.000000000000000e+00, 0.000000000000000e+00, // itest=200: sxxxxx#5 nsp=1 mass=500 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=200: sxxxxx#5 nsp=1 mass=500 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=200: sxxxxx#5 nsp=1 mass=500 + 0.000000000000000e+00, 0.000000000000000e+00 } ); // itest=200: sxxxxx#5 nsp=1 mass=500 + expwfs.push_back( { // --------- + 5.000000000000000e+02, 0.000000000000000e+00, // itest=201: sxxxxx#5 nsp=1 mass=-500 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=201: sxxxxx#5 nsp=1 mass=-500 + 1.000000000000000e+00, 0.000000000000000e+00, // itest=201: sxxxxx#5 nsp=1 mass=-500 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=201: sxxxxx#5 nsp=1 mass=-500 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=201: sxxxxx#5 nsp=1 mass=-500 + 0.000000000000000e+00, 0.000000000000000e+00 } ); // itest=201: sxxxxx#5 nsp=1 mass=-500 + expwfs.push_back( { // --------- + 5.000000000000000e+02, 0.000000000000000e+00, // itest=202: oxxxxx#5 nsp=1 mass=500 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=202: oxxxxx#5 nsp=1 mass=500 + 2.236067977499790e+01, 0.000000000000000e+00, // itest=202: oxxxxx#5 nsp=1 mass=500 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=202: oxxxxx#5 nsp=1 mass=500 + 2.236067977499790e+01, 0.000000000000000e+00, // itest=202: oxxxxx#5 nsp=1 mass=500 + 0.000000000000000e+00, 0.000000000000000e+00 } ); // itest=202: oxxxxx#5 nsp=1 mass=500 + expwfs.push_back( { // --------- + 5.000000000000000e+02, 0.000000000000000e+00, // itest=203: oxxxxx#5 nsp=1 mass=-500 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=203: oxxxxx#5 nsp=1 mass=-500 + 2.236067977499790e+01, 0.000000000000000e+00, // itest=203: oxxxxx#5 nsp=1 mass=-500 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=203: oxxxxx#5 nsp=1 mass=-500 + -2.236067977499790e+01, 0.000000000000000e+00, // itest=203: oxxxxx#5 nsp=1 mass=-500 + -0.000000000000000e+00, 0.000000000000000e+00 } ); // itest=203: oxxxxx#5 nsp=1 mass=-500 + expwfs.push_back( { // --------- + -5.000000000000000e+02, -3.000000000000000e+02, // itest=204: ixxxxx#6 nsp=1 mass=400 + -0.000000000000000e+00, -0.000000000000000e+00, // itest=204: ixxxxx#6 nsp=1 mass=400 + 1.414213562373095e+01, 0.000000000000000e+00, // itest=204: ixxxxx#6 nsp=1 mass=400 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=204: ixxxxx#6 nsp=1 mass=400 + 2.828427124746190e+01, 0.000000000000000e+00, // itest=204: ixxxxx#6 nsp=1 mass=400 + 0.000000000000000e+00, 0.000000000000000e+00 } ); // itest=204: ixxxxx#6 nsp=1 mass=400 + expwfs.push_back( { // --------- + -5.000000000000000e+02, -3.000000000000000e+02, // itest=205: ixxxxx#6 nsp=1 mass=-400 + -0.000000000000000e+00, -0.000000000000000e+00, // itest=205: ixxxxx#6 nsp=1 mass=-400 + -1.414213562373095e+01, -0.000000000000000e+00, // itest=205: ixxxxx#6 nsp=1 mass=-400 + -0.000000000000000e+00, -0.000000000000000e+00, // itest=205: ixxxxx#6 nsp=1 mass=-400 + 2.828427124746190e+01, 0.000000000000000e+00, // itest=205: ixxxxx#6 nsp=1 mass=-400 + 0.000000000000000e+00, 0.000000000000000e+00 } ); // itest=205: ixxxxx#6 nsp=1 mass=-400 + expwfs.push_back( { // --------- + 5.000000000000000e+02, 3.000000000000000e+02, // itest=206: vxxxxx#6 nsp=1 mass=400 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=206: vxxxxx#6 nsp=1 mass=400 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=206: vxxxxx#6 nsp=1 mass=400 + -7.071067811865476e-01, 0.000000000000000e+00, // itest=206: vxxxxx#6 nsp=1 mass=400 + 0.000000000000000e+00, 7.071067811865476e-01, // itest=206: vxxxxx#6 nsp=1 mass=400 + 0.000000000000000e+00, 0.000000000000000e+00 } ); // itest=206: vxxxxx#6 nsp=1 mass=400 + expwfs.push_back( { // --------- + 5.000000000000000e+02, 3.000000000000000e+02, // itest=207: vxxxxx#6 nsp=1 mass=-400 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=207: vxxxxx#6 nsp=1 mass=-400 + -0.000000000000000e+00, 0.000000000000000e+00, // itest=207: vxxxxx#6 nsp=1 mass=-400 + -7.071067811865476e-01, 0.000000000000000e+00, // itest=207: vxxxxx#6 nsp=1 mass=-400 + 0.000000000000000e+00, 7.071067811865476e-01, // itest=207: vxxxxx#6 nsp=1 mass=-400 + 0.000000000000000e+00, 0.000000000000000e+00 } ); // itest=207: vxxxxx#6 nsp=1 mass=-400 + expwfs.push_back( { // --------- + 5.000000000000000e+02, 3.000000000000000e+02, // itest=208: sxxxxx#6 nsp=1 mass=400 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=208: sxxxxx#6 nsp=1 mass=400 + 1.000000000000000e+00, 0.000000000000000e+00, // itest=208: sxxxxx#6 nsp=1 mass=400 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=208: sxxxxx#6 nsp=1 mass=400 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=208: sxxxxx#6 nsp=1 mass=400 + 0.000000000000000e+00, 0.000000000000000e+00 } ); // itest=208: sxxxxx#6 nsp=1 mass=400 + expwfs.push_back( { // --------- + 5.000000000000000e+02, 3.000000000000000e+02, // itest=209: sxxxxx#6 nsp=1 mass=-400 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=209: sxxxxx#6 nsp=1 mass=-400 + 1.000000000000000e+00, 0.000000000000000e+00, // itest=209: sxxxxx#6 nsp=1 mass=-400 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=209: sxxxxx#6 nsp=1 mass=-400 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=209: sxxxxx#6 nsp=1 mass=-400 + 0.000000000000000e+00, 0.000000000000000e+00 } ); // itest=209: sxxxxx#6 nsp=1 mass=-400 + expwfs.push_back( { // --------- + 5.000000000000000e+02, 3.000000000000000e+02, // itest=210: oxxxxx#6 nsp=1 mass=400 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=210: oxxxxx#6 nsp=1 mass=400 + 2.828427124746190e+01, 0.000000000000000e+00, // itest=210: oxxxxx#6 nsp=1 mass=400 + 0.000000000000000e+00, -0.000000000000000e+00, // itest=210: oxxxxx#6 nsp=1 mass=400 + 1.414213562373095e+01, 0.000000000000000e+00, // itest=210: oxxxxx#6 nsp=1 mass=400 + 0.000000000000000e+00, -0.000000000000000e+00 } ); // itest=210: oxxxxx#6 nsp=1 mass=400 + expwfs.push_back( { // --------- + 5.000000000000000e+02, 3.000000000000000e+02, // itest=211: oxxxxx#6 nsp=1 mass=-400 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=211: oxxxxx#6 nsp=1 mass=-400 + 2.828427124746190e+01, 0.000000000000000e+00, // itest=211: oxxxxx#6 nsp=1 mass=-400 + 0.000000000000000e+00, -0.000000000000000e+00, // itest=211: oxxxxx#6 nsp=1 mass=-400 + -1.414213562373095e+01, -0.000000000000000e+00, // itest=211: oxxxxx#6 nsp=1 mass=-400 + -0.000000000000000e+00, 0.000000000000000e+00 } ); // itest=211: oxxxxx#6 nsp=1 mass=-400 + expwfs.push_back( { // --------- + -5.000000000000000e+02, 3.000000000000000e+02, // itest=212: ixxxxx#7 nsp=1 mass=400 + -0.000000000000000e+00, -0.000000000000000e+00, // itest=212: ixxxxx#7 nsp=1 mass=400 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=212: ixxxxx#7 nsp=1 mass=400 + -1.414213562373095e+01, 0.000000000000000e+00, // itest=212: ixxxxx#7 nsp=1 mass=400 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=212: ixxxxx#7 nsp=1 mass=400 + -2.828427124746190e+01, 0.000000000000000e+00 } ); // itest=212: ixxxxx#7 nsp=1 mass=400 + expwfs.push_back( { // --------- + -5.000000000000000e+02, 3.000000000000000e+02, // itest=213: ixxxxx#7 nsp=1 mass=-400 + -0.000000000000000e+00, -0.000000000000000e+00, // itest=213: ixxxxx#7 nsp=1 mass=-400 + -0.000000000000000e+00, -0.000000000000000e+00, // itest=213: ixxxxx#7 nsp=1 mass=-400 + 1.414213562373095e+01, -0.000000000000000e+00, // itest=213: ixxxxx#7 nsp=1 mass=-400 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=213: ixxxxx#7 nsp=1 mass=-400 + -2.828427124746190e+01, 0.000000000000000e+00 } ); // itest=213: ixxxxx#7 nsp=1 mass=-400 + expwfs.push_back( { // --------- + 5.000000000000000e+02, -3.000000000000000e+02, // itest=214: vxxxxx#7 nsp=1 mass=400 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=214: vxxxxx#7 nsp=1 mass=400 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=214: vxxxxx#7 nsp=1 mass=400 + -7.071067811865476e-01, 0.000000000000000e+00, // itest=214: vxxxxx#7 nsp=1 mass=400 + 0.000000000000000e+00, -7.071067811865476e-01, // itest=214: vxxxxx#7 nsp=1 mass=400 + 0.000000000000000e+00, 0.000000000000000e+00 } ); // itest=214: vxxxxx#7 nsp=1 mass=400 + expwfs.push_back( { // --------- + 5.000000000000000e+02, -3.000000000000000e+02, // itest=215: vxxxxx#7 nsp=1 mass=-400 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=215: vxxxxx#7 nsp=1 mass=-400 + -0.000000000000000e+00, 0.000000000000000e+00, // itest=215: vxxxxx#7 nsp=1 mass=-400 + -7.071067811865476e-01, 0.000000000000000e+00, // itest=215: vxxxxx#7 nsp=1 mass=-400 + 0.000000000000000e+00, -7.071067811865476e-01, // itest=215: vxxxxx#7 nsp=1 mass=-400 + 0.000000000000000e+00, 0.000000000000000e+00 } ); // itest=215: vxxxxx#7 nsp=1 mass=-400 + expwfs.push_back( { // --------- + 5.000000000000000e+02, -3.000000000000000e+02, // itest=216: sxxxxx#7 nsp=1 mass=400 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=216: sxxxxx#7 nsp=1 mass=400 + 1.000000000000000e+00, 0.000000000000000e+00, // itest=216: sxxxxx#7 nsp=1 mass=400 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=216: sxxxxx#7 nsp=1 mass=400 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=216: sxxxxx#7 nsp=1 mass=400 + 0.000000000000000e+00, 0.000000000000000e+00 } ); // itest=216: sxxxxx#7 nsp=1 mass=400 + expwfs.push_back( { // --------- + 5.000000000000000e+02, -3.000000000000000e+02, // itest=217: sxxxxx#7 nsp=1 mass=-400 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=217: sxxxxx#7 nsp=1 mass=-400 + 1.000000000000000e+00, 0.000000000000000e+00, // itest=217: sxxxxx#7 nsp=1 mass=-400 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=217: sxxxxx#7 nsp=1 mass=-400 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=217: sxxxxx#7 nsp=1 mass=-400 + 0.000000000000000e+00, 0.000000000000000e+00 } ); // itest=217: sxxxxx#7 nsp=1 mass=-400 + expwfs.push_back( { // --------- + 5.000000000000000e+02, -3.000000000000000e+02, // itest=218: oxxxxx#7 nsp=1 mass=400 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=218: oxxxxx#7 nsp=1 mass=400 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=218: oxxxxx#7 nsp=1 mass=400 + -2.828427124746190e+01, 0.000000000000000e+00, // itest=218: oxxxxx#7 nsp=1 mass=400 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=218: oxxxxx#7 nsp=1 mass=400 + -1.414213562373095e+01, 0.000000000000000e+00 } ); // itest=218: oxxxxx#7 nsp=1 mass=400 + expwfs.push_back( { // --------- + 5.000000000000000e+02, -3.000000000000000e+02, // itest=219: oxxxxx#7 nsp=1 mass=-400 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=219: oxxxxx#7 nsp=1 mass=-400 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=219: oxxxxx#7 nsp=1 mass=-400 + -2.828427124746190e+01, 0.000000000000000e+00, // itest=219: oxxxxx#7 nsp=1 mass=-400 + -0.000000000000000e+00, -0.000000000000000e+00, // itest=219: oxxxxx#7 nsp=1 mass=-400 + 1.414213562373095e+01, -0.000000000000000e+00 } ); // itest=219: oxxxxx#7 nsp=1 mass=-400 + expwfs.push_back( { // --------- + -5.000000000000000e+02, -0.000000000000000e+00, // itest=220: ixxxxx#8 nsp=1 mass=400 + -1.800000000000000e+02, -2.400000000000000e+02, // itest=220: ixxxxx#8 nsp=1 mass=400 + 1.000000000000000e+01, 0.000000000000000e+00, // itest=220: ixxxxx#8 nsp=1 mass=400 + 5.999999999999999e+00, 7.999999999999999e+00, // itest=220: ixxxxx#8 nsp=1 mass=400 + 2.000000000000000e+01, 0.000000000000000e+00, // itest=220: ixxxxx#8 nsp=1 mass=400 + 1.200000000000000e+01, 1.600000000000000e+01 } ); // itest=220: ixxxxx#8 nsp=1 mass=400 + expwfs.push_back( { // --------- + -5.000000000000000e+02, -0.000000000000000e+00, // itest=221: ixxxxx#8 nsp=1 mass=-400 + -1.800000000000000e+02, -2.400000000000000e+02, // itest=221: ixxxxx#8 nsp=1 mass=-400 + -1.000000000000000e+01, -0.000000000000000e+00, // itest=221: ixxxxx#8 nsp=1 mass=-400 + -5.999999999999999e+00, -7.999999999999999e+00, // itest=221: ixxxxx#8 nsp=1 mass=-400 + 2.000000000000000e+01, 0.000000000000000e+00, // itest=221: ixxxxx#8 nsp=1 mass=-400 + 1.200000000000000e+01, 1.600000000000000e+01 } ); // itest=221: ixxxxx#8 nsp=1 mass=-400 + expwfs.push_back( { // --------- + 5.000000000000000e+02, 0.000000000000000e+00, // itest=222: vxxxxx#8 nsp=1 mass=400 + 1.800000000000000e+02, 2.400000000000000e+02, // itest=222: vxxxxx#8 nsp=1 mass=400 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=222: vxxxxx#8 nsp=1 mass=400 + 0.000000000000000e+00, -5.656854249492381e-01, // itest=222: vxxxxx#8 nsp=1 mass=400 + 0.000000000000000e+00, 4.242640687119285e-01, // itest=222: vxxxxx#8 nsp=1 mass=400 + 7.071067811865476e-01, 0.000000000000000e+00 } ); // itest=222: vxxxxx#8 nsp=1 mass=400 + expwfs.push_back( { // --------- + 5.000000000000000e+02, 0.000000000000000e+00, // itest=223: vxxxxx#8 nsp=1 mass=-400 + 1.800000000000000e+02, 2.400000000000000e+02, // itest=223: vxxxxx#8 nsp=1 mass=-400 + -0.000000000000000e+00, 0.000000000000000e+00, // itest=223: vxxxxx#8 nsp=1 mass=-400 + -0.000000000000000e+00, -5.656854249492381e-01, // itest=223: vxxxxx#8 nsp=1 mass=-400 + -0.000000000000000e+00, 4.242640687119285e-01, // itest=223: vxxxxx#8 nsp=1 mass=-400 + 7.071067811865476e-01, 0.000000000000000e+00 } ); // itest=223: vxxxxx#8 nsp=1 mass=-400 + expwfs.push_back( { // --------- + 5.000000000000000e+02, 0.000000000000000e+00, // itest=224: sxxxxx#8 nsp=1 mass=400 + 1.800000000000000e+02, 2.400000000000000e+02, // itest=224: sxxxxx#8 nsp=1 mass=400 + 1.000000000000000e+00, 0.000000000000000e+00, // itest=224: sxxxxx#8 nsp=1 mass=400 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=224: sxxxxx#8 nsp=1 mass=400 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=224: sxxxxx#8 nsp=1 mass=400 + 0.000000000000000e+00, 0.000000000000000e+00 } ); // itest=224: sxxxxx#8 nsp=1 mass=400 + expwfs.push_back( { // --------- + 5.000000000000000e+02, 0.000000000000000e+00, // itest=225: sxxxxx#8 nsp=1 mass=-400 + 1.800000000000000e+02, 2.400000000000000e+02, // itest=225: sxxxxx#8 nsp=1 mass=-400 + 1.000000000000000e+00, 0.000000000000000e+00, // itest=225: sxxxxx#8 nsp=1 mass=-400 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=225: sxxxxx#8 nsp=1 mass=-400 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=225: sxxxxx#8 nsp=1 mass=-400 + 0.000000000000000e+00, 0.000000000000000e+00 } ); // itest=225: sxxxxx#8 nsp=1 mass=-400 + expwfs.push_back( { // --------- + 5.000000000000000e+02, 0.000000000000000e+00, // itest=226: oxxxxx#8 nsp=1 mass=400 + 1.800000000000000e+02, 2.400000000000000e+02, // itest=226: oxxxxx#8 nsp=1 mass=400 + 2.000000000000000e+01, 0.000000000000000e+00, // itest=226: oxxxxx#8 nsp=1 mass=400 + 1.200000000000000e+01, -1.600000000000000e+01, // itest=226: oxxxxx#8 nsp=1 mass=400 + 1.000000000000000e+01, 0.000000000000000e+00, // itest=226: oxxxxx#8 nsp=1 mass=400 + 5.999999999999999e+00, -7.999999999999999e+00 } ); // itest=226: oxxxxx#8 nsp=1 mass=400 + expwfs.push_back( { // --------- + 5.000000000000000e+02, 0.000000000000000e+00, // itest=227: oxxxxx#8 nsp=1 mass=-400 + 1.800000000000000e+02, 2.400000000000000e+02, // itest=227: oxxxxx#8 nsp=1 mass=-400 + 2.000000000000000e+01, 0.000000000000000e+00, // itest=227: oxxxxx#8 nsp=1 mass=-400 + 1.200000000000000e+01, -1.600000000000000e+01, // itest=227: oxxxxx#8 nsp=1 mass=-400 + -1.000000000000000e+01, -0.000000000000000e+00, // itest=227: oxxxxx#8 nsp=1 mass=-400 + -5.999999999999999e+00, 7.999999999999999e+00 } ); // itest=227: oxxxxx#8 nsp=1 mass=-400 + expwfs.push_back( { // --------- + -5.000000000000000e+02, -0.000000000000000e+00, // itest=228: ixxxxx#9 nsp=1 mass=400 + 2.400000000000000e+02, 1.800000000000000e+02, // itest=228: ixxxxx#9 nsp=1 mass=400 + 1.000000000000000e+01, 0.000000000000000e+00, // itest=228: ixxxxx#9 nsp=1 mass=400 + -7.999999999999999e+00, -5.999999999999999e+00, // itest=228: ixxxxx#9 nsp=1 mass=400 + 2.000000000000000e+01, 0.000000000000000e+00, // itest=228: ixxxxx#9 nsp=1 mass=400 + -1.600000000000000e+01, -1.200000000000000e+01 } ); // itest=228: ixxxxx#9 nsp=1 mass=400 + expwfs.push_back( { // --------- + -5.000000000000000e+02, -0.000000000000000e+00, // itest=229: ixxxxx#9 nsp=1 mass=-400 + 2.400000000000000e+02, 1.800000000000000e+02, // itest=229: ixxxxx#9 nsp=1 mass=-400 + -1.000000000000000e+01, -0.000000000000000e+00, // itest=229: ixxxxx#9 nsp=1 mass=-400 + 7.999999999999999e+00, 5.999999999999999e+00, // itest=229: ixxxxx#9 nsp=1 mass=-400 + 2.000000000000000e+01, 0.000000000000000e+00, // itest=229: ixxxxx#9 nsp=1 mass=-400 + -1.600000000000000e+01, -1.200000000000000e+01 } ); // itest=229: ixxxxx#9 nsp=1 mass=-400 + expwfs.push_back( { // --------- + 5.000000000000000e+02, 0.000000000000000e+00, // itest=230: vxxxxx#9 nsp=1 mass=400 + -2.400000000000000e+02, -1.800000000000000e+02, // itest=230: vxxxxx#9 nsp=1 mass=400 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=230: vxxxxx#9 nsp=1 mass=400 + 0.000000000000000e+00, 4.242640687119285e-01, // itest=230: vxxxxx#9 nsp=1 mass=400 + 0.000000000000000e+00, -5.656854249492381e-01, // itest=230: vxxxxx#9 nsp=1 mass=400 + 7.071067811865476e-01, 0.000000000000000e+00 } ); // itest=230: vxxxxx#9 nsp=1 mass=400 + expwfs.push_back( { // --------- + 5.000000000000000e+02, 0.000000000000000e+00, // itest=231: vxxxxx#9 nsp=1 mass=-400 + -2.400000000000000e+02, -1.800000000000000e+02, // itest=231: vxxxxx#9 nsp=1 mass=-400 + -0.000000000000000e+00, 0.000000000000000e+00, // itest=231: vxxxxx#9 nsp=1 mass=-400 + 0.000000000000000e+00, 4.242640687119285e-01, // itest=231: vxxxxx#9 nsp=1 mass=-400 + 0.000000000000000e+00, -5.656854249492381e-01, // itest=231: vxxxxx#9 nsp=1 mass=-400 + 7.071067811865476e-01, 0.000000000000000e+00 } ); // itest=231: vxxxxx#9 nsp=1 mass=-400 + expwfs.push_back( { // --------- + 5.000000000000000e+02, 0.000000000000000e+00, // itest=232: sxxxxx#9 nsp=1 mass=400 + -2.400000000000000e+02, -1.800000000000000e+02, // itest=232: sxxxxx#9 nsp=1 mass=400 + 1.000000000000000e+00, 0.000000000000000e+00, // itest=232: sxxxxx#9 nsp=1 mass=400 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=232: sxxxxx#9 nsp=1 mass=400 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=232: sxxxxx#9 nsp=1 mass=400 + 0.000000000000000e+00, 0.000000000000000e+00 } ); // itest=232: sxxxxx#9 nsp=1 mass=400 + expwfs.push_back( { // --------- + 5.000000000000000e+02, 0.000000000000000e+00, // itest=233: sxxxxx#9 nsp=1 mass=-400 + -2.400000000000000e+02, -1.800000000000000e+02, // itest=233: sxxxxx#9 nsp=1 mass=-400 + 1.000000000000000e+00, 0.000000000000000e+00, // itest=233: sxxxxx#9 nsp=1 mass=-400 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=233: sxxxxx#9 nsp=1 mass=-400 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=233: sxxxxx#9 nsp=1 mass=-400 + 0.000000000000000e+00, 0.000000000000000e+00 } ); // itest=233: sxxxxx#9 nsp=1 mass=-400 + expwfs.push_back( { // --------- + 5.000000000000000e+02, 0.000000000000000e+00, // itest=234: oxxxxx#9 nsp=1 mass=400 + -2.400000000000000e+02, -1.800000000000000e+02, // itest=234: oxxxxx#9 nsp=1 mass=400 + 2.000000000000000e+01, 0.000000000000000e+00, // itest=234: oxxxxx#9 nsp=1 mass=400 + -1.600000000000000e+01, 1.200000000000000e+01, // itest=234: oxxxxx#9 nsp=1 mass=400 + 1.000000000000000e+01, 0.000000000000000e+00, // itest=234: oxxxxx#9 nsp=1 mass=400 + -7.999999999999999e+00, 5.999999999999999e+00 } ); // itest=234: oxxxxx#9 nsp=1 mass=400 + expwfs.push_back( { // --------- + 5.000000000000000e+02, 0.000000000000000e+00, // itest=235: oxxxxx#9 nsp=1 mass=-400 + -2.400000000000000e+02, -1.800000000000000e+02, // itest=235: oxxxxx#9 nsp=1 mass=-400 + 2.000000000000000e+01, 0.000000000000000e+00, // itest=235: oxxxxx#9 nsp=1 mass=-400 + -1.600000000000000e+01, 1.200000000000000e+01, // itest=235: oxxxxx#9 nsp=1 mass=-400 + -1.000000000000000e+01, -0.000000000000000e+00, // itest=235: oxxxxx#9 nsp=1 mass=-400 + 7.999999999999999e+00, -5.999999999999999e+00 } ); // itest=235: oxxxxx#9 nsp=1 mass=-400 + expwfs.push_back( { // --------- + -5.000000000000000e+02, -1.440000000000000e+02, // itest=236: ixxxxx#10 nsp=1 mass=400 + -1.800000000000000e+02, -1.920000000000000e+02, // itest=236: ixxxxx#10 nsp=1 mass=400 + 1.216552506059644e+01, 0.000000000000000e+00, // itest=236: ixxxxx#10 nsp=1 mass=400 + 4.931969619160719e+00, 5.260767593771432e+00, // itest=236: ixxxxx#10 nsp=1 mass=400 + 2.433105012119288e+01, 0.000000000000000e+00, // itest=236: ixxxxx#10 nsp=1 mass=400 + 9.863939238321439e+00, 1.052153518754287e+01 } ); // itest=236: ixxxxx#10 nsp=1 mass=400 + expwfs.push_back( { // --------- + -5.000000000000000e+02, -1.440000000000000e+02, // itest=237: ixxxxx#10 nsp=1 mass=-400 + -1.800000000000000e+02, -1.920000000000000e+02, // itest=237: ixxxxx#10 nsp=1 mass=-400 + -1.216552506059644e+01, -0.000000000000000e+00, // itest=237: ixxxxx#10 nsp=1 mass=-400 + -4.931969619160719e+00, -5.260767593771432e+00, // itest=237: ixxxxx#10 nsp=1 mass=-400 + 2.433105012119288e+01, 0.000000000000000e+00, // itest=237: ixxxxx#10 nsp=1 mass=-400 + 9.863939238321439e+00, 1.052153518754287e+01 } ); // itest=237: ixxxxx#10 nsp=1 mass=-400 + expwfs.push_back( { // --------- + 5.000000000000000e+02, 1.440000000000000e+02, // itest=238: vxxxxx#10 nsp=1 mass=400 + 1.800000000000000e+02, 1.920000000000000e+02, // itest=238: vxxxxx#10 nsp=1 mass=400 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=238: vxxxxx#10 nsp=1 mass=400 + -2.321373168788980e-01, -5.158607041753289e-01, // itest=238: vxxxxx#10 nsp=1 mass=400 + -2.476131380041579e-01, 4.836194101643708e-01, // itest=238: vxxxxx#10 nsp=1 mass=400 + 6.203224967708328e-01, 0.000000000000000e+00 } ); // itest=238: vxxxxx#10 nsp=1 mass=400 + expwfs.push_back( { // --------- + 5.000000000000000e+02, 1.440000000000000e+02, // itest=239: vxxxxx#10 nsp=1 mass=-400 + 1.800000000000000e+02, 1.920000000000000e+02, // itest=239: vxxxxx#10 nsp=1 mass=-400 + -0.000000000000000e+00, 0.000000000000000e+00, // itest=239: vxxxxx#10 nsp=1 mass=-400 + -2.321373168788980e-01, -5.158607041753289e-01, // itest=239: vxxxxx#10 nsp=1 mass=-400 + -2.476131380041579e-01, 4.836194101643708e-01, // itest=239: vxxxxx#10 nsp=1 mass=-400 + 6.203224967708328e-01, 0.000000000000000e+00 } ); // itest=239: vxxxxx#10 nsp=1 mass=-400 + expwfs.push_back( { // --------- + 5.000000000000000e+02, 1.440000000000000e+02, // itest=240: sxxxxx#10 nsp=1 mass=400 + 1.800000000000000e+02, 1.920000000000000e+02, // itest=240: sxxxxx#10 nsp=1 mass=400 + 1.000000000000000e+00, 0.000000000000000e+00, // itest=240: sxxxxx#10 nsp=1 mass=400 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=240: sxxxxx#10 nsp=1 mass=400 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=240: sxxxxx#10 nsp=1 mass=400 + 0.000000000000000e+00, 0.000000000000000e+00 } ); // itest=240: sxxxxx#10 nsp=1 mass=400 + expwfs.push_back( { // --------- + 5.000000000000000e+02, 1.440000000000000e+02, // itest=241: sxxxxx#10 nsp=1 mass=-400 + 1.800000000000000e+02, 1.920000000000000e+02, // itest=241: sxxxxx#10 nsp=1 mass=-400 + 1.000000000000000e+00, 0.000000000000000e+00, // itest=241: sxxxxx#10 nsp=1 mass=-400 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=241: sxxxxx#10 nsp=1 mass=-400 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=241: sxxxxx#10 nsp=1 mass=-400 + 0.000000000000000e+00, 0.000000000000000e+00 } ); // itest=241: sxxxxx#10 nsp=1 mass=-400 + expwfs.push_back( { // --------- + 5.000000000000000e+02, 1.440000000000000e+02, // itest=242: oxxxxx#10 nsp=1 mass=400 + 1.800000000000000e+02, 1.920000000000000e+02, // itest=242: oxxxxx#10 nsp=1 mass=400 + 2.433105012119288e+01, 0.000000000000000e+00, // itest=242: oxxxxx#10 nsp=1 mass=400 + 9.863939238321439e+00, -1.052153518754287e+01, // itest=242: oxxxxx#10 nsp=1 mass=400 + 1.216552506059644e+01, 0.000000000000000e+00, // itest=242: oxxxxx#10 nsp=1 mass=400 + 4.931969619160719e+00, -5.260767593771432e+00 } ); // itest=242: oxxxxx#10 nsp=1 mass=400 + expwfs.push_back( { // --------- + 5.000000000000000e+02, 1.440000000000000e+02, // itest=243: oxxxxx#10 nsp=1 mass=-400 + 1.800000000000000e+02, 1.920000000000000e+02, // itest=243: oxxxxx#10 nsp=1 mass=-400 + 2.433105012119288e+01, 0.000000000000000e+00, // itest=243: oxxxxx#10 nsp=1 mass=-400 + 9.863939238321439e+00, -1.052153518754287e+01, // itest=243: oxxxxx#10 nsp=1 mass=-400 + -1.216552506059644e+01, -0.000000000000000e+00, // itest=243: oxxxxx#10 nsp=1 mass=-400 + -4.931969619160719e+00, 5.260767593771432e+00 } ); // itest=243: oxxxxx#10 nsp=1 mass=-400 + expwfs.push_back( { // --------- + -5.000000000000000e+02, 1.440000000000000e+02, // itest=244: ixxxxx#11 nsp=1 mass=400 + -1.800000000000000e+02, -1.920000000000000e+02, // itest=244: ixxxxx#11 nsp=1 mass=400 + 7.211102550927978e+00, 0.000000000000000e+00, // itest=244: ixxxxx#11 nsp=1 mass=400 + 8.320502943378436e+00, 8.875203139603666e+00, // itest=244: ixxxxx#11 nsp=1 mass=400 + 1.442220510185596e+01, 0.000000000000000e+00, // itest=244: ixxxxx#11 nsp=1 mass=400 + 1.664100588675688e+01, 1.775040627920733e+01 } ); // itest=244: ixxxxx#11 nsp=1 mass=400 + expwfs.push_back( { // --------- + -5.000000000000000e+02, 1.440000000000000e+02, // itest=245: ixxxxx#11 nsp=1 mass=-400 + -1.800000000000000e+02, -1.920000000000000e+02, // itest=245: ixxxxx#11 nsp=1 mass=-400 + -7.211102550927978e+00, -0.000000000000000e+00, // itest=245: ixxxxx#11 nsp=1 mass=-400 + -8.320502943378436e+00, -8.875203139603666e+00, // itest=245: ixxxxx#11 nsp=1 mass=-400 + 1.442220510185596e+01, 0.000000000000000e+00, // itest=245: ixxxxx#11 nsp=1 mass=-400 + 1.664100588675688e+01, 1.775040627920733e+01 } ); // itest=245: ixxxxx#11 nsp=1 mass=-400 + expwfs.push_back( { // --------- + 5.000000000000000e+02, -1.440000000000000e+02, // itest=246: vxxxxx#11 nsp=1 mass=400 + 1.800000000000000e+02, 1.920000000000000e+02, // itest=246: vxxxxx#11 nsp=1 mass=400 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=246: vxxxxx#11 nsp=1 mass=400 + 2.321373168788980e-01, -5.158607041753289e-01, // itest=246: vxxxxx#11 nsp=1 mass=400 + 2.476131380041579e-01, 4.836194101643708e-01, // itest=246: vxxxxx#11 nsp=1 mass=400 + 6.203224967708328e-01, 0.000000000000000e+00 } ); // itest=246: vxxxxx#11 nsp=1 mass=400 + expwfs.push_back( { // --------- + 5.000000000000000e+02, -1.440000000000000e+02, // itest=247: vxxxxx#11 nsp=1 mass=-400 + 1.800000000000000e+02, 1.920000000000000e+02, // itest=247: vxxxxx#11 nsp=1 mass=-400 + -0.000000000000000e+00, 0.000000000000000e+00, // itest=247: vxxxxx#11 nsp=1 mass=-400 + 2.321373168788980e-01, -5.158607041753289e-01, // itest=247: vxxxxx#11 nsp=1 mass=-400 + 2.476131380041579e-01, 4.836194101643708e-01, // itest=247: vxxxxx#11 nsp=1 mass=-400 + 6.203224967708328e-01, 0.000000000000000e+00 } ); // itest=247: vxxxxx#11 nsp=1 mass=-400 + expwfs.push_back( { // --------- + 5.000000000000000e+02, -1.440000000000000e+02, // itest=248: sxxxxx#11 nsp=1 mass=400 + 1.800000000000000e+02, 1.920000000000000e+02, // itest=248: sxxxxx#11 nsp=1 mass=400 + 1.000000000000000e+00, 0.000000000000000e+00, // itest=248: sxxxxx#11 nsp=1 mass=400 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=248: sxxxxx#11 nsp=1 mass=400 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=248: sxxxxx#11 nsp=1 mass=400 + 0.000000000000000e+00, 0.000000000000000e+00 } ); // itest=248: sxxxxx#11 nsp=1 mass=400 + expwfs.push_back( { // --------- + 5.000000000000000e+02, -1.440000000000000e+02, // itest=249: sxxxxx#11 nsp=1 mass=-400 + 1.800000000000000e+02, 1.920000000000000e+02, // itest=249: sxxxxx#11 nsp=1 mass=-400 + 1.000000000000000e+00, 0.000000000000000e+00, // itest=249: sxxxxx#11 nsp=1 mass=-400 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=249: sxxxxx#11 nsp=1 mass=-400 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=249: sxxxxx#11 nsp=1 mass=-400 + 0.000000000000000e+00, 0.000000000000000e+00 } ); // itest=249: sxxxxx#11 nsp=1 mass=-400 + expwfs.push_back( { // --------- + 5.000000000000000e+02, -1.440000000000000e+02, // itest=250: oxxxxx#11 nsp=1 mass=400 + 1.800000000000000e+02, 1.920000000000000e+02, // itest=250: oxxxxx#11 nsp=1 mass=400 + 1.442220510185596e+01, 0.000000000000000e+00, // itest=250: oxxxxx#11 nsp=1 mass=400 + 1.664100588675688e+01, -1.775040627920733e+01, // itest=250: oxxxxx#11 nsp=1 mass=400 + 7.211102550927978e+00, 0.000000000000000e+00, // itest=250: oxxxxx#11 nsp=1 mass=400 + 8.320502943378436e+00, -8.875203139603666e+00 } ); // itest=250: oxxxxx#11 nsp=1 mass=400 + expwfs.push_back( { // --------- + 5.000000000000000e+02, -1.440000000000000e+02, // itest=251: oxxxxx#11 nsp=1 mass=-400 + 1.800000000000000e+02, 1.920000000000000e+02, // itest=251: oxxxxx#11 nsp=1 mass=-400 + 1.442220510185596e+01, 0.000000000000000e+00, // itest=251: oxxxxx#11 nsp=1 mass=-400 + 1.664100588675688e+01, -1.775040627920733e+01, // itest=251: oxxxxx#11 nsp=1 mass=-400 + -7.211102550927978e+00, -0.000000000000000e+00, // itest=251: oxxxxx#11 nsp=1 mass=-400 + -8.320502943378436e+00, 8.875203139603666e+00 } ); // itest=251: oxxxxx#11 nsp=1 mass=-400 + expwfs.push_back( { // --------- + -5.000000000000000e+02, -5.000000000000000e+02, // itest=252: ixxxxx#12 nsp=1 mass=0 + -0.000000000000000e+00, -0.000000000000000e+00, // itest=252: ixxxxx#12 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=252: ixxxxx#12 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=252: ixxxxx#12 nsp=1 mass=0 + 3.162277660168379e+01, 0.000000000000000e+00, // itest=252: ixxxxx#12 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00 } ); // itest=252: ixxxxx#12 nsp=1 mass=0 + expwfs.push_back( { // --------- + -5.000000000000000e+02, -5.000000000000000e+02, // itest=253: ixxxxx#12 nsp=1 mass=0 + -0.000000000000000e+00, -0.000000000000000e+00, // itest=253: ixxxxx#12 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=253: ixxxxx#12 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=253: ixxxxx#12 nsp=1 mass=0 + 3.162277660168379e+01, 0.000000000000000e+00, // itest=253: ixxxxx#12 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00 } ); // itest=253: ixxxxx#12 nsp=1 mass=0 + expwfs.push_back( { // --------- + -5.000000000000000e+02, -5.000000000000000e+02, // itest=254: ipzxxx#12 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=254: ipzxxx#12 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=254: ipzxxx#12 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=254: ipzxxx#12 nsp=1 mass=0 + 3.162277660168379e+01, 0.000000000000000e+00, // itest=254: ipzxxx#12 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00 } ); // itest=254: ipzxxx#12 nsp=1 mass=0 + expwfs.push_back( { // --------- + 5.000000000000000e+02, 5.000000000000000e+02, // itest=255: vxxxxx#12 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=255: vxxxxx#12 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=255: vxxxxx#12 nsp=1 mass=0 + -7.071067811865476e-01, 0.000000000000000e+00, // itest=255: vxxxxx#12 nsp=1 mass=0 + 0.000000000000000e+00, 7.071067811865476e-01, // itest=255: vxxxxx#12 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00 } ); // itest=255: vxxxxx#12 nsp=1 mass=0 + expwfs.push_back( { // --------- + 5.000000000000000e+02, 5.000000000000000e+02, // itest=256: vxxxxx#12 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=256: vxxxxx#12 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=256: vxxxxx#12 nsp=1 mass=0 + -7.071067811865476e-01, 0.000000000000000e+00, // itest=256: vxxxxx#12 nsp=1 mass=0 + 0.000000000000000e+00, 7.071067811865476e-01, // itest=256: vxxxxx#12 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00 } ); // itest=256: vxxxxx#12 nsp=1 mass=0 + expwfs.push_back( { // --------- + 5.000000000000000e+02, 5.000000000000000e+02, // itest=257: sxxxxx#12 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=257: sxxxxx#12 nsp=1 mass=0 + 1.000000000000000e+00, 0.000000000000000e+00, // itest=257: sxxxxx#12 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=257: sxxxxx#12 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=257: sxxxxx#12 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00 } ); // itest=257: sxxxxx#12 nsp=1 mass=0 + expwfs.push_back( { // --------- + 5.000000000000000e+02, 5.000000000000000e+02, // itest=258: sxxxxx#12 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=258: sxxxxx#12 nsp=1 mass=0 + 1.000000000000000e+00, 0.000000000000000e+00, // itest=258: sxxxxx#12 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=258: sxxxxx#12 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=258: sxxxxx#12 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00 } ); // itest=258: sxxxxx#12 nsp=1 mass=0 + expwfs.push_back( { // --------- + 5.000000000000000e+02, 5.000000000000000e+02, // itest=259: oxxxxx#12 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=259: oxxxxx#12 nsp=1 mass=0 + 3.162277660168379e+01, 0.000000000000000e+00, // itest=259: oxxxxx#12 nsp=1 mass=0 + 0.000000000000000e+00, -0.000000000000000e+00, // itest=259: oxxxxx#12 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=259: oxxxxx#12 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00 } ); // itest=259: oxxxxx#12 nsp=1 mass=0 + expwfs.push_back( { // --------- + 5.000000000000000e+02, 5.000000000000000e+02, // itest=260: oxxxxx#12 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=260: oxxxxx#12 nsp=1 mass=0 + 3.162277660168379e+01, 0.000000000000000e+00, // itest=260: oxxxxx#12 nsp=1 mass=0 + 0.000000000000000e+00, -0.000000000000000e+00, // itest=260: oxxxxx#12 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=260: oxxxxx#12 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00 } ); // itest=260: oxxxxx#12 nsp=1 mass=0 + expwfs.push_back( { // --------- + 5.000000000000000e+02, 5.000000000000000e+02, // itest=261: opzxxx#12 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=261: opzxxx#12 nsp=1 mass=0 + 3.162277660168379e+01, 0.000000000000000e+00, // itest=261: opzxxx#12 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=261: opzxxx#12 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=261: opzxxx#12 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00 } ); // itest=261: opzxxx#12 nsp=1 mass=0 + expwfs.push_back( { // --------- + -5.000000000000000e+02, 5.000000000000000e+02, // itest=262: ixxxxx#13 nsp=1 mass=0 + -0.000000000000000e+00, -0.000000000000000e+00, // itest=262: ixxxxx#13 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=262: ixxxxx#13 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=262: ixxxxx#13 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=262: ixxxxx#13 nsp=1 mass=0 + -3.162277660168379e+01, 0.000000000000000e+00 } ); // itest=262: ixxxxx#13 nsp=1 mass=0 + expwfs.push_back( { // --------- + -5.000000000000000e+02, 5.000000000000000e+02, // itest=263: ixxxxx#13 nsp=1 mass=0 + -0.000000000000000e+00, -0.000000000000000e+00, // itest=263: ixxxxx#13 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=263: ixxxxx#13 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=263: ixxxxx#13 nsp=1 mass=0 + -0.000000000000000e+00, 0.000000000000000e+00, // itest=263: ixxxxx#13 nsp=1 mass=0 + -3.162277660168379e+01, -0.000000000000000e+00 } ); // itest=263: ixxxxx#13 nsp=1 mass=0 + expwfs.push_back( { // --------- + -5.000000000000000e+02, 5.000000000000000e+02, // itest=264: imzxxx#13 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=264: imzxxx#13 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=264: imzxxx#13 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=264: imzxxx#13 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=264: imzxxx#13 nsp=1 mass=0 + -3.162277660168379e+01, 0.000000000000000e+00 } ); // itest=264: imzxxx#13 nsp=1 mass=0 + expwfs.push_back( { // --------- + 5.000000000000000e+02, -5.000000000000000e+02, // itest=265: vxxxxx#13 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=265: vxxxxx#13 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=265: vxxxxx#13 nsp=1 mass=0 + -7.071067811865476e-01, 0.000000000000000e+00, // itest=265: vxxxxx#13 nsp=1 mass=0 + 0.000000000000000e+00, -7.071067811865476e-01, // itest=265: vxxxxx#13 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00 } ); // itest=265: vxxxxx#13 nsp=1 mass=0 + expwfs.push_back( { // --------- + 5.000000000000000e+02, -5.000000000000000e+02, // itest=266: vxxxxx#13 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=266: vxxxxx#13 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=266: vxxxxx#13 nsp=1 mass=0 + -7.071067811865476e-01, 0.000000000000000e+00, // itest=266: vxxxxx#13 nsp=1 mass=0 + 0.000000000000000e+00, -7.071067811865476e-01, // itest=266: vxxxxx#13 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00 } ); // itest=266: vxxxxx#13 nsp=1 mass=0 + expwfs.push_back( { // --------- + 5.000000000000000e+02, -5.000000000000000e+02, // itest=267: sxxxxx#13 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=267: sxxxxx#13 nsp=1 mass=0 + 1.000000000000000e+00, 0.000000000000000e+00, // itest=267: sxxxxx#13 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=267: sxxxxx#13 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=267: sxxxxx#13 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00 } ); // itest=267: sxxxxx#13 nsp=1 mass=0 + expwfs.push_back( { // --------- + 5.000000000000000e+02, -5.000000000000000e+02, // itest=268: sxxxxx#13 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=268: sxxxxx#13 nsp=1 mass=0 + 1.000000000000000e+00, 0.000000000000000e+00, // itest=268: sxxxxx#13 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=268: sxxxxx#13 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=268: sxxxxx#13 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00 } ); // itest=268: sxxxxx#13 nsp=1 mass=0 + expwfs.push_back( { // --------- + 5.000000000000000e+02, -5.000000000000000e+02, // itest=269: oxxxxx#13 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=269: oxxxxx#13 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=269: oxxxxx#13 nsp=1 mass=0 + -3.162277660168379e+01, 0.000000000000000e+00, // itest=269: oxxxxx#13 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=269: oxxxxx#13 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00 } ); // itest=269: oxxxxx#13 nsp=1 mass=0 + expwfs.push_back( { // --------- + 5.000000000000000e+02, -5.000000000000000e+02, // itest=270: oxxxxx#13 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=270: oxxxxx#13 nsp=1 mass=0 + -0.000000000000000e+00, 0.000000000000000e+00, // itest=270: oxxxxx#13 nsp=1 mass=0 + -3.162277660168379e+01, -0.000000000000000e+00, // itest=270: oxxxxx#13 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=270: oxxxxx#13 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00 } ); // itest=270: oxxxxx#13 nsp=1 mass=0 + expwfs.push_back( { // --------- + 5.000000000000000e+02, -5.000000000000000e+02, // itest=271: omzxxx#13 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=271: omzxxx#13 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=271: omzxxx#13 nsp=1 mass=0 + -3.162277660168379e+01, 0.000000000000000e+00, // itest=271: omzxxx#13 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=271: omzxxx#13 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00 } ); // itest=271: omzxxx#13 nsp=1 mass=0 + expwfs.push_back( { // --------- + -5.000000000000000e+02, -0.000000000000000e+00, // itest=272: ixxxxx#14 nsp=1 mass=0 + -3.000000000000000e+02, -4.000000000000000e+02, // itest=272: ixxxxx#14 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=272: ixxxxx#14 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=272: ixxxxx#14 nsp=1 mass=0 + 2.236067977499790e+01, 0.000000000000000e+00, // itest=272: ixxxxx#14 nsp=1 mass=0 + 1.341640786499874e+01, 1.788854381999832e+01 } ); // itest=272: ixxxxx#14 nsp=1 mass=0 + expwfs.push_back( { // --------- + -5.000000000000000e+02, -0.000000000000000e+00, // itest=273: ixxxxx#14 nsp=1 mass=0 + -3.000000000000000e+02, -4.000000000000000e+02, // itest=273: ixxxxx#14 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=273: ixxxxx#14 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=273: ixxxxx#14 nsp=1 mass=0 + 2.236067977499790e+01, 0.000000000000000e+00, // itest=273: ixxxxx#14 nsp=1 mass=0 + 1.341640786499874e+01, 1.788854381999832e+01 } ); // itest=273: ixxxxx#14 nsp=1 mass=0 + expwfs.push_back( { // --------- + -5.000000000000000e+02, -0.000000000000000e+00, // itest=274: ixzxxx#14 nsp=1 mass=0 + -3.000000000000000e+02, -4.000000000000000e+02, // itest=274: ixzxxx#14 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=274: ixzxxx#14 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=274: ixzxxx#14 nsp=1 mass=0 + 2.236067977499790e+01, 0.000000000000000e+00, // itest=274: ixzxxx#14 nsp=1 mass=0 + 1.341640786499874e+01, 1.788854381999832e+01 } ); // itest=274: ixzxxx#14 nsp=1 mass=0 + expwfs.push_back( { // --------- + 5.000000000000000e+02, 0.000000000000000e+00, // itest=275: vxxxxx#14 nsp=1 mass=0 + 3.000000000000000e+02, 4.000000000000000e+02, // itest=275: vxxxxx#14 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=275: vxxxxx#14 nsp=1 mass=0 + -0.000000000000000e+00, -5.656854249492381e-01, // itest=275: vxxxxx#14 nsp=1 mass=0 + -0.000000000000000e+00, 4.242640687119285e-01, // itest=275: vxxxxx#14 nsp=1 mass=0 + 7.071067811865476e-01, 0.000000000000000e+00 } ); // itest=275: vxxxxx#14 nsp=1 mass=0 + expwfs.push_back( { // --------- + 5.000000000000000e+02, 0.000000000000000e+00, // itest=276: vxxxxx#14 nsp=1 mass=0 + 3.000000000000000e+02, 4.000000000000000e+02, // itest=276: vxxxxx#14 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=276: vxxxxx#14 nsp=1 mass=0 + -0.000000000000000e+00, -5.656854249492381e-01, // itest=276: vxxxxx#14 nsp=1 mass=0 + -0.000000000000000e+00, 4.242640687119285e-01, // itest=276: vxxxxx#14 nsp=1 mass=0 + 7.071067811865476e-01, 0.000000000000000e+00 } ); // itest=276: vxxxxx#14 nsp=1 mass=0 + expwfs.push_back( { // --------- + 5.000000000000000e+02, 0.000000000000000e+00, // itest=277: sxxxxx#14 nsp=1 mass=0 + 3.000000000000000e+02, 4.000000000000000e+02, // itest=277: sxxxxx#14 nsp=1 mass=0 + 1.000000000000000e+00, 0.000000000000000e+00, // itest=277: sxxxxx#14 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=277: sxxxxx#14 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=277: sxxxxx#14 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00 } ); // itest=277: sxxxxx#14 nsp=1 mass=0 + expwfs.push_back( { // --------- + 5.000000000000000e+02, 0.000000000000000e+00, // itest=278: sxxxxx#14 nsp=1 mass=0 + 3.000000000000000e+02, 4.000000000000000e+02, // itest=278: sxxxxx#14 nsp=1 mass=0 + 1.000000000000000e+00, 0.000000000000000e+00, // itest=278: sxxxxx#14 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=278: sxxxxx#14 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=278: sxxxxx#14 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00 } ); // itest=278: sxxxxx#14 nsp=1 mass=0 + expwfs.push_back( { // --------- + 5.000000000000000e+02, 0.000000000000000e+00, // itest=279: oxxxxx#14 nsp=1 mass=0 + 3.000000000000000e+02, 4.000000000000000e+02, // itest=279: oxxxxx#14 nsp=1 mass=0 + 2.236067977499790e+01, 0.000000000000000e+00, // itest=279: oxxxxx#14 nsp=1 mass=0 + 1.341640786499874e+01, -1.788854381999832e+01, // itest=279: oxxxxx#14 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=279: oxxxxx#14 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00 } ); // itest=279: oxxxxx#14 nsp=1 mass=0 + expwfs.push_back( { // --------- + 5.000000000000000e+02, 0.000000000000000e+00, // itest=280: oxxxxx#14 nsp=1 mass=0 + 3.000000000000000e+02, 4.000000000000000e+02, // itest=280: oxxxxx#14 nsp=1 mass=0 + 2.236067977499790e+01, 0.000000000000000e+00, // itest=280: oxxxxx#14 nsp=1 mass=0 + 1.341640786499874e+01, -1.788854381999832e+01, // itest=280: oxxxxx#14 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=280: oxxxxx#14 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00 } ); // itest=280: oxxxxx#14 nsp=1 mass=0 + expwfs.push_back( { // --------- + 5.000000000000000e+02, 0.000000000000000e+00, // itest=281: oxzxxx#14 nsp=1 mass=0 + 3.000000000000000e+02, 4.000000000000000e+02, // itest=281: oxzxxx#14 nsp=1 mass=0 + 2.236067977499790e+01, 0.000000000000000e+00, // itest=281: oxzxxx#14 nsp=1 mass=0 + 1.341640786499874e+01, -1.788854381999832e+01, // itest=281: oxzxxx#14 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=281: oxzxxx#14 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00 } ); // itest=281: oxzxxx#14 nsp=1 mass=0 + expwfs.push_back( { // --------- + -5.000000000000000e+02, -4.000000000000000e+02, // itest=282: ixxxxx#15 nsp=1 mass=0 + -1.800000000000000e+02, -2.400000000000000e+02, // itest=282: ixxxxx#15 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=282: ixxxxx#15 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=282: ixxxxx#15 nsp=1 mass=0 + 3.000000000000000e+01, 0.000000000000000e+00, // itest=282: ixxxxx#15 nsp=1 mass=0 + 6.000000000000000e+00, 8.000000000000000e+00 } ); // itest=282: ixxxxx#15 nsp=1 mass=0 + expwfs.push_back( { // --------- + -5.000000000000000e+02, -4.000000000000000e+02, // itest=283: ixxxxx#15 nsp=1 mass=0 + -1.800000000000000e+02, -2.400000000000000e+02, // itest=283: ixxxxx#15 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=283: ixxxxx#15 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=283: ixxxxx#15 nsp=1 mass=0 + 3.000000000000000e+01, 0.000000000000000e+00, // itest=283: ixxxxx#15 nsp=1 mass=0 + 6.000000000000000e+00, 8.000000000000000e+00 } ); // itest=283: ixxxxx#15 nsp=1 mass=0 + expwfs.push_back( { // --------- + -5.000000000000000e+02, -4.000000000000000e+02, // itest=284: ixzxxx#15 nsp=1 mass=0 + -1.800000000000000e+02, -2.400000000000000e+02, // itest=284: ixzxxx#15 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=284: ixzxxx#15 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=284: ixzxxx#15 nsp=1 mass=0 + 3.000000000000000e+01, 0.000000000000000e+00, // itest=284: ixzxxx#15 nsp=1 mass=0 + 6.000000000000000e+00, 8.000000000000000e+00 } ); // itest=284: ixzxxx#15 nsp=1 mass=0 + expwfs.push_back( { // --------- + 5.000000000000000e+02, 4.000000000000000e+02, // itest=285: vxxxxx#15 nsp=1 mass=0 + 1.800000000000000e+02, 2.400000000000000e+02, // itest=285: vxxxxx#15 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=285: vxxxxx#15 nsp=1 mass=0 + -3.394112549695428e-01, -5.656854249492381e-01, // itest=285: vxxxxx#15 nsp=1 mass=0 + -4.525483399593904e-01, 4.242640687119285e-01, // itest=285: vxxxxx#15 nsp=1 mass=0 + 4.242640687119285e-01, 0.000000000000000e+00 } ); // itest=285: vxxxxx#15 nsp=1 mass=0 + expwfs.push_back( { // --------- + 5.000000000000000e+02, 4.000000000000000e+02, // itest=286: vxxxxx#15 nsp=1 mass=0 + 1.800000000000000e+02, 2.400000000000000e+02, // itest=286: vxxxxx#15 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=286: vxxxxx#15 nsp=1 mass=0 + -3.394112549695428e-01, -5.656854249492381e-01, // itest=286: vxxxxx#15 nsp=1 mass=0 + -4.525483399593904e-01, 4.242640687119285e-01, // itest=286: vxxxxx#15 nsp=1 mass=0 + 4.242640687119285e-01, 0.000000000000000e+00 } ); // itest=286: vxxxxx#15 nsp=1 mass=0 + expwfs.push_back( { // --------- + 5.000000000000000e+02, 4.000000000000000e+02, // itest=287: sxxxxx#15 nsp=1 mass=0 + 1.800000000000000e+02, 2.400000000000000e+02, // itest=287: sxxxxx#15 nsp=1 mass=0 + 1.000000000000000e+00, 0.000000000000000e+00, // itest=287: sxxxxx#15 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=287: sxxxxx#15 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=287: sxxxxx#15 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00 } ); // itest=287: sxxxxx#15 nsp=1 mass=0 + expwfs.push_back( { // --------- + 5.000000000000000e+02, 4.000000000000000e+02, // itest=288: sxxxxx#15 nsp=1 mass=0 + 1.800000000000000e+02, 2.400000000000000e+02, // itest=288: sxxxxx#15 nsp=1 mass=0 + 1.000000000000000e+00, 0.000000000000000e+00, // itest=288: sxxxxx#15 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=288: sxxxxx#15 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=288: sxxxxx#15 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00 } ); // itest=288: sxxxxx#15 nsp=1 mass=0 + expwfs.push_back( { // --------- + 5.000000000000000e+02, 4.000000000000000e+02, // itest=289: oxxxxx#15 nsp=1 mass=0 + 1.800000000000000e+02, 2.400000000000000e+02, // itest=289: oxxxxx#15 nsp=1 mass=0 + 3.000000000000000e+01, 0.000000000000000e+00, // itest=289: oxxxxx#15 nsp=1 mass=0 + 6.000000000000000e+00, -8.000000000000000e+00, // itest=289: oxxxxx#15 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=289: oxxxxx#15 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00 } ); // itest=289: oxxxxx#15 nsp=1 mass=0 + expwfs.push_back( { // --------- + 5.000000000000000e+02, 4.000000000000000e+02, // itest=290: oxxxxx#15 nsp=1 mass=0 + 1.800000000000000e+02, 2.400000000000000e+02, // itest=290: oxxxxx#15 nsp=1 mass=0 + 3.000000000000000e+01, 0.000000000000000e+00, // itest=290: oxxxxx#15 nsp=1 mass=0 + 6.000000000000000e+00, -8.000000000000000e+00, // itest=290: oxxxxx#15 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=290: oxxxxx#15 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00 } ); // itest=290: oxxxxx#15 nsp=1 mass=0 + expwfs.push_back( { // --------- + 5.000000000000000e+02, 4.000000000000000e+02, // itest=291: oxzxxx#15 nsp=1 mass=0 + 1.800000000000000e+02, 2.400000000000000e+02, // itest=291: oxzxxx#15 nsp=1 mass=0 + 3.000000000000000e+01, 0.000000000000000e+00, // itest=291: oxzxxx#15 nsp=1 mass=0 + 6.000000000000000e+00, -8.000000000000000e+00, // itest=291: oxzxxx#15 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00, // itest=291: oxzxxx#15 nsp=1 mass=0 + 0.000000000000000e+00, 0.000000000000000e+00 } ); // itest=291: oxzxxx#15 nsp=1 mass=0 diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/timer.h b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/timer.h new file mode 100644 index 0000000000..14d7a4d892 --- /dev/null +++ b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/timer.h @@ -0,0 +1,67 @@ +#ifndef MGONGPUTIMER_H +#define MGONGPUTIMER_H 1 + +#include +#include + +namespace mgOnGpu +{ + + /* + high_resolution_clock + steady_clock + system_clock + + from https://www.modernescpp.com/index.php/the-three-clocks + and https://codereview.stackexchange.com/questions/196245/extremely-simple-timer-class-in-c + */ + + template + class Timer + { + public: + Timer() + : m_StartTime( T::now() ) {} + virtual ~Timer() {} + void Start(); + float GetDuration(); + void Info(); + private: + typedef typename T::time_point TTP; + TTP m_StartTime; + }; + + template + void + Timer::Start() + { + m_StartTime = T::now(); + } + + template + float + Timer::GetDuration() + { + std::chrono::duration duration = T::now() - m_StartTime; + return duration.count(); + } + + template + void + Timer::Info() + { + typedef typename T::period TPER; + typedef typename std::ratio_multiply MilliSec; + typedef typename std::ratio_multiply MicroSec; + std::cout << std::boolalpha << std::endl; + std::cout << "clock info: " << std::endl; + std::cout << " is steady: " << T::is_steady << std::endl; + std::cout << " precision: " << TPER::num << "/" << TPER::den << " second " << std::endl; + std::cout << std::fixed; + std::cout << " " << static_cast( MilliSec::num ) / MilliSec::den << " milliseconds " << std::endl; + std::cout << " " << static_cast( MicroSec::num ) / MicroSec::den << " microseconds " << std::endl; + std::cout << std::endl; + } + +} +#endif // MGONGPUTIMER_H diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/timermap.h b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/timermap.h new file mode 100644 index 0000000000..60d8c51021 --- /dev/null +++ b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/timermap.h @@ -0,0 +1,156 @@ +#ifndef MGONGPUTIMERMAP_H +#define MGONGPUTIMERMAP_H 1 + +#include +#include +#include +#include +#include + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wmissing-field-initializers" +#include "nvtx.h" +#pragma GCC diagnostic pop + +#include "timer.h" +#define TIMERTYPE std::chrono::high_resolution_clock + +namespace mgOnGpu +{ + class TimerMap + { + + public: + + TimerMap() + : m_timer(), m_active( "" ), m_partitionTimers(), m_partitionIds() {} + virtual ~TimerMap() {} + + // Start the timer for a specific partition (key must be a non-empty string) + // Stop the timer for the current partition if there is one active + float start( const std::string& key ) + { + assert( key != "" ); + // Close the previously active partition + float last = stop(); + // Switch to a new partition + m_timer.Start(); + m_active = key; + if( m_partitionTimers.find( key ) == m_partitionTimers.end() ) + { + m_partitionIds[key] = m_partitionTimers.size(); + m_partitionTimers[key] = 0; + } + // Open a new Cuda NVTX range + NVTX_PUSH( key.c_str(), m_partitionIds[key] ); + // Return last duration + return last; + } + + // Stop the timer for the current partition if there is one active + float stop() + { + // Close the previously active partition + float last = 0; + if( m_active != "" ) + { + last = m_timer.GetDuration(); + m_partitionTimers[m_active] += last; + } + m_active = ""; + // Close the current Cuda NVTX range + NVTX_POP(); + // Return last duration + return last; + } + + // Dump the overall results + void dump( std::ostream& ostr = std::cout, bool json = false ) + { + // Improve key formatting + const std::string totalKey = "TOTAL "; // "TOTAL (ANY)"? + //const std::string totalBut2Key = "TOTAL (n-2)"; + const std::string total123Key = "TOTAL (123)"; + const std::string total23Key = "TOTAL (23)"; + const std::string total1Key = "TOTAL (1)"; + const std::string total2Key = "TOTAL (2)"; + const std::string total3Key = "TOTAL (3)"; + const std::string total3aKey = "TOTAL (3a)"; + size_t maxsize = 0; + for( auto ip: m_partitionTimers ) + maxsize = std::max( maxsize, ip.first.size() ); + maxsize = std::max( maxsize, totalKey.size() ); + // Compute the overall total + //size_t ipart = 0; + float total = 0; + //float totalBut2 = 0; + float total123 = 0; + float total23 = 0; + float total1 = 0; + float total2 = 0; + float total3 = 0; + float total3a = 0; + for( auto ip: m_partitionTimers ) + { + total += ip.second; + //if ( ipart != 0 && ipart+1 != m_partitionTimers.size() ) totalBut2 += ip.second; + if( ip.first[0] == '1' || ip.first[0] == '2' || ip.first[0] == '3' ) total123 += ip.second; + if( ip.first[0] == '2' || ip.first[0] == '3' ) total23 += ip.second; + if( ip.first[0] == '1' ) total1 += ip.second; + if( ip.first[0] == '2' ) total2 += ip.second; + if( ip.first[0] == '3' ) total3 += ip.second; + if( ip.first[0] == '3' && ip.first[1] == 'a' ) total3a += ip.second; + //ipart++; + } + // Dump individual partition timers and the overall total + if( json ) + { + std::string s1 = "\"", s2 = "\" : \"", s3 = " sec\","; + ostr << std::setprecision( 6 ); // set precision (default=6): affects all floats + ostr << std::fixed; // fixed format: affects all floats + for( auto ip: m_partitionTimers ) + ostr << s1 << ip.first << s2 << ip.second << s3 << std::endl; + ostr << s1 << totalKey << s2 << total << s3 << std::endl + << s1 << total123Key << s2 << total123 << s3 << std::endl + << s1 << total23Key << s2 << total23 << s3 << std::endl + << s1 << total3Key << s2 << total3 << s3 << std::endl + << s1 << total3aKey << s2 << total3a << " sec \"" << std::endl; + ostr << std::defaultfloat; // default format: affects all floats + } + else + { + // NB: 'setw' affects only the next field (of any type) + ostr << std::setprecision( 6 ); // set precision (default=6): affects all floats + ostr << std::fixed; // fixed format: affects all floats + for( auto ip: m_partitionTimers ) + ostr << std::setw( maxsize ) << ip.first << " : " + << std::setw( 12 ) << ip.second << " sec" << std::endl; + ostr << std::setw( maxsize ) << totalKey << " : " + << std::setw( 12 ) << total << " sec" << std::endl + << std::setw( maxsize ) << total123Key << " : " + << std::setw( 12 ) << total123 << " sec" << std::endl + << std::setw( maxsize ) << total23Key << " : " + << std::setw( 12 ) << total23 << " sec" << std::endl + << std::setw( maxsize ) << total1Key << " : " + << std::setw( 12 ) << total1 << " sec" << std::endl + << std::setw( maxsize ) << total2Key << " : " + << std::setw( 12 ) << total2 << " sec" << std::endl + << std::setw( maxsize ) << total3Key << " : " + << std::setw( 12 ) << total3 << " sec" << std::endl + << std::setw( maxsize ) << total3aKey << " : " + << std::setw( 12 ) << total3a << " sec" << std::endl; + ostr << std::defaultfloat; // default format: affects all floats + } + } + + private: + + Timer m_timer; + std::string m_active; + std::map m_partitionTimers; + std::map m_partitionIds; + }; + +} + +#endif // MGONGPUTIMERMAP_H diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/src/CMakeLists.txt b/epochX/cudacpp/smeft_gg_tttt.sa/src/CMakeLists.txt new file mode 100644 index 0000000000..bb6d5ee85d --- /dev/null +++ b/epochX/cudacpp/smeft_gg_tttt.sa/src/CMakeLists.txt @@ -0,0 +1,5 @@ +file(GLOB_RECURSE HEADERS "*.h") +add_library(mg5amc_common Parameters_sm.cc read_slha.cc ${HEADERS}) + +# some XCode specific stuff to make the executable run +set_property(TARGET mg5amc_common PROPERTY XCODE_GENERATE_SCHEME TRUE) diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/src/HelAmps_SMEFTsim_topU3l_MwScheme_UFO.h b/epochX/cudacpp/smeft_gg_tttt.sa/src/HelAmps_SMEFTsim_topU3l_MwScheme_UFO.h new file mode 100644 index 0000000000..9e80ae076e --- /dev/null +++ b/epochX/cudacpp/smeft_gg_tttt.sa/src/HelAmps_SMEFTsim_topU3l_MwScheme_UFO.h @@ -0,0 +1,1174 @@ +//========================================================================== +// This file has been automatically generated for CUDA/C++ standalone by +// MadGraph5_aMC@NLO v. 3.5.0_lo_vect, 2023-01-26 +// By the MadGraph5_aMC@NLO Development Team +// Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch +//========================================================================== + +#ifndef HelAmps_SMEFTsim_topU3l_MwScheme_UFO_H +#define HelAmps_SMEFTsim_topU3l_MwScheme_UFO_H 1 + +#include "mgOnGpuConfig.h" + +#include "mgOnGpuVectors.h" + +#include "Parameters_SMEFTsim_topU3l_MwScheme_UFO.h" + +//#include +//#include +//#include +//#include + +#ifdef __CUDACC__ +namespace mg5amcGpu +#else +namespace mg5amcCpu +#endif +{ + //-------------------------------------------------------------------------- + +#ifdef MGONGPU_INLINE_HELAMPS +#define INLINE inline +#define ALWAYS_INLINE __attribute__( ( always_inline ) ) +#else +#define INLINE +#define ALWAYS_INLINE +#endif + + //-------------------------------------------------------------------------- + + // Compute the output wavefunction fi[6] from the input momenta[npar*4*nevt] + template + __host__ __device__ INLINE void + ixxxxx( const fptype momenta[], // input: momenta + const fptype fmass, // input: fermion mass + const int nhel, // input: -1 or +1 (helicity of fermion) + const int nsf, // input: +1 (particle) or -1 (antiparticle) + fptype wavefunctions[], // output: wavefunctions + const int ipar // input: particle# out of npar + ) ALWAYS_INLINE; + + //-------------------------------------------------------------------------- + + // Compute the output wavefunction fi[6] from the input momenta[npar*4*nevt] + // ASSUMPTIONS: (FMASS == 0) and (PX == PY == 0 and E == +PZ > 0) + template + __host__ __device__ INLINE void + ipzxxx( const fptype momenta[], // input: momenta + //const fptype fmass, // [skip: ASSUME fermion mass==0] + const int nhel, // input: -1 or +1 (helicity of fermion) + const int nsf, // input: +1 (particle) or -1 (antiparticle) + fptype wavefunctions[], // output: wavefunctions + const int ipar // input: particle# out of npar + ) ALWAYS_INLINE; + + //-------------------------------------------------------------------------- + + // Compute the output wavefunction fi[6] from the input momenta[npar*4*nevt] + // ASSUMPTIONS: (FMASS == 0) and (PX == PY == 0 and E == -PZ > 0) + template + __host__ __device__ INLINE void + imzxxx( const fptype momenta[], // input: momenta + //const fptype fmass, // [skip: ASSUME fermion mass==0] + const int nhel, // input: -1 or +1 (helicity of fermion) + const int nsf, // input: +1 (particle) or -1 (antiparticle) + fptype wavefunctions[], // output: wavefunctions + const int ipar // input: particle# out of npar + ) ALWAYS_INLINE; + + //-------------------------------------------------------------------------- + + // Compute the output wavefunction fi[6] from the input momenta[npar*4*nevt] + // ASSUMPTIONS: (FMASS == 0) and (PT > 0) + template + __host__ __device__ INLINE void + ixzxxx( const fptype momenta[], // input: momenta + //const fptype fmass, // [skip: ASSUME fermion mass==0] + const int nhel, // input: -1 or +1 (helicity of fermion) + const int nsf, // input: +1 (particle) or -1 (antiparticle) + fptype wavefunctions[], // output: wavefunctions + const int ipar // input: particle# out of npar + ) ALWAYS_INLINE; + + //-------------------------------------------------------------------------- + + // Compute the output wavefunction vc[6] from the input momenta[npar*4*nevt] + template + __host__ __device__ INLINE void + vxxxxx( const fptype momenta[], // input: momenta + const fptype vmass, // input: vector boson mass + const int nhel, // input: -1, 0 (only if vmass!=0) or +1 (helicity of vector boson) + const int nsv, // input: +1 (final) or -1 (initial) + fptype wavefunctions[], // output: wavefunctions + const int ipar // input: particle# out of npar + ) ALWAYS_INLINE; + + //-------------------------------------------------------------------------- + + // Compute the output wavefunction sc[3] from the input momenta[npar*4*nevt] + template + __host__ __device__ INLINE void + sxxxxx( const fptype momenta[], // input: momenta + //const fptype, // WARNING: input "smass" unused (missing in Fortran) - scalar boson mass + //const int, // WARNING: input "nhel" unused (missing in Fortran) - scalar has no helicity! + const int nss, // input: +1 (final) or -1 (initial) + fptype wavefunctions[], // output: wavefunctions + const int ipar // input: particle# out of npar + ) ALWAYS_INLINE; + + //-------------------------------------------------------------------------- + + // Compute the output wavefunction fo[6] from the input momenta[npar*4*nevt] + template + __host__ __device__ INLINE void + oxxxxx( const fptype momenta[], // input: momenta + const fptype fmass, // input: fermion mass + const int nhel, // input: -1, 0 (only if vmass!=0) or +1 (helicity of vector boson) + const int nsf, // input: +1 (particle) or -1 (antiparticle) + fptype wavefunctions[], // output: wavefunctions + const int ipar // input: particle# out of npar + ) ALWAYS_INLINE; + + //-------------------------------------------------------------------------- + + // Compute the output wavefunction fo[6] from the input momenta[npar*4*nevt] + // ASSUMPTIONS: (FMASS == 0) and (PX == PY == 0 and E == +PZ > 0) + template + __host__ __device__ INLINE void + opzxxx( const fptype momenta[], // input: momenta + //const fptype fmass, // [skip: ASSUME fermion mass==0] + const int nhel, // input: -1 or +1 (helicity of fermion) + const int nsf, // input: +1 (particle) or -1 (antiparticle) + fptype wavefunctions[], // output: wavefunctions + const int ipar // input: particle# out of npar + ) ALWAYS_INLINE; + + //-------------------------------------------------------------------------- + + // Compute the output wavefunction fo[6] from the input momenta[npar*4*nevt] + // ASSUMPTIONS: (FMASS == 0) and (PX == PY == 0 and E == -PZ > 0) + template + __host__ __device__ INLINE void + omzxxx( const fptype momenta[], // input: momenta + //const fptype fmass, // [skip: ASSUME fermion mass==0] + const int nhel, // input: -1 or +1 (helicity of fermion) + const int nsf, // input: +1 (particle) or -1 (antiparticle) + fptype wavefunctions[], // output: wavefunctions + const int ipar // input: particle# out of npar + ) ALWAYS_INLINE; + + //-------------------------------------------------------------------------- + + // Compute the output wavefunction fo[6] from the input momenta[npar*4*nevt] + template + __host__ __device__ INLINE void + oxzxxx( const fptype momenta[], // input: momenta + //const fptype fmass, // [skip: ASSUME fermion mass==0] + const int nhel, // input: -1 or +1 (helicity of fermion) + const int nsf, // input: +1 (particle) or -1 (antiparticle) + fptype wavefunctions[], // output: wavefunctions + const int ipar // input: particle# out of npar + ) ALWAYS_INLINE; + + //========================================================================== + + // Compute the output wavefunction fi[6] from the input momenta[npar*4*nevt] + template + __host__ __device__ void + ixxxxx( const fptype momenta[], // input: momenta + const fptype fmass, // input: fermion mass + const int nhel, // input: -1 or +1 (helicity of fermion) + const int nsf, // input: +1 (particle) or -1 (antiparticle) + fptype wavefunctions[], // output: wavefunctions + const int ipar ) // input: particle# out of npar + { + mgDebug( 0, __FUNCTION__ ); + const fptype_sv& pvec0 = M_ACCESS::kernelAccessIp4IparConst( momenta, 0, ipar ); + const fptype_sv& pvec1 = M_ACCESS::kernelAccessIp4IparConst( momenta, 1, ipar ); + const fptype_sv& pvec2 = M_ACCESS::kernelAccessIp4IparConst( momenta, 2, ipar ); + const fptype_sv& pvec3 = M_ACCESS::kernelAccessIp4IparConst( momenta, 3, ipar ); + cxtype_sv* fi = W_ACCESS::kernelAccess( wavefunctions ); + fi[0] = cxmake( -pvec0 * (fptype)nsf, -pvec3 * (fptype)nsf ); + fi[1] = cxmake( -pvec1 * (fptype)nsf, -pvec2 * (fptype)nsf ); + const int nh = nhel * nsf; + if( fmass != 0. ) + { + const fptype_sv pp = fpmin( pvec0, fpsqrt( pvec1 * pvec1 + pvec2 * pvec2 + pvec3 * pvec3 ) ); +#ifndef MGONGPU_CPPSIMD + if( pp == 0. ) + { + // NB: Do not use "abs" for floats! It returns an integer with no build warning! Use std::abs! + fptype sqm[2] = { fpsqrt( std::abs( fmass ) ), 0. }; // possibility of negative fermion masses + //sqm[1] = ( fmass < 0. ? -abs( sqm[0] ) : abs( sqm[0] ) ); // AV: why abs here? + sqm[1] = ( fmass < 0. ? -sqm[0] : sqm[0] ); // AV: removed an abs here + const int ip = ( 1 + nh ) / 2; // NB: Fortran sqm(0:1) also has indexes 0,1 as in C++ + const int im = ( 1 - nh ) / 2; // NB: Fortran sqm(0:1) also has indexes 0,1 as in C++ + fi[2] = cxmake( ip * sqm[ip], 0 ); + fi[3] = cxmake( im * nsf * sqm[ip], 0 ); + fi[4] = cxmake( ip * nsf * sqm[im], 0 ); + fi[5] = cxmake( im * sqm[im], 0 ); + } + else + { + const fptype sf[2] = { fptype( 1 + nsf + ( 1 - nsf ) * nh ) * (fptype)0.5, + fptype( 1 + nsf - ( 1 - nsf ) * nh ) * (fptype)0.5 }; + fptype omega[2] = { fpsqrt( pvec0 + pp ), 0. }; + omega[1] = fmass / omega[0]; + const int ip = ( 1 + nh ) / 2; // NB: Fortran is (3+nh)/2 because omega(2) has indexes 1,2 and not 0,1 + const int im = ( 1 - nh ) / 2; // NB: Fortran is (3-nh)/2 because omega(2) has indexes 1,2 and not 0,1 + const fptype sfomega[2] = { sf[0] * omega[ip], sf[1] * omega[im] }; + const fptype pp3 = fpmax( pp + pvec3, 0. ); + const cxtype chi[2] = { cxmake( fpsqrt( pp3 * (fptype)0.5 / pp ), 0. ), + ( pp3 == 0. ? cxmake( -nh, 0. ) : cxmake( nh * pvec1, pvec2 ) / fpsqrt( 2. * pp * pp3 ) ) }; + fi[2] = sfomega[0] * chi[im]; + fi[3] = sfomega[0] * chi[ip]; + fi[4] = sfomega[1] * chi[im]; + fi[5] = sfomega[1] * chi[ip]; + } +#else + const int ip = ( 1 + nh ) / 2; + const int im = ( 1 - nh ) / 2; + // Branch A: pp == 0. + // NB: Do not use "abs" for floats! It returns an integer with no build warning! Use std::abs! + fptype sqm[2] = { fpsqrt( std::abs( fmass ) ), 0 }; // possibility of negative fermion masses (NB: SCALAR!) + sqm[1] = ( fmass < 0 ? -sqm[0] : sqm[0] ); // AV: removed an abs here (as above) + const cxtype fiA_2 = ip * sqm[ip]; // scalar cxtype: real part initialised from fptype, imag part = 0 + const cxtype fiA_3 = im * nsf * sqm[ip]; // scalar cxtype: real part initialised from fptype, imag part = 0 + const cxtype fiA_4 = ip * nsf * sqm[im]; // scalar cxtype: real part initialised from fptype, imag part = 0 + const cxtype fiA_5 = im * sqm[im]; // scalar cxtype: real part initialised from fptype, imag part = 0 + // Branch B: pp != 0. + const fptype sf[2] = { fptype( 1 + nsf + ( 1 - nsf ) * nh ) * (fptype)0.5, + fptype( 1 + nsf - ( 1 - nsf ) * nh ) * (fptype)0.5 }; + fptype_v omega[2] = { fpsqrt( pvec0 + pp ), 0 }; + omega[1] = fmass / omega[0]; + const fptype_v sfomega[2] = { sf[0] * omega[ip], sf[1] * omega[im] }; + const fptype_v pp3 = fpmax( pp + pvec3, 0 ); + const cxtype_v chi[2] = { cxmake( fpsqrt( pp3 * 0.5 / pp ), 0 ), + cxternary( ( pp3 == 0. ), + cxmake( -nh, 0 ), + cxmake( (fptype)nh * pvec1, pvec2 ) / fpsqrt( 2. * pp * pp3 ) ) }; + const cxtype_v fiB_2 = sfomega[0] * chi[im]; + const cxtype_v fiB_3 = sfomega[0] * chi[ip]; + const cxtype_v fiB_4 = sfomega[1] * chi[im]; + const cxtype_v fiB_5 = sfomega[1] * chi[ip]; + // Choose between the results from branch A and branch B + const bool_v mask = ( pp == 0. ); + fi[2] = cxternary( mask, fiA_2, fiB_2 ); + fi[3] = cxternary( mask, fiA_3, fiB_3 ); + fi[4] = cxternary( mask, fiA_4, fiB_4 ); + fi[5] = cxternary( mask, fiA_5, fiB_5 ); +#endif + } + else + { + const fptype_sv sqp0p3 = fpternary( ( pvec1 == 0. and pvec2 == 0. and pvec3 < 0. ), + fptype_sv{ 0 }, + fpsqrt( fpmax( pvec0 + pvec3, 0. ) ) * (fptype)nsf ); + const cxtype_sv chi[2] = { cxmake( sqp0p3, 0. ), cxternary( ( sqp0p3 == 0. ), cxmake( -(fptype)nhel * fpsqrt( 2. * pvec0 ), 0. ), cxmake( (fptype)nh * pvec1, pvec2 ) / sqp0p3 ) }; + if( nh == 1 ) + { + fi[2] = cxzero_sv(); + fi[3] = cxzero_sv(); + fi[4] = chi[0]; + fi[5] = chi[1]; + } + else + { + fi[2] = chi[1]; + fi[3] = chi[0]; + fi[4] = cxzero_sv(); + fi[5] = cxzero_sv(); + } + } + mgDebug( 1, __FUNCTION__ ); + return; + } + + //-------------------------------------------------------------------------- + + // Compute the output wavefunction fi[6] from the input momenta[npar*4*nevt] + // ASSUMPTIONS: (FMASS == 0) and (PX == PY == 0 and E == +PZ > 0) + template + __host__ __device__ void + ipzxxx( const fptype momenta[], // input: momenta + //const fptype fmass, // [skip: ASSUME fermion mass==0] + const int nhel, // input: -1 or +1 (helicity of fermion) + const int nsf, // input: +1 (particle) or -1 (antiparticle) + fptype wavefunctions[], // output: wavefunctions + const int ipar ) // input: particle# out of npar + { + mgDebug( 0, __FUNCTION__ ); + const fptype_sv& pvec3 = M_ACCESS::kernelAccessIp4IparConst( momenta, 3, ipar ); + cxtype_sv* fi = W_ACCESS::kernelAccess( wavefunctions ); + fi[0] = cxmake( -pvec3 * (fptype)nsf, -pvec3 * (fptype)nsf ); + fi[1] = cxzero_sv(); + const int nh = nhel * nsf; + const cxtype_sv sqp0p3 = cxmake( fpsqrt( 2. * pvec3 ) * (fptype)nsf, 0. ); + fi[2] = fi[1]; + if( nh == 1 ) + { + fi[3] = fi[1]; + fi[4] = sqp0p3; + } + else + { + fi[3] = sqp0p3; + fi[4] = fi[1]; + } + fi[5] = fi[1]; + mgDebug( 1, __FUNCTION__ ); + return; + } + + //-------------------------------------------------------------------------- + + // Compute the output wavefunction fi[6] from the input momenta[npar*4*nevt] + // ASSUMPTIONS: (FMASS == 0) and (PX == PY == 0 and E == -PZ > 0) + template + __host__ __device__ void + imzxxx( const fptype momenta[], // input: momenta + //const fptype fmass, // [skip: ASSUME fermion mass==0] + const int nhel, // input: -1 or +1 (helicity of fermion) + const int nsf, // input: +1 (particle) or -1 (antiparticle) + fptype wavefunctions[], // output: wavefunctions + const int ipar ) // input: particle# out of npar + { + mgDebug( 0, __FUNCTION__ ); + const fptype_sv& pvec3 = M_ACCESS::kernelAccessIp4IparConst( momenta, 3, ipar ); + cxtype_sv* fi = W_ACCESS::kernelAccess( wavefunctions ); + fi[0] = cxmake( pvec3 * (fptype)nsf, -pvec3 * (fptype)nsf ); + fi[1] = cxzero_sv(); + const int nh = nhel * nsf; + const cxtype_sv chi = cxmake( -(fptype)nhel * fpsqrt( -2. * pvec3 ), 0. ); + fi[3] = cxzero_sv(); + fi[4] = cxzero_sv(); + if( nh == 1 ) + { + fi[2] = cxzero_sv(); + fi[5] = chi; + } + else + { + fi[2] = chi; + fi[5] = cxzero_sv(); + } + mgDebug( 1, __FUNCTION__ ); + return; + } + + //-------------------------------------------------------------------------- + + // Compute the output wavefunction fi[6] from the input momenta[npar*4*nevt] + // ASSUMPTIONS: (FMASS == 0) and (PT > 0) + template + __host__ __device__ void + ixzxxx( const fptype momenta[], // input: momenta + //const fptype fmass, // [skip: ASSUME fermion mass==0] + const int nhel, // input: -1 or +1 (helicity of fermion) + const int nsf, // input: +1 (particle) or -1 (antiparticle) + fptype wavefunctions[], // output: wavefunctions + const int ipar ) // input: particle# out of npar + { + mgDebug( 0, __FUNCTION__ ); + const fptype_sv& pvec0 = M_ACCESS::kernelAccessIp4IparConst( momenta, 0, ipar ); + const fptype_sv& pvec1 = M_ACCESS::kernelAccessIp4IparConst( momenta, 1, ipar ); + const fptype_sv& pvec2 = M_ACCESS::kernelAccessIp4IparConst( momenta, 2, ipar ); + const fptype_sv& pvec3 = M_ACCESS::kernelAccessIp4IparConst( momenta, 3, ipar ); + cxtype_sv* fi = W_ACCESS::kernelAccess( wavefunctions ); + //fi[0] = cxmake( -pvec0 * nsf, -pvec2 * nsf ); // AV: BUG! not the same as ixxxxx + //fi[1] = cxmake( -pvec0 * nsf, -pvec1 * nsf ); // AV: BUG! not the same as ixxxxx + fi[0] = cxmake( -pvec0 * (fptype)nsf, -pvec3 * (fptype)nsf ); // AV: BUG FIX + fi[1] = cxmake( -pvec1 * (fptype)nsf, -pvec2 * (fptype)nsf ); // AV: BUG FIX + const int nh = nhel * nsf; + //const float sqp0p3 = sqrtf( pvec0 + pvec3 ) * nsf; // AV: why force a float here? + const fptype_sv sqp0p3 = fpsqrt( pvec0 + pvec3 ) * (fptype)nsf; + const cxtype_sv chi0 = cxmake( sqp0p3, 0. ); + const cxtype_sv chi1 = cxmake( (fptype)nh * pvec1 / sqp0p3, pvec2 / sqp0p3 ); + if( nh == 1 ) + { + fi[2] = cxzero_sv(); + fi[3] = cxzero_sv(); + fi[4] = chi0; + fi[5] = chi1; + } + else + { + fi[2] = chi1; + fi[3] = chi0; + fi[4] = cxzero_sv(); + fi[5] = cxzero_sv(); + } + mgDebug( 1, __FUNCTION__ ); + return; + } + + //-------------------------------------------------------------------------- + + // Compute the output wavefunction vc[6] from the input momenta[npar*4*nevt] + template + __host__ __device__ void + vxxxxx( const fptype momenta[], // input: momenta + const fptype vmass, // input: vector boson mass + const int nhel, // input: -1, 0 (only if vmass!=0) or +1 (helicity of vector boson) + const int nsv, // input: +1 (final) or -1 (initial) + fptype wavefunctions[], // output: wavefunctions + const int ipar ) // input: particle# out of npar + { + mgDebug( 0, __FUNCTION__ ); + const fptype_sv& pvec0 = M_ACCESS::kernelAccessIp4IparConst( momenta, 0, ipar ); + const fptype_sv& pvec1 = M_ACCESS::kernelAccessIp4IparConst( momenta, 1, ipar ); + const fptype_sv& pvec2 = M_ACCESS::kernelAccessIp4IparConst( momenta, 2, ipar ); + const fptype_sv& pvec3 = M_ACCESS::kernelAccessIp4IparConst( momenta, 3, ipar ); + cxtype_sv* vc = W_ACCESS::kernelAccess( wavefunctions ); + const fptype sqh = fpsqrt( 0.5 ); // AV this is > 0! + const fptype hel = nhel; + vc[0] = cxmake( pvec0 * (fptype)nsv, pvec3 * (fptype)nsv ); + vc[1] = cxmake( pvec1 * (fptype)nsv, pvec2 * (fptype)nsv ); + if( vmass != 0. ) + { + const int nsvahl = nsv * std::abs( hel ); + const fptype_sv pt2 = ( pvec1 * pvec1 ) + ( pvec2 * pvec2 ); + const fptype_sv pp = fpmin( pvec0, fpsqrt( pt2 + ( pvec3 * pvec3 ) ) ); + const fptype_sv pt = fpmin( pp, fpsqrt( pt2 ) ); + const fptype hel0 = 1. - std::abs( hel ); +#ifndef MGONGPU_CPPSIMD + if( pp == 0. ) + { + vc[2] = cxmake( 0., 0. ); + vc[3] = cxmake( -hel * sqh, 0. ); + vc[4] = cxmake( 0., nsvahl * sqh ); + vc[5] = cxmake( hel0, 0. ); + } + else + { + const fptype emp = pvec0 / ( vmass * pp ); + vc[2] = cxmake( hel0 * pp / vmass, 0. ); + vc[5] = cxmake( hel0 * pvec3 * emp + hel * pt / pp * sqh, 0. ); + if( pt != 0. ) + { + const fptype pzpt = pvec3 / ( pp * pt ) * sqh * hel; + vc[3] = cxmake( hel0 * pvec1 * emp - pvec1 * pzpt, -nsvahl * pvec2 / pt * sqh ); + vc[4] = cxmake( hel0 * pvec2 * emp - pvec2 * pzpt, nsvahl * pvec1 / pt * sqh ); + } + else + { + vc[3] = cxmake( -hel * sqh, 0. ); + // NB: Do not use "abs" for floats! It returns an integer with no build warning! Use std::abs! + //vc[4] = cxmake( 0., nsvahl * ( pvec3 < 0. ? -std::abs( sqh ) : std::abs( sqh ) ) ); // AV: why abs here? + vc[4] = cxmake( 0., nsvahl * ( pvec3 < 0. ? -sqh : sqh ) ); // AV: removed an abs here + } + } +#else + // Branch A: pp == 0. + const cxtype vcA_2 = cxmake( 0, 0 ); + const cxtype vcA_3 = cxmake( -hel * sqh, 0 ); + const cxtype vcA_4 = cxmake( 0, nsvahl * sqh ); + const cxtype vcA_5 = cxmake( hel0, 0 ); + // Branch B: pp != 0. + const fptype_v emp = pvec0 / ( vmass * pp ); + const cxtype_v vcB_2 = cxmake( hel0 * pp / vmass, 0 ); + const cxtype_v vcB_5 = cxmake( hel0 * pvec3 * emp + hel * pt / pp * sqh, 0 ); + // Branch B1: pp != 0. and pt != 0. + const fptype_v pzpt = pvec3 / ( pp * pt ) * sqh * hel; + const cxtype_v vcB1_3 = cxmake( hel0 * pvec1 * emp - pvec1 * pzpt, -(fptype)nsvahl * pvec2 / pt * sqh ); + const cxtype_v vcB1_4 = cxmake( hel0 * pvec2 * emp - pvec2 * pzpt, (fptype)nsvahl * pvec1 / pt * sqh ); + // Branch B2: pp != 0. and pt == 0. + const cxtype vcB2_3 = cxmake( -hel * sqh, 0. ); + const cxtype_v vcB2_4 = cxmake( 0., (fptype)nsvahl * fpternary( ( pvec3 < 0 ), -sqh, sqh ) ); // AV: removed an abs here + // Choose between the results from branch A and branch B (and from branch B1 and branch B2) + const bool_v mask = ( pp == 0. ); + const bool_v maskB = ( pt != 0. ); + vc[2] = cxternary( mask, vcA_2, vcB_2 ); + vc[3] = cxternary( mask, vcA_3, cxternary( maskB, vcB1_3, vcB2_3 ) ); + vc[4] = cxternary( mask, vcA_4, cxternary( maskB, vcB1_4, vcB2_4 ) ); + vc[5] = cxternary( mask, vcA_5, vcB_5 ); +#endif + } + else + { + const fptype_sv& pp = pvec0; // NB: rewrite the following as in Fortran, using pp instead of pvec0 + const fptype_sv pt = fpsqrt( ( pvec1 * pvec1 ) + ( pvec2 * pvec2 ) ); + vc[2] = cxzero_sv(); + vc[5] = cxmake( hel * pt / pp * sqh, 0. ); +#ifndef MGONGPU_CPPSIMD + if( pt != 0. ) + { + const fptype pzpt = pvec3 / ( pp * pt ) * sqh * hel; + vc[3] = cxmake( -pvec1 * pzpt, -nsv * pvec2 / pt * sqh ); + vc[4] = cxmake( -pvec2 * pzpt, nsv * pvec1 / pt * sqh ); + } + else + { + vc[3] = cxmake( -hel * sqh, 0. ); + // NB: Do not use "abs" for floats! It returns an integer with no build warning! Use std::abs! + //vc[4] = cxmake( 0, nsv * ( pvec3 < 0. ? -std::abs( sqh ) : std::abs( sqh ) ) ); // AV why abs here? + vc[4] = cxmake( 0., nsv * ( pvec3 < 0. ? -sqh : sqh ) ); // AV: removed an abs here + } +#else + // Branch A: pt != 0. + const fptype_v pzpt = pvec3 / ( pp * pt ) * sqh * hel; + const cxtype_v vcA_3 = cxmake( -pvec1 * pzpt, -(fptype)nsv * pvec2 / pt * sqh ); + const cxtype_v vcA_4 = cxmake( -pvec2 * pzpt, (fptype)nsv * pvec1 / pt * sqh ); + // Branch B: pt == 0. + const cxtype vcB_3 = cxmake( -(fptype)hel * sqh, 0 ); + const cxtype_v vcB_4 = cxmake( 0, (fptype)nsv * fpternary( ( pvec3 < 0 ), -sqh, sqh ) ); // AV: removed an abs here + // Choose between the results from branch A and branch B + const bool_v mask = ( pt != 0. ); + vc[3] = cxternary( mask, vcA_3, vcB_3 ); + vc[4] = cxternary( mask, vcA_4, vcB_4 ); +#endif + } + mgDebug( 1, __FUNCTION__ ); + return; + } + + //-------------------------------------------------------------------------- + + // Compute the output wavefunction sc[3] from the input momenta[npar*4*nevt] + template + __host__ __device__ void + sxxxxx( const fptype momenta[], // input: momenta + //const fptype, // WARNING: input "smass" unused (missing in Fortran) - scalar boson mass + //const int, // WARNING: input "nhel" unused (missing in Fortran) - scalar has no helicity! + const int nss, // input: +1 (final) or -1 (initial) + fptype wavefunctions[], // output: wavefunctions + const int ipar ) // input: particle# out of npar + { + mgDebug( 0, __FUNCTION__ ); + const fptype_sv& pvec0 = M_ACCESS::kernelAccessIp4IparConst( momenta, 0, ipar ); + const fptype_sv& pvec1 = M_ACCESS::kernelAccessIp4IparConst( momenta, 1, ipar ); + const fptype_sv& pvec2 = M_ACCESS::kernelAccessIp4IparConst( momenta, 2, ipar ); + const fptype_sv& pvec3 = M_ACCESS::kernelAccessIp4IparConst( momenta, 3, ipar ); + cxtype_sv* sc = W_ACCESS::kernelAccess( wavefunctions ); + sc[2] = cxmake( 1 + fptype_sv{ 0 }, 0 ); + sc[0] = cxmake( pvec0 * (fptype)nss, pvec3 * (fptype)nss ); + sc[1] = cxmake( pvec1 * (fptype)nss, pvec2 * (fptype)nss ); + mgDebug( 1, __FUNCTION__ ); + return; + } + + //-------------------------------------------------------------------------- + + // Compute the output wavefunction fo[6] from the input momenta[npar*4*nevt] + template + __host__ __device__ void + oxxxxx( const fptype momenta[], // input: momenta + const fptype fmass, // input: fermion mass + const int nhel, // input: -1 or +1 (helicity of fermion) + const int nsf, // input: +1 (particle) or -1 (antiparticle) + fptype wavefunctions[], // output: wavefunctions + const int ipar ) // input: particle# out of npar + { + mgDebug( 0, __FUNCTION__ ); + const fptype_sv& pvec0 = M_ACCESS::kernelAccessIp4IparConst( momenta, 0, ipar ); + const fptype_sv& pvec1 = M_ACCESS::kernelAccessIp4IparConst( momenta, 1, ipar ); + const fptype_sv& pvec2 = M_ACCESS::kernelAccessIp4IparConst( momenta, 2, ipar ); + const fptype_sv& pvec3 = M_ACCESS::kernelAccessIp4IparConst( momenta, 3, ipar ); + cxtype_sv* fo = W_ACCESS::kernelAccess( wavefunctions ); + fo[0] = cxmake( pvec0 * (fptype)nsf, pvec3 * (fptype)nsf ); + fo[1] = cxmake( pvec1 * (fptype)nsf, pvec2 * (fptype)nsf ); + const int nh = nhel * nsf; + if( fmass != 0. ) + { + const fptype_sv pp = fpmin( pvec0, fpsqrt( ( pvec1 * pvec1 ) + ( pvec2 * pvec2 ) + ( pvec3 * pvec3 ) ) ); +#ifndef MGONGPU_CPPSIMD + if( pp == 0. ) + { + // NB: Do not use "abs" for floats! It returns an integer with no build warning! Use std::abs! + fptype sqm[2] = { fpsqrt( std::abs( fmass ) ), 0. }; // possibility of negative fermion masses + //sqm[1] = ( fmass < 0. ? -abs( sqm[0] ) : abs( sqm[0] ) ); // AV: why abs here? + sqm[1] = ( fmass < 0. ? -sqm[0] : sqm[0] ); // AV: removed an abs here + const int ip = -( ( 1 - nh ) / 2 ) * nhel; // NB: Fortran sqm(0:1) also has indexes 0,1 as in C++ + const int im = ( 1 + nh ) / 2 * nhel; // NB: Fortran sqm(0:1) also has indexes 0,1 as in C++ + fo[2] = cxmake( im * sqm[std::abs( ip )], 0 ); + fo[3] = cxmake( ip * nsf * sqm[std::abs( ip )], 0 ); + fo[4] = cxmake( im * nsf * sqm[std::abs( im )], 0 ); + fo[5] = cxmake( ip * sqm[std::abs( im )], 0 ); + } + else + { + const fptype sf[2] = { fptype( 1 + nsf + ( 1 - nsf ) * nh ) * (fptype)0.5, + fptype( 1 + nsf - ( 1 - nsf ) * nh ) * (fptype)0.5 }; + fptype omega[2] = { fpsqrt( pvec0 + pp ), 0. }; + omega[1] = fmass / omega[0]; + const int ip = ( 1 + nh ) / 2; // NB: Fortran is (3+nh)/2 because omega(2) has indexes 1,2 and not 0,1 + const int im = ( 1 - nh ) / 2; // NB: Fortran is (3-nh)/2 because omega(2) has indexes 1,2 and not 0,1 + const fptype sfomeg[2] = { sf[0] * omega[ip], sf[1] * omega[im] }; + const fptype pp3 = fpmax( pp + pvec3, 0. ); + const cxtype chi[2] = { cxmake( fpsqrt( pp3 * (fptype)0.5 / pp ), 0. ), + ( ( pp3 == 0. ) ? cxmake( -nh, 0. ) + : cxmake( nh * pvec1, -pvec2 ) / fpsqrt( 2. * pp * pp3 ) ) }; + fo[2] = sfomeg[1] * chi[im]; + fo[3] = sfomeg[1] * chi[ip]; + fo[4] = sfomeg[0] * chi[im]; + fo[5] = sfomeg[0] * chi[ip]; + } +#else + // Branch A: pp == 0. + // NB: Do not use "abs" for floats! It returns an integer with no build warning! Use std::abs! + fptype sqm[2] = { fpsqrt( std::abs( fmass ) ), 0 }; // possibility of negative fermion masses + sqm[1] = ( fmass < 0 ? -sqm[0] : sqm[0] ); // AV: removed an abs here (as above) + const int ipA = -( ( 1 - nh ) / 2 ) * nhel; + const int imA = ( 1 + nh ) / 2 * nhel; + const cxtype foA_2 = imA * sqm[std::abs( ipA )]; + const cxtype foA_3 = ipA * nsf * sqm[std::abs( ipA )]; + const cxtype foA_4 = imA * nsf * sqm[std::abs( imA )]; + const cxtype foA_5 = ipA * sqm[std::abs( imA )]; + // Branch B: pp != 0. + const fptype sf[2] = { fptype( 1 + nsf + ( 1 - nsf ) * nh ) * (fptype)0.5, + fptype( 1 + nsf - ( 1 - nsf ) * nh ) * (fptype)0.5 }; + fptype_v omega[2] = { fpsqrt( pvec0 + pp ), 0 }; + omega[1] = fmass / omega[0]; + const int ipB = ( 1 + nh ) / 2; + const int imB = ( 1 - nh ) / 2; + const fptype_v sfomeg[2] = { sf[0] * omega[ipB], sf[1] * omega[imB] }; + const fptype_v pp3 = fpmax( pp + pvec3, 0. ); + const cxtype_v chi[2] = { cxmake( fpsqrt( pp3 * 0.5 / pp ), 0. ), + ( cxternary( ( pp3 == 0. ), + cxmake( -nh, 0. ), + cxmake( (fptype)nh * pvec1, -pvec2 ) / fpsqrt( 2. * pp * pp3 ) ) ) }; + const cxtype_v foB_2 = sfomeg[1] * chi[imB]; + const cxtype_v foB_3 = sfomeg[1] * chi[ipB]; + const cxtype_v foB_4 = sfomeg[0] * chi[imB]; + const cxtype_v foB_5 = sfomeg[0] * chi[ipB]; + // Choose between the results from branch A and branch B + const bool_v mask = ( pp == 0. ); + fo[2] = cxternary( mask, foA_2, foB_2 ); + fo[3] = cxternary( mask, foA_3, foB_3 ); + fo[4] = cxternary( mask, foA_4, foB_4 ); + fo[5] = cxternary( mask, foA_5, foB_5 ); +#endif + } + else + { + const fptype_sv sqp0p3 = fpternary( ( pvec1 == 0. ) and ( pvec2 == 0. ) and ( pvec3 < 0. ), + 0, + fpsqrt( fpmax( pvec0 + pvec3, 0. ) ) * (fptype)nsf ); + const cxtype_sv chi[2] = { cxmake( sqp0p3, 0. ), + cxternary( ( sqp0p3 == 0. ), + cxmake( -nhel, 0. ) * fpsqrt( 2. * pvec0 ), + cxmake( (fptype)nh * pvec1, -pvec2 ) / sqp0p3 ) }; + if( nh == 1 ) + { + fo[2] = chi[0]; + fo[3] = chi[1]; + fo[4] = cxzero_sv(); + fo[5] = cxzero_sv(); + } + else + { + fo[2] = cxzero_sv(); + fo[3] = cxzero_sv(); + fo[4] = chi[1]; + fo[5] = chi[0]; + } + } + mgDebug( 1, __FUNCTION__ ); + return; + } + + //-------------------------------------------------------------------------- + + // Compute the output wavefunction fo[6] from the input momenta[npar*4*nevt] + // ASSUMPTIONS: (FMASS == 0) and (PX == PY == 0 and E == +PZ > 0) + template + __host__ __device__ void + opzxxx( const fptype momenta[], // input: momenta + //const fptype fmass, // [skip: ASSUME fermion mass==0] + const int nhel, // input: -1 or +1 (helicity of fermion) + const int nsf, // input: +1 (particle) or -1 (antiparticle) + fptype wavefunctions[], // output: wavefunctions + const int ipar ) // input: particle# out of npar + { + mgDebug( 0, __FUNCTION__ ); + const fptype_sv& pvec3 = M_ACCESS::kernelAccessIp4IparConst( momenta, 3, ipar ); + cxtype_sv* fo = W_ACCESS::kernelAccess( wavefunctions ); + fo[0] = cxmake( pvec3 * (fptype)nsf, pvec3 * (fptype)nsf ); + fo[1] = cxzero_sv(); + const int nh = nhel * nsf; + const cxtype_sv csqp0p3 = cxmake( fpsqrt( 2. * pvec3 ) * (fptype)nsf, 0. ); + fo[3] = cxzero_sv(); + fo[4] = cxzero_sv(); + if( nh == 1 ) + { + fo[2] = csqp0p3; + fo[5] = cxzero_sv(); + } + else + { + fo[2] = cxzero_sv(); + fo[5] = csqp0p3; + } + mgDebug( 1, __FUNCTION__ ); + return; + } + + //-------------------------------------------------------------------------- + + // Compute the output wavefunction fo[6] from the input momenta[npar*4*nevt] + // ASSUMPTIONS: (FMASS == 0) and (PX == PY == 0 and E == -PZ > 0) + template + __host__ __device__ void + omzxxx( const fptype momenta[], // input: momenta + //const fptype fmass, // [skip: ASSUME fermion mass==0] + const int nhel, // input: -1 or +1 (helicity of fermion) + const int nsf, // input: +1 (particle) or -1 (antiparticle) + fptype wavefunctions[], // output: wavefunctions + const int ipar ) // input: particle# out of npar + { + mgDebug( 0, __FUNCTION__ ); + const fptype_sv& pvec3 = M_ACCESS::kernelAccessIp4IparConst( momenta, 3, ipar ); + cxtype_sv* fo = W_ACCESS::kernelAccess( wavefunctions ); + fo[0] = cxmake( -pvec3 * (fptype)nsf, pvec3 * (fptype)nsf ); // remember pvec0 == -pvec3 + fo[1] = cxzero_sv(); + const int nh = nhel * nsf; + const cxtype_sv chi1 = cxmake( -nhel, 0. ) * fpsqrt( -2. * pvec3 ); + if( nh == 1 ) + { + fo[2] = cxzero_sv(); + fo[3] = chi1; + fo[4] = cxzero_sv(); + fo[5] = cxzero_sv(); + } + else + { + fo[2] = cxzero_sv(); + fo[3] = cxzero_sv(); + fo[4] = chi1; + //fo[5] = chi1; // AV: BUG! + fo[5] = cxzero_sv(); // AV: BUG FIX + } + mgDebug( 1, __FUNCTION__ ); + return; + } + + //-------------------------------------------------------------------------- + + // Compute the output wavefunction fo[6] from the input momenta[npar*4*nevt] + // ASSUMPTIONS: (FMASS == 0) and (PT > 0) + template + __host__ __device__ void + oxzxxx( const fptype momenta[], // input: momenta + //const fptype fmass, // [skip: ASSUME fermion mass==0] + const int nhel, // input: -1 or +1 (helicity of fermion) + const int nsf, // input: +1 (particle) or -1 (antiparticle) + fptype wavefunctions[], // output: wavefunctions + const int ipar ) // input: particle# out of npar + { + mgDebug( 0, __FUNCTION__ ); + const fptype_sv& pvec0 = M_ACCESS::kernelAccessIp4IparConst( momenta, 0, ipar ); + const fptype_sv& pvec1 = M_ACCESS::kernelAccessIp4IparConst( momenta, 1, ipar ); + const fptype_sv& pvec2 = M_ACCESS::kernelAccessIp4IparConst( momenta, 2, ipar ); + const fptype_sv& pvec3 = M_ACCESS::kernelAccessIp4IparConst( momenta, 3, ipar ); + cxtype_sv* fo = W_ACCESS::kernelAccess( wavefunctions ); + fo[0] = cxmake( pvec0 * (fptype)nsf, pvec3 * (fptype)nsf ); + fo[1] = cxmake( pvec1 * (fptype)nsf, pvec2 * (fptype)nsf ); + const int nh = nhel * nsf; + //const float sqp0p3 = sqrtf( pvec0 + pvec3 ) * nsf; // AV: why force a float here? + const fptype_sv sqp0p3 = fpsqrt( pvec0 + pvec3 ) * (fptype)nsf; + const cxtype_sv chi0 = cxmake( sqp0p3, 0. ); + const cxtype_sv chi1 = cxmake( (fptype)nh * pvec1 / sqp0p3, -pvec2 / sqp0p3 ); + if( nh == 1 ) + { + fo[2] = chi0; + fo[3] = chi1; + fo[4] = cxzero_sv(); + fo[5] = cxzero_sv(); + } + else + { + fo[2] = cxzero_sv(); + fo[3] = cxzero_sv(); + fo[4] = chi1; + fo[5] = chi0; + } + mgDebug( 1, __FUNCTION__ ); + return; + } + + //========================================================================== + + // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6] + template + __device__ INLINE void + VVV5_0( const fptype allV1[], + const fptype allV2[], + const fptype allV3[], + const fptype allCOUP[], + fptype allvertexes[] ) ALWAYS_INLINE; + + //-------------------------------------------------------------------------- + + // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6] + template + __device__ INLINE void + VVV5P0_1( const fptype allV2[], + const fptype allV3[], + const fptype allCOUP[], + const fptype M1, + const fptype W1, + fptype allV1[] ) ALWAYS_INLINE; + + //-------------------------------------------------------------------------- + + // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6] + template + __device__ INLINE void + FFV1_0( const fptype allF1[], + const fptype allF2[], + const fptype allV3[], + const fptype allCOUP[], + fptype allvertexes[] ) ALWAYS_INLINE; + + //-------------------------------------------------------------------------- + + // Compute the output wavefunction 'F1[6]' from the input wavefunctions F2[6], V3[6] + template + __device__ INLINE void + FFV1_1( const fptype allF2[], + const fptype allV3[], + const fptype allCOUP[], + const fptype M1, + const fptype W1, + fptype allF1[] ) ALWAYS_INLINE; + + //-------------------------------------------------------------------------- + + // Compute the output wavefunction 'F2[6]' from the input wavefunctions F1[6], V3[6] + template + __device__ INLINE void + FFV1_2( const fptype allF1[], + const fptype allV3[], + const fptype allCOUP[], + const fptype M2, + const fptype W2, + fptype allF2[] ) ALWAYS_INLINE; + + //-------------------------------------------------------------------------- + + // Compute the output wavefunction 'V3[6]' from the input wavefunctions F1[6], F2[6] + template + __device__ INLINE void + FFV1P0_3( const fptype allF1[], + const fptype allF2[], + const fptype allCOUP[], + const fptype M3, + const fptype W3, + fptype allV3[] ) ALWAYS_INLINE; + + //-------------------------------------------------------------------------- + + // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6], V4[6] + template + __device__ INLINE void + VVVV1_0( const fptype allV1[], + const fptype allV2[], + const fptype allV3[], + const fptype allV4[], + const fptype allCOUP[], + fptype allvertexes[] ) ALWAYS_INLINE; + + //-------------------------------------------------------------------------- + + // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6], V4[6] + template + __device__ INLINE void + VVVV9_0( const fptype allV1[], + const fptype allV2[], + const fptype allV3[], + const fptype allV4[], + const fptype allCOUP[], + fptype allvertexes[] ) ALWAYS_INLINE; + + //-------------------------------------------------------------------------- + + // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6], V4[6] + template + __device__ INLINE void + VVVV10_0( const fptype allV1[], + const fptype allV2[], + const fptype allV3[], + const fptype allV4[], + const fptype allCOUP[], + fptype allvertexes[] ) ALWAYS_INLINE; + + //========================================================================== + + // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6] + template + __device__ void + VVV5_0( const fptype allV1[], + const fptype allV2[], + const fptype allV3[], + const fptype allCOUP[], + fptype allvertexes[] ) + { + mgDebug( 0, __FUNCTION__ ); + const cxtype_sv* V1 = W_ACCESS::kernelAccessConst( allV1 ); + const cxtype_sv* V2 = W_ACCESS::kernelAccessConst( allV2 ); + const cxtype_sv* V3 = W_ACCESS::kernelAccessConst( allV3 ); + const cxtype_sv COUP = C_ACCESS::kernelAccessConst( allCOUP ); + cxtype_sv* vertex = A_ACCESS::kernelAccess( allvertexes ); + const cxtype cI = cxmake( 0., 1. ); + const fptype_sv P1[4] = { +cxreal( V1[0] ), +cxreal( V1[1] ), +cximag( V1[1] ), +cximag( V1[0] ) }; + const fptype_sv P2[4] = { +cxreal( V2[0] ), +cxreal( V2[1] ), +cximag( V2[1] ), +cximag( V2[0] ) }; + const fptype_sv P3[4] = { +cxreal( V3[0] ), +cxreal( V3[1] ), +cximag( V3[1] ), +cximag( V3[0] ) }; + const cxtype_sv TMP0 = ( V3[2] * P1[0] - V3[3] * P1[1] - V3[4] * P1[2] - V3[5] * P1[3] ); + const cxtype_sv TMP1 = ( V2[2] * V1[2] - V2[3] * V1[3] - V2[4] * V1[4] - V2[5] * V1[5] ); + const cxtype_sv TMP2 = ( V3[2] * P2[0] - V3[3] * P2[1] - V3[4] * P2[2] - V3[5] * P2[3] ); + const cxtype_sv TMP3 = ( V3[2] * V1[2] - V3[3] * V1[3] - V3[4] * V1[4] - V3[5] * V1[5] ); + const cxtype_sv TMP4 = ( P1[0] * V2[2] - P1[1] * V2[3] - P1[2] * V2[4] - P1[3] * V2[5] ); + const cxtype_sv TMP5 = ( V2[2] * P3[0] - V2[3] * P3[1] - V2[4] * P3[2] - V2[5] * P3[3] ); + const cxtype_sv TMP6 = ( V3[2] * V2[2] - V3[3] * V2[3] - V3[4] * V2[4] - V3[5] * V2[5] ); + const cxtype_sv TMP7 = ( V1[2] * P2[0] - V1[3] * P2[1] - V1[4] * P2[2] - V1[5] * P2[3] ); + const cxtype_sv TMP8 = ( V1[2] * P3[0] - V1[3] * P3[1] - V1[4] * P3[2] - V1[5] * P3[3] ); + ( *vertex ) = COUP * ( TMP1 * ( -cI * TMP0 + cI * TMP2 ) + ( TMP3 * ( +cI * TMP4 - cI * TMP5 ) + TMP6 * ( -cI * TMP7 + cI * TMP8 ) ) ); + mgDebug( 1, __FUNCTION__ ); + return; + } + + //-------------------------------------------------------------------------- + + // Compute the output wavefunction 'V1[6]' from the input wavefunctions V2[6], V3[6] + template + __device__ void + VVV5P0_1( const fptype allV2[], + const fptype allV3[], + const fptype allCOUP[], + const fptype M1, + const fptype W1, + fptype allV1[] ) + { + mgDebug( 0, __FUNCTION__ ); + const cxtype_sv* V2 = W_ACCESS::kernelAccessConst( allV2 ); + const cxtype_sv* V3 = W_ACCESS::kernelAccessConst( allV3 ); + const cxtype_sv COUP = C_ACCESS::kernelAccessConst( allCOUP ); + cxtype_sv* V1 = W_ACCESS::kernelAccess( allV1 ); + const cxtype cI = cxmake( 0., 1. ); + const fptype_sv P2[4] = { +cxreal( V2[0] ), +cxreal( V2[1] ), +cximag( V2[1] ), +cximag( V2[0] ) }; + const fptype_sv P3[4] = { +cxreal( V3[0] ), +cxreal( V3[1] ), +cximag( V3[1] ), +cximag( V3[0] ) }; + V1[0] = +V2[0] + V3[0]; + V1[1] = +V2[1] + V3[1]; + const fptype_sv P1[4] = { -cxreal( V1[0] ), -cxreal( V1[1] ), -cximag( V1[1] ), -cximag( V1[0] ) }; + const cxtype_sv TMP0 = ( V3[2] * P1[0] - V3[3] * P1[1] - V3[4] * P1[2] - V3[5] * P1[3] ); + const cxtype_sv TMP2 = ( V3[2] * P2[0] - V3[3] * P2[1] - V3[4] * P2[2] - V3[5] * P2[3] ); + const cxtype_sv TMP4 = ( P1[0] * V2[2] - P1[1] * V2[3] - P1[2] * V2[4] - P1[3] * V2[5] ); + const cxtype_sv TMP5 = ( V2[2] * P3[0] - V2[3] * P3[1] - V2[4] * P3[2] - V2[5] * P3[3] ); + const cxtype_sv TMP6 = ( V3[2] * V2[2] - V3[3] * V2[3] - V3[4] * V2[4] - V3[5] * V2[5] ); + const cxtype_sv denom = COUP / ( ( P1[0] * P1[0] ) - ( P1[1] * P1[1] ) - ( P1[2] * P1[2] ) - ( P1[3] * P1[3] ) - M1 * ( M1 - cI * W1 ) ); + V1[2] = denom * ( TMP6 * ( -cI * P2[0] + cI * P3[0] ) + ( V2[2] * ( -cI * TMP0 + cI * TMP2 ) + V3[2] * ( +cI * TMP4 - cI * TMP5 ) ) ); + V1[3] = denom * ( TMP6 * ( -cI * P2[1] + cI * P3[1] ) + ( V2[3] * ( -cI * TMP0 + cI * TMP2 ) + V3[3] * ( +cI * TMP4 - cI * TMP5 ) ) ); + V1[4] = denom * ( TMP6 * ( -cI * P2[2] + cI * P3[2] ) + ( V2[4] * ( -cI * TMP0 + cI * TMP2 ) + V3[4] * ( +cI * TMP4 - cI * TMP5 ) ) ); + V1[5] = denom * ( TMP6 * ( -cI * P2[3] + cI * P3[3] ) + ( V2[5] * ( -cI * TMP0 + cI * TMP2 ) + V3[5] * ( +cI * TMP4 - cI * TMP5 ) ) ); + mgDebug( 1, __FUNCTION__ ); + return; + } + + //-------------------------------------------------------------------------- + + // Compute the output amplitude 'vertex' from the input wavefunctions F1[6], F2[6], V3[6] + template + __device__ void + FFV1_0( const fptype allF1[], + const fptype allF2[], + const fptype allV3[], + const fptype allCOUP[], + fptype allvertexes[] ) + { + mgDebug( 0, __FUNCTION__ ); + const cxtype_sv* F1 = W_ACCESS::kernelAccessConst( allF1 ); + const cxtype_sv* F2 = W_ACCESS::kernelAccessConst( allF2 ); + const cxtype_sv* V3 = W_ACCESS::kernelAccessConst( allV3 ); + const cxtype_sv COUP = C_ACCESS::kernelAccessConst( allCOUP ); + cxtype_sv* vertex = A_ACCESS::kernelAccess( allvertexes ); + const cxtype cI = cxmake( 0., 1. ); + const cxtype_sv TMP9 = ( F1[2] * ( F2[4] * ( V3[2] + V3[5] ) + F2[5] * ( V3[3] + cI * V3[4] ) ) + ( F1[3] * ( F2[4] * ( V3[3] - cI * V3[4] ) + F2[5] * ( V3[2] - V3[5] ) ) + ( F1[4] * ( F2[2] * ( V3[2] - V3[5] ) - F2[3] * ( V3[3] + cI * V3[4] ) ) + F1[5] * ( F2[2] * ( -V3[3] + cI * V3[4] ) + F2[3] * ( V3[2] + V3[5] ) ) ) ) ); + ( *vertex ) = COUP * -cI * TMP9; + mgDebug( 1, __FUNCTION__ ); + return; + } + + //-------------------------------------------------------------------------- + + // Compute the output wavefunction 'F1[6]' from the input wavefunctions F2[6], V3[6] + template + __device__ void + FFV1_1( const fptype allF2[], + const fptype allV3[], + const fptype allCOUP[], + const fptype M1, + const fptype W1, + fptype allF1[] ) + { + mgDebug( 0, __FUNCTION__ ); + const cxtype_sv* F2 = W_ACCESS::kernelAccessConst( allF2 ); + const cxtype_sv* V3 = W_ACCESS::kernelAccessConst( allV3 ); + const cxtype_sv COUP = C_ACCESS::kernelAccessConst( allCOUP ); + cxtype_sv* F1 = W_ACCESS::kernelAccess( allF1 ); + const cxtype cI = cxmake( 0., 1. ); + F1[0] = +F2[0] + V3[0]; + F1[1] = +F2[1] + V3[1]; + const fptype_sv P1[4] = { -cxreal( F1[0] ), -cxreal( F1[1] ), -cximag( F1[1] ), -cximag( F1[0] ) }; + constexpr fptype one( 1. ); + const cxtype_sv denom = COUP / ( ( P1[0] * P1[0] ) - ( P1[1] * P1[1] ) - ( P1[2] * P1[2] ) - ( P1[3] * P1[3] ) - M1 * ( M1 - cI * W1 ) ); + F1[2] = denom * cI * ( F2[2] * ( P1[0] * ( -V3[2] + V3[5] ) + ( P1[1] * ( V3[3] - cI * V3[4] ) + ( P1[2] * ( +cI * V3[3] + V3[4] ) + P1[3] * ( -V3[2] + V3[5] ) ) ) ) + ( F2[3] * ( P1[0] * ( V3[3] + cI * V3[4] ) + ( P1[1] * ( -one ) * ( V3[2] + V3[5] ) + ( P1[2] * ( -one ) * ( +cI * ( V3[2] + V3[5] ) ) + P1[3] * ( V3[3] + cI * V3[4] ) ) ) ) + M1 * ( F2[4] * ( V3[2] + V3[5] ) + F2[5] * ( V3[3] + cI * V3[4] ) ) ) ); + F1[3] = denom * ( -cI ) * ( F2[2] * ( P1[0] * ( -V3[3] + cI * V3[4] ) + ( P1[1] * ( V3[2] - V3[5] ) + ( P1[2] * ( -cI * V3[2] + cI * V3[5] ) + P1[3] * ( V3[3] - cI * V3[4] ) ) ) ) + ( F2[3] * ( P1[0] * ( V3[2] + V3[5] ) + ( P1[1] * ( -one ) * ( V3[3] + cI * V3[4] ) + ( P1[2] * ( +cI * V3[3] - V3[4] ) - P1[3] * ( V3[2] + V3[5] ) ) ) ) + M1 * ( F2[4] * ( -V3[3] + cI * V3[4] ) + F2[5] * ( -V3[2] + V3[5] ) ) ) ); + F1[4] = denom * ( -cI ) * ( F2[4] * ( P1[0] * ( V3[2] + V3[5] ) + ( P1[1] * ( -V3[3] + cI * V3[4] ) + ( P1[2] * ( -one ) * ( +cI * V3[3] + V3[4] ) - P1[3] * ( V3[2] + V3[5] ) ) ) ) + ( F2[5] * ( P1[0] * ( V3[3] + cI * V3[4] ) + ( P1[1] * ( -V3[2] + V3[5] ) + ( P1[2] * ( -cI * V3[2] + cI * V3[5] ) - P1[3] * ( V3[3] + cI * V3[4] ) ) ) ) + M1 * ( F2[2] * ( -V3[2] + V3[5] ) + F2[3] * ( V3[3] + cI * V3[4] ) ) ) ); + F1[5] = denom * cI * ( F2[4] * ( P1[0] * ( -V3[3] + cI * V3[4] ) + ( P1[1] * ( V3[2] + V3[5] ) + ( P1[2] * ( -one ) * ( +cI * ( V3[2] + V3[5] ) ) + P1[3] * ( -V3[3] + cI * V3[4] ) ) ) ) + ( F2[5] * ( P1[0] * ( -V3[2] + V3[5] ) + ( P1[1] * ( V3[3] + cI * V3[4] ) + ( P1[2] * ( -cI * V3[3] + V3[4] ) + P1[3] * ( -V3[2] + V3[5] ) ) ) ) + M1 * ( F2[2] * ( -V3[3] + cI * V3[4] ) + F2[3] * ( V3[2] + V3[5] ) ) ) ); + mgDebug( 1, __FUNCTION__ ); + return; + } + + //-------------------------------------------------------------------------- + + // Compute the output wavefunction 'F2[6]' from the input wavefunctions F1[6], V3[6] + template + __device__ void + FFV1_2( const fptype allF1[], + const fptype allV3[], + const fptype allCOUP[], + const fptype M2, + const fptype W2, + fptype allF2[] ) + { + mgDebug( 0, __FUNCTION__ ); + const cxtype_sv* F1 = W_ACCESS::kernelAccessConst( allF1 ); + const cxtype_sv* V3 = W_ACCESS::kernelAccessConst( allV3 ); + const cxtype_sv COUP = C_ACCESS::kernelAccessConst( allCOUP ); + cxtype_sv* F2 = W_ACCESS::kernelAccess( allF2 ); + const cxtype cI = cxmake( 0., 1. ); + F2[0] = +F1[0] + V3[0]; + F2[1] = +F1[1] + V3[1]; + const fptype_sv P2[4] = { -cxreal( F2[0] ), -cxreal( F2[1] ), -cximag( F2[1] ), -cximag( F2[0] ) }; + constexpr fptype one( 1. ); + const cxtype_sv denom = COUP / ( ( P2[0] * P2[0] ) - ( P2[1] * P2[1] ) - ( P2[2] * P2[2] ) - ( P2[3] * P2[3] ) - M2 * ( M2 - cI * W2 ) ); + F2[2] = denom * cI * ( F1[2] * ( P2[0] * ( V3[2] + V3[5] ) + ( P2[1] * ( -one ) * ( V3[3] + cI * V3[4] ) + ( P2[2] * ( +cI * V3[3] - V3[4] ) - P2[3] * ( V3[2] + V3[5] ) ) ) ) + ( F1[3] * ( P2[0] * ( V3[3] - cI * V3[4] ) + ( P2[1] * ( -V3[2] + V3[5] ) + ( P2[2] * ( +cI * V3[2] - cI * V3[5] ) + P2[3] * ( -V3[3] + cI * V3[4] ) ) ) ) + M2 * ( F1[4] * ( V3[2] - V3[5] ) + F1[5] * ( -V3[3] + cI * V3[4] ) ) ) ); + F2[3] = denom * ( -cI ) * ( F1[2] * ( P2[0] * ( -one ) * ( V3[3] + cI * V3[4] ) + ( P2[1] * ( V3[2] + V3[5] ) + ( P2[2] * ( +cI * ( V3[2] + V3[5] ) ) - P2[3] * ( V3[3] + cI * V3[4] ) ) ) ) + ( F1[3] * ( P2[0] * ( -V3[2] + V3[5] ) + ( P2[1] * ( V3[3] - cI * V3[4] ) + ( P2[2] * ( +cI * V3[3] + V3[4] ) + P2[3] * ( -V3[2] + V3[5] ) ) ) ) + M2 * ( F1[4] * ( V3[3] + cI * V3[4] ) - F1[5] * ( V3[2] + V3[5] ) ) ) ); + F2[4] = denom * ( -cI ) * ( F1[4] * ( P2[0] * ( -V3[2] + V3[5] ) + ( P2[1] * ( V3[3] + cI * V3[4] ) + ( P2[2] * ( -cI * V3[3] + V3[4] ) + P2[3] * ( -V3[2] + V3[5] ) ) ) ) + ( F1[5] * ( P2[0] * ( V3[3] - cI * V3[4] ) + ( P2[1] * ( -one ) * ( V3[2] + V3[5] ) + ( P2[2] * ( +cI * ( V3[2] + V3[5] ) ) + P2[3] * ( V3[3] - cI * V3[4] ) ) ) ) + M2 * ( F1[2] * ( -one ) * ( V3[2] + V3[5] ) + F1[3] * ( -V3[3] + cI * V3[4] ) ) ) ); + F2[5] = denom * cI * ( F1[4] * ( P2[0] * ( -one ) * ( V3[3] + cI * V3[4] ) + ( P2[1] * ( V3[2] - V3[5] ) + ( P2[2] * ( +cI * V3[2] - cI * V3[5] ) + P2[3] * ( V3[3] + cI * V3[4] ) ) ) ) + ( F1[5] * ( P2[0] * ( V3[2] + V3[5] ) + ( P2[1] * ( -V3[3] + cI * V3[4] ) + ( P2[2] * ( -one ) * ( +cI * V3[3] + V3[4] ) - P2[3] * ( V3[2] + V3[5] ) ) ) ) + M2 * ( F1[2] * ( V3[3] + cI * V3[4] ) + F1[3] * ( V3[2] - V3[5] ) ) ) ); + mgDebug( 1, __FUNCTION__ ); + return; + } + + //-------------------------------------------------------------------------- + + // Compute the output wavefunction 'V3[6]' from the input wavefunctions F1[6], F2[6] + template + __device__ void + FFV1P0_3( const fptype allF1[], + const fptype allF2[], + const fptype allCOUP[], + const fptype M3, + const fptype W3, + fptype allV3[] ) + { + mgDebug( 0, __FUNCTION__ ); + const cxtype_sv* F1 = W_ACCESS::kernelAccessConst( allF1 ); + const cxtype_sv* F2 = W_ACCESS::kernelAccessConst( allF2 ); + const cxtype_sv COUP = C_ACCESS::kernelAccessConst( allCOUP ); + cxtype_sv* V3 = W_ACCESS::kernelAccess( allV3 ); + const cxtype cI = cxmake( 0., 1. ); + V3[0] = +F1[0] + F2[0]; + V3[1] = +F1[1] + F2[1]; + const fptype_sv P3[4] = { -cxreal( V3[0] ), -cxreal( V3[1] ), -cximag( V3[1] ), -cximag( V3[0] ) }; + const cxtype_sv denom = COUP / ( ( P3[0] * P3[0] ) - ( P3[1] * P3[1] ) - ( P3[2] * P3[2] ) - ( P3[3] * P3[3] ) - M3 * ( M3 - cI * W3 ) ); + V3[2] = denom * ( -cI ) * ( F1[2] * F2[4] + F1[3] * F2[5] + F1[4] * F2[2] + F1[5] * F2[3] ); + V3[3] = denom * ( -cI ) * ( -F1[2] * F2[5] - F1[3] * F2[4] + F1[4] * F2[3] + F1[5] * F2[2] ); + V3[4] = denom * ( -cI ) * ( -cI * ( F1[2] * F2[5] + F1[5] * F2[2] ) + cI * ( F1[3] * F2[4] + F1[4] * F2[3] ) ); + V3[5] = denom * ( -cI ) * ( -F1[2] * F2[4] - F1[5] * F2[3] + F1[3] * F2[5] + F1[4] * F2[2] ); + mgDebug( 1, __FUNCTION__ ); + return; + } + + //-------------------------------------------------------------------------- + + // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6], V4[6] + template + __device__ void + VVVV1_0( const fptype allV1[], + const fptype allV2[], + const fptype allV3[], + const fptype allV4[], + const fptype allCOUP[], + fptype allvertexes[] ) + { + mgDebug( 0, __FUNCTION__ ); + const cxtype_sv* V1 = W_ACCESS::kernelAccessConst( allV1 ); + const cxtype_sv* V2 = W_ACCESS::kernelAccessConst( allV2 ); + const cxtype_sv* V3 = W_ACCESS::kernelAccessConst( allV3 ); + const cxtype_sv* V4 = W_ACCESS::kernelAccessConst( allV4 ); + const cxtype_sv COUP = C_ACCESS::kernelAccessConst( allCOUP ); + cxtype_sv* vertex = A_ACCESS::kernelAccess( allvertexes ); + const cxtype cI = cxmake( 0., 1. ); + const cxtype_sv TMP10 = ( V1[2] * V4[2] - V1[3] * V4[3] - V1[4] * V4[4] - V1[5] * V4[5] ); + const cxtype_sv TMP11 = ( V2[2] * V4[2] - V2[3] * V4[3] - V2[4] * V4[4] - V2[5] * V4[5] ); + const cxtype_sv TMP3 = ( V3[2] * V1[2] - V3[3] * V1[3] - V3[4] * V1[4] - V3[5] * V1[5] ); + const cxtype_sv TMP6 = ( V3[2] * V2[2] - V3[3] * V2[3] - V3[4] * V2[4] - V3[5] * V2[5] ); + ( *vertex ) = COUP * ( -cI * ( TMP6 * TMP10 ) + cI * ( TMP3 * TMP11 ) ); + mgDebug( 1, __FUNCTION__ ); + return; + } + + //-------------------------------------------------------------------------- + + // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6], V4[6] + template + __device__ void + VVVV9_0( const fptype allV1[], + const fptype allV2[], + const fptype allV3[], + const fptype allV4[], + const fptype allCOUP[], + fptype allvertexes[] ) + { + mgDebug( 0, __FUNCTION__ ); + const cxtype_sv* V1 = W_ACCESS::kernelAccessConst( allV1 ); + const cxtype_sv* V2 = W_ACCESS::kernelAccessConst( allV2 ); + const cxtype_sv* V3 = W_ACCESS::kernelAccessConst( allV3 ); + const cxtype_sv* V4 = W_ACCESS::kernelAccessConst( allV4 ); + const cxtype_sv COUP = C_ACCESS::kernelAccessConst( allCOUP ); + cxtype_sv* vertex = A_ACCESS::kernelAccess( allvertexes ); + const cxtype cI = cxmake( 0., 1. ); + const cxtype_sv TMP1 = ( V2[2] * V1[2] - V2[3] * V1[3] - V2[4] * V1[4] - V2[5] * V1[5] ); + const cxtype_sv TMP10 = ( V1[2] * V4[2] - V1[3] * V4[3] - V1[4] * V4[4] - V1[5] * V4[5] ); + const cxtype_sv TMP12 = ( V3[2] * V4[2] - V3[3] * V4[3] - V3[4] * V4[4] - V3[5] * V4[5] ); + const cxtype_sv TMP6 = ( V3[2] * V2[2] - V3[3] * V2[3] - V3[4] * V2[4] - V3[5] * V2[5] ); + ( *vertex ) = COUP * ( -cI * ( TMP6 * TMP10 ) + cI * ( TMP1 * TMP12 ) ); + mgDebug( 1, __FUNCTION__ ); + return; + } + + //-------------------------------------------------------------------------- + + // Compute the output amplitude 'vertex' from the input wavefunctions V1[6], V2[6], V3[6], V4[6] + template + __device__ void + VVVV10_0( const fptype allV1[], + const fptype allV2[], + const fptype allV3[], + const fptype allV4[], + const fptype allCOUP[], + fptype allvertexes[] ) + { + mgDebug( 0, __FUNCTION__ ); + const cxtype_sv* V1 = W_ACCESS::kernelAccessConst( allV1 ); + const cxtype_sv* V2 = W_ACCESS::kernelAccessConst( allV2 ); + const cxtype_sv* V3 = W_ACCESS::kernelAccessConst( allV3 ); + const cxtype_sv* V4 = W_ACCESS::kernelAccessConst( allV4 ); + const cxtype_sv COUP = C_ACCESS::kernelAccessConst( allCOUP ); + cxtype_sv* vertex = A_ACCESS::kernelAccess( allvertexes ); + const cxtype cI = cxmake( 0., 1. ); + const cxtype_sv TMP1 = ( V2[2] * V1[2] - V2[3] * V1[3] - V2[4] * V1[4] - V2[5] * V1[5] ); + const cxtype_sv TMP11 = ( V2[2] * V4[2] - V2[3] * V4[3] - V2[4] * V4[4] - V2[5] * V4[5] ); + const cxtype_sv TMP12 = ( V3[2] * V4[2] - V3[3] * V4[3] - V3[4] * V4[4] - V3[5] * V4[5] ); + const cxtype_sv TMP3 = ( V3[2] * V1[2] - V3[3] * V1[3] - V3[4] * V1[4] - V3[5] * V1[5] ); + ( *vertex ) = COUP * ( -cI * ( TMP3 * TMP11 ) + cI * ( TMP1 * TMP12 ) ); + mgDebug( 1, __FUNCTION__ ); + return; + } + + //-------------------------------------------------------------------------- + +} // end namespace + +#endif // HelAmps_SMEFTsim_topU3l_MwScheme_UFO_H diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/src/Parameters_SMEFTsim_topU3l_MwScheme_UFO.cc b/epochX/cudacpp/smeft_gg_tttt.sa/src/Parameters_SMEFTsim_topU3l_MwScheme_UFO.cc new file mode 100644 index 0000000000..f08a14b80a --- /dev/null +++ b/epochX/cudacpp/smeft_gg_tttt.sa/src/Parameters_SMEFTsim_topU3l_MwScheme_UFO.cc @@ -0,0 +1,796 @@ +//========================================================================== +// This file has been automatically generated for CUDA/C++ standalone by +// MadGraph5_aMC@NLO v. 3.5.0_lo_vect, 2023-01-26 +// By the MadGraph5_aMC@NLO Development Team +// Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch +//========================================================================== + +#include "Parameters_SMEFTsim_topU3l_MwScheme_UFO.h" + +#include +#include + +#ifndef MGONGPU_HARDCODE_PARAM + +// Initialize static instance +Parameters_SMEFTsim_topU3l_MwScheme_UFO* Parameters_SMEFTsim_topU3l_MwScheme_UFO::instance = 0; + +// Function to get static instance - only one instance per program +Parameters_SMEFTsim_topU3l_MwScheme_UFO* +Parameters_SMEFTsim_topU3l_MwScheme_UFO::getInstance() +{ + if( instance == 0 ) + instance = new Parameters_SMEFTsim_topU3l_MwScheme_UFO(); + return instance; +} + +void +Parameters_SMEFTsim_topU3l_MwScheme_UFO::setIndependentParameters( SLHAReader& slha ) +{ + zero = 0; // define "zero" + ZERO = 0; // define "zero" + //std::vector indices(2, 0); // prepare a vector for indices + mdl_WH = slha.get_block_entry( "decay", 25, 4.070000e - 03 ); + mdl_WW = slha.get_block_entry( "decay", 24, 2.085000e + 00 ); + mdl_WZ = slha.get_block_entry( "decay", 23, 2.495200e + 00 ); + mdl_WT = slha.get_block_entry( "decay", 6, 1.330000e + 00 ); + mdl_ymtau = slha.get_block_entry( "yukawa", 15, 1.777000e + 00 ); + mdl_ymm = slha.get_block_entry( "yukawa", 13, 1.056600e - 01 ); + mdl_yme = slha.get_block_entry( "yukawa", 11, 5.110000e - 04 ); + mdl_ymt = slha.get_block_entry( "yukawa", 6, 1.727600e + 02 ); + mdl_ymb = slha.get_block_entry( "yukawa", 5, 4.180000e + 00 ); + mdl_ymc = slha.get_block_entry( "yukawa", 4, 1.270000e + 00 ); + mdl_yms = slha.get_block_entry( "yukawa", 3, 9.300000e - 02 ); + mdl_ymup = slha.get_block_entry( "yukawa", 2, 2.160000e - 03 ); + mdl_ymdo = slha.get_block_entry( "yukawa", 1, 4.670000e - 03 ); + mdl_linearPropCorrections = slha.get_block_entry( "switches", 1, 0.000000e + 00 ); + //aS = slha.get_block_entry( "sminputs", 3, 1.179000e - 01 ); // now retrieved event-by-event (as G) from Fortran (running alphas #373) + mdl_Gf = slha.get_block_entry( "sminputs", 2, 1.166379e - 05 ); + mdl_MW = slha.get_block_entry( "sminputs", 1, 8.038700e + 01 ); + mdl_LambdaSMEFT = slha.get_block_entry( "smeftcutoff", 1, 1.000000e + 03 ); + mdl_cleQt3Im = slha.get_block_entry( "smeftcpv", 53, 0.000000e + 00 ); + mdl_cleQt1Im = slha.get_block_entry( "smeftcpv", 52, 0.000000e + 00 ); + mdl_cleju3Im = slha.get_block_entry( "smeftcpv", 51, 0.000000e + 00 ); + mdl_cleju1Im = slha.get_block_entry( "smeftcpv", 50, 0.000000e + 00 ); + mdl_clebQIm = slha.get_block_entry( "smeftcpv", 49, 0.000000e + 00 ); + mdl_cledjIm = slha.get_block_entry( "smeftcpv", 48, 0.000000e + 00 ); + mdl_ceBIm = slha.get_block_entry( "smeftcpv", 47, 0.000000e + 00 ); + mdl_ceWIm = slha.get_block_entry( "smeftcpv", 46, 0.000000e + 00 ); + mdl_ceHIm = slha.get_block_entry( "smeftcpv", 45, 0.000000e + 00 ); + mdl_cQtQb8Im = slha.get_block_entry( "smeftcpv", 44, 0.000000e + 00 ); + mdl_cQtQb1Im = slha.get_block_entry( "smeftcpv", 43, 0.000000e + 00 ); + mdl_cjtQd8Im = slha.get_block_entry( "smeftcpv", 42, 0.000000e + 00 ); + mdl_cjtQd1Im = slha.get_block_entry( "smeftcpv", 41, 0.000000e + 00 ); + mdl_cQujb8Im = slha.get_block_entry( "smeftcpv", 40, 0.000000e + 00 ); + mdl_cQujb1Im = slha.get_block_entry( "smeftcpv", 39, 0.000000e + 00 ); + mdl_cjuQb8Im = slha.get_block_entry( "smeftcpv", 38, 0.000000e + 00 ); + mdl_cjuQb1Im = slha.get_block_entry( "smeftcpv", 37, 0.000000e + 00 ); + mdl_cQtjd8Im = slha.get_block_entry( "smeftcpv", 36, 0.000000e + 00 ); + mdl_cQtjd1Im = slha.get_block_entry( "smeftcpv", 35, 0.000000e + 00 ); + mdl_cjujd81Im = slha.get_block_entry( "smeftcpv", 34, 0.000000e + 00 ); + mdl_cjujd11Im = slha.get_block_entry( "smeftcpv", 33, 0.000000e + 00 ); + mdl_cjujd8Im = slha.get_block_entry( "smeftcpv", 32, 0.000000e + 00 ); + mdl_cjujd1Im = slha.get_block_entry( "smeftcpv", 31, 0.000000e + 00 ); + mdl_cjQbd8Im = slha.get_block_entry( "smeftcpv", 30, 0.000000e + 00 ); + mdl_cjQbd1Im = slha.get_block_entry( "smeftcpv", 29, 0.000000e + 00 ); + mdl_cjQtu8Im = slha.get_block_entry( "smeftcpv", 28, 0.000000e + 00 ); + mdl_cjQtu1Im = slha.get_block_entry( "smeftcpv", 27, 0.000000e + 00 ); + mdl_cutbd8Im = slha.get_block_entry( "smeftcpv", 26, 0.000000e + 00 ); + mdl_cutbd1Im = slha.get_block_entry( "smeftcpv", 25, 0.000000e + 00 ); + mdl_cHtbIm = slha.get_block_entry( "smeftcpv", 24, 0.000000e + 00 ); + mdl_cHudIm = slha.get_block_entry( "smeftcpv", 23, 0.000000e + 00 ); + mdl_cbHIm = slha.get_block_entry( "smeftcpv", 22, 0.000000e + 00 ); + mdl_cdHIm = slha.get_block_entry( "smeftcpv", 21, 0.000000e + 00 ); + mdl_ctHIm = slha.get_block_entry( "smeftcpv", 20, 0.000000e + 00 ); + mdl_cuHIm = slha.get_block_entry( "smeftcpv", 19, 0.000000e + 00 ); + mdl_cbBIm = slha.get_block_entry( "smeftcpv", 18, 0.000000e + 00 ); + mdl_cdBIm = slha.get_block_entry( "smeftcpv", 17, 0.000000e + 00 ); + mdl_cbWIm = slha.get_block_entry( "smeftcpv", 16, 0.000000e + 00 ); + mdl_cdWIm = slha.get_block_entry( "smeftcpv", 15, 0.000000e + 00 ); + mdl_cbGIm = slha.get_block_entry( "smeftcpv", 14, 0.000000e + 00 ); + mdl_cdGIm = slha.get_block_entry( "smeftcpv", 13, 0.000000e + 00 ); + mdl_ctBIm = slha.get_block_entry( "smeftcpv", 12, 0.000000e + 00 ); + mdl_cuBIm = slha.get_block_entry( "smeftcpv", 11, 0.000000e + 00 ); + mdl_ctWIm = slha.get_block_entry( "smeftcpv", 10, 0.000000e + 00 ); + mdl_cuWIm = slha.get_block_entry( "smeftcpv", 9, 0.000000e + 00 ); + mdl_ctGIm = slha.get_block_entry( "smeftcpv", 8, 0.000000e + 00 ); + mdl_cuGIm = slha.get_block_entry( "smeftcpv", 7, 0.000000e + 00 ); + mdl_cHWBtil = slha.get_block_entry( "smeftcpv", 6, 0.000000e + 00 ); + mdl_cHBtil = slha.get_block_entry( "smeftcpv", 5, 0.000000e + 00 ); + mdl_cHWtil = slha.get_block_entry( "smeftcpv", 4, 0.000000e + 00 ); + mdl_cHGtil = slha.get_block_entry( "smeftcpv", 3, 0.000000e + 00 ); + mdl_cWtil = slha.get_block_entry( "smeftcpv", 2, 0.000000e + 00 ); + mdl_cGtil = slha.get_block_entry( "smeftcpv", 1, 0.000000e + 00 ); + mdl_cleQt3Re = slha.get_block_entry( "smeft", 129, 0.000000e + 00 ); + mdl_cleju3Re = slha.get_block_entry( "smeft", 128, 0.000000e + 00 ); + mdl_cleQt1Re = slha.get_block_entry( "smeft", 127, 0.000000e + 00 ); + mdl_cleju1Re = slha.get_block_entry( "smeft", 126, 0.000000e + 00 ); + mdl_clebQRe = slha.get_block_entry( "smeft", 125, 0.000000e + 00 ); + mdl_cledjRe = slha.get_block_entry( "smeft", 124, 0.000000e + 00 ); + mdl_cle = slha.get_block_entry( "smeft", 123, 0.000000e + 00 ); + mdl_cbl = slha.get_block_entry( "smeft", 122, 0.000000e + 00 ); + mdl_cld = slha.get_block_entry( "smeft", 121, 0.000000e + 00 ); + mdl_ctl = slha.get_block_entry( "smeft", 120, 0.000000e + 00 ); + mdl_clu = slha.get_block_entry( "smeft", 119, 0.000000e + 00 ); + mdl_cQe = slha.get_block_entry( "smeft", 118, 0.000000e + 00 ); + mdl_cje = slha.get_block_entry( "smeft", 117, 0.000000e + 00 ); + mdl_cbe = slha.get_block_entry( "smeft", 116, 0.000000e + 00 ); + mdl_ced = slha.get_block_entry( "smeft", 115, 0.000000e + 00 ); + mdl_cte = slha.get_block_entry( "smeft", 114, 0.000000e + 00 ); + mdl_ceu = slha.get_block_entry( "smeft", 113, 0.000000e + 00 ); + mdl_cee = slha.get_block_entry( "smeft", 112, 0.000000e + 00 ); + mdl_cQl3 = slha.get_block_entry( "smeft", 111, 0.000000e + 00 ); + mdl_cQl1 = slha.get_block_entry( "smeft", 110, 0.000000e + 00 ); + mdl_clj3 = slha.get_block_entry( "smeft", 109, 0.000000e + 00 ); + mdl_clj1 = slha.get_block_entry( "smeft", 108, 0.000000e + 00 ); + mdl_cll1 = slha.get_block_entry( "smeft", 107, 0.000000e + 00 ); + mdl_cll = slha.get_block_entry( "smeft", 106, 0.000000e + 00 ); + mdl_cHe = slha.get_block_entry( "smeft", 105, 0.000000e + 00 ); + mdl_cHl3 = slha.get_block_entry( "smeft", 104, 0.000000e + 00 ); + mdl_cHl1 = slha.get_block_entry( "smeft", 103, 0.000000e + 00 ); + mdl_ceBRe = slha.get_block_entry( "smeft", 102, 0.000000e + 00 ); + mdl_ceWRe = slha.get_block_entry( "smeft", 101, 0.000000e + 00 ); + mdl_ceHRe = slha.get_block_entry( "smeft", 100, 0.000000e + 00 ); + mdl_cQtQb8Re = slha.get_block_entry( "smeft", 99, 0.000000e + 00 ); + mdl_cQtQb1Re = slha.get_block_entry( "smeft", 98, 0.000000e + 00 ); + mdl_cjtQd8Re = slha.get_block_entry( "smeft", 97, 0.000000e + 00 ); + mdl_cjtQd1Re = slha.get_block_entry( "smeft", 96, 0.000000e + 00 ); + mdl_cQujb8Re = slha.get_block_entry( "smeft", 95, 0.000000e + 00 ); + mdl_cQujb1Re = slha.get_block_entry( "smeft", 94, 0.000000e + 00 ); + mdl_cjuQb8Re = slha.get_block_entry( "smeft", 93, 0.000000e + 00 ); + mdl_cjuQb1Re = slha.get_block_entry( "smeft", 92, 0.000000e + 00 ); + mdl_cQtjd8Re = slha.get_block_entry( "smeft", 91, 0.000000e + 00 ); + mdl_cQtjd1Re = slha.get_block_entry( "smeft", 90, 0.000000e + 00 ); + mdl_cjujd81Re = slha.get_block_entry( "smeft", 89, 0.000000e + 00 ); + mdl_cjujd11Re = slha.get_block_entry( "smeft", 88, 0.000000e + 00 ); + mdl_cjujd8Re = slha.get_block_entry( "smeft", 87, 0.000000e + 00 ); + mdl_cjujd1Re = slha.get_block_entry( "smeft", 86, 0.000000e + 00 ); + mdl_cjQbd8Re = slha.get_block_entry( "smeft", 85, 0.000000e + 00 ); + mdl_cjQbd1Re = slha.get_block_entry( "smeft", 84, 0.000000e + 00 ); + mdl_cjQtu8Re = slha.get_block_entry( "smeft", 83, 0.000000e + 00 ); + mdl_cjQtu1Re = slha.get_block_entry( "smeft", 82, 0.000000e + 00 ); + mdl_cQb8 = slha.get_block_entry( "smeft", 81, 0.000000e + 00 ); + mdl_cQb1 = slha.get_block_entry( "smeft", 80, 0.000000e + 00 ); + mdl_cbj8 = slha.get_block_entry( "smeft", 79, 0.000000e + 00 ); + mdl_cbj1 = slha.get_block_entry( "smeft", 78, 0.000000e + 00 ); + mdl_cQd8 = slha.get_block_entry( "smeft", 77, 0.000000e + 00 ); + mdl_cQd1 = slha.get_block_entry( "smeft", 76, 0.000000e + 00 ); + mdl_cjd8 = slha.get_block_entry( "smeft", 75, 0.000000e + 00 ); + mdl_cjd1 = slha.get_block_entry( "smeft", 74, 0.000000e + 00 ); + mdl_cQt8 = slha.get_block_entry( "smeft", 73, 0.000000e + 00 ); + mdl_cQt1 = slha.get_block_entry( "smeft", 72, 0.000000e + 00 ); + mdl_ctj8 = slha.get_block_entry( "smeft", 71, 0.000000e + 00 ); + mdl_ctj1 = slha.get_block_entry( "smeft", 70, 0.000000e + 00 ); + mdl_cQu8 = slha.get_block_entry( "smeft", 69, 0.000000e + 00 ); + mdl_cju8 = slha.get_block_entry( "smeft", 68, 0.000000e + 00 ); + mdl_cQu1 = slha.get_block_entry( "smeft", 67, 0.000000e + 00 ); + mdl_cju1 = slha.get_block_entry( "smeft", 66, 0.000000e + 00 ); + mdl_cutbd8Re = slha.get_block_entry( "smeft", 65, 0.000000e + 00 ); + mdl_cutbd1Re = slha.get_block_entry( "smeft", 64, 0.000000e + 00 ); + mdl_cbu8 = slha.get_block_entry( "smeft", 63, 0.000000e + 00 ); + mdl_ctd8 = slha.get_block_entry( "smeft", 62, 0.000000e + 00 ); + mdl_ctb8 = slha.get_block_entry( "smeft", 61, 0.000000e + 00 ); + mdl_cud8 = slha.get_block_entry( "smeft", 60, 0.000000e + 00 ); + mdl_cbu1 = slha.get_block_entry( "smeft", 59, 0.000000e + 00 ); + mdl_ctd1 = slha.get_block_entry( "smeft", 58, 0.000000e + 00 ); + mdl_ctb1 = slha.get_block_entry( "smeft", 57, 0.000000e + 00 ); + mdl_cud1 = slha.get_block_entry( "smeft", 56, 0.000000e + 00 ); + mdl_cbd8 = slha.get_block_entry( "smeft", 55, 0.000000e + 00 ); + mdl_cbd1 = slha.get_block_entry( "smeft", 54, 0.000000e + 00 ); + mdl_cbb = slha.get_block_entry( "smeft", 53, 0.000000e + 00 ); + mdl_cdd8 = slha.get_block_entry( "smeft", 52, 0.000000e + 00 ); + mdl_cdd1 = slha.get_block_entry( "smeft", 51, 0.000000e + 00 ); + mdl_ctu8 = slha.get_block_entry( "smeft", 50, 0.000000e + 00 ); + mdl_ctu1 = slha.get_block_entry( "smeft", 49, 0.000000e + 00 ); + mdl_ctt = slha.get_block_entry( "smeft", 48, 0.000000e + 00 ); + mdl_cuu8 = slha.get_block_entry( "smeft", 47, 0.000000e + 00 ); + mdl_cuu1 = slha.get_block_entry( "smeft", 46, 0.000000e + 00 ); + mdl_cQQ8 = slha.get_block_entry( "smeft", 45, 0.000000e + 00 ); + mdl_cQQ1 = slha.get_block_entry( "smeft", 44, 0.000000e + 00 ); + mdl_cQj38 = slha.get_block_entry( "smeft", 43, 0.000000e + 00 ); + mdl_cQj31 = slha.get_block_entry( "smeft", 42, 0.000000e + 00 ); + mdl_cQj18 = slha.get_block_entry( "smeft", 41, 0.000000e + 00 ); + mdl_cQj11 = slha.get_block_entry( "smeft", 40, 0.000000e + 00 ); + mdl_cjj38 = slha.get_block_entry( "smeft", 39, 0.000000e + 00 ); + mdl_cjj31 = slha.get_block_entry( "smeft", 38, 0.000000e + 00 ); + mdl_cjj18 = slha.get_block_entry( "smeft", 37, 0.000000e + 00 ); + mdl_cjj11 = slha.get_block_entry( "smeft", 36, 0.000000e + 00 ); + mdl_cHtbRe = slha.get_block_entry( "smeft", 35, 0.000000e + 00 ); + mdl_cHudRe = slha.get_block_entry( "smeft", 34, 0.000000e + 00 ); + mdl_cHbq = slha.get_block_entry( "smeft", 33, 0.000000e + 00 ); + mdl_cHd = slha.get_block_entry( "smeft", 32, 0.000000e + 00 ); + mdl_cHt = slha.get_block_entry( "smeft", 31, 0.000000e + 00 ); + mdl_cHu = slha.get_block_entry( "smeft", 30, 0.000000e + 00 ); + mdl_cHQ3 = slha.get_block_entry( "smeft", 29, 0.000000e + 00 ); + mdl_cHj3 = slha.get_block_entry( "smeft", 28, 0.000000e + 00 ); + mdl_cHQ1 = slha.get_block_entry( "smeft", 27, 0.000000e + 00 ); + mdl_cHj1 = slha.get_block_entry( "smeft", 26, 0.000000e + 00 ); + mdl_cbBRe = slha.get_block_entry( "smeft", 25, 0.000000e + 00 ); + mdl_cdBRe = slha.get_block_entry( "smeft", 24, 0.000000e + 00 ); + mdl_cbWRe = slha.get_block_entry( "smeft", 23, 0.000000e + 00 ); + mdl_cdWRe = slha.get_block_entry( "smeft", 22, 0.000000e + 00 ); + mdl_cbGRe = slha.get_block_entry( "smeft", 21, 0.000000e + 00 ); + mdl_cdGRe = slha.get_block_entry( "smeft", 20, 0.000000e + 00 ); + mdl_ctBRe = slha.get_block_entry( "smeft", 19, 0.000000e + 00 ); + mdl_cuBRe = slha.get_block_entry( "smeft", 18, 0.000000e + 00 ); + mdl_ctWRe = slha.get_block_entry( "smeft", 17, 0.000000e + 00 ); + mdl_cuWRe = slha.get_block_entry( "smeft", 16, 0.000000e + 00 ); + mdl_ctGRe = slha.get_block_entry( "smeft", 15, 0.000000e + 00 ); + mdl_cuGRe = slha.get_block_entry( "smeft", 14, 0.000000e + 00 ); + mdl_cbHRe = slha.get_block_entry( "smeft", 13, 0.000000e + 00 ); + mdl_cdHRe = slha.get_block_entry( "smeft", 12, 0.000000e + 00 ); + mdl_ctHRe = slha.get_block_entry( "smeft", 11, 0.000000e + 00 ); + mdl_cuHRe = slha.get_block_entry( "smeft", 10, 0.000000e + 00 ); + mdl_cHWB = slha.get_block_entry( "smeft", 9, 0.000000e + 00 ); + mdl_cHB = slha.get_block_entry( "smeft", 8, 0.000000e + 00 ); + mdl_cHW = slha.get_block_entry( "smeft", 7, 0.000000e + 00 ); + mdl_cHG = slha.get_block_entry( "smeft", 6, 0.000000e + 00 ); + mdl_cHDD = slha.get_block_entry( "smeft", 5, 0.000000e + 00 ); + mdl_cHbox = slha.get_block_entry( "smeft", 4, 0.000000e + 00 ); + mdl_cH = slha.get_block_entry( "smeft", 3, 0.000000e + 00 ); + mdl_cW = slha.get_block_entry( "smeft", 2, 0.000000e + 00 ); + mdl_cG = slha.get_block_entry( "smeft", 1, 0.000000e + 00 ); + mdl_MH = slha.get_block_entry( "mass", 25, 1.250900e + 02 ); + mdl_MZ = slha.get_block_entry( "mass", 23, 9.118760e + 01 ); + mdl_MTA = slha.get_block_entry( "mass", 15, 1.777000e + 00 ); + mdl_MMU = slha.get_block_entry( "mass", 13, 1.056600e - 01 ); + mdl_Me = slha.get_block_entry( "mass", 11, 5.110000e - 04 ); + mdl_MT = slha.get_block_entry( "mass", 6, 1.727600e + 02 ); + mdl_MB = slha.get_block_entry( "mass", 5, 4.180000e + 00 ); + mdl_MC = slha.get_block_entry( "mass", 4, 1.270000e + 00 ); + mdl_MS = slha.get_block_entry( "mass", 3, 9.300000e - 02 ); + mdl_MU = slha.get_block_entry( "mass", 2, 2.160000e - 03 ); + mdl_MD = slha.get_block_entry( "mass", 1, 4.670000e - 03 ); + mdl_complexi = cxsmpl( 0., 1. ); + mdl_cuH = mdl_cuHRe + mdl_cuHIm * mdl_complexi; + mdl_ctHH = mdl_ctHRe + mdl_ctHIm * mdl_complexi; + mdl_cdH = mdl_cdHRe + mdl_cdHIm * mdl_complexi; + mdl_cbH = mdl_cbHRe + mdl_cbHIm * mdl_complexi; + mdl_cuG = mdl_cuGRe + mdl_cuGIm * mdl_complexi; + mdl_ctG = mdl_ctGRe + mdl_ctGIm * mdl_complexi; + mdl_cuW = mdl_cuWRe + mdl_cuWIm * mdl_complexi; + mdl_ctW = mdl_ctWRe + mdl_ctWIm * mdl_complexi; + mdl_cuB = mdl_cuBRe + mdl_cuBIm * mdl_complexi; + mdl_ctB = mdl_ctBRe + mdl_ctBIm * mdl_complexi; + mdl_cdG = mdl_cdGRe + mdl_cdGIm * mdl_complexi; + mdl_cbG = mdl_cbGRe + mdl_cbGIm * mdl_complexi; + mdl_cdW = mdl_cdWRe + mdl_cdWIm * mdl_complexi; + mdl_cbW = mdl_cbWRe + mdl_cbWIm * mdl_complexi; + mdl_cdB = mdl_cdBRe + mdl_cdBIm * mdl_complexi; + mdl_cbBB = mdl_cbBRe + mdl_cbBIm * mdl_complexi; + mdl_cHud = mdl_cHudRe + mdl_cHudIm * mdl_complexi; + mdl_cHtb = mdl_cHtbRe + mdl_cHtbIm * mdl_complexi; + mdl_cutbd1 = mdl_cutbd1Re + mdl_cutbd1Im * mdl_complexi; + mdl_cutbd8 = mdl_cutbd8Re + mdl_cutbd8Im * mdl_complexi; + mdl_cjQtu1 = mdl_cjQtu1Re + mdl_cjQtu1Im * mdl_complexi; + mdl_cjQtu8 = mdl_cjQtu8Re + mdl_cjQtu8Im * mdl_complexi; + mdl_cjQbd1 = mdl_cjQbd1Re + mdl_cjQbd1Im * mdl_complexi; + mdl_cjQbd8 = mdl_cjQbd8Re + mdl_cjQbd8Im * mdl_complexi; + mdl_cjujd1 = mdl_cjujd1Re + mdl_cjujd1Im * mdl_complexi; + mdl_cjujd8 = mdl_cjujd8Re + mdl_cjujd8Im * mdl_complexi; + mdl_cjujd11 = mdl_cjujd11Re + mdl_cjujd11Im * mdl_complexi; + mdl_cjujd81 = mdl_cjujd81Re + mdl_cjujd81Im * mdl_complexi; + mdl_cQtjd1 = mdl_cQtjd1Re + mdl_cQtjd1Im * mdl_complexi; + mdl_cQtjd8 = mdl_cQtjd8Re + mdl_cQtjd8Im * mdl_complexi; + mdl_cjuQb1 = mdl_cjuQb1Re + mdl_cjuQb1Im * mdl_complexi; + mdl_cjuQb8 = mdl_cjuQb8Re + mdl_cjuQb8Im * mdl_complexi; + mdl_cQujb1 = mdl_cQujb1Re + mdl_cQujb1Im * mdl_complexi; + mdl_cQujb8 = mdl_cQujb8Re + mdl_cQujb8Im * mdl_complexi; + mdl_cjtQd1 = mdl_cjtQd1Re + mdl_cjtQd1Im * mdl_complexi; + mdl_cjtQd8 = mdl_cjtQd8Re + mdl_cjtQd8Im * mdl_complexi; + mdl_cQtQb1 = mdl_cQtQb1Re + mdl_cQtQb1Im * mdl_complexi; + mdl_cQtQb8 = mdl_cQtQb8Re + mdl_cQtQb8Im * mdl_complexi; + mdl_ceH = mdl_ceHRe + mdl_ceHIm * mdl_complexi; + mdl_ceW = mdl_ceWRe + mdl_ceWIm * mdl_complexi; + mdl_ceB = mdl_ceBRe + mdl_ceBIm * mdl_complexi; + mdl_cledj = mdl_cledjRe + mdl_cledjIm * mdl_complexi; + mdl_clebQ = mdl_clebQRe + mdl_clebQIm * mdl_complexi; + mdl_cleju1 = mdl_cleju1Re + mdl_cleju1Im * mdl_complexi; + mdl_cleju3 = mdl_cleju3Re + mdl_cleju3Im * mdl_complexi; + mdl_cleQt1 = mdl_cleQt1Re + mdl_cleQt1Im * mdl_complexi; + mdl_cleQt3 = mdl_cleQt3Re + mdl_cleQt3Im * mdl_complexi; + mdl_MWsm = mdl_MW; + mdl_MW__exp__2 = ( ( mdl_MW ) * ( mdl_MW ) ); + mdl_MZ__exp__2 = ( ( mdl_MZ ) * ( mdl_MZ ) ); + mdl_sqrt__2 = sqrt( 2. ); + mdl_nb__2__exp__0_25 = pow( 2., 0.25 ); + mdl_MH__exp__2 = ( ( mdl_MH ) * ( mdl_MH ) ); + mdl_sth2 = 1. - mdl_MW__exp__2 / mdl_MZ__exp__2; + mdl_nb__10__exp___m_40 = pow( 10., -40. ); + mdl_propCorr = ABS( mdl_linearPropCorrections ) / ( ABS( mdl_linearPropCorrections ) + mdl_nb__10__exp___m_40 ); + mdl_MZ1 = mdl_MZ; + mdl_MH1 = mdl_MH; + mdl_MT1 = mdl_MT; + mdl_WZ1 = mdl_WZ; + mdl_WW1 = mdl_WW; + mdl_WH1 = mdl_WH; + mdl_WT1 = mdl_WT; + mdl_cth = sqrt( 1. - mdl_sth2 ); + mdl_MW1 = mdl_MWsm; + mdl_sqrt__sth2 = sqrt( mdl_sth2 ); + mdl_sth = mdl_sqrt__sth2; + mdl_LambdaSMEFT__exp__2 = ( ( mdl_LambdaSMEFT ) * ( mdl_LambdaSMEFT ) ); + mdl_conjg__cbH = conj( mdl_cbH ); + mdl_conjg__ctHH = conj( mdl_ctHH ); + mdl_MT__exp__2 = ( ( mdl_MT ) * ( mdl_MT ) ); + mdl_MH__exp__6 = pow( mdl_MH, 6. ); + mdl_MWsm__exp__6 = pow( mdl_MWsm, 6. ); + mdl_MH__exp__4 = ( ( mdl_MH ) * ( mdl_MH ) * ( mdl_MH ) * ( mdl_MH ) ); + mdl_MWsm__exp__4 = ( ( mdl_MWsm ) * ( mdl_MWsm ) * ( mdl_MWsm ) * ( mdl_MWsm ) ); + mdl_MWsm__exp__2 = ( ( mdl_MWsm ) * ( mdl_MWsm ) ); + mdl_MZ__exp__4 = ( ( mdl_MZ ) * ( mdl_MZ ) * ( mdl_MZ ) * ( mdl_MZ ) ); + mdl_MZ__exp__6 = pow( mdl_MZ, 6. ); + mdl_cth__exp__2 = ( ( mdl_cth ) * ( mdl_cth ) ); + mdl_sth__exp__2 = ( ( mdl_sth ) * ( mdl_sth ) ); + mdl_MB__exp__2 = ( ( mdl_MB ) * ( mdl_MB ) ); + mdl_MZ__exp__3 = ( ( mdl_MZ ) * ( mdl_MZ ) * ( mdl_MZ ) ); + mdl_sth__exp__4 = ( ( mdl_sth ) * ( mdl_sth ) * ( mdl_sth ) * ( mdl_sth ) ); + mdl_sth__exp__6 = pow( mdl_sth, 6. ); + mdl_sth__exp__3 = ( ( mdl_sth ) * ( mdl_sth ) * ( mdl_sth ) ); + mdl_sth__exp__5 = pow( mdl_sth, 5. ); + mdl_propCorr__exp__2 = ( ( mdl_propCorr ) * ( mdl_propCorr ) ); + mdl_propCorr__exp__3 = ( ( mdl_propCorr ) * ( mdl_propCorr ) * ( mdl_propCorr ) ); + mdl_propCorr__exp__4 = ( ( mdl_propCorr ) * ( mdl_propCorr ) * ( mdl_propCorr ) * ( mdl_propCorr ) ); + mdl_cth__exp__3 = ( ( mdl_cth ) * ( mdl_cth ) * ( mdl_cth ) ); + mdl_aEW = ( mdl_Gf * mdl_MW__exp__2 * ( 1. - mdl_MW__exp__2 / mdl_MZ__exp__2 ) * mdl_sqrt__2 ) / M_PI; + mdl_sqrt__Gf = sqrt( mdl_Gf ); + mdl_vevhat = 1. / ( mdl_nb__2__exp__0_25 * mdl_sqrt__Gf ); + mdl_lam = ( mdl_Gf * mdl_MH__exp__2 ) / mdl_sqrt__2; + mdl_sqrt__aEW = sqrt( mdl_aEW ); + mdl_ee = 2. * mdl_sqrt__aEW * sqrt( M_PI ); + mdl_yb = ( mdl_ymb * mdl_sqrt__2 ) / mdl_vevhat; + mdl_yc = ( mdl_ymc * mdl_sqrt__2 ) / mdl_vevhat; + mdl_ydo = ( mdl_ymdo * mdl_sqrt__2 ) / mdl_vevhat; + mdl_ye = ( mdl_yme * mdl_sqrt__2 ) / mdl_vevhat; + mdl_ym = ( mdl_ymm * mdl_sqrt__2 ) / mdl_vevhat; + mdl_ys = ( mdl_yms * mdl_sqrt__2 ) / mdl_vevhat; + mdl_yt = ( mdl_ymt * mdl_sqrt__2 ) / mdl_vevhat; + mdl_ytau = ( mdl_ymtau * mdl_sqrt__2 ) / mdl_vevhat; + mdl_yup = ( mdl_ymup * mdl_sqrt__2 ) / mdl_vevhat; + mdl_vevhat__exp__2 = ( ( mdl_vevhat ) * ( mdl_vevhat ) ); + mdl_dGf = ( ( 2. * mdl_cHl3 - mdl_cll1 ) * mdl_vevhat__exp__2 ) / mdl_LambdaSMEFT__exp__2; + mdl_dkH = ( ( mdl_cHbox - mdl_cHDD / 4. ) * mdl_vevhat__exp__2 ) / mdl_LambdaSMEFT__exp__2; + mdl_vevT = ( 1. + mdl_dGf / 2. ) * mdl_vevhat; + mdl_g1 = mdl_ee / mdl_cth; + mdl_gw = mdl_ee / mdl_sth; + mdl_yb0 = ( 1. - mdl_dGf / 2. ) * mdl_yb + ( mdl_vevhat__exp__2 * mdl_conjg__cbH ) / ( 2. * mdl_LambdaSMEFT__exp__2 ); + mdl_yt0 = ( 1. - mdl_dGf / 2. ) * mdl_yt + ( mdl_vevhat__exp__2 * mdl_conjg__ctHH ) / ( 2. * mdl_LambdaSMEFT__exp__2 ); + mdl_ee__exp__2 = ( ( mdl_ee ) * ( mdl_ee ) ); + mdl_gHaa = ( mdl_ee__exp__2 * ( -1.75 + ( 4. * ( 0.3333333333333333 + ( 7. * mdl_MH__exp__2 ) / ( 360. * mdl_MT__exp__2 ) ) ) / 3. - ( 29. * mdl_MH__exp__6 ) / ( 16800. * mdl_MWsm__exp__6 ) - ( 19. * mdl_MH__exp__4 ) / ( 1680. * mdl_MWsm__exp__4 ) - ( 11. * mdl_MH__exp__2 ) / ( 120. * mdl_MWsm__exp__2 ) ) ) / ( 8. * ( ( M_PI ) * ( M_PI ) ) ); + mdl_gHza = ( mdl_ee__exp__2 * ( ( ( 0.4583333333333333 + ( 29. * mdl_MH__exp__6 ) / ( 100800. * mdl_MWsm__exp__6 ) + ( 19. * mdl_MH__exp__4 ) / ( 10080. * mdl_MWsm__exp__4 ) + ( 11. * mdl_MH__exp__2 ) / ( 720. * mdl_MWsm__exp__2 ) + ( mdl_MH__exp__4 * mdl_MZ__exp__2 ) / ( 2100. * mdl_MWsm__exp__6 ) + ( mdl_MH__exp__2 * mdl_MZ__exp__2 ) / ( 280. * mdl_MWsm__exp__4 ) + ( 7. * mdl_MZ__exp__2 ) / ( 180. * mdl_MWsm__exp__2 ) + ( 67. * mdl_MH__exp__2 * mdl_MZ__exp__4 ) / ( 100800. * mdl_MWsm__exp__6 ) + ( 53. * mdl_MZ__exp__4 ) / ( 10080. * mdl_MWsm__exp__4 ) + ( 43. * mdl_MZ__exp__6 ) / ( 50400. * mdl_MWsm__exp__6 ) - ( 31. * mdl_cth__exp__2 ) / ( 24. * mdl_sth__exp__2 ) - ( 29. * mdl_cth__exp__2 * mdl_MH__exp__6 ) / ( 20160. * mdl_MWsm__exp__6 * mdl_sth__exp__2 ) - ( 19. * mdl_cth__exp__2 * mdl_MH__exp__4 ) / ( 2016. * mdl_MWsm__exp__4 * mdl_sth__exp__2 ) - ( 11. * mdl_cth__exp__2 * mdl_MH__exp__2 ) / ( 144. * mdl_MWsm__exp__2 * mdl_sth__exp__2 ) - ( mdl_cth__exp__2 * mdl_MH__exp__4 * mdl_MZ__exp__2 ) / ( 560. * mdl_MWsm__exp__6 * mdl_sth__exp__2 ) - ( 31. * mdl_cth__exp__2 * mdl_MH__exp__2 * mdl_MZ__exp__2 ) / ( 2520. * mdl_MWsm__exp__4 * mdl_sth__exp__2 ) - ( mdl_cth__exp__2 * mdl_MZ__exp__2 ) / ( 9. * mdl_MWsm__exp__2 * mdl_sth__exp__2 ) - ( 43. * mdl_cth__exp__2 * mdl_MH__exp__2 * mdl_MZ__exp__4 ) / ( 20160. * mdl_MWsm__exp__6 * mdl_sth__exp__2 ) - ( 17. * mdl_cth__exp__2 * mdl_MZ__exp__4 ) / ( 1120. * mdl_MWsm__exp__4 * mdl_sth__exp__2 ) - ( 5. * mdl_cth__exp__2 * mdl_MZ__exp__6 ) / ( 2016. * mdl_MWsm__exp__6 * mdl_sth__exp__2 ) ) * mdl_sth ) / mdl_cth + ( ( 0.3333333333333333 + ( 7. * mdl_MH__exp__2 ) / ( 360. * mdl_MT__exp__2 ) + ( 11. * mdl_MZ__exp__2 ) / ( 360. * mdl_MT__exp__2 ) ) * ( 0.5 - ( 4. * mdl_sth__exp__2 ) / 3. ) ) / ( mdl_cth * mdl_sth ) ) ) / ( 4. * ( ( M_PI ) * ( M_PI ) ) ); + mdl_dMZ2 = ( ( mdl_cHDD / 2. + 2. * mdl_cHWB * mdl_cth * mdl_sth ) * mdl_vevhat__exp__2 ) / mdl_LambdaSMEFT__exp__2; + mdl_dMH2 = 2. * mdl_dkH - ( 3. * mdl_cH * mdl_vevhat__exp__2 ) / ( 2. * mdl_lam * mdl_LambdaSMEFT__exp__2 ); + mdl_dgw = -mdl_dGf / 2.; + mdl_barlam = ( 1. - mdl_dGf - mdl_dMH2 ) * mdl_lam; + mdl_dWT = 2. * mdl_WT * ( mdl_dgw + ( mdl_vevhat * ( mdl_ee * ( 3. * mdl_cHtbRe * mdl_MB * mdl_MT * mdl_MWsm__exp__2 + mdl_cHQ3 * ( ( ( mdl_MB__exp__2 - mdl_MT__exp__2 ) * ( mdl_MB__exp__2 - mdl_MT__exp__2 ) ) + ( mdl_MB__exp__2 + mdl_MT__exp__2 ) * mdl_MWsm__exp__2 - 2. * mdl_MWsm__exp__4 ) ) * mdl_vevhat + 6. * mdl_MWsm__exp__2 * ( mdl_ctWRe * mdl_MT * ( mdl_MB__exp__2 - mdl_MT__exp__2 + mdl_MWsm__exp__2 ) + mdl_cbWRe * mdl_MB * ( -mdl_MB__exp__2 + mdl_MT__exp__2 + mdl_MWsm__exp__2 ) ) * mdl_sth * mdl_sqrt__2 ) ) / ( mdl_ee * mdl_LambdaSMEFT__exp__2 * ( ( ( mdl_MB__exp__2 - mdl_MT__exp__2 ) * ( mdl_MB__exp__2 - mdl_MT__exp__2 ) ) + ( mdl_MB__exp__2 + mdl_MT__exp__2 ) * mdl_MWsm__exp__2 - 2. * mdl_MWsm__exp__4 ) ) ); + mdl_dWW = ( 2. * mdl_dgw + ( 2. * ( 2. * mdl_cHj3 + mdl_cHl3 ) * mdl_vevhat__exp__2 ) / ( 3. * mdl_LambdaSMEFT__exp__2 ) ) * mdl_WW; + mdl_gwsh = ( mdl_ee * ( 1. + mdl_dgw - ( mdl_cHW * mdl_vevhat__exp__2 ) / mdl_LambdaSMEFT__exp__2 ) ) / mdl_sth; + mdl_vev = ( 1. - ( 3. * mdl_cH * mdl_vevhat__exp__2 ) / ( 8. * mdl_lam * mdl_LambdaSMEFT__exp__2 ) ) * mdl_vevT; + mdl_dg1 = ( -mdl_dGf - mdl_dMZ2 / mdl_sth__exp__2 ) / 2.; + mdl_dWHc = mdl_yc / ( mdl_yc + mdl_nb__10__exp___m_40 ) * ( -0.02884 * mdl_dGf + ( ( 0.05768 * mdl_cHbox - 0.01442 * mdl_cHDD - 0.05768 * mdl_cuHRe ) * mdl_vevhat__exp__2 ) / mdl_LambdaSMEFT__exp__2 ); + mdl_dWHb = mdl_yb / ( mdl_yb + mdl_nb__10__exp___m_40 ) * ( mdl_vevhat__exp__2 * ( -1.1618 * mdl_cbHRe ) / ( mdl_LambdaSMEFT__exp__2 * ( mdl_yb + mdl_nb__10__exp___m_40 ) ) - 0.5809 * mdl_dGf + ( mdl_vevhat__exp__2 * ( 1.1618 * mdl_cHbox - 0.29045 * mdl_cHDD ) ) / ( mdl_LambdaSMEFT__exp__2 ) ); + mdl_dWHta = mdl_ytau / ( mdl_ytau + mdl_nb__10__exp___m_40 ) * ( -0.06256 * mdl_dGf + mdl_vevhat__exp__2 * ( -0.12512 * mdl_ceHRe + 0.12512 * mdl_cHbox - 0.03128 * mdl_cHDD ) / ( mdl_LambdaSMEFT__exp__2 ) ); + mdl_dWZ = mdl_WZ * ( -1. + ( 36. * mdl_cth * mdl_MB * mdl_MZ__exp__2 * mdl_sth * ( mdl_cbWRe * mdl_cth + mdl_cbBRe * mdl_sth ) * ( -3. + 4. * mdl_sth__exp__2 ) * mdl_vevhat * mdl_sqrt__2 * sqrt( -4. * mdl_MB__exp__2 + mdl_MZ__exp__2 ) + mdl_ee * mdl_LambdaSMEFT__exp__2 * ( 2. * mdl_MZ__exp__3 * ( 27. + 54. * mdl_dgw - 54. * ( 1. + mdl_dg1 + mdl_dgw ) * mdl_sth__exp__2 + 76. * ( 1. + 4. * mdl_dg1 - 2. * mdl_dgw ) * mdl_sth__exp__4 + 152. * ( -mdl_dg1 + mdl_dgw ) * mdl_sth__exp__6 ) + mdl_MZ__exp__2 * ( 9. + 18. * mdl_dgw - 6. * ( 2. + mdl_dg1 + 3. * mdl_dgw ) * mdl_sth__exp__2 + 8. * ( 1. + 4. * mdl_dg1 - 2. * mdl_dgw ) * mdl_sth__exp__4 + 16. * ( -mdl_dg1 + mdl_dgw ) * mdl_sth__exp__6 ) * sqrt( -4. * mdl_MB__exp__2 + mdl_MZ__exp__2 ) + mdl_MB__exp__2 * ( -9. - 18. * mdl_dgw - 6. * ( 4. + 11. * mdl_dg1 - 3. * mdl_dgw ) * mdl_sth__exp__2 + 16. * ( 1. + 4. * mdl_dg1 - 2. * mdl_dgw ) * mdl_sth__exp__4 + 32. * ( -mdl_dg1 + mdl_dgw ) * mdl_sth__exp__6 ) * sqrt( -4. * mdl_MB__exp__2 + mdl_MZ__exp__2 ) ) + 2. * mdl_ee * mdl_vevhat__exp__2 * ( 36. * mdl_cHj3 * mdl_MZ__exp__3 + 18. * mdl_cHl3 * mdl_MZ__exp__3 + 9. * ( 3. * mdl_cHbq - mdl_cHQ1 - mdl_cHQ3 ) * mdl_MB__exp__2 * sqrt( -4. * mdl_MB__exp__2 + mdl_MZ__exp__2 ) + 9. * mdl_cHQ1 * mdl_MZ__exp__2 * sqrt( -4. * mdl_MB__exp__2 + mdl_MZ__exp__2 ) + 9. * mdl_cHQ3 * mdl_MZ__exp__2 * sqrt( -4. * mdl_MB__exp__2 + mdl_MZ__exp__2 ) + 3. * mdl_cHWB * mdl_cth * ( -7. * mdl_MB__exp__2 + mdl_MZ__exp__2 ) * mdl_sth * sqrt( -4. * mdl_MB__exp__2 + mdl_MZ__exp__2 ) + 8. * mdl_cHWB * mdl_cth * mdl_sth__exp__3 * ( 2. * mdl_MB__exp__2 * sqrt( -4. * mdl_MB__exp__2 + mdl_MZ__exp__2 ) + mdl_MZ__exp__2 * ( 19. * mdl_MZ + sqrt( -4. * mdl_MB__exp__2 + mdl_MZ__exp__2 ) ) ) - 8. * mdl_cHWB * mdl_cth * mdl_sth__exp__5 * ( 2. * mdl_MB__exp__2 * sqrt( -4. * mdl_MB__exp__2 + mdl_MZ__exp__2 ) + mdl_MZ__exp__2 * ( 19. * mdl_MZ + sqrt( -4. * mdl_MB__exp__2 + mdl_MZ__exp__2 ) ) ) - 6. * mdl_sth__exp__2 * ( 2. * ( mdl_cHbq + mdl_cHQ1 + mdl_cHQ3 ) * mdl_MB__exp__2 * sqrt( -4. * mdl_MB__exp__2 + mdl_MZ__exp__2 ) + mdl_MZ__exp__2 * ( ( 2. * mdl_cHd + 3. * mdl_cHe - 2. * mdl_cHj1 + 3. * ( 2. * mdl_cHj3 + mdl_cHl1 + mdl_cHl3 ) - 4. * mdl_cHu ) * mdl_MZ + ( mdl_cHbq + mdl_cHQ1 + mdl_cHQ3 ) * sqrt( -4. * mdl_MB__exp__2 + mdl_MZ__exp__2 ) ) ) ) ) / ( mdl_ee * mdl_LambdaSMEFT__exp__2 * ( 2. * mdl_MZ__exp__3 * ( 27. - 54. * mdl_sth__exp__2 + 76. * mdl_sth__exp__4 ) + mdl_MZ__exp__2 * ( 9. - 12. * mdl_sth__exp__2 + 8. * mdl_sth__exp__4 ) * sqrt( -4. * mdl_MB__exp__2 + mdl_MZ__exp__2 ) + mdl_MB__exp__2 * ( -9. - 24. * mdl_sth__exp__2 + 16. * mdl_sth__exp__4 ) * sqrt( -4. * mdl_MB__exp__2 + mdl_MZ__exp__2 ) ) ) ); + mdl_g1sh = ( mdl_ee * ( 1. + mdl_dg1 - ( mdl_cHB * mdl_vevhat__exp__2 ) / mdl_LambdaSMEFT__exp__2 ) ) / mdl_cth; + mdl_ee__exp__3 = ( ( mdl_ee ) * ( mdl_ee ) * ( mdl_ee ) ); + mdl_vevhat__exp__3 = ( ( mdl_vevhat ) * ( mdl_vevhat ) * ( mdl_vevhat ) ); +} + +void +Parameters_SMEFTsim_topU3l_MwScheme_UFO::setIndependentCouplings() +{ + // (none) +} + +/* +void +Parameters_SMEFTsim_topU3l_MwScheme_UFO::setDependentParameters() // now computed event-by-event (running alphas #373) +{ + mdl_sqrt__aS = sqrt( aS ); + G = 2. * mdl_sqrt__aS * sqrt( M_PI ); + mdl_gHgg2 = ( -7. * aS ) / ( 720. * M_PI ); + mdl_gHgg4 = aS / ( 360. * M_PI ); + mdl_gHgg5 = aS / ( 20. * M_PI ); + mdl_G__exp__2 = ( ( G ) * ( G ) ); + mdl_gHgg1 = mdl_G__exp__2 / ( 48. * ( ( M_PI ) * ( M_PI ) ) ); + mdl_gHgg3 = ( aS * G ) / ( 60. * M_PI ); + mdl_G__exp__3 = ( ( G ) * ( G ) * ( G ) ); + mdl_dWH = mdl_WH * ( -0.24161 * mdl_dGf + 0.96644 * mdl_dgw + 0.4832199999999999 * mdl_dkH - 0.11186509426655467 * mdl_dWW + ( 0.36410378449238195 * mdl_cHj3 * mdl_vevhat__exp__2 ) / mdl_LambdaSMEFT__exp__2 + ( 0.17608307708657747 * mdl_cHl3 * mdl_vevhat__exp__2 ) / mdl_LambdaSMEFT__exp__2 + ( 0.1636 * mdl_cHG * mdl_MT__exp__2 * mdl_vevhat__exp__2 ) / ( mdl_LambdaSMEFT__exp__2 * ( -0.5 * mdl_gHgg2 * mdl_MH__exp__2 + mdl_gHgg1 * mdl_MT__exp__2 ) ) + ( mdl_cHW * ( -0.35937785117066967 * mdl_gHaa * mdl_gHza + 0.006164 * mdl_cth * mdl_gHaa * mdl_sth + 0.00454 * mdl_gHza * mdl_sth__exp__2 ) * mdl_vevhat__exp__2 ) / ( mdl_gHaa * mdl_gHza * mdl_LambdaSMEFT__exp__2 ) + ( mdl_cHWB * ( -0.00454 * mdl_cth * mdl_gHza * mdl_sth + mdl_gHaa * ( -0.0030819999999999997 + 0.006163999999999999 * mdl_sth__exp__2 ) ) * mdl_vevhat__exp__2 ) / ( mdl_gHaa * mdl_gHza * mdl_LambdaSMEFT__exp__2 ) + ( mdl_cHB * ( -0.006163999999999999 * mdl_cth * mdl_gHaa * mdl_sth - 0.00454 * mdl_gHza * ( -1. + mdl_sth__exp__2 ) ) * mdl_vevhat__exp__2 ) / ( mdl_gHaa * mdl_gHza * mdl_LambdaSMEFT__exp__2 ) + mdl_dWHc + mdl_dWHb + mdl_dWHta ); +} + +void +Parameters_SMEFTsim_topU3l_MwScheme_UFO::setDependentCouplings() // now computed event-by-event (running alphas #373) +{ + GC_6 = -( mdl_complexi * G ); + GC_7 = G; + GC_8 = mdl_complexi * mdl_G__exp__2; +} +*/ + +#endif + +// Routines for printing out parameters +void +Parameters_SMEFTsim_topU3l_MwScheme_UFO::printIndependentParameters() +{ + std::cout << "SMEFTsim_topU3l_MwScheme_UFO model parameters independent of event kinematics:" << std::endl; + std::cout << "(Warning: aS in the runcard is ignored because event-by-event Gs are hardcoded or retrieved from Fortran)" << std::endl; + std::cout << std::setw( 20 ) << "mdl_WH = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_WH << std::endl; + std::cout << std::setw( 20 ) << "mdl_WW = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_WW << std::endl; + std::cout << std::setw( 20 ) << "mdl_WZ = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_WZ << std::endl; + std::cout << std::setw( 20 ) << "mdl_WT = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_WT << std::endl; + std::cout << std::setw( 20 ) << "mdl_ymtau = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_ymtau << std::endl; + std::cout << std::setw( 20 ) << "mdl_ymm = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_ymm << std::endl; + std::cout << std::setw( 20 ) << "mdl_yme = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_yme << std::endl; + std::cout << std::setw( 20 ) << "mdl_ymt = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_ymt << std::endl; + std::cout << std::setw( 20 ) << "mdl_ymb = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_ymb << std::endl; + std::cout << std::setw( 20 ) << "mdl_ymc = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_ymc << std::endl; + std::cout << std::setw( 20 ) << "mdl_yms = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_yms << std::endl; + std::cout << std::setw( 20 ) << "mdl_ymup = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_ymup << std::endl; + std::cout << std::setw( 20 ) << "mdl_ymdo = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_ymdo << std::endl; + std::cout << std::setw( 20 ) << "mdl_linearPropCorrections = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_linearPropCorrections << std::endl; + //std::cout << std::setw( 20 ) << "aS = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << aS << std::endl; // now retrieved event-by-event (as G) from Fortran (running alphas #373) + std::cout << std::setw( 20 ) << "mdl_Gf = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_Gf << std::endl; + std::cout << std::setw( 20 ) << "mdl_MW = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_MW << std::endl; + std::cout << std::setw( 20 ) << "mdl_LambdaSMEFT = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_LambdaSMEFT << std::endl; + std::cout << std::setw( 20 ) << "mdl_cleQt3Im = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_cleQt3Im << std::endl; + std::cout << std::setw( 20 ) << "mdl_cleQt1Im = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_cleQt1Im << std::endl; + std::cout << std::setw( 20 ) << "mdl_cleju3Im = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_cleju3Im << std::endl; + std::cout << std::setw( 20 ) << "mdl_cleju1Im = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_cleju1Im << std::endl; + std::cout << std::setw( 20 ) << "mdl_clebQIm = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_clebQIm << std::endl; + std::cout << std::setw( 20 ) << "mdl_cledjIm = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_cledjIm << std::endl; + std::cout << std::setw( 20 ) << "mdl_ceBIm = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_ceBIm << std::endl; + std::cout << std::setw( 20 ) << "mdl_ceWIm = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_ceWIm << std::endl; + std::cout << std::setw( 20 ) << "mdl_ceHIm = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_ceHIm << std::endl; + std::cout << std::setw( 20 ) << "mdl_cQtQb8Im = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_cQtQb8Im << std::endl; + std::cout << std::setw( 20 ) << "mdl_cQtQb1Im = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_cQtQb1Im << std::endl; + std::cout << std::setw( 20 ) << "mdl_cjtQd8Im = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_cjtQd8Im << std::endl; + std::cout << std::setw( 20 ) << "mdl_cjtQd1Im = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_cjtQd1Im << std::endl; + std::cout << std::setw( 20 ) << "mdl_cQujb8Im = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_cQujb8Im << std::endl; + std::cout << std::setw( 20 ) << "mdl_cQujb1Im = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_cQujb1Im << std::endl; + std::cout << std::setw( 20 ) << "mdl_cjuQb8Im = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_cjuQb8Im << std::endl; + std::cout << std::setw( 20 ) << "mdl_cjuQb1Im = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_cjuQb1Im << std::endl; + std::cout << std::setw( 20 ) << "mdl_cQtjd8Im = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_cQtjd8Im << std::endl; + std::cout << std::setw( 20 ) << "mdl_cQtjd1Im = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_cQtjd1Im << std::endl; + std::cout << std::setw( 20 ) << "mdl_cjujd81Im = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_cjujd81Im << std::endl; + std::cout << std::setw( 20 ) << "mdl_cjujd11Im = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_cjujd11Im << std::endl; + std::cout << std::setw( 20 ) << "mdl_cjujd8Im = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_cjujd8Im << std::endl; + std::cout << std::setw( 20 ) << "mdl_cjujd1Im = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_cjujd1Im << std::endl; + std::cout << std::setw( 20 ) << "mdl_cjQbd8Im = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_cjQbd8Im << std::endl; + std::cout << std::setw( 20 ) << "mdl_cjQbd1Im = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_cjQbd1Im << std::endl; + std::cout << std::setw( 20 ) << "mdl_cjQtu8Im = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_cjQtu8Im << std::endl; + std::cout << std::setw( 20 ) << "mdl_cjQtu1Im = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_cjQtu1Im << std::endl; + std::cout << std::setw( 20 ) << "mdl_cutbd8Im = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_cutbd8Im << std::endl; + std::cout << std::setw( 20 ) << "mdl_cutbd1Im = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_cutbd1Im << std::endl; + std::cout << std::setw( 20 ) << "mdl_cHtbIm = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_cHtbIm << std::endl; + std::cout << std::setw( 20 ) << "mdl_cHudIm = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_cHudIm << std::endl; + std::cout << std::setw( 20 ) << "mdl_cbHIm = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_cbHIm << std::endl; + std::cout << std::setw( 20 ) << "mdl_cdHIm = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_cdHIm << std::endl; + std::cout << std::setw( 20 ) << "mdl_ctHIm = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_ctHIm << std::endl; + std::cout << std::setw( 20 ) << "mdl_cuHIm = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_cuHIm << std::endl; + std::cout << std::setw( 20 ) << "mdl_cbBIm = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_cbBIm << std::endl; + std::cout << std::setw( 20 ) << "mdl_cdBIm = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_cdBIm << std::endl; + std::cout << std::setw( 20 ) << "mdl_cbWIm = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_cbWIm << std::endl; + std::cout << std::setw( 20 ) << "mdl_cdWIm = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_cdWIm << std::endl; + std::cout << std::setw( 20 ) << "mdl_cbGIm = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_cbGIm << std::endl; + std::cout << std::setw( 20 ) << "mdl_cdGIm = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_cdGIm << std::endl; + std::cout << std::setw( 20 ) << "mdl_ctBIm = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_ctBIm << std::endl; + std::cout << std::setw( 20 ) << "mdl_cuBIm = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_cuBIm << std::endl; + std::cout << std::setw( 20 ) << "mdl_ctWIm = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_ctWIm << std::endl; + std::cout << std::setw( 20 ) << "mdl_cuWIm = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_cuWIm << std::endl; + std::cout << std::setw( 20 ) << "mdl_ctGIm = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_ctGIm << std::endl; + std::cout << std::setw( 20 ) << "mdl_cuGIm = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_cuGIm << std::endl; + std::cout << std::setw( 20 ) << "mdl_cHWBtil = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_cHWBtil << std::endl; + std::cout << std::setw( 20 ) << "mdl_cHBtil = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_cHBtil << std::endl; + std::cout << std::setw( 20 ) << "mdl_cHWtil = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_cHWtil << std::endl; + std::cout << std::setw( 20 ) << "mdl_cHGtil = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_cHGtil << std::endl; + std::cout << std::setw( 20 ) << "mdl_cWtil = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_cWtil << std::endl; + std::cout << std::setw( 20 ) << "mdl_cGtil = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_cGtil << std::endl; + std::cout << std::setw( 20 ) << "mdl_cleQt3Re = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_cleQt3Re << std::endl; + std::cout << std::setw( 20 ) << "mdl_cleju3Re = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_cleju3Re << std::endl; + std::cout << std::setw( 20 ) << "mdl_cleQt1Re = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_cleQt1Re << std::endl; + std::cout << std::setw( 20 ) << "mdl_cleju1Re = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_cleju1Re << std::endl; + std::cout << std::setw( 20 ) << "mdl_clebQRe = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_clebQRe << std::endl; + std::cout << std::setw( 20 ) << "mdl_cledjRe = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_cledjRe << std::endl; + std::cout << std::setw( 20 ) << "mdl_cle = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_cle << std::endl; + std::cout << std::setw( 20 ) << "mdl_cbl = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_cbl << std::endl; + std::cout << std::setw( 20 ) << "mdl_cld = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_cld << std::endl; + std::cout << std::setw( 20 ) << "mdl_ctl = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_ctl << std::endl; + std::cout << std::setw( 20 ) << "mdl_clu = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_clu << std::endl; + std::cout << std::setw( 20 ) << "mdl_cQe = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_cQe << std::endl; + std::cout << std::setw( 20 ) << "mdl_cje = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_cje << std::endl; + std::cout << std::setw( 20 ) << "mdl_cbe = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_cbe << std::endl; + std::cout << std::setw( 20 ) << "mdl_ced = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_ced << std::endl; + std::cout << std::setw( 20 ) << "mdl_cte = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_cte << std::endl; + std::cout << std::setw( 20 ) << "mdl_ceu = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_ceu << std::endl; + std::cout << std::setw( 20 ) << "mdl_cee = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_cee << std::endl; + std::cout << std::setw( 20 ) << "mdl_cQl3 = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_cQl3 << std::endl; + std::cout << std::setw( 20 ) << "mdl_cQl1 = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_cQl1 << std::endl; + std::cout << std::setw( 20 ) << "mdl_clj3 = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_clj3 << std::endl; + std::cout << std::setw( 20 ) << "mdl_clj1 = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_clj1 << std::endl; + std::cout << std::setw( 20 ) << "mdl_cll1 = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_cll1 << std::endl; + std::cout << std::setw( 20 ) << "mdl_cll = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_cll << std::endl; + std::cout << std::setw( 20 ) << "mdl_cHe = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_cHe << std::endl; + std::cout << std::setw( 20 ) << "mdl_cHl3 = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_cHl3 << std::endl; + std::cout << std::setw( 20 ) << "mdl_cHl1 = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_cHl1 << std::endl; + std::cout << std::setw( 20 ) << "mdl_ceBRe = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_ceBRe << std::endl; + std::cout << std::setw( 20 ) << "mdl_ceWRe = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_ceWRe << std::endl; + std::cout << std::setw( 20 ) << "mdl_ceHRe = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_ceHRe << std::endl; + std::cout << std::setw( 20 ) << "mdl_cQtQb8Re = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_cQtQb8Re << std::endl; + std::cout << std::setw( 20 ) << "mdl_cQtQb1Re = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_cQtQb1Re << std::endl; + std::cout << std::setw( 20 ) << "mdl_cjtQd8Re = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_cjtQd8Re << std::endl; + std::cout << std::setw( 20 ) << "mdl_cjtQd1Re = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_cjtQd1Re << std::endl; + std::cout << std::setw( 20 ) << "mdl_cQujb8Re = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_cQujb8Re << std::endl; + std::cout << std::setw( 20 ) << "mdl_cQujb1Re = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_cQujb1Re << std::endl; + std::cout << std::setw( 20 ) << "mdl_cjuQb8Re = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_cjuQb8Re << std::endl; + std::cout << std::setw( 20 ) << "mdl_cjuQb1Re = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_cjuQb1Re << std::endl; + std::cout << std::setw( 20 ) << "mdl_cQtjd8Re = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_cQtjd8Re << std::endl; + std::cout << std::setw( 20 ) << "mdl_cQtjd1Re = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_cQtjd1Re << std::endl; + std::cout << std::setw( 20 ) << "mdl_cjujd81Re = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_cjujd81Re << std::endl; + std::cout << std::setw( 20 ) << "mdl_cjujd11Re = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_cjujd11Re << std::endl; + std::cout << std::setw( 20 ) << "mdl_cjujd8Re = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_cjujd8Re << std::endl; + std::cout << std::setw( 20 ) << "mdl_cjujd1Re = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_cjujd1Re << std::endl; + std::cout << std::setw( 20 ) << "mdl_cjQbd8Re = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_cjQbd8Re << std::endl; + std::cout << std::setw( 20 ) << "mdl_cjQbd1Re = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_cjQbd1Re << std::endl; + std::cout << std::setw( 20 ) << "mdl_cjQtu8Re = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_cjQtu8Re << std::endl; + std::cout << std::setw( 20 ) << "mdl_cjQtu1Re = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_cjQtu1Re << std::endl; + std::cout << std::setw( 20 ) << "mdl_cQb8 = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_cQb8 << std::endl; + std::cout << std::setw( 20 ) << "mdl_cQb1 = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_cQb1 << std::endl; + std::cout << std::setw( 20 ) << "mdl_cbj8 = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_cbj8 << std::endl; + std::cout << std::setw( 20 ) << "mdl_cbj1 = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_cbj1 << std::endl; + std::cout << std::setw( 20 ) << "mdl_cQd8 = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_cQd8 << std::endl; + std::cout << std::setw( 20 ) << "mdl_cQd1 = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_cQd1 << std::endl; + std::cout << std::setw( 20 ) << "mdl_cjd8 = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_cjd8 << std::endl; + std::cout << std::setw( 20 ) << "mdl_cjd1 = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_cjd1 << std::endl; + std::cout << std::setw( 20 ) << "mdl_cQt8 = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_cQt8 << std::endl; + std::cout << std::setw( 20 ) << "mdl_cQt1 = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_cQt1 << std::endl; + std::cout << std::setw( 20 ) << "mdl_ctj8 = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_ctj8 << std::endl; + std::cout << std::setw( 20 ) << "mdl_ctj1 = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_ctj1 << std::endl; + std::cout << std::setw( 20 ) << "mdl_cQu8 = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_cQu8 << std::endl; + std::cout << std::setw( 20 ) << "mdl_cju8 = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_cju8 << std::endl; + std::cout << std::setw( 20 ) << "mdl_cQu1 = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_cQu1 << std::endl; + std::cout << std::setw( 20 ) << "mdl_cju1 = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_cju1 << std::endl; + std::cout << std::setw( 20 ) << "mdl_cutbd8Re = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_cutbd8Re << std::endl; + std::cout << std::setw( 20 ) << "mdl_cutbd1Re = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_cutbd1Re << std::endl; + std::cout << std::setw( 20 ) << "mdl_cbu8 = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_cbu8 << std::endl; + std::cout << std::setw( 20 ) << "mdl_ctd8 = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_ctd8 << std::endl; + std::cout << std::setw( 20 ) << "mdl_ctb8 = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_ctb8 << std::endl; + std::cout << std::setw( 20 ) << "mdl_cud8 = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_cud8 << std::endl; + std::cout << std::setw( 20 ) << "mdl_cbu1 = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_cbu1 << std::endl; + std::cout << std::setw( 20 ) << "mdl_ctd1 = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_ctd1 << std::endl; + std::cout << std::setw( 20 ) << "mdl_ctb1 = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_ctb1 << std::endl; + std::cout << std::setw( 20 ) << "mdl_cud1 = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_cud1 << std::endl; + std::cout << std::setw( 20 ) << "mdl_cbd8 = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_cbd8 << std::endl; + std::cout << std::setw( 20 ) << "mdl_cbd1 = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_cbd1 << std::endl; + std::cout << std::setw( 20 ) << "mdl_cbb = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_cbb << std::endl; + std::cout << std::setw( 20 ) << "mdl_cdd8 = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_cdd8 << std::endl; + std::cout << std::setw( 20 ) << "mdl_cdd1 = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_cdd1 << std::endl; + std::cout << std::setw( 20 ) << "mdl_ctu8 = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_ctu8 << std::endl; + std::cout << std::setw( 20 ) << "mdl_ctu1 = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_ctu1 << std::endl; + std::cout << std::setw( 20 ) << "mdl_ctt = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_ctt << std::endl; + std::cout << std::setw( 20 ) << "mdl_cuu8 = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_cuu8 << std::endl; + std::cout << std::setw( 20 ) << "mdl_cuu1 = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_cuu1 << std::endl; + std::cout << std::setw( 20 ) << "mdl_cQQ8 = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_cQQ8 << std::endl; + std::cout << std::setw( 20 ) << "mdl_cQQ1 = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_cQQ1 << std::endl; + std::cout << std::setw( 20 ) << "mdl_cQj38 = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_cQj38 << std::endl; + std::cout << std::setw( 20 ) << "mdl_cQj31 = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_cQj31 << std::endl; + std::cout << std::setw( 20 ) << "mdl_cQj18 = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_cQj18 << std::endl; + std::cout << std::setw( 20 ) << "mdl_cQj11 = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_cQj11 << std::endl; + std::cout << std::setw( 20 ) << "mdl_cjj38 = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_cjj38 << std::endl; + std::cout << std::setw( 20 ) << "mdl_cjj31 = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_cjj31 << std::endl; + std::cout << std::setw( 20 ) << "mdl_cjj18 = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_cjj18 << std::endl; + std::cout << std::setw( 20 ) << "mdl_cjj11 = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_cjj11 << std::endl; + std::cout << std::setw( 20 ) << "mdl_cHtbRe = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_cHtbRe << std::endl; + std::cout << std::setw( 20 ) << "mdl_cHudRe = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_cHudRe << std::endl; + std::cout << std::setw( 20 ) << "mdl_cHbq = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_cHbq << std::endl; + std::cout << std::setw( 20 ) << "mdl_cHd = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_cHd << std::endl; + std::cout << std::setw( 20 ) << "mdl_cHt = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_cHt << std::endl; + std::cout << std::setw( 20 ) << "mdl_cHu = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_cHu << std::endl; + std::cout << std::setw( 20 ) << "mdl_cHQ3 = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_cHQ3 << std::endl; + std::cout << std::setw( 20 ) << "mdl_cHj3 = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_cHj3 << std::endl; + std::cout << std::setw( 20 ) << "mdl_cHQ1 = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_cHQ1 << std::endl; + std::cout << std::setw( 20 ) << "mdl_cHj1 = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_cHj1 << std::endl; + std::cout << std::setw( 20 ) << "mdl_cbBRe = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_cbBRe << std::endl; + std::cout << std::setw( 20 ) << "mdl_cdBRe = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_cdBRe << std::endl; + std::cout << std::setw( 20 ) << "mdl_cbWRe = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_cbWRe << std::endl; + std::cout << std::setw( 20 ) << "mdl_cdWRe = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_cdWRe << std::endl; + std::cout << std::setw( 20 ) << "mdl_cbGRe = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_cbGRe << std::endl; + std::cout << std::setw( 20 ) << "mdl_cdGRe = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_cdGRe << std::endl; + std::cout << std::setw( 20 ) << "mdl_ctBRe = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_ctBRe << std::endl; + std::cout << std::setw( 20 ) << "mdl_cuBRe = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_cuBRe << std::endl; + std::cout << std::setw( 20 ) << "mdl_ctWRe = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_ctWRe << std::endl; + std::cout << std::setw( 20 ) << "mdl_cuWRe = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_cuWRe << std::endl; + std::cout << std::setw( 20 ) << "mdl_ctGRe = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_ctGRe << std::endl; + std::cout << std::setw( 20 ) << "mdl_cuGRe = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_cuGRe << std::endl; + std::cout << std::setw( 20 ) << "mdl_cbHRe = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_cbHRe << std::endl; + std::cout << std::setw( 20 ) << "mdl_cdHRe = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_cdHRe << std::endl; + std::cout << std::setw( 20 ) << "mdl_ctHRe = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_ctHRe << std::endl; + std::cout << std::setw( 20 ) << "mdl_cuHRe = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_cuHRe << std::endl; + std::cout << std::setw( 20 ) << "mdl_cHWB = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_cHWB << std::endl; + std::cout << std::setw( 20 ) << "mdl_cHB = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_cHB << std::endl; + std::cout << std::setw( 20 ) << "mdl_cHW = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_cHW << std::endl; + std::cout << std::setw( 20 ) << "mdl_cHG = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_cHG << std::endl; + std::cout << std::setw( 20 ) << "mdl_cHDD = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_cHDD << std::endl; + std::cout << std::setw( 20 ) << "mdl_cHbox = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_cHbox << std::endl; + std::cout << std::setw( 20 ) << "mdl_cH = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_cH << std::endl; + std::cout << std::setw( 20 ) << "mdl_cW = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_cW << std::endl; + std::cout << std::setw( 20 ) << "mdl_cG = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_cG << std::endl; + std::cout << std::setw( 20 ) << "mdl_MH = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_MH << std::endl; + std::cout << std::setw( 20 ) << "mdl_MZ = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_MZ << std::endl; + std::cout << std::setw( 20 ) << "mdl_MTA = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_MTA << std::endl; + std::cout << std::setw( 20 ) << "mdl_MMU = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_MMU << std::endl; + std::cout << std::setw( 20 ) << "mdl_Me = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_Me << std::endl; + std::cout << std::setw( 20 ) << "mdl_MT = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_MT << std::endl; + std::cout << std::setw( 20 ) << "mdl_MB = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_MB << std::endl; + std::cout << std::setw( 20 ) << "mdl_MC = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_MC << std::endl; + std::cout << std::setw( 20 ) << "mdl_MS = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_MS << std::endl; + std::cout << std::setw( 20 ) << "mdl_MU = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_MU << std::endl; + std::cout << std::setw( 20 ) << "mdl_MD = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_MD << std::endl; + std::cout << std::setw( 20 ) << "mdl_complexi = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_complexi << std::endl; + std::cout << std::setw( 20 ) << "mdl_cuH = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_cuH << std::endl; + std::cout << std::setw( 20 ) << "mdl_ctHH = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_ctHH << std::endl; + std::cout << std::setw( 20 ) << "mdl_cdH = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_cdH << std::endl; + std::cout << std::setw( 20 ) << "mdl_cbH = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_cbH << std::endl; + std::cout << std::setw( 20 ) << "mdl_cuG = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_cuG << std::endl; + std::cout << std::setw( 20 ) << "mdl_ctG = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_ctG << std::endl; + std::cout << std::setw( 20 ) << "mdl_cuW = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_cuW << std::endl; + std::cout << std::setw( 20 ) << "mdl_ctW = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_ctW << std::endl; + std::cout << std::setw( 20 ) << "mdl_cuB = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_cuB << std::endl; + std::cout << std::setw( 20 ) << "mdl_ctB = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_ctB << std::endl; + std::cout << std::setw( 20 ) << "mdl_cdG = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_cdG << std::endl; + std::cout << std::setw( 20 ) << "mdl_cbG = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_cbG << std::endl; + std::cout << std::setw( 20 ) << "mdl_cdW = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_cdW << std::endl; + std::cout << std::setw( 20 ) << "mdl_cbW = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_cbW << std::endl; + std::cout << std::setw( 20 ) << "mdl_cdB = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_cdB << std::endl; + std::cout << std::setw( 20 ) << "mdl_cbBB = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_cbBB << std::endl; + std::cout << std::setw( 20 ) << "mdl_cHud = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_cHud << std::endl; + std::cout << std::setw( 20 ) << "mdl_cHtb = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_cHtb << std::endl; + std::cout << std::setw( 20 ) << "mdl_cutbd1 = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_cutbd1 << std::endl; + std::cout << std::setw( 20 ) << "mdl_cutbd8 = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_cutbd8 << std::endl; + std::cout << std::setw( 20 ) << "mdl_cjQtu1 = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_cjQtu1 << std::endl; + std::cout << std::setw( 20 ) << "mdl_cjQtu8 = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_cjQtu8 << std::endl; + std::cout << std::setw( 20 ) << "mdl_cjQbd1 = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_cjQbd1 << std::endl; + std::cout << std::setw( 20 ) << "mdl_cjQbd8 = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_cjQbd8 << std::endl; + std::cout << std::setw( 20 ) << "mdl_cjujd1 = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_cjujd1 << std::endl; + std::cout << std::setw( 20 ) << "mdl_cjujd8 = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_cjujd8 << std::endl; + std::cout << std::setw( 20 ) << "mdl_cjujd11 = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_cjujd11 << std::endl; + std::cout << std::setw( 20 ) << "mdl_cjujd81 = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_cjujd81 << std::endl; + std::cout << std::setw( 20 ) << "mdl_cQtjd1 = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_cQtjd1 << std::endl; + std::cout << std::setw( 20 ) << "mdl_cQtjd8 = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_cQtjd8 << std::endl; + std::cout << std::setw( 20 ) << "mdl_cjuQb1 = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_cjuQb1 << std::endl; + std::cout << std::setw( 20 ) << "mdl_cjuQb8 = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_cjuQb8 << std::endl; + std::cout << std::setw( 20 ) << "mdl_cQujb1 = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_cQujb1 << std::endl; + std::cout << std::setw( 20 ) << "mdl_cQujb8 = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_cQujb8 << std::endl; + std::cout << std::setw( 20 ) << "mdl_cjtQd1 = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_cjtQd1 << std::endl; + std::cout << std::setw( 20 ) << "mdl_cjtQd8 = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_cjtQd8 << std::endl; + std::cout << std::setw( 20 ) << "mdl_cQtQb1 = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_cQtQb1 << std::endl; + std::cout << std::setw( 20 ) << "mdl_cQtQb8 = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_cQtQb8 << std::endl; + std::cout << std::setw( 20 ) << "mdl_ceH = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_ceH << std::endl; + std::cout << std::setw( 20 ) << "mdl_ceW = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_ceW << std::endl; + std::cout << std::setw( 20 ) << "mdl_ceB = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_ceB << std::endl; + std::cout << std::setw( 20 ) << "mdl_cledj = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_cledj << std::endl; + std::cout << std::setw( 20 ) << "mdl_clebQ = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_clebQ << std::endl; + std::cout << std::setw( 20 ) << "mdl_cleju1 = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_cleju1 << std::endl; + std::cout << std::setw( 20 ) << "mdl_cleju3 = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_cleju3 << std::endl; + std::cout << std::setw( 20 ) << "mdl_cleQt1 = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_cleQt1 << std::endl; + std::cout << std::setw( 20 ) << "mdl_cleQt3 = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_cleQt3 << std::endl; + std::cout << std::setw( 20 ) << "mdl_MWsm = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_MWsm << std::endl; + std::cout << std::setw( 20 ) << "mdl_MW__exp__2 = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_MW__exp__2 << std::endl; + std::cout << std::setw( 20 ) << "mdl_MZ__exp__2 = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_MZ__exp__2 << std::endl; + std::cout << std::setw( 20 ) << "mdl_sqrt__2 = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_sqrt__2 << std::endl; + std::cout << std::setw( 20 ) << "mdl_nb__2__exp__0_25 = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_nb__2__exp__0_25 << std::endl; + std::cout << std::setw( 20 ) << "mdl_MH__exp__2 = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_MH__exp__2 << std::endl; + std::cout << std::setw( 20 ) << "mdl_sth2 = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_sth2 << std::endl; + std::cout << std::setw( 20 ) << "mdl_nb__10__exp___m_40 = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_nb__10__exp___m_40 << std::endl; + std::cout << std::setw( 20 ) << "mdl_propCorr = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_propCorr << std::endl; + std::cout << std::setw( 20 ) << "mdl_MZ1 = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_MZ1 << std::endl; + std::cout << std::setw( 20 ) << "mdl_MH1 = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_MH1 << std::endl; + std::cout << std::setw( 20 ) << "mdl_MT1 = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_MT1 << std::endl; + std::cout << std::setw( 20 ) << "mdl_WZ1 = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_WZ1 << std::endl; + std::cout << std::setw( 20 ) << "mdl_WW1 = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_WW1 << std::endl; + std::cout << std::setw( 20 ) << "mdl_WH1 = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_WH1 << std::endl; + std::cout << std::setw( 20 ) << "mdl_WT1 = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_WT1 << std::endl; + std::cout << std::setw( 20 ) << "mdl_cth = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_cth << std::endl; + std::cout << std::setw( 20 ) << "mdl_MW1 = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_MW1 << std::endl; + std::cout << std::setw( 20 ) << "mdl_sqrt__sth2 = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_sqrt__sth2 << std::endl; + std::cout << std::setw( 20 ) << "mdl_sth = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_sth << std::endl; + std::cout << std::setw( 20 ) << "mdl_LambdaSMEFT__exp__2 = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_LambdaSMEFT__exp__2 << std::endl; + std::cout << std::setw( 20 ) << "mdl_conjg__cbH = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_conjg__cbH << std::endl; + std::cout << std::setw( 20 ) << "mdl_conjg__ctHH = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_conjg__ctHH << std::endl; + std::cout << std::setw( 20 ) << "mdl_MT__exp__2 = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_MT__exp__2 << std::endl; + std::cout << std::setw( 20 ) << "mdl_MH__exp__6 = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_MH__exp__6 << std::endl; + std::cout << std::setw( 20 ) << "mdl_MWsm__exp__6 = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_MWsm__exp__6 << std::endl; + std::cout << std::setw( 20 ) << "mdl_MH__exp__4 = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_MH__exp__4 << std::endl; + std::cout << std::setw( 20 ) << "mdl_MWsm__exp__4 = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_MWsm__exp__4 << std::endl; + std::cout << std::setw( 20 ) << "mdl_MWsm__exp__2 = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_MWsm__exp__2 << std::endl; + std::cout << std::setw( 20 ) << "mdl_MZ__exp__4 = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_MZ__exp__4 << std::endl; + std::cout << std::setw( 20 ) << "mdl_MZ__exp__6 = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_MZ__exp__6 << std::endl; + std::cout << std::setw( 20 ) << "mdl_cth__exp__2 = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_cth__exp__2 << std::endl; + std::cout << std::setw( 20 ) << "mdl_sth__exp__2 = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_sth__exp__2 << std::endl; + std::cout << std::setw( 20 ) << "mdl_MB__exp__2 = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_MB__exp__2 << std::endl; + std::cout << std::setw( 20 ) << "mdl_MZ__exp__3 = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_MZ__exp__3 << std::endl; + std::cout << std::setw( 20 ) << "mdl_sth__exp__4 = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_sth__exp__4 << std::endl; + std::cout << std::setw( 20 ) << "mdl_sth__exp__6 = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_sth__exp__6 << std::endl; + std::cout << std::setw( 20 ) << "mdl_sth__exp__3 = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_sth__exp__3 << std::endl; + std::cout << std::setw( 20 ) << "mdl_sth__exp__5 = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_sth__exp__5 << std::endl; + std::cout << std::setw( 20 ) << "mdl_propCorr__exp__2 = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_propCorr__exp__2 << std::endl; + std::cout << std::setw( 20 ) << "mdl_propCorr__exp__3 = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_propCorr__exp__3 << std::endl; + std::cout << std::setw( 20 ) << "mdl_propCorr__exp__4 = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_propCorr__exp__4 << std::endl; + std::cout << std::setw( 20 ) << "mdl_cth__exp__3 = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_cth__exp__3 << std::endl; + std::cout << std::setw( 20 ) << "mdl_aEW = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_aEW << std::endl; + std::cout << std::setw( 20 ) << "mdl_sqrt__Gf = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_sqrt__Gf << std::endl; + std::cout << std::setw( 20 ) << "mdl_vevhat = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_vevhat << std::endl; + std::cout << std::setw( 20 ) << "mdl_lam = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_lam << std::endl; + std::cout << std::setw( 20 ) << "mdl_sqrt__aEW = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_sqrt__aEW << std::endl; + std::cout << std::setw( 20 ) << "mdl_ee = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_ee << std::endl; + std::cout << std::setw( 20 ) << "mdl_yb = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_yb << std::endl; + std::cout << std::setw( 20 ) << "mdl_yc = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_yc << std::endl; + std::cout << std::setw( 20 ) << "mdl_ydo = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_ydo << std::endl; + std::cout << std::setw( 20 ) << "mdl_ye = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_ye << std::endl; + std::cout << std::setw( 20 ) << "mdl_ym = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_ym << std::endl; + std::cout << std::setw( 20 ) << "mdl_ys = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_ys << std::endl; + std::cout << std::setw( 20 ) << "mdl_yt = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_yt << std::endl; + std::cout << std::setw( 20 ) << "mdl_ytau = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_ytau << std::endl; + std::cout << std::setw( 20 ) << "mdl_yup = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_yup << std::endl; + std::cout << std::setw( 20 ) << "mdl_vevhat__exp__2 = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_vevhat__exp__2 << std::endl; + std::cout << std::setw( 20 ) << "mdl_dGf = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_dGf << std::endl; + std::cout << std::setw( 20 ) << "mdl_dkH = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_dkH << std::endl; + std::cout << std::setw( 20 ) << "mdl_vevT = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_vevT << std::endl; + std::cout << std::setw( 20 ) << "mdl_g1 = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_g1 << std::endl; + std::cout << std::setw( 20 ) << "mdl_gw = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_gw << std::endl; + std::cout << std::setw( 20 ) << "mdl_yb0 = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_yb0 << std::endl; + std::cout << std::setw( 20 ) << "mdl_yt0 = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_yt0 << std::endl; + std::cout << std::setw( 20 ) << "mdl_ee__exp__2 = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_ee__exp__2 << std::endl; + std::cout << std::setw( 20 ) << "mdl_gHaa = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_gHaa << std::endl; + std::cout << std::setw( 20 ) << "mdl_gHza = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_gHza << std::endl; + std::cout << std::setw( 20 ) << "mdl_dMZ2 = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_dMZ2 << std::endl; + std::cout << std::setw( 20 ) << "mdl_dMH2 = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_dMH2 << std::endl; + std::cout << std::setw( 20 ) << "mdl_dgw = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_dgw << std::endl; + std::cout << std::setw( 20 ) << "mdl_barlam = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_barlam << std::endl; + std::cout << std::setw( 20 ) << "mdl_dWT = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_dWT << std::endl; + std::cout << std::setw( 20 ) << "mdl_dWW = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_dWW << std::endl; + std::cout << std::setw( 20 ) << "mdl_gwsh = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_gwsh << std::endl; + std::cout << std::setw( 20 ) << "mdl_vev = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_vev << std::endl; + std::cout << std::setw( 20 ) << "mdl_dg1 = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_dg1 << std::endl; + std::cout << std::setw( 20 ) << "mdl_dWHc = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_dWHc << std::endl; + std::cout << std::setw( 20 ) << "mdl_dWHb = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_dWHb << std::endl; + std::cout << std::setw( 20 ) << "mdl_dWHta = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_dWHta << std::endl; + std::cout << std::setw( 20 ) << "mdl_dWZ = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_dWZ << std::endl; + std::cout << std::setw( 20 ) << "mdl_g1sh = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_g1sh << std::endl; + std::cout << std::setw( 20 ) << "mdl_ee__exp__3 = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_ee__exp__3 << std::endl; + std::cout << std::setw( 20 ) << "mdl_vevhat__exp__3 = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_vevhat__exp__3 << std::endl; +} + +void +Parameters_SMEFTsim_topU3l_MwScheme_UFO::printIndependentCouplings() +{ + std::cout << "SMEFTsim_topU3l_MwScheme_UFO model couplings independent of event kinematics:" << std::endl; + // (none) +} + +/* +void +Parameters_SMEFTsim_topU3l_MwScheme_UFO::printDependentParameters() // now computed event-by-event (running alphas #373) +{ + std::cout << "SMEFTsim_topU3l_MwScheme_UFO model parameters dependent on event kinematics:" << std::endl; + std::cout << std::setw( 20 ) << "mdl_sqrt__aS = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_sqrt__aS << std::endl; + std::cout << std::setw( 20 ) << "G = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << G << std::endl; + std::cout << std::setw( 20 ) << "mdl_gHgg2 = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_gHgg2 << std::endl; + std::cout << std::setw( 20 ) << "mdl_gHgg4 = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_gHgg4 << std::endl; + std::cout << std::setw( 20 ) << "mdl_gHgg5 = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_gHgg5 << std::endl; + std::cout << std::setw( 20 ) << "mdl_G__exp__2 = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_G__exp__2 << std::endl; + std::cout << std::setw( 20 ) << "mdl_gHgg1 = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_gHgg1 << std::endl; + std::cout << std::setw( 20 ) << "mdl_gHgg3 = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_gHgg3 << std::endl; + std::cout << std::setw( 20 ) << "mdl_G__exp__3 = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_G__exp__3 << std::endl; + std::cout << std::setw( 20 ) << "mdl_dWH = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << mdl_dWH << std::endl; +} + +void +Parameters_SMEFTsim_topU3l_MwScheme_UFO::printDependentCouplings() // now computed event-by-event (running alphas #373) +{ + std::cout << "SMEFTsim_topU3l_MwScheme_UFO model couplings dependent on event kinematics:" << std::endl; + std::cout << std::setw( 20 ) << "GC_6 = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << GC_6 << std::endl; + std::cout << std::setw( 20 ) << "GC_7 = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << GC_7 << std::endl; + std::cout << std::setw( 20 ) << "GC_8 = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << GC_8 << std::endl; +} +*/ diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/src/Parameters_SMEFTsim_topU3l_MwScheme_UFO.h b/epochX/cudacpp/smeft_gg_tttt.sa/src/Parameters_SMEFTsim_topU3l_MwScheme_UFO.h new file mode 100644 index 0000000000..fd5e4ee1f4 --- /dev/null +++ b/epochX/cudacpp/smeft_gg_tttt.sa/src/Parameters_SMEFTsim_topU3l_MwScheme_UFO.h @@ -0,0 +1,643 @@ +//========================================================================== +// This file has been automatically generated for CUDA/C++ standalone by +// MadGraph5_aMC@NLO v. 3.5.0_lo_vect, 2023-01-26 +// By the MadGraph5_aMC@NLO Development Team +// Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch +//========================================================================== + +#ifndef Parameters_SMEFTsim_topU3l_MwScheme_UFO_H +#define Parameters_SMEFTsim_topU3l_MwScheme_UFO_H + +#include "mgOnGpuConfig.h" + +#include "mgOnGpuCxtypes.h" +#include "mgOnGpuVectors.h" + +//========================================================================== + +#ifndef MGONGPU_HARDCODE_PARAM // this is only supported in SM processes (e.g. not in EFT models) for the moment (#439) +#error This non-SM physics process only supports MGONGPU_HARDCODE_PARAM builds (#439): please run "make HRDCOD=1" + +#include "read_slha.h" + +class Parameters_SMEFTsim_topU3l_MwScheme_UFO +{ +public: + + static Parameters_SMEFTsim_topU3l_MwScheme_UFO* getInstance(); + + // Define "zero" + double zero, ZERO; + + // Model parameters independent of aS + //double aS; // now retrieved event-by-event (as G) from Fortran (running alphas #373) + double mdl_WH, mdl_WW, mdl_WZ, mdl_WT, mdl_ymtau, mdl_ymm, mdl_yme, mdl_ymt, mdl_ymb, mdl_ymc, mdl_yms, mdl_ymup, mdl_ymdo, mdl_linearPropCorrections, mdl_Gf, mdl_MW, mdl_LambdaSMEFT, mdl_cleQt3Im, mdl_cleQt1Im, mdl_cleju3Im, mdl_cleju1Im, mdl_clebQIm, mdl_cledjIm, mdl_ceBIm, mdl_ceWIm, mdl_ceHIm, mdl_cQtQb8Im, mdl_cQtQb1Im, mdl_cjtQd8Im, mdl_cjtQd1Im, mdl_cQujb8Im, mdl_cQujb1Im, mdl_cjuQb8Im, mdl_cjuQb1Im, mdl_cQtjd8Im, mdl_cQtjd1Im, mdl_cjujd81Im, mdl_cjujd11Im, mdl_cjujd8Im, mdl_cjujd1Im, mdl_cjQbd8Im, mdl_cjQbd1Im, mdl_cjQtu8Im, mdl_cjQtu1Im, mdl_cutbd8Im, mdl_cutbd1Im, mdl_cHtbIm, mdl_cHudIm, mdl_cbHIm, mdl_cdHIm, mdl_ctHIm, mdl_cuHIm, mdl_cbBIm, mdl_cdBIm, mdl_cbWIm, mdl_cdWIm, mdl_cbGIm, mdl_cdGIm, mdl_ctBIm, mdl_cuBIm, mdl_ctWIm, mdl_cuWIm, mdl_ctGIm, mdl_cuGIm, mdl_cHWBtil, mdl_cHBtil, mdl_cHWtil, mdl_cHGtil, mdl_cWtil, mdl_cGtil, mdl_cleQt3Re, mdl_cleju3Re, mdl_cleQt1Re, mdl_cleju1Re, mdl_clebQRe, mdl_cledjRe, mdl_cle, mdl_cbl, mdl_cld, mdl_ctl, mdl_clu, mdl_cQe, mdl_cje, mdl_cbe, mdl_ced, mdl_cte, mdl_ceu, mdl_cee, mdl_cQl3, mdl_cQl1, mdl_clj3, mdl_clj1, mdl_cll1, mdl_cll, mdl_cHe, mdl_cHl3, mdl_cHl1, mdl_ceBRe, mdl_ceWRe, mdl_ceHRe, mdl_cQtQb8Re, mdl_cQtQb1Re, mdl_cjtQd8Re, mdl_cjtQd1Re, mdl_cQujb8Re, mdl_cQujb1Re, mdl_cjuQb8Re, mdl_cjuQb1Re, mdl_cQtjd8Re, mdl_cQtjd1Re, mdl_cjujd81Re, mdl_cjujd11Re, mdl_cjujd8Re, mdl_cjujd1Re, mdl_cjQbd8Re, mdl_cjQbd1Re, mdl_cjQtu8Re, mdl_cjQtu1Re, mdl_cQb8, mdl_cQb1, mdl_cbj8, mdl_cbj1, mdl_cQd8, mdl_cQd1, mdl_cjd8, mdl_cjd1, mdl_cQt8, mdl_cQt1, mdl_ctj8, mdl_ctj1, mdl_cQu8, mdl_cju8, mdl_cQu1, mdl_cju1, mdl_cutbd8Re, mdl_cutbd1Re, mdl_cbu8, mdl_ctd8, mdl_ctb8, mdl_cud8, mdl_cbu1, mdl_ctd1, mdl_ctb1, mdl_cud1, mdl_cbd8, mdl_cbd1, mdl_cbb, mdl_cdd8, mdl_cdd1, mdl_ctu8, mdl_ctu1, mdl_ctt, mdl_cuu8, mdl_cuu1, mdl_cQQ8, mdl_cQQ1, mdl_cQj38, mdl_cQj31, mdl_cQj18, mdl_cQj11, mdl_cjj38, mdl_cjj31, mdl_cjj18, mdl_cjj11, mdl_cHtbRe, mdl_cHudRe, mdl_cHbq, mdl_cHd, mdl_cHt, mdl_cHu, mdl_cHQ3, mdl_cHj3, mdl_cHQ1, mdl_cHj1, mdl_cbBRe, mdl_cdBRe, mdl_cbWRe, mdl_cdWRe, mdl_cbGRe, mdl_cdGRe, mdl_ctBRe, mdl_cuBRe, mdl_ctWRe, mdl_cuWRe, mdl_ctGRe, mdl_cuGRe, mdl_cbHRe, mdl_cdHRe, mdl_ctHRe, mdl_cuHRe, mdl_cHWB, mdl_cHB, mdl_cHW, mdl_cHG, mdl_cHDD, mdl_cHbox, mdl_cH, mdl_cW, mdl_cG, mdl_MH, mdl_MZ, mdl_MTA, mdl_MMU, mdl_Me, mdl_MT, mdl_MB, mdl_MC, mdl_MS, mdl_MU, mdl_MD, mdl_MWsm, mdl_MW__exp__2, mdl_MZ__exp__2, mdl_sqrt__2, mdl_nb__2__exp__0_25, mdl_MH__exp__2, mdl_sth2, mdl_nb__10__exp___m_40, mdl_propCorr, mdl_MZ1, mdl_MH1, mdl_MT1, mdl_WZ1, mdl_WW1, mdl_WH1, mdl_WT1, mdl_cth, mdl_MW1, mdl_sqrt__sth2, mdl_sth, mdl_LambdaSMEFT__exp__2, mdl_MT__exp__2, mdl_MH__exp__6, mdl_MWsm__exp__6, mdl_MH__exp__4, mdl_MWsm__exp__4, mdl_MWsm__exp__2, mdl_MZ__exp__4, mdl_MZ__exp__6, mdl_cth__exp__2, mdl_sth__exp__2, mdl_MB__exp__2, mdl_MZ__exp__3, mdl_sth__exp__4, mdl_sth__exp__6, mdl_sth__exp__3, mdl_sth__exp__5, mdl_propCorr__exp__2, mdl_propCorr__exp__3, mdl_propCorr__exp__4, mdl_cth__exp__3, mdl_aEW, mdl_sqrt__Gf, mdl_vevhat, mdl_lam, mdl_sqrt__aEW, mdl_ee, mdl_yb, mdl_yc, mdl_ydo, mdl_ye, mdl_ym, mdl_ys, mdl_yt, mdl_ytau, mdl_yup, mdl_vevhat__exp__2, mdl_dGf, mdl_dkH, mdl_vevT, mdl_g1, mdl_gw, mdl_ee__exp__2, mdl_gHaa, mdl_gHza, mdl_dMZ2, mdl_dMH2, mdl_dgw, mdl_barlam, mdl_dWT, mdl_dWW, mdl_gwsh, mdl_vev, mdl_dg1, mdl_dWHc, mdl_dWHb, mdl_dWHta, mdl_dWZ, mdl_g1sh, mdl_ee__exp__3, mdl_vevhat__exp__3; + cxsmpl mdl_complexi, mdl_cuH, mdl_ctHH, mdl_cdH, mdl_cbH, mdl_cuG, mdl_ctG, mdl_cuW, mdl_ctW, mdl_cuB, mdl_ctB, mdl_cdG, mdl_cbG, mdl_cdW, mdl_cbW, mdl_cdB, mdl_cbBB, mdl_cHud, mdl_cHtb, mdl_cutbd1, mdl_cutbd8, mdl_cjQtu1, mdl_cjQtu8, mdl_cjQbd1, mdl_cjQbd8, mdl_cjujd1, mdl_cjujd8, mdl_cjujd11, mdl_cjujd81, mdl_cQtjd1, mdl_cQtjd8, mdl_cjuQb1, mdl_cjuQb8, mdl_cQujb1, mdl_cQujb8, mdl_cjtQd1, mdl_cjtQd8, mdl_cQtQb1, mdl_cQtQb8, mdl_ceH, mdl_ceW, mdl_ceB, mdl_cledj, mdl_clebQ, mdl_cleju1, mdl_cleju3, mdl_cleQt1, mdl_cleQt3, mdl_conjg__cbH, mdl_conjg__ctHH, mdl_yb0, mdl_yt0; + + // Model couplings independent of aS + // (none) + + // Model parameters dependent on aS + //double mdl_sqrt__aS, G, mdl_gHgg2, mdl_gHgg4, mdl_gHgg5, mdl_G__exp__2, mdl_gHgg1, mdl_gHgg3, mdl_dWH; // now computed event-by-event (running alphas #373) + //cxsmpl mdl_G__exp__3; // now computed event-by-event (running alphas #373) + + // Model couplings dependent on aS + //cxsmpl GC_6, GC_7, GC_8; // now computed event-by-event (running alphas #373) + + // Set parameters that are unchanged during the run + void setIndependentParameters( SLHAReader& slha ); + + // Set couplings that are unchanged during the run + void setIndependentCouplings(); + + // Set parameters that are changed event by event + //void setDependentParameters(); // now computed event-by-event (running alphas #373) + + // Set couplings that are changed event by event + //void setDependentCouplings(); // now computed event-by-event (running alphas #373) + + // Print parameters that are unchanged during the run + void printIndependentParameters(); + + // Print couplings that are unchanged during the run + void printIndependentCouplings(); + + // Print parameters that are changed event by event + //void printDependentParameters(); // now computed event-by-event (running alphas #373) + + // Print couplings that are changed event by event + //void printDependentCouplings(); // now computed event-by-event (running alphas #373) + +private: + + static Parameters_SMEFTsim_topU3l_MwScheme_UFO* instance; +}; + +#else + +#include +#include + +// Hardcoded constexpr physics parameters +namespace Parameters_SMEFTsim_topU3l_MwScheme_UFO // keep the same name rather than HardcodedParameters_SMEFTsim_topU3l_MwScheme_UFO for simplicity +{ + // Constexpr implementation of sqrt (see https://stackoverflow.com/a/34134071) + double constexpr sqrtNewtonRaphson( double x, double curr, double prev ) + { + return curr == prev ? curr : sqrtNewtonRaphson( x, 0.5 * ( curr + x / curr ), curr ); + } + double constexpr constexpr_sqrt( double x ) + { + return x >= 0 // && x < std::numeric_limits::infinity() // avoid -Wtautological-constant-compare warning in fast math + ? sqrtNewtonRaphson( x, x, 0 ) + : std::numeric_limits::quiet_NaN(); + } + + // Constexpr implementation of floor (see https://stackoverflow.com/a/66146159) + constexpr int constexpr_floor( double d ) + { + const int i = static_cast( d ); + return d < i ? i - 1 : i; + } + + // Constexpr implementation of pow + constexpr double constexpr_pow( double base, double exp ) + { + // NB(1): this implementation of constexpr_pow requires exponent >= 0 + assert( exp >= 0 ); // NB would fail at compile time with "error: call to non-‘constexpr’ function ‘void __assert_fail'" + // NB(2): this implementation of constexpr_pow requires an integer exponent + const int iexp = constexpr_floor( exp ); + assert( static_cast( iexp ) == exp ); // NB would fail at compile time with "error: call to non-‘constexpr’ function ‘void __assert_fail'" + // Iterative implementation of pow if exp is a non negative integer + return iexp == 0 ? 1 : base * constexpr_pow( base, iexp - 1 ); + } + + // Model parameters independent of aS + constexpr double zero = 0; + constexpr double ZERO = 0; + constexpr double mdl_WH = 4.070000e - 03; + constexpr double mdl_WW = 2.085000e + 00; + constexpr double mdl_WZ = 2.495200e + 00; + constexpr double mdl_WT = 1.330000e + 00; + constexpr double mdl_ymtau = 1.777000e + 00; + constexpr double mdl_ymm = 1.056600e - 01; + constexpr double mdl_yme = 5.110000e - 04; + constexpr double mdl_ymt = 1.727600e + 02; + constexpr double mdl_ymb = 4.180000e + 00; + constexpr double mdl_ymc = 1.270000e + 00; + constexpr double mdl_yms = 9.300000e - 02; + constexpr double mdl_ymup = 2.160000e - 03; + constexpr double mdl_ymdo = 4.670000e - 03; + constexpr double mdl_linearPropCorrections = 0.000000e + 00; + //constexpr double aS = 1.179000e - 01; // now retrieved event-by-event (as G) from Fortran (running alphas #373) + constexpr double mdl_Gf = 1.166379e - 05; + constexpr double mdl_MW = 8.038700e + 01; + constexpr double mdl_LambdaSMEFT = 1.000000e + 03; + constexpr double mdl_cleQt3Im = 0.000000e + 00; + constexpr double mdl_cleQt1Im = 0.000000e + 00; + constexpr double mdl_cleju3Im = 0.000000e + 00; + constexpr double mdl_cleju1Im = 0.000000e + 00; + constexpr double mdl_clebQIm = 0.000000e + 00; + constexpr double mdl_cledjIm = 0.000000e + 00; + constexpr double mdl_ceBIm = 0.000000e + 00; + constexpr double mdl_ceWIm = 0.000000e + 00; + constexpr double mdl_ceHIm = 0.000000e + 00; + constexpr double mdl_cQtQb8Im = 0.000000e + 00; + constexpr double mdl_cQtQb1Im = 0.000000e + 00; + constexpr double mdl_cjtQd8Im = 0.000000e + 00; + constexpr double mdl_cjtQd1Im = 0.000000e + 00; + constexpr double mdl_cQujb8Im = 0.000000e + 00; + constexpr double mdl_cQujb1Im = 0.000000e + 00; + constexpr double mdl_cjuQb8Im = 0.000000e + 00; + constexpr double mdl_cjuQb1Im = 0.000000e + 00; + constexpr double mdl_cQtjd8Im = 0.000000e + 00; + constexpr double mdl_cQtjd1Im = 0.000000e + 00; + constexpr double mdl_cjujd81Im = 0.000000e + 00; + constexpr double mdl_cjujd11Im = 0.000000e + 00; + constexpr double mdl_cjujd8Im = 0.000000e + 00; + constexpr double mdl_cjujd1Im = 0.000000e + 00; + constexpr double mdl_cjQbd8Im = 0.000000e + 00; + constexpr double mdl_cjQbd1Im = 0.000000e + 00; + constexpr double mdl_cjQtu8Im = 0.000000e + 00; + constexpr double mdl_cjQtu1Im = 0.000000e + 00; + constexpr double mdl_cutbd8Im = 0.000000e + 00; + constexpr double mdl_cutbd1Im = 0.000000e + 00; + constexpr double mdl_cHtbIm = 0.000000e + 00; + constexpr double mdl_cHudIm = 0.000000e + 00; + constexpr double mdl_cbHIm = 0.000000e + 00; + constexpr double mdl_cdHIm = 0.000000e + 00; + constexpr double mdl_ctHIm = 0.000000e + 00; + constexpr double mdl_cuHIm = 0.000000e + 00; + constexpr double mdl_cbBIm = 0.000000e + 00; + constexpr double mdl_cdBIm = 0.000000e + 00; + constexpr double mdl_cbWIm = 0.000000e + 00; + constexpr double mdl_cdWIm = 0.000000e + 00; + constexpr double mdl_cbGIm = 0.000000e + 00; + constexpr double mdl_cdGIm = 0.000000e + 00; + constexpr double mdl_ctBIm = 0.000000e + 00; + constexpr double mdl_cuBIm = 0.000000e + 00; + constexpr double mdl_ctWIm = 0.000000e + 00; + constexpr double mdl_cuWIm = 0.000000e + 00; + constexpr double mdl_ctGIm = 0.000000e + 00; + constexpr double mdl_cuGIm = 0.000000e + 00; + constexpr double mdl_cHWBtil = 0.000000e + 00; + constexpr double mdl_cHBtil = 0.000000e + 00; + constexpr double mdl_cHWtil = 0.000000e + 00; + constexpr double mdl_cHGtil = 0.000000e + 00; + constexpr double mdl_cWtil = 0.000000e + 00; + constexpr double mdl_cGtil = 0.000000e + 00; + constexpr double mdl_cleQt3Re = 0.000000e + 00; + constexpr double mdl_cleju3Re = 0.000000e + 00; + constexpr double mdl_cleQt1Re = 0.000000e + 00; + constexpr double mdl_cleju1Re = 0.000000e + 00; + constexpr double mdl_clebQRe = 0.000000e + 00; + constexpr double mdl_cledjRe = 0.000000e + 00; + constexpr double mdl_cle = 0.000000e + 00; + constexpr double mdl_cbl = 0.000000e + 00; + constexpr double mdl_cld = 0.000000e + 00; + constexpr double mdl_ctl = 0.000000e + 00; + constexpr double mdl_clu = 0.000000e + 00; + constexpr double mdl_cQe = 0.000000e + 00; + constexpr double mdl_cje = 0.000000e + 00; + constexpr double mdl_cbe = 0.000000e + 00; + constexpr double mdl_ced = 0.000000e + 00; + constexpr double mdl_cte = 0.000000e + 00; + constexpr double mdl_ceu = 0.000000e + 00; + constexpr double mdl_cee = 0.000000e + 00; + constexpr double mdl_cQl3 = 0.000000e + 00; + constexpr double mdl_cQl1 = 0.000000e + 00; + constexpr double mdl_clj3 = 0.000000e + 00; + constexpr double mdl_clj1 = 0.000000e + 00; + constexpr double mdl_cll1 = 0.000000e + 00; + constexpr double mdl_cll = 0.000000e + 00; + constexpr double mdl_cHe = 0.000000e + 00; + constexpr double mdl_cHl3 = 0.000000e + 00; + constexpr double mdl_cHl1 = 0.000000e + 00; + constexpr double mdl_ceBRe = 0.000000e + 00; + constexpr double mdl_ceWRe = 0.000000e + 00; + constexpr double mdl_ceHRe = 0.000000e + 00; + constexpr double mdl_cQtQb8Re = 0.000000e + 00; + constexpr double mdl_cQtQb1Re = 0.000000e + 00; + constexpr double mdl_cjtQd8Re = 0.000000e + 00; + constexpr double mdl_cjtQd1Re = 0.000000e + 00; + constexpr double mdl_cQujb8Re = 0.000000e + 00; + constexpr double mdl_cQujb1Re = 0.000000e + 00; + constexpr double mdl_cjuQb8Re = 0.000000e + 00; + constexpr double mdl_cjuQb1Re = 0.000000e + 00; + constexpr double mdl_cQtjd8Re = 0.000000e + 00; + constexpr double mdl_cQtjd1Re = 0.000000e + 00; + constexpr double mdl_cjujd81Re = 0.000000e + 00; + constexpr double mdl_cjujd11Re = 0.000000e + 00; + constexpr double mdl_cjujd8Re = 0.000000e + 00; + constexpr double mdl_cjujd1Re = 0.000000e + 00; + constexpr double mdl_cjQbd8Re = 0.000000e + 00; + constexpr double mdl_cjQbd1Re = 0.000000e + 00; + constexpr double mdl_cjQtu8Re = 0.000000e + 00; + constexpr double mdl_cjQtu1Re = 0.000000e + 00; + constexpr double mdl_cQb8 = 0.000000e + 00; + constexpr double mdl_cQb1 = 0.000000e + 00; + constexpr double mdl_cbj8 = 0.000000e + 00; + constexpr double mdl_cbj1 = 0.000000e + 00; + constexpr double mdl_cQd8 = 0.000000e + 00; + constexpr double mdl_cQd1 = 0.000000e + 00; + constexpr double mdl_cjd8 = 0.000000e + 00; + constexpr double mdl_cjd1 = 0.000000e + 00; + constexpr double mdl_cQt8 = 0.000000e + 00; + constexpr double mdl_cQt1 = 0.000000e + 00; + constexpr double mdl_ctj8 = 0.000000e + 00; + constexpr double mdl_ctj1 = 0.000000e + 00; + constexpr double mdl_cQu8 = 0.000000e + 00; + constexpr double mdl_cju8 = 0.000000e + 00; + constexpr double mdl_cQu1 = 0.000000e + 00; + constexpr double mdl_cju1 = 0.000000e + 00; + constexpr double mdl_cutbd8Re = 0.000000e + 00; + constexpr double mdl_cutbd1Re = 0.000000e + 00; + constexpr double mdl_cbu8 = 0.000000e + 00; + constexpr double mdl_ctd8 = 0.000000e + 00; + constexpr double mdl_ctb8 = 0.000000e + 00; + constexpr double mdl_cud8 = 0.000000e + 00; + constexpr double mdl_cbu1 = 0.000000e + 00; + constexpr double mdl_ctd1 = 0.000000e + 00; + constexpr double mdl_ctb1 = 0.000000e + 00; + constexpr double mdl_cud1 = 0.000000e + 00; + constexpr double mdl_cbd8 = 0.000000e + 00; + constexpr double mdl_cbd1 = 0.000000e + 00; + constexpr double mdl_cbb = 0.000000e + 00; + constexpr double mdl_cdd8 = 0.000000e + 00; + constexpr double mdl_cdd1 = 0.000000e + 00; + constexpr double mdl_ctu8 = 0.000000e + 00; + constexpr double mdl_ctu1 = 0.000000e + 00; + constexpr double mdl_ctt = 0.000000e + 00; + constexpr double mdl_cuu8 = 0.000000e + 00; + constexpr double mdl_cuu1 = 0.000000e + 00; + constexpr double mdl_cQQ8 = 0.000000e + 00; + constexpr double mdl_cQQ1 = 0.000000e + 00; + constexpr double mdl_cQj38 = 0.000000e + 00; + constexpr double mdl_cQj31 = 0.000000e + 00; + constexpr double mdl_cQj18 = 0.000000e + 00; + constexpr double mdl_cQj11 = 0.000000e + 00; + constexpr double mdl_cjj38 = 0.000000e + 00; + constexpr double mdl_cjj31 = 0.000000e + 00; + constexpr double mdl_cjj18 = 0.000000e + 00; + constexpr double mdl_cjj11 = 0.000000e + 00; + constexpr double mdl_cHtbRe = 0.000000e + 00; + constexpr double mdl_cHudRe = 0.000000e + 00; + constexpr double mdl_cHbq = 0.000000e + 00; + constexpr double mdl_cHd = 0.000000e + 00; + constexpr double mdl_cHt = 0.000000e + 00; + constexpr double mdl_cHu = 0.000000e + 00; + constexpr double mdl_cHQ3 = 0.000000e + 00; + constexpr double mdl_cHj3 = 0.000000e + 00; + constexpr double mdl_cHQ1 = 0.000000e + 00; + constexpr double mdl_cHj1 = 0.000000e + 00; + constexpr double mdl_cbBRe = 0.000000e + 00; + constexpr double mdl_cdBRe = 0.000000e + 00; + constexpr double mdl_cbWRe = 0.000000e + 00; + constexpr double mdl_cdWRe = 0.000000e + 00; + constexpr double mdl_cbGRe = 0.000000e + 00; + constexpr double mdl_cdGRe = 0.000000e + 00; + constexpr double mdl_ctBRe = 0.000000e + 00; + constexpr double mdl_cuBRe = 0.000000e + 00; + constexpr double mdl_ctWRe = 0.000000e + 00; + constexpr double mdl_cuWRe = 0.000000e + 00; + constexpr double mdl_ctGRe = 0.000000e + 00; + constexpr double mdl_cuGRe = 0.000000e + 00; + constexpr double mdl_cbHRe = 0.000000e + 00; + constexpr double mdl_cdHRe = 0.000000e + 00; + constexpr double mdl_ctHRe = 0.000000e + 00; + constexpr double mdl_cuHRe = 0.000000e + 00; + constexpr double mdl_cHWB = 0.000000e + 00; + constexpr double mdl_cHB = 0.000000e + 00; + constexpr double mdl_cHW = 0.000000e + 00; + constexpr double mdl_cHG = 0.000000e + 00; + constexpr double mdl_cHDD = 0.000000e + 00; + constexpr double mdl_cHbox = 0.000000e + 00; + constexpr double mdl_cH = 0.000000e + 00; + constexpr double mdl_cW = 0.000000e + 00; + constexpr double mdl_cG = 0.000000e + 00; + constexpr double mdl_MH = 1.250900e + 02; + constexpr double mdl_MZ = 9.118760e + 01; + constexpr double mdl_MTA = 1.777000e + 00; + constexpr double mdl_MMU = 1.056600e - 01; + constexpr double mdl_Me = 5.110000e - 04; + constexpr double mdl_MT = 1.727600e + 02; + constexpr double mdl_MB = 4.180000e + 00; + constexpr double mdl_MC = 1.270000e + 00; + constexpr double mdl_MS = 9.300000e - 02; + constexpr double mdl_MU = 2.160000e - 03; + constexpr double mdl_MD = 4.670000e - 03; + constexpr cxsmpl mdl_complexi = cxsmpl( 0., 1. ); + constexpr cxsmpl mdl_cuH = mdl_cuHRe + mdl_cuHIm * mdl_complexi; + constexpr cxsmpl mdl_ctHH = mdl_ctHRe + mdl_ctHIm * mdl_complexi; + constexpr cxsmpl mdl_cdH = mdl_cdHRe + mdl_cdHIm * mdl_complexi; + constexpr cxsmpl mdl_cbH = mdl_cbHRe + mdl_cbHIm * mdl_complexi; + constexpr cxsmpl mdl_cuG = mdl_cuGRe + mdl_cuGIm * mdl_complexi; + constexpr cxsmpl mdl_ctG = mdl_ctGRe + mdl_ctGIm * mdl_complexi; + constexpr cxsmpl mdl_cuW = mdl_cuWRe + mdl_cuWIm * mdl_complexi; + constexpr cxsmpl mdl_ctW = mdl_ctWRe + mdl_ctWIm * mdl_complexi; + constexpr cxsmpl mdl_cuB = mdl_cuBRe + mdl_cuBIm * mdl_complexi; + constexpr cxsmpl mdl_ctB = mdl_ctBRe + mdl_ctBIm * mdl_complexi; + constexpr cxsmpl mdl_cdG = mdl_cdGRe + mdl_cdGIm * mdl_complexi; + constexpr cxsmpl mdl_cbG = mdl_cbGRe + mdl_cbGIm * mdl_complexi; + constexpr cxsmpl mdl_cdW = mdl_cdWRe + mdl_cdWIm * mdl_complexi; + constexpr cxsmpl mdl_cbW = mdl_cbWRe + mdl_cbWIm * mdl_complexi; + constexpr cxsmpl mdl_cdB = mdl_cdBRe + mdl_cdBIm * mdl_complexi; + constexpr cxsmpl mdl_cbBB = mdl_cbBRe + mdl_cbBIm * mdl_complexi; + constexpr cxsmpl mdl_cHud = mdl_cHudRe + mdl_cHudIm * mdl_complexi; + constexpr cxsmpl mdl_cHtb = mdl_cHtbRe + mdl_cHtbIm * mdl_complexi; + constexpr cxsmpl mdl_cutbd1 = mdl_cutbd1Re + mdl_cutbd1Im * mdl_complexi; + constexpr cxsmpl mdl_cutbd8 = mdl_cutbd8Re + mdl_cutbd8Im * mdl_complexi; + constexpr cxsmpl mdl_cjQtu1 = mdl_cjQtu1Re + mdl_cjQtu1Im * mdl_complexi; + constexpr cxsmpl mdl_cjQtu8 = mdl_cjQtu8Re + mdl_cjQtu8Im * mdl_complexi; + constexpr cxsmpl mdl_cjQbd1 = mdl_cjQbd1Re + mdl_cjQbd1Im * mdl_complexi; + constexpr cxsmpl mdl_cjQbd8 = mdl_cjQbd8Re + mdl_cjQbd8Im * mdl_complexi; + constexpr cxsmpl mdl_cjujd1 = mdl_cjujd1Re + mdl_cjujd1Im * mdl_complexi; + constexpr cxsmpl mdl_cjujd8 = mdl_cjujd8Re + mdl_cjujd8Im * mdl_complexi; + constexpr cxsmpl mdl_cjujd11 = mdl_cjujd11Re + mdl_cjujd11Im * mdl_complexi; + constexpr cxsmpl mdl_cjujd81 = mdl_cjujd81Re + mdl_cjujd81Im * mdl_complexi; + constexpr cxsmpl mdl_cQtjd1 = mdl_cQtjd1Re + mdl_cQtjd1Im * mdl_complexi; + constexpr cxsmpl mdl_cQtjd8 = mdl_cQtjd8Re + mdl_cQtjd8Im * mdl_complexi; + constexpr cxsmpl mdl_cjuQb1 = mdl_cjuQb1Re + mdl_cjuQb1Im * mdl_complexi; + constexpr cxsmpl mdl_cjuQb8 = mdl_cjuQb8Re + mdl_cjuQb8Im * mdl_complexi; + constexpr cxsmpl mdl_cQujb1 = mdl_cQujb1Re + mdl_cQujb1Im * mdl_complexi; + constexpr cxsmpl mdl_cQujb8 = mdl_cQujb8Re + mdl_cQujb8Im * mdl_complexi; + constexpr cxsmpl mdl_cjtQd1 = mdl_cjtQd1Re + mdl_cjtQd1Im * mdl_complexi; + constexpr cxsmpl mdl_cjtQd8 = mdl_cjtQd8Re + mdl_cjtQd8Im * mdl_complexi; + constexpr cxsmpl mdl_cQtQb1 = mdl_cQtQb1Re + mdl_cQtQb1Im * mdl_complexi; + constexpr cxsmpl mdl_cQtQb8 = mdl_cQtQb8Re + mdl_cQtQb8Im * mdl_complexi; + constexpr cxsmpl mdl_ceH = mdl_ceHRe + mdl_ceHIm * mdl_complexi; + constexpr cxsmpl mdl_ceW = mdl_ceWRe + mdl_ceWIm * mdl_complexi; + constexpr cxsmpl mdl_ceB = mdl_ceBRe + mdl_ceBIm * mdl_complexi; + constexpr cxsmpl mdl_cledj = mdl_cledjRe + mdl_cledjIm * mdl_complexi; + constexpr cxsmpl mdl_clebQ = mdl_clebQRe + mdl_clebQIm * mdl_complexi; + constexpr cxsmpl mdl_cleju1 = mdl_cleju1Re + mdl_cleju1Im * mdl_complexi; + constexpr cxsmpl mdl_cleju3 = mdl_cleju3Re + mdl_cleju3Im * mdl_complexi; + constexpr cxsmpl mdl_cleQt1 = mdl_cleQt1Re + mdl_cleQt1Im * mdl_complexi; + constexpr cxsmpl mdl_cleQt3 = mdl_cleQt3Re + mdl_cleQt3Im * mdl_complexi; + constexpr double mdl_MWsm = mdl_MW; + constexpr double mdl_MW__exp__2 = ( ( mdl_MW ) * ( mdl_MW ) ); + constexpr double mdl_MZ__exp__2 = ( ( mdl_MZ ) * ( mdl_MZ ) ); + constexpr double mdl_sqrt__2 = constexpr_sqrt( 2. ); + constexpr double mdl_nb__2__exp__0_25 = constexpr_pow( 2., 0.25 ); + constexpr double mdl_MH__exp__2 = ( ( mdl_MH ) * ( mdl_MH ) ); + constexpr double mdl_sth2 = 1. - mdl_MW__exp__2 / mdl_MZ__exp__2; + constexpr double mdl_nb__10__exp___m_40 = constexpr_pow( 10., -40. ); + constexpr double mdl_propCorr = ABS( mdl_linearPropCorrections ) / ( ABS( mdl_linearPropCorrections ) + mdl_nb__10__exp___m_40 ); + constexpr double mdl_MZ1 = mdl_MZ; + constexpr double mdl_MH1 = mdl_MH; + constexpr double mdl_MT1 = mdl_MT; + constexpr double mdl_WZ1 = mdl_WZ; + constexpr double mdl_WW1 = mdl_WW; + constexpr double mdl_WH1 = mdl_WH; + constexpr double mdl_WT1 = mdl_WT; + constexpr double mdl_cth = constexpr_sqrt( 1. - mdl_sth2 ); + constexpr double mdl_MW1 = mdl_MWsm; + constexpr double mdl_sqrt__sth2 = constexpr_sqrt( mdl_sth2 ); + constexpr double mdl_sth = mdl_sqrt__sth2; + constexpr double mdl_LambdaSMEFT__exp__2 = ( ( mdl_LambdaSMEFT ) * ( mdl_LambdaSMEFT ) ); + constexpr cxsmpl mdl_conjg__cbH = conj( mdl_cbH ); + constexpr cxsmpl mdl_conjg__ctHH = conj( mdl_ctHH ); + constexpr double mdl_MT__exp__2 = ( ( mdl_MT ) * ( mdl_MT ) ); + constexpr double mdl_MH__exp__6 = constexpr_pow( mdl_MH, 6. ); + constexpr double mdl_MWsm__exp__6 = constexpr_pow( mdl_MWsm, 6. ); + constexpr double mdl_MH__exp__4 = ( ( mdl_MH ) * ( mdl_MH ) * ( mdl_MH ) * ( mdl_MH ) ); + constexpr double mdl_MWsm__exp__4 = ( ( mdl_MWsm ) * ( mdl_MWsm ) * ( mdl_MWsm ) * ( mdl_MWsm ) ); + constexpr double mdl_MWsm__exp__2 = ( ( mdl_MWsm ) * ( mdl_MWsm ) ); + constexpr double mdl_MZ__exp__4 = ( ( mdl_MZ ) * ( mdl_MZ ) * ( mdl_MZ ) * ( mdl_MZ ) ); + constexpr double mdl_MZ__exp__6 = constexpr_pow( mdl_MZ, 6. ); + constexpr double mdl_cth__exp__2 = ( ( mdl_cth ) * ( mdl_cth ) ); + constexpr double mdl_sth__exp__2 = ( ( mdl_sth ) * ( mdl_sth ) ); + constexpr double mdl_MB__exp__2 = ( ( mdl_MB ) * ( mdl_MB ) ); + constexpr double mdl_MZ__exp__3 = ( ( mdl_MZ ) * ( mdl_MZ ) * ( mdl_MZ ) ); + constexpr double mdl_sth__exp__4 = ( ( mdl_sth ) * ( mdl_sth ) * ( mdl_sth ) * ( mdl_sth ) ); + constexpr double mdl_sth__exp__6 = constexpr_pow( mdl_sth, 6. ); + constexpr double mdl_sth__exp__3 = ( ( mdl_sth ) * ( mdl_sth ) * ( mdl_sth ) ); + constexpr double mdl_sth__exp__5 = constexpr_pow( mdl_sth, 5. ); + constexpr double mdl_propCorr__exp__2 = ( ( mdl_propCorr ) * ( mdl_propCorr ) ); + constexpr double mdl_propCorr__exp__3 = ( ( mdl_propCorr ) * ( mdl_propCorr ) * ( mdl_propCorr ) ); + constexpr double mdl_propCorr__exp__4 = ( ( mdl_propCorr ) * ( mdl_propCorr ) * ( mdl_propCorr ) * ( mdl_propCorr ) ); + constexpr double mdl_cth__exp__3 = ( ( mdl_cth ) * ( mdl_cth ) * ( mdl_cth ) ); + constexpr double mdl_aEW = ( mdl_Gf * mdl_MW__exp__2 * ( 1. - mdl_MW__exp__2 / mdl_MZ__exp__2 ) * mdl_sqrt__2 ) / M_PI; + constexpr double mdl_sqrt__Gf = constexpr_sqrt( mdl_Gf ); + constexpr double mdl_vevhat = 1. / ( mdl_nb__2__exp__0_25 * mdl_sqrt__Gf ); + constexpr double mdl_lam = ( mdl_Gf * mdl_MH__exp__2 ) / mdl_sqrt__2; + constexpr double mdl_sqrt__aEW = constexpr_sqrt( mdl_aEW ); + constexpr double mdl_ee = 2. * mdl_sqrt__aEW * constexpr_sqrt( M_PI ); + constexpr double mdl_yb = ( mdl_ymb * mdl_sqrt__2 ) / mdl_vevhat; + constexpr double mdl_yc = ( mdl_ymc * mdl_sqrt__2 ) / mdl_vevhat; + constexpr double mdl_ydo = ( mdl_ymdo * mdl_sqrt__2 ) / mdl_vevhat; + constexpr double mdl_ye = ( mdl_yme * mdl_sqrt__2 ) / mdl_vevhat; + constexpr double mdl_ym = ( mdl_ymm * mdl_sqrt__2 ) / mdl_vevhat; + constexpr double mdl_ys = ( mdl_yms * mdl_sqrt__2 ) / mdl_vevhat; + constexpr double mdl_yt = ( mdl_ymt * mdl_sqrt__2 ) / mdl_vevhat; + constexpr double mdl_ytau = ( mdl_ymtau * mdl_sqrt__2 ) / mdl_vevhat; + constexpr double mdl_yup = ( mdl_ymup * mdl_sqrt__2 ) / mdl_vevhat; + constexpr double mdl_vevhat__exp__2 = ( ( mdl_vevhat ) * ( mdl_vevhat ) ); + constexpr double mdl_dGf = ( ( 2. * mdl_cHl3 - mdl_cll1 ) * mdl_vevhat__exp__2 ) / mdl_LambdaSMEFT__exp__2; + constexpr double mdl_dkH = ( ( mdl_cHbox - mdl_cHDD / 4. ) * mdl_vevhat__exp__2 ) / mdl_LambdaSMEFT__exp__2; + constexpr double mdl_vevT = ( 1. + mdl_dGf / 2. ) * mdl_vevhat; + constexpr double mdl_g1 = mdl_ee / mdl_cth; + constexpr double mdl_gw = mdl_ee / mdl_sth; + constexpr cxsmpl mdl_yb0 = ( 1. - mdl_dGf / 2. ) * mdl_yb + ( mdl_vevhat__exp__2 * mdl_conjg__cbH ) / ( 2. * mdl_LambdaSMEFT__exp__2 ); + constexpr cxsmpl mdl_yt0 = ( 1. - mdl_dGf / 2. ) * mdl_yt + ( mdl_vevhat__exp__2 * mdl_conjg__ctHH ) / ( 2. * mdl_LambdaSMEFT__exp__2 ); + constexpr double mdl_ee__exp__2 = ( ( mdl_ee ) * ( mdl_ee ) ); + constexpr double mdl_gHaa = ( mdl_ee__exp__2 * ( -1.75 + ( 4. * ( 0.3333333333333333 + ( 7. * mdl_MH__exp__2 ) / ( 360. * mdl_MT__exp__2 ) ) ) / 3. - ( 29. * mdl_MH__exp__6 ) / ( 16800. * mdl_MWsm__exp__6 ) - ( 19. * mdl_MH__exp__4 ) / ( 1680. * mdl_MWsm__exp__4 ) - ( 11. * mdl_MH__exp__2 ) / ( 120. * mdl_MWsm__exp__2 ) ) ) / ( 8. * ( ( M_PI ) * ( M_PI ) ) ); + constexpr double mdl_gHza = ( mdl_ee__exp__2 * ( ( ( 0.4583333333333333 + ( 29. * mdl_MH__exp__6 ) / ( 100800. * mdl_MWsm__exp__6 ) + ( 19. * mdl_MH__exp__4 ) / ( 10080. * mdl_MWsm__exp__4 ) + ( 11. * mdl_MH__exp__2 ) / ( 720. * mdl_MWsm__exp__2 ) + ( mdl_MH__exp__4 * mdl_MZ__exp__2 ) / ( 2100. * mdl_MWsm__exp__6 ) + ( mdl_MH__exp__2 * mdl_MZ__exp__2 ) / ( 280. * mdl_MWsm__exp__4 ) + ( 7. * mdl_MZ__exp__2 ) / ( 180. * mdl_MWsm__exp__2 ) + ( 67. * mdl_MH__exp__2 * mdl_MZ__exp__4 ) / ( 100800. * mdl_MWsm__exp__6 ) + ( 53. * mdl_MZ__exp__4 ) / ( 10080. * mdl_MWsm__exp__4 ) + ( 43. * mdl_MZ__exp__6 ) / ( 50400. * mdl_MWsm__exp__6 ) - ( 31. * mdl_cth__exp__2 ) / ( 24. * mdl_sth__exp__2 ) - ( 29. * mdl_cth__exp__2 * mdl_MH__exp__6 ) / ( 20160. * mdl_MWsm__exp__6 * mdl_sth__exp__2 ) - ( 19. * mdl_cth__exp__2 * mdl_MH__exp__4 ) / ( 2016. * mdl_MWsm__exp__4 * mdl_sth__exp__2 ) - ( 11. * mdl_cth__exp__2 * mdl_MH__exp__2 ) / ( 144. * mdl_MWsm__exp__2 * mdl_sth__exp__2 ) - ( mdl_cth__exp__2 * mdl_MH__exp__4 * mdl_MZ__exp__2 ) / ( 560. * mdl_MWsm__exp__6 * mdl_sth__exp__2 ) - ( 31. * mdl_cth__exp__2 * mdl_MH__exp__2 * mdl_MZ__exp__2 ) / ( 2520. * mdl_MWsm__exp__4 * mdl_sth__exp__2 ) - ( mdl_cth__exp__2 * mdl_MZ__exp__2 ) / ( 9. * mdl_MWsm__exp__2 * mdl_sth__exp__2 ) - ( 43. * mdl_cth__exp__2 * mdl_MH__exp__2 * mdl_MZ__exp__4 ) / ( 20160. * mdl_MWsm__exp__6 * mdl_sth__exp__2 ) - ( 17. * mdl_cth__exp__2 * mdl_MZ__exp__4 ) / ( 1120. * mdl_MWsm__exp__4 * mdl_sth__exp__2 ) - ( 5. * mdl_cth__exp__2 * mdl_MZ__exp__6 ) / ( 2016. * mdl_MWsm__exp__6 * mdl_sth__exp__2 ) ) * mdl_sth ) / mdl_cth + ( ( 0.3333333333333333 + ( 7. * mdl_MH__exp__2 ) / ( 360. * mdl_MT__exp__2 ) + ( 11. * mdl_MZ__exp__2 ) / ( 360. * mdl_MT__exp__2 ) ) * ( 0.5 - ( 4. * mdl_sth__exp__2 ) / 3. ) ) / ( mdl_cth * mdl_sth ) ) ) / ( 4. * ( ( M_PI ) * ( M_PI ) ) ); + constexpr double mdl_dMZ2 = ( ( mdl_cHDD / 2. + 2. * mdl_cHWB * mdl_cth * mdl_sth ) * mdl_vevhat__exp__2 ) / mdl_LambdaSMEFT__exp__2; + constexpr double mdl_dMH2 = 2. * mdl_dkH - ( 3. * mdl_cH * mdl_vevhat__exp__2 ) / ( 2. * mdl_lam * mdl_LambdaSMEFT__exp__2 ); + constexpr double mdl_dgw = -mdl_dGf / 2.; + constexpr double mdl_barlam = ( 1. - mdl_dGf - mdl_dMH2 ) * mdl_lam; + constexpr double mdl_dWT = 2. * mdl_WT * ( mdl_dgw + ( mdl_vevhat * ( mdl_ee * ( 3. * mdl_cHtbRe * mdl_MB * mdl_MT * mdl_MWsm__exp__2 + mdl_cHQ3 * ( ( ( mdl_MB__exp__2 - mdl_MT__exp__2 ) * ( mdl_MB__exp__2 - mdl_MT__exp__2 ) ) + ( mdl_MB__exp__2 + mdl_MT__exp__2 ) * mdl_MWsm__exp__2 - 2. * mdl_MWsm__exp__4 ) ) * mdl_vevhat + 6. * mdl_MWsm__exp__2 * ( mdl_ctWRe * mdl_MT * ( mdl_MB__exp__2 - mdl_MT__exp__2 + mdl_MWsm__exp__2 ) + mdl_cbWRe * mdl_MB * ( -mdl_MB__exp__2 + mdl_MT__exp__2 + mdl_MWsm__exp__2 ) ) * mdl_sth * mdl_sqrt__2 ) ) / ( mdl_ee * mdl_LambdaSMEFT__exp__2 * ( ( ( mdl_MB__exp__2 - mdl_MT__exp__2 ) * ( mdl_MB__exp__2 - mdl_MT__exp__2 ) ) + ( mdl_MB__exp__2 + mdl_MT__exp__2 ) * mdl_MWsm__exp__2 - 2. * mdl_MWsm__exp__4 ) ) ); + constexpr double mdl_dWW = ( 2. * mdl_dgw + ( 2. * ( 2. * mdl_cHj3 + mdl_cHl3 ) * mdl_vevhat__exp__2 ) / ( 3. * mdl_LambdaSMEFT__exp__2 ) ) * mdl_WW; + constexpr double mdl_gwsh = ( mdl_ee * ( 1. + mdl_dgw - ( mdl_cHW * mdl_vevhat__exp__2 ) / mdl_LambdaSMEFT__exp__2 ) ) / mdl_sth; + constexpr double mdl_vev = ( 1. - ( 3. * mdl_cH * mdl_vevhat__exp__2 ) / ( 8. * mdl_lam * mdl_LambdaSMEFT__exp__2 ) ) * mdl_vevT; + constexpr double mdl_dg1 = ( -mdl_dGf - mdl_dMZ2 / mdl_sth__exp__2 ) / 2.; + constexpr double mdl_dWHc = mdl_yc / ( mdl_yc + mdl_nb__10__exp___m_40 ) * ( -0.02884 * mdl_dGf + ( ( 0.05768 * mdl_cHbox - 0.01442 * mdl_cHDD - 0.05768 * mdl_cuHRe ) * mdl_vevhat__exp__2 ) / mdl_LambdaSMEFT__exp__2 ); + constexpr double mdl_dWHb = mdl_yb / ( mdl_yb + mdl_nb__10__exp___m_40 ) * ( mdl_vevhat__exp__2 * ( -1.1618 * mdl_cbHRe ) / ( mdl_LambdaSMEFT__exp__2 * ( mdl_yb + mdl_nb__10__exp___m_40 ) ) - 0.5809 * mdl_dGf + ( mdl_vevhat__exp__2 * ( 1.1618 * mdl_cHbox - 0.29045 * mdl_cHDD ) ) / ( mdl_LambdaSMEFT__exp__2 ) ); + constexpr double mdl_dWHta = mdl_ytau / ( mdl_ytau + mdl_nb__10__exp___m_40 ) * ( -0.06256 * mdl_dGf + mdl_vevhat__exp__2 * ( -0.12512 * mdl_ceHRe + 0.12512 * mdl_cHbox - 0.03128 * mdl_cHDD ) / ( mdl_LambdaSMEFT__exp__2 ) ); + constexpr double mdl_dWZ = mdl_WZ * ( -1. + ( 36. * mdl_cth * mdl_MB * mdl_MZ__exp__2 * mdl_sth * ( mdl_cbWRe * mdl_cth + mdl_cbBRe * mdl_sth ) * ( -3. + 4. * mdl_sth__exp__2 ) * mdl_vevhat * mdl_sqrt__2 * constexpr_sqrt( -4. * mdl_MB__exp__2 + mdl_MZ__exp__2 ) + mdl_ee * mdl_LambdaSMEFT__exp__2 * ( 2. * mdl_MZ__exp__3 * ( 27. + 54. * mdl_dgw - 54. * ( 1. + mdl_dg1 + mdl_dgw ) * mdl_sth__exp__2 + 76. * ( 1. + 4. * mdl_dg1 - 2. * mdl_dgw ) * mdl_sth__exp__4 + 152. * ( -mdl_dg1 + mdl_dgw ) * mdl_sth__exp__6 ) + mdl_MZ__exp__2 * ( 9. + 18. * mdl_dgw - 6. * ( 2. + mdl_dg1 + 3. * mdl_dgw ) * mdl_sth__exp__2 + 8. * ( 1. + 4. * mdl_dg1 - 2. * mdl_dgw ) * mdl_sth__exp__4 + 16. * ( -mdl_dg1 + mdl_dgw ) * mdl_sth__exp__6 ) * constexpr_sqrt( -4. * mdl_MB__exp__2 + mdl_MZ__exp__2 ) + mdl_MB__exp__2 * ( -9. - 18. * mdl_dgw - 6. * ( 4. + 11. * mdl_dg1 - 3. * mdl_dgw ) * mdl_sth__exp__2 + 16. * ( 1. + 4. * mdl_dg1 - 2. * mdl_dgw ) * mdl_sth__exp__4 + 32. * ( -mdl_dg1 + mdl_dgw ) * mdl_sth__exp__6 ) * constexpr_sqrt( -4. * mdl_MB__exp__2 + mdl_MZ__exp__2 ) ) + 2. * mdl_ee * mdl_vevhat__exp__2 * ( 36. * mdl_cHj3 * mdl_MZ__exp__3 + 18. * mdl_cHl3 * mdl_MZ__exp__3 + 9. * ( 3. * mdl_cHbq - mdl_cHQ1 - mdl_cHQ3 ) * mdl_MB__exp__2 * constexpr_sqrt( -4. * mdl_MB__exp__2 + mdl_MZ__exp__2 ) + 9. * mdl_cHQ1 * mdl_MZ__exp__2 * constexpr_sqrt( -4. * mdl_MB__exp__2 + mdl_MZ__exp__2 ) + 9. * mdl_cHQ3 * mdl_MZ__exp__2 * constexpr_sqrt( -4. * mdl_MB__exp__2 + mdl_MZ__exp__2 ) + 3. * mdl_cHWB * mdl_cth * ( -7. * mdl_MB__exp__2 + mdl_MZ__exp__2 ) * mdl_sth * constexpr_sqrt( -4. * mdl_MB__exp__2 + mdl_MZ__exp__2 ) + 8. * mdl_cHWB * mdl_cth * mdl_sth__exp__3 * ( 2. * mdl_MB__exp__2 * constexpr_sqrt( -4. * mdl_MB__exp__2 + mdl_MZ__exp__2 ) + mdl_MZ__exp__2 * ( 19. * mdl_MZ + constexpr_sqrt( -4. * mdl_MB__exp__2 + mdl_MZ__exp__2 ) ) ) - 8. * mdl_cHWB * mdl_cth * mdl_sth__exp__5 * ( 2. * mdl_MB__exp__2 * constexpr_sqrt( -4. * mdl_MB__exp__2 + mdl_MZ__exp__2 ) + mdl_MZ__exp__2 * ( 19. * mdl_MZ + constexpr_sqrt( -4. * mdl_MB__exp__2 + mdl_MZ__exp__2 ) ) ) - 6. * mdl_sth__exp__2 * ( 2. * ( mdl_cHbq + mdl_cHQ1 + mdl_cHQ3 ) * mdl_MB__exp__2 * constexpr_sqrt( -4. * mdl_MB__exp__2 + mdl_MZ__exp__2 ) + mdl_MZ__exp__2 * ( ( 2. * mdl_cHd + 3. * mdl_cHe - 2. * mdl_cHj1 + 3. * ( 2. * mdl_cHj3 + mdl_cHl1 + mdl_cHl3 ) - 4. * mdl_cHu ) * mdl_MZ + ( mdl_cHbq + mdl_cHQ1 + mdl_cHQ3 ) * constexpr_sqrt( -4. * mdl_MB__exp__2 + mdl_MZ__exp__2 ) ) ) ) ) / ( mdl_ee * mdl_LambdaSMEFT__exp__2 * ( 2. * mdl_MZ__exp__3 * ( 27. - 54. * mdl_sth__exp__2 + 76. * mdl_sth__exp__4 ) + mdl_MZ__exp__2 * ( 9. - 12. * mdl_sth__exp__2 + 8. * mdl_sth__exp__4 ) * constexpr_sqrt( -4. * mdl_MB__exp__2 + mdl_MZ__exp__2 ) + mdl_MB__exp__2 * ( -9. - 24. * mdl_sth__exp__2 + 16. * mdl_sth__exp__4 ) * constexpr_sqrt( -4. * mdl_MB__exp__2 + mdl_MZ__exp__2 ) ) ) ); + constexpr double mdl_g1sh = ( mdl_ee * ( 1. + mdl_dg1 - ( mdl_cHB * mdl_vevhat__exp__2 ) / mdl_LambdaSMEFT__exp__2 ) ) / mdl_cth; + constexpr double mdl_ee__exp__3 = ( ( mdl_ee ) * ( mdl_ee ) * ( mdl_ee ) ); + constexpr double mdl_vevhat__exp__3 = ( ( mdl_vevhat ) * ( mdl_vevhat ) * ( mdl_vevhat ) ); + + // Model couplings independent of aS + // (none) + + // Model parameters dependent on aS + //constexpr double mdl_sqrt__aS = //constexpr_sqrt( aS ); // now computed event-by-event (running alphas #373) + //constexpr double G = 2. * mdl_sqrt__aS * //constexpr_sqrt( M_PI ); // now computed event-by-event (running alphas #373) + //constexpr double mdl_gHgg2 = ( -7. * aS ) / ( 720. * M_PI ); // now computed event-by-event (running alphas #373) + //constexpr double mdl_gHgg4 = aS / ( 360. * M_PI ); // now computed event-by-event (running alphas #373) + //constexpr double mdl_gHgg5 = aS / ( 20. * M_PI ); // now computed event-by-event (running alphas #373) + //constexpr double mdl_G__exp__2 = ( ( G ) * ( G ) ); // now computed event-by-event (running alphas #373) + //constexpr double mdl_gHgg1 = mdl_G__exp__2 / ( 48. * ( ( M_PI ) * ( M_PI ) ) ); // now computed event-by-event (running alphas #373) + //constexpr double mdl_gHgg3 = ( aS * G ) / ( 60. * M_PI ); // now computed event-by-event (running alphas #373) + //constexpr cxsmpl mdl_G__exp__3 = ( ( G ) * ( G ) * ( G ) ); // now computed event-by-event (running alphas #373) + //constexpr double mdl_dWH = mdl_WH * ( -0.24161 * mdl_dGf + 0.96644 * mdl_dgw + 0.4832199999999999 * mdl_dkH - 0.11186509426655467 * mdl_dWW + ( 0.36410378449238195 * mdl_cHj3 * mdl_vevhat__exp__2 ) / mdl_LambdaSMEFT__exp__2 + ( 0.17608307708657747 * mdl_cHl3 * mdl_vevhat__exp__2 ) / mdl_LambdaSMEFT__exp__2 + ( 0.1636 * mdl_cHG * mdl_MT__exp__2 * mdl_vevhat__exp__2 ) / ( mdl_LambdaSMEFT__exp__2 * ( -0.5 * mdl_gHgg2 * mdl_MH__exp__2 + mdl_gHgg1 * mdl_MT__exp__2 ) ) + ( mdl_cHW * ( -0.35937785117066967 * mdl_gHaa * mdl_gHza + 0.006164 * mdl_cth * mdl_gHaa * mdl_sth + 0.00454 * mdl_gHza * mdl_sth__exp__2 ) * mdl_vevhat__exp__2 ) / ( mdl_gHaa * mdl_gHza * mdl_LambdaSMEFT__exp__2 ) + ( mdl_cHWB * ( -0.00454 * mdl_cth * mdl_gHza * mdl_sth + mdl_gHaa * ( -0.0030819999999999997 + 0.006163999999999999 * mdl_sth__exp__2 ) ) * mdl_vevhat__exp__2 ) / ( mdl_gHaa * mdl_gHza * mdl_LambdaSMEFT__exp__2 ) + ( mdl_cHB * ( -0.006163999999999999 * mdl_cth * mdl_gHaa * mdl_sth - 0.00454 * mdl_gHza * ( -1. + mdl_sth__exp__2 ) ) * mdl_vevhat__exp__2 ) / ( mdl_gHaa * mdl_gHza * mdl_LambdaSMEFT__exp__2 ) + mdl_dWHc + mdl_dWHb + mdl_dWHta ); // now computed event-by-event (running alphas #373) + + // Model couplings dependent on aS + //constexpr cxsmpl GC_6 = -( mdl_complexi * G ); // now computed event-by-event (running alphas #373) + //constexpr cxsmpl GC_7 = G; // now computed event-by-event (running alphas #373) + //constexpr cxsmpl GC_8 = mdl_complexi * mdl_G__exp__2; // now computed event-by-event (running alphas #373) + + // Print parameters that are unchanged during the run + void printIndependentParameters(); + + // Print couplings that are unchanged during the run + void printIndependentCouplings(); + + // Print parameters that are changed event by event + //void printDependentParameters(); // now computed event-by-event (running alphas #373) + + // Print couplings that are changed event by event + //void printDependentCouplings(); // now computed event-by-event (running alphas #373) +} + +#endif + +//========================================================================== + +namespace Parameters_SMEFTsim_topU3l_MwScheme_UFO_dependentCouplings +{ + constexpr size_t ndcoup = 3; // #couplings that vary event by event because they depend on the running alphas QCD + constexpr size_t idcoup_GC_6 = 0; + constexpr size_t idcoup_GC_7 = 1; + constexpr size_t idcoup_GC_8 = 2; + struct DependentCouplings_sv + { + cxtype_sv GC_6; + cxtype_sv GC_7; + cxtype_sv GC_8; + }; +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wunused-variable" // e.g. <> +#pragma GCC diagnostic ignored "-Wunused-parameter" // e.g. <> +#ifdef __CUDACC__ +#pragma nv_diagnostic push +#pragma nv_diag_suppress 177 // e.g. <> +#endif + __host__ __device__ inline const DependentCouplings_sv computeDependentCouplings_fromG( const fptype_sv& G_sv ) + { +#ifdef MGONGPU_HARDCODE_PARAM + using namespace Parameters_SMEFTsim_topU3l_MwScheme_UFO; +#endif + // NB: hardcode cxtype cI(0,1) instead of cxtype (or hardcoded cxsmpl) mdl_complexi (which exists in Parameters_SMEFTsim_topU3l_MwScheme_UFO) because: + // (1) mdl_complexi is always (0,1); (2) mdl_complexi is undefined in device code; (3) need cxsmpl conversion to cxtype in code below + const cxtype cI( 0., 1. ); + DependentCouplings_sv out; + // Begin non-SM (e.g. EFT) implementation - special handling of vectors of floats (#439) +#if not( defined MGONGPU_CPPSIMD && defined MGONGPU_FPTYPE_FLOAT ) + { + const fptype_sv& G = G_sv; + // Model parameters dependent on aS + //const fptype_sv mdl_sqrt__aS = constexpr_sqrt( aS ); + //const fptype_sv G = 2. * mdl_sqrt__aS * constexpr_sqrt( M_PI ); + const fptype_sv mdl_gHgg2 = ( -7. * aS ) / ( 720. * M_PI ); + const fptype_sv mdl_gHgg4 = aS / ( 360. * M_PI ); + const fptype_sv mdl_gHgg5 = aS / ( 20. * M_PI ); + const fptype_sv mdl_G__exp__2 = ( ( G ) * ( G ) ); + const fptype_sv mdl_gHgg1 = mdl_G__exp__2 / ( 48. * ( ( M_PI ) * ( M_PI ) ) ); + const fptype_sv mdl_gHgg3 = ( aS * G ) / ( 60. * M_PI ); + constexpr cxsmpl mdl_G__exp__3 = ( ( G ) * ( G ) * ( G ) ); + const fptype_sv mdl_dWH = mdl_WH * ( -0.24161 * mdl_dGf + 0.96644 * mdl_dgw + 0.4832199999999999 * mdl_dkH - 0.11186509426655467 * mdl_dWW + ( 0.36410378449238195 * mdl_cHj3 * mdl_vevhat__exp__2 ) / mdl_LambdaSMEFT__exp__2 + ( 0.17608307708657747 * mdl_cHl3 * mdl_vevhat__exp__2 ) / mdl_LambdaSMEFT__exp__2 + ( 0.1636 * mdl_cHG * mdl_MT__exp__2 * mdl_vevhat__exp__2 ) / ( mdl_LambdaSMEFT__exp__2 * ( -0.5 * mdl_gHgg2 * mdl_MH__exp__2 + mdl_gHgg1 * mdl_MT__exp__2 ) ) + ( mdl_cHW * ( -0.35937785117066967 * mdl_gHaa * mdl_gHza + 0.006164 * mdl_cth * mdl_gHaa * mdl_sth + 0.00454 * mdl_gHza * mdl_sth__exp__2 ) * mdl_vevhat__exp__2 ) / ( mdl_gHaa * mdl_gHza * mdl_LambdaSMEFT__exp__2 ) + ( mdl_cHWB * ( -0.00454 * mdl_cth * mdl_gHza * mdl_sth + mdl_gHaa * ( -0.0030819999999999997 + 0.006163999999999999 * mdl_sth__exp__2 ) ) * mdl_vevhat__exp__2 ) / ( mdl_gHaa * mdl_gHza * mdl_LambdaSMEFT__exp__2 ) + ( mdl_cHB * ( -0.006163999999999999 * mdl_cth * mdl_gHaa * mdl_sth - 0.00454 * mdl_gHza * ( -1. + mdl_sth__exp__2 ) ) * mdl_vevhat__exp__2 ) / ( mdl_gHaa * mdl_gHza * mdl_LambdaSMEFT__exp__2 ) + mdl_dWHc + mdl_dWHb + mdl_dWHta ); + // Model couplings dependent on aS + out.GC_6 = -( cI * G ); + out.GC_7 = G; + out.GC_8 = cI * mdl_G__exp__2; + } +#else + // ** NB #439: special handling is necessary ONLY FOR VECTORS OF FLOATS (variable Gs are vector floats, fixed parameters are scalar doubles) + // Use an explicit loop to avoid <> + // Problems may come e.g. in EFTs from multiplying a vector float (related to aS-dependent G) by a scalar double (aS-independent parameters) + fptype_v GC_6r_v; + fptype_v GC_6i_v; + fptype_v GC_7r_v; + fptype_v GC_7i_v; + fptype_v GC_8r_v; + fptype_v GC_8i_v; + for( int i = 0; i < neppV; i++ ) + { + const fptype& G = G_sv[i]; + // Model parameters dependent on aS + //const fptype mdl_sqrt__aS = constexpr_sqrt( aS ); + //const fptype G = 2. * mdl_sqrt__aS * constexpr_sqrt( M_PI ); + const fptype mdl_gHgg2 = ( -7. * aS ) / ( 720. * M_PI ); + const fptype mdl_gHgg4 = aS / ( 360. * M_PI ); + const fptype mdl_gHgg5 = aS / ( 20. * M_PI ); + const fptype mdl_G__exp__2 = ( ( G ) * ( G ) ); + const fptype mdl_gHgg1 = mdl_G__exp__2 / ( 48. * ( ( M_PI ) * ( M_PI ) ) ); + const fptype mdl_gHgg3 = ( aS * G ) / ( 60. * M_PI ); + constexpr cxsmpl mdl_G__exp__3 = ( ( G ) * ( G ) * ( G ) ); + const fptype mdl_dWH = mdl_WH * ( -0.24161 * mdl_dGf + 0.96644 * mdl_dgw + 0.4832199999999999 * mdl_dkH - 0.11186509426655467 * mdl_dWW + ( 0.36410378449238195 * mdl_cHj3 * mdl_vevhat__exp__2 ) / mdl_LambdaSMEFT__exp__2 + ( 0.17608307708657747 * mdl_cHl3 * mdl_vevhat__exp__2 ) / mdl_LambdaSMEFT__exp__2 + ( 0.1636 * mdl_cHG * mdl_MT__exp__2 * mdl_vevhat__exp__2 ) / ( mdl_LambdaSMEFT__exp__2 * ( -0.5 * mdl_gHgg2 * mdl_MH__exp__2 + mdl_gHgg1 * mdl_MT__exp__2 ) ) + ( mdl_cHW * ( -0.35937785117066967 * mdl_gHaa * mdl_gHza + 0.006164 * mdl_cth * mdl_gHaa * mdl_sth + 0.00454 * mdl_gHza * mdl_sth__exp__2 ) * mdl_vevhat__exp__2 ) / ( mdl_gHaa * mdl_gHza * mdl_LambdaSMEFT__exp__2 ) + ( mdl_cHWB * ( -0.00454 * mdl_cth * mdl_gHza * mdl_sth + mdl_gHaa * ( -0.0030819999999999997 + 0.006163999999999999 * mdl_sth__exp__2 ) ) * mdl_vevhat__exp__2 ) / ( mdl_gHaa * mdl_gHza * mdl_LambdaSMEFT__exp__2 ) + ( mdl_cHB * ( -0.006163999999999999 * mdl_cth * mdl_gHaa * mdl_sth - 0.00454 * mdl_gHza * ( -1. + mdl_sth__exp__2 ) ) * mdl_vevhat__exp__2 ) / ( mdl_gHaa * mdl_gHza * mdl_LambdaSMEFT__exp__2 ) + mdl_dWHc + mdl_dWHb + mdl_dWHta ); + // Model couplings dependent on aS + const cxtype GC_6 = -( cI * G ); + const cxtype GC_7 = G; + const cxtype GC_8 = cI * mdl_G__exp__2; + GC_6r_v[i] = cxreal( GC_6 ); + GC_6i_v[i] = cximag( GC_6 ); + GC_7r_v[i] = cxreal( GC_7 ); + GC_7i_v[i] = cximag( GC_7 ); + GC_8r_v[i] = cxreal( GC_8 ); + GC_8i_v[i] = cximag( GC_8 ); + } + out.GC_6 = cxtype_v( GC_6r_v, GC_6i_v ); + out.GC_7 = cxtype_v( GC_7r_v, GC_7i_v ); + out.GC_8 = cxtype_v( GC_8r_v, GC_8i_v ); +#endif + // End non-SM (e.g. EFT) implementation - special handling of vectors of floats (#439) + return out; + } +#ifdef __CUDACC__ +#pragma GCC diagnostic pop +#pragma nv_diagnostic pop +#endif +} + +//========================================================================== + +namespace Parameters_SMEFTsim_topU3l_MwScheme_UFO_independentCouplings +{ + constexpr size_t nicoup = 0; // #couplings that are fixed for all events because they do not depend on the running alphas QCD + // NB: there are no aS-independent couplings in this physics process +} + +//========================================================================== + +#ifdef __CUDACC__ +namespace mg5amcGpu +#else +namespace mg5amcCpu +#endif +{ +#pragma GCC diagnostic push +#ifndef __clang__ +#pragma GCC diagnostic ignored "-Wunused-but-set-variable" // e.g. <> +#endif + // Compute the output couplings (e.g. gc10 and gc11) from the input gs + template + __device__ inline void + G2COUP( const fptype gs[], + fptype couplings[] ) + { + mgDebug( 0, __FUNCTION__ ); + using namespace Parameters_SMEFTsim_topU3l_MwScheme_UFO_dependentCouplings; + const fptype_sv& gs_sv = G_ACCESS::kernelAccessConst( gs ); + DependentCouplings_sv couplings_sv = computeDependentCouplings_fromG( gs_sv ); + fptype* GC_6s = C_ACCESS::idcoupAccessBuffer( couplings, idcoup_GC_6 ); + fptype* GC_7s = C_ACCESS::idcoupAccessBuffer( couplings, idcoup_GC_7 ); + fptype* GC_8s = C_ACCESS::idcoupAccessBuffer( couplings, idcoup_GC_8 ); + cxtype_sv_ref GC_6s_sv = C_ACCESS::kernelAccess( GC_6s ); + cxtype_sv_ref GC_7s_sv = C_ACCESS::kernelAccess( GC_7s ); + cxtype_sv_ref GC_8s_sv = C_ACCESS::kernelAccess( GC_8s ); + GC_6s_sv = couplings_sv.GC_6; + GC_7s_sv = couplings_sv.GC_7; + GC_8s_sv = couplings_sv.GC_8; + mgDebug( 1, __FUNCTION__ ); + return; + } +#pragma GCC diagnostic pop +} + +//========================================================================== + +#endif // Parameters_SMEFTsim_topU3l_MwScheme_UFO_H diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/src/cudacpp_src.mk b/epochX/cudacpp/smeft_gg_tttt.sa/src/cudacpp_src.mk new file mode 100644 index 0000000000..87e1ae946d --- /dev/null +++ b/epochX/cudacpp/smeft_gg_tttt.sa/src/cudacpp_src.mk @@ -0,0 +1,268 @@ +#=== Determine the name of this makefile (https://ftp.gnu.org/old-gnu/Manuals/make-3.80/html_node/make_17.html) +#=== NB: assume that the same name (e.g. cudacpp.mk, Makefile...) is used in the Subprocess and src directories + +THISMK = $(word $(words $(MAKEFILE_LIST)),$(MAKEFILE_LIST)) + +#------------------------------------------------------------------------------- + +#=== Use bash in the Makefile (https://www.gnu.org/software/make/manual/html_node/Choosing-the-Shell.html) + +SHELL := /bin/bash + +#------------------------------------------------------------------------------- + +#=== Configure common compiler flags for CUDA and C++ + +INCFLAGS = -I. +OPTFLAGS = -O3 # this ends up in CUFLAGS too (should it?), cannot add -Ofast or -ffast-math here + +#------------------------------------------------------------------------------- + +#=== Configure the C++ compiler + +CXXFLAGS = $(OPTFLAGS) -std=c++17 $(INCFLAGS) $(USE_NVTX) -fPIC -Wall -Wshadow -Wextra +ifeq ($(shell $(CXX) --version | grep ^nvc++),) +CXXFLAGS+= -ffast-math # see issue #117 +endif +###CXXFLAGS+= -Ofast # performance is not different from --fast-math +###CXXFLAGS+= -g # FOR DEBUGGING ONLY + +# Note: AR, CXX and FC are implicitly defined if not set externally +# See https://www.gnu.org/software/make/manual/html_node/Implicit-Variables.html +###RANLIB = ranlib + +#------------------------------------------------------------------------------- + +#=== Configure ccache for CUDA and C++ builds + +# Enable ccache if USECCACHE=1 +ifeq ($(USECCACHE)$(shell echo $(CXX) | grep ccache),1) + override CXX:=ccache $(CXX) +endif +#ifeq ($(USECCACHE)$(shell echo $(AR) | grep ccache),1) +# override AR:=ccache $(AR) +#endif +#ifneq ($(NVCC),) +# ifeq ($(USECCACHE)$(shell echo $(NVCC) | grep ccache),1) +# override NVCC:=ccache $(NVCC) +# endif +#endif + +#------------------------------------------------------------------------------- + +#=== Configure PowerPC-specific compiler flags for CUDA and C++ + +# Assuming uname is available, detect if architecture is PowerPC +UNAME_P := $(shell uname -p) + +# PowerPC-specific CXX compiler flags (being reviewed) +ifeq ($(UNAME_P),ppc64le) + CXXFLAGS+= -mcpu=power9 -mtune=power9 # gains ~2-3% both for none and sse4 + # Throughput references without the extra flags below: none=1.41-1.42E6, sse4=2.15-2.19E6 + ###CXXFLAGS+= -DNO_WARN_X86_INTRINSICS # no change + ###CXXFLAGS+= -fpeel-loops # no change + ###CXXFLAGS+= -funroll-loops # gains ~1% for none, loses ~1% for sse4 + ###CXXFLAGS+= -ftree-vectorize # no change + ###CXXFLAGS+= -flto # BUILD ERROR IF THIS ADDED IN SRC?! +else + ###AR=gcc-ar # needed by -flto + ###RANLIB=gcc-ranlib # needed by -flto + ###CXXFLAGS+= -flto # NB: build error from src/Makefile unless gcc-ar and gcc-ranlib are used + ######CXXFLAGS+= -fno-semantic-interposition # no benefit (neither alone, nor combined with -flto) +endif + +#------------------------------------------------------------------------------- + +#=== Set the CUDA/C++ compiler flags appropriate to user-defined choices of AVX, FPTYPE, HELINL, HRDCOD, RNDGEN + +# Set the build flags appropriate to OMPFLAGS +###$(info OMPFLAGS=$(OMPFLAGS)) +CXXFLAGS += $(OMPFLAGS) + +# Set the build flags appropriate to each AVX choice (example: "make AVX=none") +# [NB MGONGPU_PVW512 is needed because "-mprefer-vector-width=256" is not exposed in a macro] +# [See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=96476] +$(info AVX=$(AVX)) +ifeq ($(UNAME_P),ppc64le) + ifeq ($(AVX),sse4) + override AVXFLAGS = -D__SSE4_2__ # Power9 VSX with 128 width (VSR registers) + else ifneq ($(AVX),none) + $(error Unknown AVX='$(AVX)': only 'none' and 'sse4' are supported on PowerPC for the moment) + endif +else ifeq ($(UNAME_P),arm) + ifeq ($(AVX),sse4) + override AVXFLAGS = -D__SSE4_2__ # ARM NEON with 128 width (Q/quadword registers) + else ifneq ($(AVX),none) + $(error Unknown AVX='$(AVX)': only 'none' and 'sse4' are supported on ARM for the moment) + endif +else ifneq ($(shell $(CXX) --version | grep ^nvc++),) # support nvc++ #531 + ifeq ($(AVX),none) + override AVXFLAGS = -mno-sse3 # no SIMD + else ifeq ($(AVX),sse4) + override AVXFLAGS = -mno-avx # SSE4.2 with 128 width (xmm registers) + else ifeq ($(AVX),avx2) + override AVXFLAGS = -march=haswell # AVX2 with 256 width (ymm registers) [DEFAULT for clang] + else ifeq ($(AVX),512y) + override AVXFLAGS = -march=skylake -mprefer-vector-width=256 # AVX512 with 256 width (ymm registers) [DEFAULT for gcc] + else ifeq ($(AVX),512z) + override AVXFLAGS = -march=skylake -DMGONGPU_PVW512 # AVX512 with 512 width (zmm registers) + else + $(error Unknown AVX='$(AVX)': only 'none', 'sse4', 'avx2', '512y' and '512z' are supported) + endif +else + ifeq ($(AVX),sse4) + override AVXFLAGS = -march=nehalem # SSE4.2 with 128 width (xmm registers) + else ifeq ($(AVX),avx2) + override AVXFLAGS = -march=haswell # AVX2 with 256 width (ymm registers) [DEFAULT for clang] + else ifeq ($(AVX),512y) + override AVXFLAGS = -march=skylake-avx512 -mprefer-vector-width=256 # AVX512 with 256 width (ymm registers) [DEFAULT for gcc] + else ifeq ($(AVX),512z) + override AVXFLAGS = -march=skylake-avx512 -DMGONGPU_PVW512 # AVX512 with 512 width (zmm registers) + else ifneq ($(AVX),none) + $(error Unknown AVX='$(AVX)': only 'none', 'sse4', 'avx2', '512y' and '512z' are supported) + endif +endif +# For the moment, use AVXFLAGS everywhere: eventually, use them only in encapsulated implementations? +CXXFLAGS+= $(AVXFLAGS) + +# Set the build flags appropriate to each FPTYPE choice (example: "make FPTYPE=f") +###$(info FPTYPE=$(FPTYPE)) +ifeq ($(FPTYPE),d) + CXXFLAGS += -DMGONGPU_FPTYPE_DOUBLE -DMGONGPU_FPTYPE2_DOUBLE +else ifeq ($(FPTYPE),f) + CXXFLAGS += -DMGONGPU_FPTYPE_FLOAT -DMGONGPU_FPTYPE2_FLOAT +else ifeq ($(FPTYPE),m) + CXXFLAGS += -DMGONGPU_FPTYPE_DOUBLE -DMGONGPU_FPTYPE2_FLOAT +else + $(error Unknown FPTYPE='$(FPTYPE)': only 'd', 'f' and 'm' are supported) +endif + +# Set the build flags appropriate to each HELINL choice (example: "make HELINL=1") +###$(info HELINL=$(HELINL)) +ifeq ($(HELINL),1) + CXXFLAGS += -DMGONGPU_INLINE_HELAMPS +else ifneq ($(HELINL),0) + $(error Unknown HELINL='$(HELINL)': only '0' and '1' are supported) +endif + +# Set the build flags appropriate to each HRDCOD choice (example: "make HRDCOD=1") +###$(info HRDCOD=$(HRDCOD)) +ifeq ($(HRDCOD),1) + CXXFLAGS += -DMGONGPU_HARDCODE_PARAM +else ifneq ($(HRDCOD),0) + $(error Unknown HRDCOD='$(HRDCOD)': only '0' and '1' are supported) +endif + +# Set the build flags appropriate to each RNDGEN choice (example: "make RNDGEN=hasNoCurand") +###$(info RNDGEN=$(RNDGEN)) +ifeq ($(RNDGEN),hasNoCurand) + CXXFLAGS += -DMGONGPU_HAS_NO_CURAND +else ifneq ($(RNDGEN),hasCurand) + $(error Unknown RNDGEN='$(RNDGEN)': only 'hasCurand' and 'hasNoCurand' are supported) +endif + +#------------------------------------------------------------------------------- + +#=== Configure build directories and build lockfiles === + +# Build directory "short" tag (defines target and path to the optional build directory) +# (Rationale: keep directory names shorter, e.g. do not include random number generator choice) +override DIRTAG = $(AVX)_$(FPTYPE)_inl$(HELINL)_hrd$(HRDCOD) + +# Build lockfile "full" tag (defines full specification of build options that cannot be intermixed) +# (Rationale: avoid mixing of CUDA and no-CUDA environment builds with different random number generators) +override TAG = $(AVX)_$(FPTYPE)_inl$(HELINL)_hrd$(HRDCOD)_$(RNDGEN) + +# Build directory: current directory by default, or build.$(DIRTAG) if USEBUILDDIR==1 +###$(info Current directory is $(shell pwd)) +ifeq ($(USEBUILDDIR),1) + override BUILDDIR = build.$(DIRTAG) + override LIBDIRREL = ../lib/$(BUILDDIR) + ###$(info Building in BUILDDIR=$(BUILDDIR) for tag=$(TAG) (USEBUILDDIR=1 is set)) +else + override BUILDDIR = . + override LIBDIRREL = ../lib + ###$(info Building in BUILDDIR=$(BUILDDIR) for tag=$(TAG) (USEBUILDDIR is not set)) +endif +######$(info Building in BUILDDIR=$(BUILDDIR) for tag=$(TAG)) + +# Workaround for Mac #375 (I did not manage to fix rpath with @executable_path): use absolute paths for LIBDIR +# (NB: this is quite ugly because it creates the directory if it does not exist - to avoid removing src by mistake) +UNAME_S := $(shell uname -s) +ifeq ($(UNAME_S),Darwin) +override LIBDIR = $(shell mkdir -p $(LIBDIRREL); cd $(LIBDIRREL); pwd) +ifeq ($(wildcard $(LIBDIR)),) +$(error Directory LIBDIR="$(LIBDIR)" should have been created by now) +endif +else +override LIBDIR = $(LIBDIRREL) +endif + +#=============================================================================== +#=== Makefile TARGETS and build rules below +#=============================================================================== + +# NB1: there are no CUDA targets in src as we avoid RDC! +# NB2: CUDA includes for curand.h are no longer needed in the C++ code anywhere in src! + +MG5AMC_COMMONLIB = mg5amc_common + +# First target (default goal) +all.$(TAG): $(BUILDDIR)/.build.$(TAG) $(LIBDIR)/.build.$(TAG) $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so + +# Target (and build options): debug +debug: OPTFLAGS = -g -O0 -DDEBUG2 +debug: all.$(TAG) + +# Target: tag-specific build lockfiles +override oldtagsb=`if [ -d $(BUILDDIR) ]; then find $(BUILDDIR) -maxdepth 1 -name '.build.*' ! -name '.build.$(TAG)' -exec echo $(shell pwd)/{} \; ; fi` +override oldtagsl=`if [ -d $(LIBDIR) ]; then find $(LIBDIR) -maxdepth 1 -name '.build.*' ! -name '.build.$(TAG)' -exec echo $(shell pwd)/{} \; ; fi` + +$(BUILDDIR)/.build.$(TAG): $(LIBDIR)/.build.$(TAG) + +$(LIBDIR)/.build.$(TAG): + @if [ "$(oldtagsl)" != "" ]; then echo -e "Cannot build for tag=$(TAG) as old builds exist in $(LIBDIR) for other tags:\n$(oldtagsl)\nPlease run 'make clean' first\nIf 'make clean' is not enough: run 'make clean USEBUILDDIR=1 AVX=$(AVX) FPTYPE=$(FPTYPE)' or 'make cleanall'"; exit 1; fi + @if [ "$(oldtagsb)" != "" ]; then echo -e "Cannot build for tag=$(TAG) as old builds exist in $(BUILDDIR) for other tags:\n$(oldtagsb)\nPlease run 'make clean' first\nIf 'make clean' is not enough: run 'make clean USEBUILDDIR=1 AVX=$(AVX) FPTYPE=$(FPTYPE)' or 'make cleanall'"; exit 1; fi + @if [ ! -d $(LIBDIR) ]; then echo "mkdir -p $(LIBDIR)"; mkdir -p $(LIBDIR); fi + @touch $(LIBDIR)/.build.$(TAG) + @if [ ! -d $(BUILDDIR) ]; then echo "mkdir -p $(BUILDDIR)"; mkdir -p $(BUILDDIR); fi + @touch $(BUILDDIR)/.build.$(TAG) + +#------------------------------------------------------------------------------- + +# Generic target and build rules: objects from C++ compilation +$(BUILDDIR)/%.o : %.cc *.h + @if [ ! -d $(BUILDDIR) ]; then mkdir -p $(BUILDDIR); fi + $(CXX) $(CPPFLAGS) $(CXXFLAGS) -c $< -o $@ + +#------------------------------------------------------------------------------- + +cxx_objects=$(addprefix $(BUILDDIR)/, Parameters_SMEFTsim_topU3l_MwScheme_UFO.o read_slha.o) + +# Target (and build rules): common (src) library +$(LIBDIR)/lib$(MG5AMC_COMMONLIB).so : $(cxx_objects) + @if [ ! -d $(LIBDIR) ]; then echo "mkdir -p $(LIBDIR)"; mkdir -p $(LIBDIR); fi + $(CXX) -shared -o$@ $(cxx_objects) + +#------------------------------------------------------------------------------- + +# Target: clean the builds +.PHONY: clean + +clean: +ifeq ($(USEBUILDDIR),1) + rm -rf $(LIBDIR) + rm -rf $(BUILDDIR) +else + rm -f $(LIBDIR)/.build.* $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so + rm -f $(BUILDDIR)/.build.* $(BUILDDIR)/*.o $(BUILDDIR)/*.exe +endif + +cleanall: + @echo + $(MAKE) clean -f $(THISMK) + @echo + rm -rf $(LIBDIR)/build.* + rm -rf build.* + +#------------------------------------------------------------------------------- diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/src/mgOnGpuConfig.h b/epochX/cudacpp/smeft_gg_tttt.sa/src/mgOnGpuConfig.h new file mode 100644 index 0000000000..e11e8ec53b --- /dev/null +++ b/epochX/cudacpp/smeft_gg_tttt.sa/src/mgOnGpuConfig.h @@ -0,0 +1,234 @@ +#ifndef MGONGPUCONFIG_H +#define MGONGPUCONFIG_H 1 + +// HARDCODED AT CODE GENERATION TIME: DO NOT MODIFY (#473) +// There are two different code bases for standalone_cudacpp (without multichannel) and madevent+cudacpp (with multichannel) +#undef MGONGPU_SUPPORTS_MULTICHANNEL + +// ** NB1 Throughputs (e.g. 6.8E8) are events/sec for "./gcheck.exe -p 65536 128 12" +// ** NB2 Baseline on b7g47n0004 fluctuates (probably depends on load on other VMs) + +// Choose if curand is supported for generating random numbers +// For C++, by default, do not inline, but allow this macro to be set from outside with e.g. -DMGONGPU_HAS_NO_CURAND +#ifdef __CUDACC__ +#undef MGONGPU_HAS_NO_CURAND +#else +//#undef MGONGPU_HAS_NO_CURAND // default +////#define MGONGPU_HAS_NO_CURAND 1 +#endif + +// Choose floating point precision (for everything but color algebra #537) +// If one of these macros has been set from outside with e.g. -DMGONGPU_FPTYPE_FLOAT, nothing happens (issue #167) +#if not defined MGONGPU_FPTYPE_DOUBLE and not defined MGONGPU_FPTYPE_FLOAT +// Floating point precision (CHOOSE ONLY ONE) +#define MGONGPU_FPTYPE_DOUBLE 1 // default +//#define MGONGPU_FPTYPE_FLOAT 1 // 2x faster +#endif + +// Choose floating point precision (for color algebra alone #537) +// If one of these macros has been set from outside with e.g. -DMGONGPU_FPTYPE2_FLOAT, nothing happens (issue #167) +#if not defined MGONGPU_FPTYPE2_DOUBLE and not defined MGONGPU_FPTYPE2_FLOAT +// Floating point precision (CHOOSE ONLY ONE) +#define MGONGPU_FPTYPE2_DOUBLE 1 // default +//#define MGONGPU_FPTYPE2_FLOAT 1 // 2x faster +#endif + +// Choose whether to inline all HelAmps functions +// This optimization can gain almost a factor 4 in C++, similar to -flto (issue #229) +// By default, do not inline, but allow this macro to be set from outside with e.g. -DMGONGPU_INLINE_HELAMPS +//#undef MGONGPU_INLINE_HELAMPS // default +////#define MGONGPU_INLINE_HELAMPS 1 + +// Choose whether to hardcode the cIPD physics parameters rather than reading them from user cards +// This optimization can gain 20% in CUDA in eemumu (issue #39) +// By default, do not hardcode, but allow this macro to be set from outside with e.g. -DMGONGPU_HARDCODE_PARAM +// ** NB: The option to use hardcoded cIPD physics parameters is supported again even now when alphas is running (#373) +// ** NB: Note however that it now only refers to cIPD parameters (cIPC parameters are always accessed through global memory) +//#undef MGONGPU_HARDCODE_PARAM // default +////#define MGONGPU_HARDCODE_PARAM 1 + +// Complex type in c++: std::complex or cxsmpl (CHOOSE ONLY ONE) +#ifndef __CUDACC__ +//#define MGONGPU_CPPCXTYPE_STDCOMPLEX 1 // ~8 percent slower on float, same on double (5.1E6/double, 9.4E6/float) +#define MGONGPU_CPPCXTYPE_CXSMPL 1 // new default (5.1E6/double, 10.2E6/float) +#endif + +// Complex type in cuda: thrust or cucomplex or cxsmpl (CHOOSE ONLY ONE) +#ifdef __CUDACC__ +#define MGONGPU_CUCXTYPE_THRUST 1 // default (~1.15E9/double, ~3.2E9/float) +//#define MGONGPU_CUCXTYPE_CUCOMPLEX 1 // ~10 percent slower (1.03E9/double, ~2.8E9/float) +//#define MGONGPU_CUCXTYPE_CXSMPL 1 // ~10 percent slower (1.00E9/double, ~2.9E9/float) +#endif + +// Cuda nsight compute (ncu) debug: add dummy lines to ease SASS program flow navigation +#ifdef __CUDACC__ +#undef MGONGPU_NSIGHT_DEBUG // default +//#define MGONGPU_NSIGHT_DEBUG 1 +#endif + +// SANITY CHECKS (floating point precision for everything but color algebra #537) +#if defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE_FLOAT +#error You must CHOOSE (ONE AND) ONLY ONE of MGONGPU_FPTYPE_DOUBLE or defined MGONGPU_FPTYPE_FLOAT +#endif + +// SANITY CHECKS (floating point precision for color algebra alone #537) +#if defined MGONGPU_FPTYPE2_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT +#error You must CHOOSE (ONE AND) ONLY ONE of MGONGPU_FPTYPE2_DOUBLE or defined MGONGPU_FPTYPE2_FLOAT +#endif +#if defined MGONGPU_FPTYPE2_DOUBLE and defined MGONGPU_FPTYPE_FLOAT +#error You cannot use double precision for color algebra and single precision elsewhere +#endif + +// SANITY CHECKS (c++ complex number implementation) +#ifndef __CUDACC__ +#if defined MGONGPU_CPPCXTYPE_STDCOMPLEX and defined MGONGPU_CPPCXTYPE_CXSMPL +#error You must CHOOSE (ONE AND) ONLY ONE of MGONGPU_CPPCXTYPE_STDCOMPLEX or MGONGPU_CPPCXTYPE_CXSMPL +#endif +#endif + +// SANITY CHECKS (cuda complex number implementation) +#ifdef __CUDACC__ +#if defined MGONGPU_CUCXTYPE_THRUST and defined MGONGPU_CUCXTYPE_CUCOMPLEX and defined MGONGPU_CUCXTYPE_CXSMPL +#error You must CHOOSE (ONE AND) ONLY ONE of MGONGPU_CUCXTYPE_THRUST or MGONGPU_CUCXTYPE_CUCOMPLEX or MGONGPU_CUCXTYPE_CXSMPL +#endif +#endif + +namespace mgOnGpu +{ + + // --- Type definitions + + // Floating point type (for everything but color algebra #537): fptype +#if defined MGONGPU_FPTYPE_DOUBLE + typedef double fptype; // double precision (8 bytes, fp64) +#elif defined MGONGPU_FPTYPE_FLOAT + typedef float fptype; // single precision (4 bytes, fp32) +#endif + + // Floating point type (for color algebra alone #537): fptype2 +#if defined MGONGPU_FPTYPE2_DOUBLE + typedef double fptype2; // double precision (8 bytes, fp64) +#elif defined MGONGPU_FPTYPE2_FLOAT + typedef float fptype2; // single precision (4 bytes, fp32) +#endif + + // --- Physics process-specific constants that are best declared at compile time + + const int np4 = 4; // dimensions of 4-momenta (E,px,py,pz) + + const int npari = 2; // #particles in the initial state (incoming): e.g. 2 (e+ e-) for e+ e- -> mu+ mu- + + const int nparf = 4; // #particles in the final state (outgoing): e.g. 2 (mu+ mu-) for e+ e- -> mu+ mu- + + const int npar = npari + nparf; // #particles in total (external = initial + final): e.g. 4 for e+ e- -> mu+ mu- + + const int ncomb = 64; // #helicity combinations: e.g. 16 for e+ e- -> mu+ mu- (2**4 = fermion spin up/down ** npar) + + const int nw6 = 6; // dimensions of each wavefunction (HELAS KEK 91-11): e.g. 6 for e+ e- -> mu+ mu- (fermions and vectors) + + const int nwf = 18; // #wavefunctions = #external (npar) + #internal: e.g. 5 for e+ e- -> mu+ mu- (1 internal is gamma or Z) + + // --- Platform-specific software implementation details + + // Maximum number of blocks per grid + // ** NB Some arrays of pointers will be allocated statically to fit all these blocks + // ** (the actual memory for each block will then be allocated dynamically only for existing blocks) + //const int nbpgMAX = 2048; + + // Maximum number of threads per block + //const int ntpbMAX = 256; // AV Apr2021: why had I set this to 256? + const int ntpbMAX = 1024; // NB: 512 is ok, but 1024 does fail with "too many resources requested for launch" + + // Alignment requirement for using reinterpret_cast with SIMD vectorized code + // (using reinterpret_cast with non aligned memory may lead to segmentation faults!) + // Only needed for C++ code but can be enforced also in NVCC builds of C++ code using CUDA>=11.2 and C++17 (#318, #319, #333) +#ifndef __CUDACC__ + constexpr int cppAlign = 64; // alignment requirement for SIMD vectorization (64-byte i.e. 512-bit) +#endif + +} + +// Expose typedefs and operators outside the namespace +using mgOnGpu::fptype; +using mgOnGpu::fptype2; + +// C++ SIMD vectorization width (this will be used to set neppV) +#ifdef __CUDACC__ // CUDA implementation has no SIMD +#undef MGONGPU_CPPSIMD +#elif defined __AVX512VL__ && defined MGONGPU_PVW512 // C++ "512z" AVX512 with 512 width (512-bit ie 64-byte): 8 (DOUBLE) or 16 (FLOAT) +#ifdef MGONGPU_FPTYPE_DOUBLE +#define MGONGPU_CPPSIMD 8 +#else +#define MGONGPU_CPPSIMD 16 +#endif +#elif defined __AVX512VL__ // C++ "512y" AVX512 with 256 width (256-bit ie 32-byte): 4 (DOUBLE) or 8 (FLOAT) [gcc DEFAULT] +#ifdef MGONGPU_FPTYPE_DOUBLE +#define MGONGPU_CPPSIMD 4 +#else +#define MGONGPU_CPPSIMD 8 +#endif +#elif defined __AVX2__ // C++ "avx2" AVX2 (256-bit ie 32-byte): 4 (DOUBLE) or 8 (FLOAT) [clang DEFAULT] +#ifdef MGONGPU_FPTYPE_DOUBLE +#define MGONGPU_CPPSIMD 4 +#else +#define MGONGPU_CPPSIMD 8 +#endif +#elif defined __SSE4_2__ // C++ "sse4" SSE4.2 (128-bit ie 16-byte): 2 (DOUBLE) or 4 (FLOAT) [Power9 and ARM default] +#ifdef MGONGPU_FPTYPE_DOUBLE +#define MGONGPU_CPPSIMD 2 +#else +#define MGONGPU_CPPSIMD 4 +#endif +#else // C++ "none" i.e. no SIMD +#undef MGONGPU_CPPSIMD +#endif + +// Cuda nsight compute (ncu) debug: add dummy lines to ease SASS program flow navigation +// Arguments (not used so far): text is __FUNCTION__, code is 0 (start) or 1 (end) +#if defined __CUDACC__ && defined MGONGPU_NSIGHT_DEBUG /* clang-format off */ +#define mgDebugDeclare() __shared__ float mgDebugCounter[mgOnGpu::ntpbMAX]; +#define mgDebugInitialise() { mgDebugCounter[threadIdx.x] = 0; } +#define mgDebug( code, text ) { mgDebugCounter[threadIdx.x] += 1; } +#define mgDebugFinalise() { if ( blockIdx.x == 0 && threadIdx.x == 0 ) printf( "MGDEBUG: counter=%f\n", mgDebugCounter[threadIdx.x] ); } +#else +#define mgDebugDeclare() /*noop*/ +#define mgDebugInitialise() { /*noop*/ } +#define mgDebug( code, text ) { /*noop*/ } +#define mgDebugFinalise() { /*noop*/ } +#endif /* clang-format on */ + +// Define empty CUDA declaration specifiers for C++ +#ifndef __CUDACC__ +#define __global__ +#define __host__ +#define __device__ +#endif + +// For SANITY CHECKS: check that neppR, neppM, neppV... are powers of two (https://stackoverflow.com/a/108360) +inline constexpr bool +ispoweroftwo( int n ) +{ + return ( n > 0 ) && !( n & ( n - 1 ) ); +} + +// Compiler version support (#96): require nvcc from CUDA >= 11.2, e.g. to use C++17 (see #333) +#ifdef __NVCC__ +#if( __CUDACC_VER_MAJOR__ < 11 ) || ( __CUDACC_VER_MAJOR__ == 11 && __CUDACC_VER_MINOR__ < 2 ) +#error Unsupported CUDA version: please use CUDA >= 11.2 +#endif +#endif + +// Compiler version support (#96): require clang >= 11 +#if defined __clang__ +#if( __clang_major__ < 11 ) +#error Unsupported clang version: please use clang >= 11 +#endif +// Compiler version support (#96): require gcc >= 9.3, e.g. for some OMP issues (see #269) +// [NB skip this check for the gcc toolchain below clang or icx (TEMPORARY? #355)] +#elif defined __GNUC__ +#if( __GNUC__ < 9 ) || ( __GNUC__ == 9 && __GNUC_MINOR__ < 3 ) +#error Unsupported gcc version: please gcc >= 9.3 +#endif +#endif + +#endif // MGONGPUCONFIG_H diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/src/mgOnGpuCxtypes.h b/epochX/cudacpp/smeft_gg_tttt.sa/src/mgOnGpuCxtypes.h new file mode 100644 index 0000000000..caff927311 --- /dev/null +++ b/epochX/cudacpp/smeft_gg_tttt.sa/src/mgOnGpuCxtypes.h @@ -0,0 +1,633 @@ +#ifndef MGONGPUCXTYPES_H +#define MGONGPUCXTYPES_H 1 + +#include "mgOnGpuConfig.h" + +#include "mgOnGpuFptypes.h" + +#include + +//========================================================================== +// COMPLEX TYPES: (PLATFORM-SPECIFIC) HEADERS +//========================================================================== + +#include + +// Complex type in cuda: thrust or cucomplex or cxsmpl +#ifdef __CUDACC__ +#if defined MGONGPU_CUCXTYPE_THRUST +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wtautological-compare" // for icpx2021/clang13 (https://stackoverflow.com/a/15864661) +#include +#pragma clang diagnostic pop +#elif defined MGONGPU_CUCXTYPE_CUCOMPLEX +#include +#elif not defined MGONGPU_CUCXTYPE_CXSMPL +#error You must CHOOSE (ONE AND) ONLY ONE of MGONGPU_CUCXTYPE_THRUST or MGONGPU_CUCXTYPE_CUCOMPLEX or MGONGPU_CUCXTYPE_CXSMPL +#endif +#else +// Complex type in c++: std::complex or cxsmpl +#if defined MGONGPU_CPPCXTYPE_STDCOMPLEX +#include +#elif not defined MGONGPU_CPPCXTYPE_CXSMPL +#error You must CHOOSE (ONE AND) ONLY ONE of MGONGPU_CPPCXTYPE_STDCOMPLEX or MGONGPU_CPPCXTYPE_CXSMPL +#endif +#endif + +//========================================================================== +// COMPLEX TYPES: SIMPLE COMPLEX CLASS (cxsmpl) +//========================================================================== + +namespace mgOnGpu /* clang-format off */ +{ + // --- Type definition (simple complex type derived from cxtype_v) + template + class cxsmpl + { + public: + __host__ __device__ constexpr cxsmpl() : m_real( 0 ), m_imag( 0 ) {} + cxsmpl( const cxsmpl& ) = default; + cxsmpl( cxsmpl&& ) = default; + __host__ __device__ constexpr cxsmpl( const FP& r, const FP& i = 0 ) : m_real( r ), m_imag( i ) {} + __host__ __device__ constexpr cxsmpl( const std::complex& c ) : m_real( c.real() ), m_imag( c.imag() ) {} + cxsmpl& operator=( const cxsmpl& ) = default; + cxsmpl& operator=( cxsmpl&& ) = default; + __host__ __device__ constexpr cxsmpl& operator+=( const cxsmpl& c ) { m_real += c.real(); m_imag += c.imag(); return *this; } + __host__ __device__ constexpr cxsmpl& operator-=( const cxsmpl& c ) { m_real -= c.real(); m_imag -= c.imag(); return *this; } + __host__ __device__ constexpr const FP& real() const { return m_real; } + __host__ __device__ constexpr const FP& imag() const { return m_imag; } + //constexpr operator std::complex() const { return std::complex( m_real, m_imag ); } // cxsmpl to std::complex (float-to-float or double-to-double) + private: + FP m_real, m_imag; // RI + }; + + template + inline __host__ __device__ cxsmpl // (NB: cannot be constexpr as a constexpr function cannot have a nonliteral return type "mgOnGpu::cxsmpl") + conj( const cxsmpl& c ) + { + return cxsmpl( c.real(), -c.imag() ); + } +} /* clang-format on */ + +// Expose the cxsmpl class outside the namespace +using mgOnGpu::cxsmpl; + +// Printout to stream for user defined types +template +inline __host__ __device__ std::ostream& +operator<<( std::ostream& out, const cxsmpl& c ) +{ + out << std::complex( c.real(), c.imag() ); + return out; +} + +// Operators for cxsmpl +template +inline __host__ __device__ constexpr cxsmpl +operator+( const cxsmpl a ) +{ + return a; +} + +template +inline __host__ __device__ constexpr cxsmpl +operator-( const cxsmpl& a ) +{ + return cxsmpl( -a.real(), -a.imag() ); +} + +template +inline __host__ __device__ constexpr cxsmpl +operator+( const cxsmpl& a, const cxsmpl& b ) +{ + return cxsmpl( a.real() + b.real(), a.imag() + b.imag() ); +} + +template +inline __host__ __device__ constexpr cxsmpl +operator+( const FP& a, const cxsmpl& b ) +{ + return cxsmpl( a, 0 ) + b; +} + +template +inline __host__ __device__ constexpr cxsmpl +operator-( const cxsmpl& a, const cxsmpl& b ) +{ + return cxsmpl( a.real() - b.real(), a.imag() - b.imag() ); +} + +template +inline __host__ __device__ constexpr cxsmpl +operator-( const FP& a, const cxsmpl& b ) +{ + return cxsmpl( a, 0 ) - b; +} + +template +inline __host__ __device__ constexpr cxsmpl +operator*( const cxsmpl& a, const cxsmpl& b ) +{ + return cxsmpl( a.real() * b.real() - a.imag() * b.imag(), a.imag() * b.real() + a.real() * b.imag() ); +} + +template +inline __host__ __device__ constexpr cxsmpl +operator*( const FP& a, const cxsmpl& b ) +{ + return cxsmpl( a, 0 ) * b; +} + +inline __host__ __device__ constexpr cxsmpl +operator*( const double& a, const cxsmpl& b ) +{ + return cxsmpl( a, 0 ) * b; +} + +template +inline __host__ __device__ constexpr cxsmpl +operator/( const cxsmpl& a, const cxsmpl& b ) +{ + FP bnorm = b.real() * b.real() + b.imag() * b.imag(); + return cxsmpl( ( a.real() * b.real() + a.imag() * b.imag() ) / bnorm, + ( a.imag() * b.real() - a.real() * b.imag() ) / bnorm ); +} + +template +inline __host__ __device__ constexpr cxsmpl +operator/( const FP& a, const cxsmpl& b ) +{ + return cxsmpl( a, 0 ) / b; +} + +template +inline __host__ __device__ constexpr cxsmpl +operator+( const cxsmpl& a, const FP& b ) +{ + return a + cxsmpl( b, 0 ); +} + +template +inline __host__ __device__ constexpr cxsmpl +operator-( const cxsmpl& a, const FP& b ) +{ + return a - cxsmpl( b, 0 ); +} + +template +inline __host__ __device__ constexpr cxsmpl +operator*( const cxsmpl& a, const FP& b ) +{ + return a * cxsmpl( b, 0 ); +} + +template +inline __host__ __device__ constexpr cxsmpl +operator/( const cxsmpl& a, const FP& b ) +{ + return a / cxsmpl( b, 0 ); +} + +//========================================================================== +// COMPLEX TYPES: (PLATFORM-SPECIFIC) TYPEDEFS +//========================================================================== + +namespace mgOnGpu +{ + + // --- Type definitions (complex type: cxtype) +#ifdef __CUDACC__ // cuda +#if defined MGONGPU_CUCXTYPE_THRUST + typedef thrust::complex cxtype; +#elif defined MGONGPU_CUCXTYPE_CUCOMPLEX +#if defined MGONGPU_FPTYPE_DOUBLE + typedef cuDoubleComplex cxtype; +#elif defined MGONGPU_FPTYPE_FLOAT + typedef cuFloatComplex cxtype; +#endif +#else + typedef cxsmpl cxtype; +#endif +#else // c++ +#if defined MGONGPU_CPPCXTYPE_STDCOMPLEX + typedef std::complex cxtype; +#else + typedef cxsmpl cxtype; +#endif +#endif + + // The number of floating point types in a complex type (real, imaginary) + constexpr int nx2 = 2; + + // SANITY CHECK: memory access may be based on casts of fptype[2] to cxtype (e.g. for wavefunctions) + static_assert( sizeof( cxtype ) == nx2 * sizeof( fptype ), "sizeof(cxtype) is not 2*sizeof(fptype)" ); +} + +// Expose typedefs and operators outside the namespace +using mgOnGpu::cxtype; + +//========================================================================== +// COMPLEX TYPES: (PLATFORM-SPECIFIC) FUNCTIONS AND OPERATORS +//========================================================================== + +#if defined MGONGPU_CUCXTYPE_CXSMPL or defined MGONGPU_CPPCXTYPE_CXSMPL + +//------------------------------ +// CUDA or C++ - using cxsmpl +//------------------------------ + +inline __host__ __device__ cxtype +cxmake( const fptype& r, const fptype& i ) +{ + return cxtype( r, i ); // cxsmpl constructor +} + +inline __host__ __device__ fptype +cxreal( const cxtype& c ) +{ + return c.real(); // cxsmpl::real() +} + +inline __host__ __device__ fptype +cximag( const cxtype& c ) +{ + return c.imag(); // cxsmpl::imag() +} + +inline __host__ __device__ cxtype +cxconj( const cxtype& c ) +{ + return conj( c ); // conj( cxsmpl ) +} + +inline __host__ cxtype // NOT __device__ +cxmake( const std::complex& c ) // std::complex to cxsmpl (float-to-float or float-to-double) +{ + return cxmake( c.real(), c.imag() ); +} + +inline __host__ cxtype // NOT __device__ +cxmake( const std::complex& c ) // std::complex to cxsmpl (double-to-float or double-to-double) +{ + return cxmake( c.real(), c.imag() ); +} + +#endif // #if defined MGONGPU_CUCXTYPE_CXSMPL or defined MGONGPU_CPPCXTYPE_CXSMPL + +//========================================================================== + +#if defined __CUDACC__ and defined MGONGPU_CUCXTYPE_THRUST // cuda + thrust + +//------------------------------ +// CUDA - using thrust::complex +//------------------------------ + +inline __host__ __device__ cxtype +cxmake( const fptype& r, const fptype& i ) +{ + return cxtype( r, i ); // thrust::complex constructor +} + +inline __host__ __device__ fptype +cxreal( const cxtype& c ) +{ + return c.real(); // thrust::complex::real() +} + +inline __host__ __device__ fptype +cximag( const cxtype& c ) +{ + return c.imag(); // thrust::complex::imag() +} + +inline __host__ __device__ cxtype +cxconj( const cxtype& c ) +{ + return conj( c ); // conj( thrust::complex ) +} + +inline __host__ __device__ const cxtype& +cxmake( const cxtype& c ) +{ + return c; +} + +#endif // #if defined __CUDACC__ and defined MGONGPU_CUCXTYPE_THRUST + +//========================================================================== + +#if defined __CUDACC__ and defined MGONGPU_CUCXTYPE_CUCOMPLEX // cuda + cucomplex + +//------------------------------ +// CUDA - using cuComplex +//------------------------------ + +#if defined MGONGPU_FPTYPE_DOUBLE // cuda + cucomplex + double + +//+++++++++++++++++++++++++ +// cuDoubleComplex ONLY +//+++++++++++++++++++++++++ + +inline __host__ __device__ cxtype +cxmake( const fptype& r, const fptype& i ) +{ + return make_cuDoubleComplex( r, i ); +} + +inline __host__ __device__ fptype +cxreal( const cxtype& c ) +{ + return cuCreal( c ); // returns by value +} + +inline __host__ __device__ fptype +cximag( const cxtype& c ) +{ + return cuCimag( c ); // returns by value +} + +inline __host__ __device__ cxtype +operator+( const cxtype& a, const cxtype& b ) +{ + return cuCadd( a, b ); +} + +inline __host__ __device__ cxtype& +operator+=( cxtype& a, const cxtype& b ) +{ + a = cuCadd( a, b ); + return a; +} + +inline __host__ __device__ cxtype +operator-( const cxtype& a, const cxtype& b ) +{ + return cuCsub( a, b ); +} + +inline __host__ __device__ cxtype& +operator-=( cxtype& a, const cxtype& b ) +{ + a = cuCsub( a, b ); + return a; +} + +inline __host__ __device__ cxtype +operator*( const cxtype& a, const cxtype& b ) +{ + return cuCmul( a, b ); +} + +inline __host__ __device__ cxtype +operator/( const cxtype& a, const cxtype& b ) +{ + return cuCdiv( a, b ); +} + +#elif defined MGONGPU_FPTYPE_FLOAT // cuda + cucomplex + float + +//+++++++++++++++++++++++++ +// cuFloatComplex ONLY +//+++++++++++++++++++++++++ + +inline __host__ __device__ cxtype +cxmake( const fptype& r, const fptype& i ) +{ + return make_cuFloatComplex( r, i ); +} + +inline __host__ __device__ fptype +cxreal( const cxtype& c ) +{ + return cuCrealf( c ); // returns by value +} + +inline __host__ __device__ fptype +cximag( const cxtype& c ) +{ + return cuCimagf( c ); // returns by value +} + +inline __host__ __device__ cxtype +operator+( const cxtype& a, const cxtype& b ) +{ + return cuCaddf( a, b ); +} + +inline __host__ __device__ cxtype& +operator+=( cxtype& a, const cxtype& b ) +{ + a = cuCaddf( a, b ); + return a; +} + +inline __host__ __device__ cxtype +operator-( const cxtype& a, const cxtype& b ) +{ + return cuCsubf( a, b ); +} + +inline __host__ __device__ cxtype& +operator-=( cxtype& a, const cxtype& b ) +{ + a = cuCsubf( a, b ); + return a; +} + +inline __host__ __device__ cxtype +operator*( const cxtype& a, const cxtype& b ) +{ + return cuCmulf( a, b ); +} + +inline __host__ __device__ cxtype +operator/( const cxtype& a, const cxtype& b ) +{ + return cuCdivf( a, b ); +} + +inline __host__ cxtype // NOT __device__ +cxmake( const std::complex& c ) // std::complex to cucomplex (cast double-to-float) +{ + return cxmake( (fptype)c.real(), (fptype)c.imag() ); +} + +#endif + +//+++++++++++++++++++++++++ +// cuDoubleComplex OR +// cuFloatComplex +//+++++++++++++++++++++++++ + +inline __host__ __device__ cxtype +operator+( const cxtype a ) +{ + return a; +} + +inline __host__ __device__ cxtype +operator-( const cxtype& a ) +{ + return cxmake( -cxreal( a ), -cximag( a ) ); +} + +inline __host__ __device__ cxtype +operator+( const fptype& a, const cxtype& b ) +{ + return cxmake( a, 0 ) + b; +} + +inline __host__ __device__ cxtype +operator-( const fptype& a, const cxtype& b ) +{ + return cxmake( a, 0 ) - b; +} + +inline __host__ __device__ cxtype +operator*( const fptype& a, const cxtype& b ) +{ + return cxmake( a, 0 ) * b; +} + +inline __host__ __device__ cxtype +operator/( const fptype& a, const cxtype& b ) +{ + return cxmake( a, 0 ) / b; +} + +inline __host__ __device__ cxtype +operator+( const cxtype& a, const fptype& b ) +{ + return a + cxmake( b, 0 ); +} + +inline __host__ __device__ cxtype +operator-( const cxtype& a, const fptype& b ) +{ + return a - cxmake( b, 0 ); +} + +inline __host__ __device__ cxtype +operator*( const cxtype& a, const fptype& b ) +{ + return a * cxmake( b, 0 ); +} + +inline __host__ __device__ cxtype +operator/( const cxtype& a, const fptype& b ) +{ + return a / cxmake( b, 0 ); +} + +inline __host__ __device__ cxtype +cxconj( const cxtype& c ) +{ + return cxmake( cxreal( c ), -cximag( c ) ); +} + +inline __host__ cxtype // NOT __device__ +cxmake( const std::complex& c ) // std::complex to cucomplex (float-to-float or double-to-double) +{ + return cxmake( c.real(), c.imag() ); +} + +#endif // #if defined __CUDACC__ and defined MGONGPU_CUCXTYPE_CUCOMPLEX + +//========================================================================== + +#if not defined __CUDACC__ and defined MGONGPU_CPPCXTYPE_STDCOMPLEX // c++ + stdcomplex + +//------------------------------ +// C++ - using std::complex +//------------------------------ + +inline cxtype +cxmake( const fptype& r, const fptype& i ) +{ + return cxtype( r, i ); // std::complex constructor +} + +inline fptype +cxreal( const cxtype& c ) +{ + return c.real(); // std::complex::real() +} + +inline fptype +cximag( const cxtype& c ) +{ + return c.imag(); // std::complex::imag() +} + +inline cxtype +cxconj( const cxtype& c ) +{ + return conj( c ); // conj( std::complex ) +} + +inline const cxtype& +cxmake( const cxtype& c ) // std::complex to std::complex (float-to-float or double-to-double) +{ + return c; +} + +#if defined MGONGPU_FPTYPE_FLOAT +inline cxtype +cxmake( const std::complex& c ) // std::complex to std::complex (cast double-to-float) +{ + return cxmake( (fptype)c.real(), (fptype)c.imag() ); +} +#endif + +#endif // #if not defined __CUDACC__ and defined MGONGPU_CPPCXTYPE_STDCOMPLEX + +//========================================================================== + +inline __host__ __device__ const cxtype +cxmake( const cxsmpl& c ) // cxsmpl to cxtype (float-to-float or float-to-double) +{ + return cxmake( c.real(), c.imag() ); +} + +inline __host__ __device__ const cxtype +cxmake( const cxsmpl& c ) // cxsmpl to cxtype (double-to-float or double-to-double) +{ + return cxmake( c.real(), c.imag() ); +} + +//========================================================================== +// COMPLEX TYPES: WRAPPER OVER RI FLOATING POINT PAIR (cxtype_ref) +//========================================================================== + +namespace mgOnGpu /* clang-format off */ +{ + // The cxtype_ref class (a non-const reference to two fp variables) was originally designed for cxtype_v::operator[] + // It used to be included in the code only when MGONGPU_HAS_CPPCXTYPEV_BRK (originally MGONGPU_HAS_CPPCXTYPE_REF) is defined + // It is now always included in the code because it is needed also to access an fptype wavefunction buffer as a cxtype + class cxtype_ref + { + public: + cxtype_ref() = delete; + cxtype_ref( const cxtype_ref& ) = delete; + cxtype_ref( cxtype_ref&& ) = default; // copy refs + __host__ __device__ cxtype_ref( fptype& r, fptype& i ) : m_preal( &r ), m_pimag( &i ) {} // copy refs + cxtype_ref& operator=( const cxtype_ref& ) = delete; + //__host__ __device__ cxtype_ref& operator=( cxtype_ref&& c ) {...} // REMOVED! Should copy refs or copy values? No longer needed in cxternary + __host__ __device__ cxtype_ref& operator=( const cxtype& c ) { *m_preal = cxreal( c ); *m_pimag = cximag( c ); return *this; } // copy values + __host__ __device__ operator cxtype() const { return cxmake( *m_preal, *m_pimag ); } + private: + fptype *m_preal, *m_pimag; // RI + }; +} /* clang-format on */ + +// Printout to stream for user defined types +inline __host__ __device__ std::ostream& +operator<<( std::ostream& out, const mgOnGpu::cxtype_ref& c ) +{ + out << (cxtype)c; + return out; +} + +//========================================================================== + +#endif // MGONGPUCXTYPES_H diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/src/mgOnGpuFptypes.h b/epochX/cudacpp/smeft_gg_tttt.sa/src/mgOnGpuFptypes.h new file mode 100644 index 0000000000..b278275f80 --- /dev/null +++ b/epochX/cudacpp/smeft_gg_tttt.sa/src/mgOnGpuFptypes.h @@ -0,0 +1,87 @@ +#ifndef MGONGPUFPTYPES_H +#define MGONGPUFPTYPES_H 1 + +#include "mgOnGpuConfig.h" + +#include +#include + +//========================================================================== + +#ifdef __CUDACC__ // cuda + +//------------------------------ +// Floating point types - Cuda +//------------------------------ + +/* +inline __host__ __device__ fptype +fpmax( const fptype& a, const fptype& b ) +{ + return max( a, b ); +} + +inline __host__ __device__ fptype +fpmin( const fptype& a, const fptype& b ) +{ + return min( a, b ); +} +*/ + +inline __host__ __device__ const fptype& +fpmax( const fptype& a, const fptype& b ) +{ + return ( ( b < a ) ? a : b ); +} + +inline __host__ __device__ const fptype& +fpmin( const fptype& a, const fptype& b ) +{ + return ( ( a < b ) ? a : b ); +} + +inline __host__ __device__ fptype +fpsqrt( const fptype& f ) +{ +#if defined MGONGPU_FPTYPE_FLOAT + // See https://docs.nvidia.com/cuda/cuda-math-api/group__CUDA__MATH__SINGLE.html + return sqrtf( f ); +#else + // See https://docs.nvidia.com/cuda/cuda-math-api/group__CUDA__MATH__DOUBLE.html + return sqrt( f ); +#endif +} + +#endif // #ifdef __CUDACC__ + +//========================================================================== + +#ifndef __CUDACC__ + +//------------------------------ +// Floating point types - C++ +//------------------------------ + +inline const fptype& +fpmax( const fptype& a, const fptype& b ) +{ + return std::max( a, b ); +} + +inline const fptype& +fpmin( const fptype& a, const fptype& b ) +{ + return std::min( a, b ); +} + +inline fptype +fpsqrt( const fptype& f ) +{ + return std::sqrt( f ); +} + +#endif // #ifndef __CUDACC__ + +//========================================================================== + +#endif // MGONGPUFPTYPES_H diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/src/mgOnGpuVectors.h b/epochX/cudacpp/smeft_gg_tttt.sa/src/mgOnGpuVectors.h new file mode 100644 index 0000000000..0dd4c69bd4 --- /dev/null +++ b/epochX/cudacpp/smeft_gg_tttt.sa/src/mgOnGpuVectors.h @@ -0,0 +1,829 @@ +#ifndef MGONGPUVECTORS_H +#define MGONGPUVECTORS_H 1 + +#include "mgOnGpuCxtypes.h" +#include "mgOnGpuFptypes.h" + +#include + +//========================================================================== + +//------------------------------ +// Vector types - C++ +//------------------------------ + +#ifdef __clang__ +// If set: return a pair of (fptype&, fptype&) by non-const reference in cxtype_v::operator[] +// This is forbidden in clang ("non-const reference cannot bind to vector element") +// See also https://stackoverflow.com/questions/26554829 +//#define MGONGPU_HAS_CPPCXTYPEV_BRK 1 // clang test (compilation fails also on clang 12.0, issue #182) +#undef MGONGPU_HAS_CPPCXTYPEV_BRK // clang default +#elif defined __INTEL_COMPILER +//#define MGONGPU_HAS_CPPCXTYPEV_BRK 1 // icc default? +#undef MGONGPU_HAS_CPPCXTYPEV_BRK // icc test +#else +#define MGONGPU_HAS_CPPCXTYPEV_BRK 1 // gcc default +//#undef MGONGPU_HAS_CPPCXTYPEV_BRK // gcc test (very slightly slower? issue #172) +#endif + +namespace mgOnGpu /* clang-format off */ +{ +#ifdef MGONGPU_CPPSIMD + + const int neppV = MGONGPU_CPPSIMD; + + // SANITY CHECK: cppAlign must be a multiple of neppV * sizeof(fptype) + static_assert( mgOnGpu::cppAlign % ( neppV * sizeof( fptype ) ) == 0 ); + + // SANITY CHECK: check that neppV is a power of two + static_assert( ispoweroftwo( neppV ), "neppV is not a power of 2" ); + + // --- Type definition (using vector compiler extensions: need -march=...) + // For gcc: https://gcc.gnu.org/onlinedocs/gcc/Vector-Extensions.html + // For clang: https://clang.llvm.org/docs/LanguageExtensions.html#vectors-and-extended-vectors +#ifdef __clang__ + typedef fptype fptype_v __attribute__( ( ext_vector_type( neppV ) ) ); // RRRR +#else + typedef fptype fptype_v __attribute__( ( vector_size( neppV * sizeof( fptype ) ) ) ); // RRRR +#endif + + // Mixed fptypes #537: float for color algebra and double elsewhere +#if defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT + const int neppV2 = MGONGPU_CPPSIMD * 2; + static_assert( mgOnGpu::cppAlign % ( neppV2 * sizeof( fptype2 ) ) == 0 ); + static_assert( ispoweroftwo( neppV2 ), "neppV2 is not a power of 2" ); +#ifdef __clang__ + typedef fptype2 fptype2_v __attribute__( ( ext_vector_type( neppV2 ) ) ); // RRRRRRRR +#else + typedef fptype2 fptype2_v __attribute__( ( vector_size( neppV2 * sizeof( fptype2 ) ) ) ); // RRRRRRRR +#endif +#else + typedef fptype_v fptype2_v; +#endif + + // --- Type definition (using vector compiler extensions: need -march=...) + class cxtype_v // no need for "class alignas(2*sizeof(fptype_v)) cxtype_v" + { + public: + // Array initialization: zero-out as "{0}" (C and C++) or as "{}" (C++ only) + // See https://en.cppreference.com/w/c/language/array_initialization#Notes + cxtype_v() : m_real{ 0 }, m_imag{ 0 } {} // RRRR=0000 IIII=0000 + cxtype_v( const cxtype_v& ) = default; + cxtype_v( cxtype_v&& ) = default; + cxtype_v( const fptype_v& r, const fptype_v& i ) : m_real( r ), m_imag( i ) {} + cxtype_v( const fptype_v& r ) : m_real( r ), m_imag{ 0 } {} // IIII=0000 + cxtype_v& operator=( const cxtype_v& ) = default; + cxtype_v& operator=( cxtype_v&& ) = default; + cxtype_v& operator+=( const cxtype_v& c ) { m_real += c.real(); m_imag += c.imag(); return *this; } + cxtype_v& operator-=( const cxtype_v& c ) { m_real -= c.real(); m_imag -= c.imag(); return *this; } +#ifdef MGONGPU_HAS_CPPCXTYPEV_BRK + // NB: THIS IS THE FUNDAMENTAL DIFFERENCE BETWEEN MGONGPU_HAS_CPPCXTYPEV_BRK DEFINED AND NOT DEFINED + // NB: the alternative "clang" implementation is simpler: it simply does not have any bracket operator[] + // NB: ** do NOT implement operator[] to return a value: it does not fail the build (why?) and gives unexpected results! ** + cxtype_ref operator[]( size_t i ) const { return cxtype_ref( m_real[i], m_imag[i] ); } +#endif + const fptype_v& real() const { return m_real; } + const fptype_v& imag() const { return m_imag; } + private: + fptype_v m_real, m_imag; // RRRRIIII + }; + + // --- Type definition (using vector compiler extensions: need -march=...) +#ifdef __clang__ // https://clang.llvm.org/docs/LanguageExtensions.html#vectors-and-extended-vectors +#if defined MGONGPU_FPTYPE_DOUBLE + typedef long int bool_v __attribute__( ( ext_vector_type( neppV ) ) ); // bbbb +#elif defined MGONGPU_FPTYPE_FLOAT + typedef int bool_v __attribute__( ( ext_vector_type( neppV ) ) ); // bbbb +#endif +#else // gcc +#if defined MGONGPU_FPTYPE_DOUBLE + typedef long int bool_v __attribute__( ( vector_size( neppV * sizeof( long int ) ) ) ); // bbbb +#elif defined MGONGPU_FPTYPE_FLOAT + typedef int bool_v __attribute__( ( vector_size( neppV * sizeof( int ) ) ) ); // bbbb +#endif +#endif + +#else // i.e #ifndef MGONGPU_CPPSIMD (this includes #ifdef __CUDACC__) + + const int neppV = 1; + +#endif // #ifdef MGONGPU_CPPSIMD + +} /* clang-format on */ + +//-------------------------------------------------------------------------- + +// Expose typedefs outside the namespace +using mgOnGpu::neppV; +#ifdef MGONGPU_CPPSIMD +using mgOnGpu::fptype_v; +using mgOnGpu::fptype2_v; +using mgOnGpu::cxtype_v; +using mgOnGpu::bool_v; +#endif + +//-------------------------------------------------------------------------- + +#ifndef __CUDACC__ + +// Printout to stream for user defined types + +#ifndef MGONGPU_CPPCXTYPE_CXSMPL // operator<< for cxsmpl has already been defined! +inline std::ostream& +operator<<( std::ostream& out, const cxtype& c ) +{ + out << "[" << cxreal( c ) << "," << cximag( c ) << "]"; + //out << cxreal(c) << "+i" << cximag(c); + return out; +} +#endif + +/* +#ifdef MGONGPU_CPPSIMD +inline std::ostream& +operator<<( std::ostream& out, const bool_v& v ) +{ + out << "{ " << v[0]; + for ( int i=1; i +#include +#include + +// Simplified rambo version for 2 to N (with N>=2) processes with massless particles +#ifdef __CUDACC__ +namespace mg5amcGpu +#else +namespace mg5amcCpu +#endif +{ + using mgOnGpu::np4; + using mgOnGpu::npari; + using mgOnGpu::nparf; + using mgOnGpu::npar; + + //-------------------------------------------------------------------------- + + // Fill in the momenta of the initial particles + // [NB: the output buffer includes both initial and final momenta, but only initial momenta are filled in] + template + __host__ __device__ void + ramboGetMomentaInitial( const fptype energy, // input: energy + fptype* momenta ) // output: momenta for one event or for a set of events + { + const fptype energy1 = energy / 2; + const fptype energy2 = energy / 2; + const fptype mom = energy / 2; + M_ACCESS::kernelAccessIp4Ipar( momenta, 0, 0 ) = energy1; + M_ACCESS::kernelAccessIp4Ipar( momenta, 1, 0 ) = 0; + M_ACCESS::kernelAccessIp4Ipar( momenta, 2, 0 ) = 0; + M_ACCESS::kernelAccessIp4Ipar( momenta, 3, 0 ) = mom; + M_ACCESS::kernelAccessIp4Ipar( momenta, 0, 1 ) = energy2; + M_ACCESS::kernelAccessIp4Ipar( momenta, 1, 1 ) = 0; + M_ACCESS::kernelAccessIp4Ipar( momenta, 2, 1 ) = 0; + M_ACCESS::kernelAccessIp4Ipar( momenta, 3, 1 ) = -mom; + } + + //-------------------------------------------------------------------------- + + // Fill in the momenta of the final particles using the RAMBO algorithm + // [NB: the output buffer includes both initial and final momenta, but only initial momenta are filled in] + template + __host__ __device__ void + ramboGetMomentaFinal( const fptype energy, // input: energy + const fptype* rndmom, // input: random numbers in [0,1] for one event or for a set of events + fptype* momenta, // output: momenta for one event or for a set of events + fptype* wgts ) // output: weights for one event or for a set of events + { + /**************************************************************************** + * rambo * + * ra(ndom) m(omenta) b(eautifully) o(rganized) * + * * + * a democratic multi-particle phase space generator * + * authors: s.d. ellis, r. kleiss, w.j. stirling * + * this is version 1.0 - written by r. kleiss * + * -- adjusted by hans kuijf, weights are logarithmic (1990-08-20) * + * -- adjusted by madgraph@sheffield_gpu_hackathon team (2020-07-29) * + * * + ****************************************************************************/ + + // output weight + fptype& wt = W_ACCESS::kernelAccess( wgts ); + + // AV special case nparf==1 (issue #358) + if constexpr( nparf == 1 ) + { + static bool first = true; + if( first ) + { +#ifdef __CUDACC__ + if constexpr( M_ACCESS::isOnDevice() ) // avoid + { + const int ievt0 = 0; + const int ievt = blockDim.x * blockIdx.x + threadIdx.x; // index of event (thread) in grid + if( ievt == ievt0 ) + printf( "WARNING! Rambo called with 1 final particle: random numbers will be ignored\n" ); + } + else +#endif + { + printf( "WARNING! Rambo called with 1 final particle: random numbers will be ignored\n" ); + } + first = false; + } + const int iparf = 0; + for( int i4 = 0; i4 < np4; i4++ ) + { + M_ACCESS::kernelAccessIp4Ipar( momenta, i4, iparf + npari ) = 0; + for( int ipari = 0; ipari < npari; ipari++ ) + { + M_ACCESS::kernelAccessIp4Ipar( momenta, i4, iparf + npari ) += M_ACCESS::kernelAccessIp4Ipar( momenta, i4, ipari ); + } + } + wt = 1; + return; + } + + // initialization step: factorials for the phase space weight + const fptype twopi = 8. * atan( 1. ); + const fptype po2log = log( twopi / 4. ); + fptype z[nparf]; + if constexpr( nparf > 1 ) // avoid build warning on clang (related to #358) + z[1] = po2log; + for( int kpar = 2; kpar < nparf; kpar++ ) z[kpar] = z[kpar - 1] + po2log - 2. * log( fptype( kpar - 1 ) ); + for( int kpar = 2; kpar < nparf; kpar++ ) z[kpar] = ( z[kpar] - log( fptype( kpar ) ) ); + + // generate n massless momenta in infinite phase space + fptype q[nparf][np4]; + for( int iparf = 0; iparf < nparf; iparf++ ) + { + const fptype r1 = R_ACCESS::kernelAccessIp4IparfConst( rndmom, 0, iparf ); + const fptype r2 = R_ACCESS::kernelAccessIp4IparfConst( rndmom, 1, iparf ); + const fptype r3 = R_ACCESS::kernelAccessIp4IparfConst( rndmom, 2, iparf ); + const fptype r4 = R_ACCESS::kernelAccessIp4IparfConst( rndmom, 3, iparf ); + const fptype c = 2. * r1 - 1.; + const fptype s = sqrt( 1. - c * c ); + const fptype f = twopi * r2; + q[iparf][0] = -log( r3 * r4 ); + q[iparf][3] = q[iparf][0] * c; + q[iparf][2] = q[iparf][0] * s * cos( f ); + q[iparf][1] = q[iparf][0] * s * sin( f ); + } + + // calculate the parameters of the conformal transformation + fptype r[np4]; + fptype b[np4 - 1]; + for( int i4 = 0; i4 < np4; i4++ ) r[i4] = 0.; + for( int iparf = 0; iparf < nparf; iparf++ ) + { + for( int i4 = 0; i4 < np4; i4++ ) r[i4] = r[i4] + q[iparf][i4]; + } + const fptype rmas = sqrt( pow( r[0], 2 ) - pow( r[3], 2 ) - pow( r[2], 2 ) - pow( r[1], 2 ) ); + for( int i4 = 1; i4 < np4; i4++ ) b[i4 - 1] = -r[i4] / rmas; + const fptype g = r[0] / rmas; + const fptype a = 1. / ( 1. + g ); + const fptype x0 = energy / rmas; + + // transform the q's conformally into the p's (i.e. the 'momenta') + for( int iparf = 0; iparf < nparf; iparf++ ) + { + fptype bq = b[0] * q[iparf][1] + b[1] * q[iparf][2] + b[2] * q[iparf][3]; + for( int i4 = 1; i4 < np4; i4++ ) + { + M_ACCESS::kernelAccessIp4Ipar( momenta, i4, iparf + npari ) = x0 * ( q[iparf][i4] + b[i4 - 1] * ( q[iparf][0] + a * bq ) ); + } + M_ACCESS::kernelAccessIp4Ipar( momenta, 0, iparf + npari ) = x0 * ( g * q[iparf][0] + bq ); + } + + // calculate weight (NB return log of weight) + wt = po2log; + if( nparf != 2 ) wt = ( 2. * nparf - 4. ) * log( energy ) + z[nparf - 1]; + +#ifndef __CUDACC__ + // issue warnings if weight is too small or too large + static int iwarn[5] = { 0, 0, 0, 0, 0 }; + if( wt < -180. ) + { + if( iwarn[0] <= 5 ) std::cout << "Too small wt, risk for underflow: " << wt << std::endl; + iwarn[0] = iwarn[0] + 1; + } + if( wt > 174. ) + { + if( iwarn[1] <= 5 ) std::cout << "Too large wt, risk for overflow: " << wt << std::endl; + iwarn[1] = iwarn[1] + 1; + } +#endif + + // return for weighted massless momenta + // nothing else to do in this event if all particles are massless (nm==0) + + return; + } + + //-------------------------------------------------------------------------- +} diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/src/read_slha.cc b/epochX/cudacpp/smeft_gg_tttt.sa/src/read_slha.cc new file mode 100644 index 0000000000..2934e3a476 --- /dev/null +++ b/epochX/cudacpp/smeft_gg_tttt.sa/src/read_slha.cc @@ -0,0 +1,184 @@ +#include "read_slha.h" + +#include +#include +#include +#include +#include + +void +SLHABlock::set_entry( std::vector indices, double value ) +{ + if( _entries.size() == 0 ) + _indices = indices.size(); + else if( indices.size() != _indices ) + throw "Wrong number of indices in set_entry"; + + _entries[indices] = value; +} + +double +SLHABlock::get_entry( std::vector indices, double def_val ) +{ + if( _entries.find( indices ) == _entries.end() ) + { + std::cout << "Warning: No such entry in " << _name << ", using default value " + << def_val << std::endl; + return def_val; + } + return _entries[indices]; +} + +void +SLHAReader::read_slha_file( std::string file_name, bool verbose ) +{ + std::ifstream param_card; + param_card.open( file_name.c_str(), std::ifstream::in ); + if( param_card.good() ) + { + if( verbose ) std::cout << "Opened slha file " << file_name << " for reading" << std::endl; + } + else + { + const char envpath[] = "MG5AMC_CARD_PATH"; + if( !getenv( envpath ) ) + { + std::cout << "ERROR! Card file '" << file_name << "' does not exist" + << " and environment variable '" << envpath << "' is not set" << std::endl; + throw "Error while opening param card"; + } + else + { + std::cout << "WARNING! Card file '" << file_name << "' does not exist:" + << " look for the file in directory $" << envpath << "='" << getenv( envpath ) << "'" << std::endl; + const std::string file_name2 = std::filesystem::path( getenv( envpath ) ) / std::filesystem::path( file_name ).filename(); + param_card.open( file_name2.c_str(), std::ifstream::in ); + if( param_card.good() ) + { + std::cout << "Opened slha file " << file_name2 << " for reading" << std::endl; + } + else + { + std::cout << "ERROR! Card file '" << file_name2 << "' does not exist" << std::endl; + throw "Error while opening param card"; + } + } + } + char buf[200]; + std::string line; + std::string block( "" ); + while( param_card.good() ) + { + param_card.getline( buf, 200 ); + line = buf; + // Change to lowercase + transform( line.begin(), line.end(), line.begin(), (int ( * )( int ))tolower ); + if( line != "" && line[0] != '#' ) + { + if( block != "" ) + { + // Look for double index blocks + double dindex1, dindex2; + double value; + std::stringstream linestr2( line ); + if( linestr2 >> dindex1 >> dindex2 >> value && + dindex1 == int( dindex1 ) and dindex2 == int( dindex2 ) ) + { + std::vector indices; + indices.push_back( int( dindex1 ) ); + indices.push_back( int( dindex2 ) ); + set_block_entry( block, indices, value ); + // Done with this line, read next + continue; + } + std::stringstream linestr1( line ); + // Look for single index blocks + if( linestr1 >> dindex1 >> value && dindex1 == int( dindex1 ) ) + { + std::vector indices; + indices.push_back( int( dindex1 ) ); + set_block_entry( block, indices, value ); + // Done with this line, read next + continue; + } + } + // Look for block + if( line.find( "block " ) != line.npos ) + { + line = line.substr( 6 ); + // Get rid of spaces between block and block name + while( line[0] == ' ' ) + line = line.substr( 1 ); + // Now find end of block name + size_t space_pos = line.find( ' ' ); + if( space_pos != std::string::npos ) + line = line.substr( 0, space_pos ); + block = line; + continue; + } + // Look for decay + if( line.find( "decay " ) == 0 ) + { + line = line.substr( 6 ); + block = ""; + std::stringstream linestr( line ); + int pdg_code; + double value; + if( linestr >> pdg_code >> value ) + set_block_entry( "decay", pdg_code, value ); + else + std::cout << "Warning: Wrong format for decay block " << line << std::endl; + continue; + } + } + } + if( _blocks.size() == 0 ) + throw "No information read from SLHA card"; + + param_card.close(); +} + +double +SLHAReader::get_block_entry( std::string block_name, std::vector indices, double def_val ) +{ + if( _blocks.find( block_name ) == _blocks.end() ) + { + std::cout << "No such block " << block_name << ", using default value " + << def_val << std::endl; + return def_val; + } + return _blocks[block_name].get_entry( indices ); +} + +double +SLHAReader::get_block_entry( std::string block_name, int index, double def_val ) +{ + std::vector indices; + indices.push_back( index ); + return get_block_entry( block_name, indices, def_val ); +} + +void +SLHAReader::set_block_entry( std::string block_name, std::vector indices, double value ) +{ + if( _blocks.find( block_name ) == _blocks.end() ) + { + SLHABlock block( block_name ); + _blocks[block_name] = block; + } + _blocks[block_name].set_entry( indices, value ); + /* + cout << "Set block " << block_name << " entry "; + for (int i=0;i < indices.size();i++) + cout << indices[i] << " "; + cout << "to " << _blocks[block_name].get_entry(indices) << endl; + */ +} + +void +SLHAReader::set_block_entry( std::string block_name, int index, double value ) +{ + std::vector indices; + indices.push_back( index ); + set_block_entry( block_name, indices, value ); +} diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/src/read_slha.h b/epochX/cudacpp/smeft_gg_tttt.sa/src/read_slha.h new file mode 100644 index 0000000000..feb8b43b5a --- /dev/null +++ b/epochX/cudacpp/smeft_gg_tttt.sa/src/read_slha.h @@ -0,0 +1,41 @@ +#ifndef READ_SLHA_H +#define READ_SLHA_H 1 + +#include +#include +#include +#include + +class SLHABlock +{ +public: + SLHABlock( std::string name = "" ) { _name = name; } + ~SLHABlock() {} + void set_entry( std::vector indices, double value ); + double get_entry( std::vector indices, double def_val = 0 ); + void set_name( std::string name ) { _name = name; } + std::string get_name() { return _name; } + unsigned int get_indices() { return _indices; } +private: + std::string _name; + std::map, double> _entries; + unsigned int _indices; +}; + +class SLHAReader +{ +public: + SLHAReader( std::string file_name = "", bool verbose = true ) + { + if( file_name != "" ) read_slha_file( file_name, verbose ); + } + void read_slha_file( std::string file_name, bool verbose ); + double get_block_entry( std::string block_name, std::vector indices, double def_val = 0 ); + double get_block_entry( std::string block_name, int index, double def_val = 0 ); + void set_block_entry( std::string block_name, std::vector indices, double value ); + void set_block_entry( std::string block_name, int index, double value ); +private: + std::map _blocks; +}; + +#endif