From 5b5631d76b972a74b3af2cbf3505f1d7e26a2d0d Mon Sep 17 00:00:00 2001 From: Jack Betteridge Date: Fri, 19 Apr 2024 15:03:37 +0100 Subject: [PATCH 1/3] Fix for massively parallel performance regression --- pyop2/compilation.py | 174 +++++++++++++++++++++---------------------- 1 file changed, 84 insertions(+), 90 deletions(-) diff --git a/pyop2/compilation.py b/pyop2/compilation.py index 794024a8d..0b9e16358 100644 --- a/pyop2/compilation.py +++ b/pyop2/compilation.py @@ -63,7 +63,7 @@ def _check_hashes(x, y, datatype): def set_default_compiler(compiler): - """Set the PyOP2 default compiler, globally. + """Set the PyOP2 default compiler, globally over COMM_WORLD. :arg compiler: String with name or path to compiler executable OR a subclass of the Compiler class @@ -85,66 +85,71 @@ def set_default_compiler(compiler): ) -def sniff_compiler(exe): +def sniff_compiler(exe, comm=mpi.COMM_WORLD): """Obtain the correct compiler class by calling the compiler executable. :arg exe: String with name or path to compiler executable :returns: A compiler class """ - try: - output = subprocess.run( - [exe, "--version"], - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - check=True, - encoding="utf-8" - ).stdout - except (subprocess.CalledProcessError, UnicodeDecodeError): - output = "" - - # Find the name of the compiler family - if output.startswith("gcc") or output.startswith("g++"): - name = "GNU" - elif output.startswith("clang"): - name = "clang" - elif output.startswith("Apple LLVM") or output.startswith("Apple clang"): - name = "clang" - elif output.startswith("icc"): - name = "Intel" - elif "Cray" in output.split("\n")[0]: - # Cray is more awkward eg: - # Cray clang version 11.0.4 () - # gcc (GCC) 9.3.0 20200312 (Cray Inc.) - name = "Cray" - else: - name = "unknown" - - # Set the compiler instance based on the platform (and architecture) - if sys.platform.find("linux") == 0: - if name == "Intel": - compiler = LinuxIntelCompiler - elif name == "GNU": - compiler = LinuxGnuCompiler - elif name == "clang": - compiler = LinuxClangCompiler - elif name == "Cray": - compiler = LinuxCrayCompiler + # Note: + # Sniffing compiler for very large numbers of MPI ranks is expensive + compiler = None + if comm.rank == 0: + try: + output = subprocess.run( + [exe, "--version"], + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + check=True, + encoding="utf-8" + ).stdout + except (subprocess.CalledProcessError, UnicodeDecodeError): + output = "" + + # Find the name of the compiler family + if output.startswith("gcc") or output.startswith("g++"): + name = "GNU" + elif output.startswith("clang"): + name = "clang" + elif output.startswith("Apple LLVM") or output.startswith("Apple clang"): + name = "clang" + elif output.startswith("icc"): + name = "Intel" + elif "Cray" in output.split("\n")[0]: + # Cray is more awkward eg: + # Cray clang version 11.0.4 () + # gcc (GCC) 9.3.0 20200312 (Cray Inc.) + name = "Cray" else: - compiler = AnonymousCompiler - elif sys.platform.find("darwin") == 0: - if name == "clang": - machine = platform.uname().machine - if machine == "arm64": - compiler = MacClangARMCompiler - elif machine == "x86_64": - compiler = MacClangCompiler - elif name == "GNU": - compiler = MacGNUCompiler + name = "unknown" + + # Set the compiler instance based on the platform (and architecture) + if sys.platform.find("linux") == 0: + if name == "Intel": + compiler = LinuxIntelCompiler + elif name == "GNU": + compiler = LinuxGnuCompiler + elif name == "clang": + compiler = LinuxClangCompiler + elif name == "Cray": + compiler = LinuxCrayCompiler + else: + compiler = AnonymousCompiler + elif sys.platform.find("darwin") == 0: + if name == "clang": + machine = platform.uname().machine + if machine == "arm64": + compiler = MacClangARMCompiler + elif machine == "x86_64": + compiler = MacClangCompiler + elif name == "GNU": + compiler = MacGNUCompiler + else: + compiler = AnonymousCompiler else: compiler = AnonymousCompiler - else: - compiler = AnonymousCompiler - return compiler + + return comm.bcast(compiler, 0) class Compiler(ABC): @@ -178,8 +183,8 @@ class Compiler(ABC): _debugflags = () def __init__(self, extra_compiler_flags=(), extra_linker_flags=(), cpp=False, comm=None): - # Get compiler version ASAP since it is used in __repr__ - self.sniff_compiler_version() + # Set compiler version ASAP since it is used in __repr__ + self.version = None self._extra_compiler_flags = tuple(extra_compiler_flags) self._extra_linker_flags = tuple(extra_linker_flags) @@ -190,6 +195,7 @@ def __init__(self, extra_compiler_flags=(), extra_linker_flags=(), cpp=False, co # Compilation communicators are reference counted on the PyOP2 comm self.pcomm = mpi.internal_comm(comm, self) self.comm = mpi.compilation_comm(self.pcomm, self) + self.sniff_compiler_version() def __repr__(self): return f"<{self._name} compiler, version {self.version or 'unknown'}>" @@ -238,23 +244,28 @@ def sniff_compiler_version(self, cpp=False): :arg cpp: If set to True will use the C++ compiler rather than the C compiler to determine the version number. """ + # Note: + # Sniffing the compiler version for very large numbers of + # MPI ranks is expensive exe = self.cxx if cpp else self.cc - self.version = None - # `-dumpversion` is not sufficient to get the whole version string (for some compilers), - # but other compilers do not implement `-dumpfullversion`! - for dumpstring in ["-dumpfullversion", "-dumpversion"]: - try: - output = subprocess.run( - [exe, dumpstring], - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - check=True, - encoding="utf-8" - ).stdout - self.version = Version(output) - break - except (subprocess.CalledProcessError, UnicodeDecodeError, InvalidVersion): - continue + version = None + if mpi.COMM_WORLD.rank == 0: + # `-dumpversion` is not sufficient to get the whole version string (for some compilers), + # but other compilers do not implement `-dumpfullversion`! + for dumpstring in ["-dumpfullversion", "-dumpversion"]: + try: + output = subprocess.run( + [exe, dumpstring], + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + check=True, + encoding="utf-8" + ).stdout + version = Version(output) + break + except (subprocess.CalledProcessError, UnicodeDecodeError, InvalidVersion): + continue + self.version = mpi.COMM_WORLD.bcast(version, 0) @property def bugfix_cflags(self): @@ -448,23 +459,6 @@ class LinuxGnuCompiler(Compiler): _optflags = ("-march=native", "-O3", "-ffast-math") _debugflags = ("-O0", "-g") - def sniff_compiler_version(self, cpp=False): - super(LinuxGnuCompiler, self).sniff_compiler_version() - if self.version >= Version("7.0"): - try: - # gcc-7 series only spits out patch level on dumpfullversion. - exe = self.cxx if cpp else self.cc - output = subprocess.run( - [exe, "-dumpfullversion"], - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - check=True, - encoding="utf-8" - ).stdout - self.version = Version(output) - except (subprocess.CalledProcessError, UnicodeDecodeError, InvalidVersion): - pass - @property def bugfix_cflags(self): """Flags to work around bugs in compilers.""" @@ -552,7 +546,7 @@ class AnonymousCompiler(Compiler): @mpi.collective def load(jitmodule, extension, fn_name, cppargs=(), ldargs=(), - argtypes=None, restype=None, comm=None): + argtypes=None, restype=None, comm=mpi.COMM_WORLD): """Build a shared library and return a function pointer from it. :arg jitmodule: The JIT Module which can generate the code to compile, or @@ -596,7 +590,7 @@ def __init__(self, code, argtypes): exe = configuration["cxx"] or "mpicxx" else: exe = configuration["cc"] or "mpicc" - compiler = sniff_compiler(exe) + compiler = sniff_compiler(exe, comm) dll = compiler(cppargs, ldargs, cpp=cpp, comm=comm).get_so(code, extension) if isinstance(jitmodule, GlobalKernel): From 7e7ed47ca1c92879931fbb78768909ecd5083242 Mon Sep 17 00:00:00 2001 From: Jack Betteridge Date: Fri, 19 Apr 2024 15:34:14 +0100 Subject: [PATCH 2/3] Ooops wrong comm --- pyop2/compilation.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pyop2/compilation.py b/pyop2/compilation.py index 0b9e16358..51791ca24 100644 --- a/pyop2/compilation.py +++ b/pyop2/compilation.py @@ -249,7 +249,7 @@ def sniff_compiler_version(self, cpp=False): # MPI ranks is expensive exe = self.cxx if cpp else self.cc version = None - if mpi.COMM_WORLD.rank == 0: + if self.comm.rank == 0: # `-dumpversion` is not sufficient to get the whole version string (for some compilers), # but other compilers do not implement `-dumpfullversion`! for dumpstring in ["-dumpfullversion", "-dumpversion"]: @@ -265,7 +265,7 @@ def sniff_compiler_version(self, cpp=False): break except (subprocess.CalledProcessError, UnicodeDecodeError, InvalidVersion): continue - self.version = mpi.COMM_WORLD.bcast(version, 0) + self.version = self.comm.bcast(version, 0) @property def bugfix_cflags(self): From 794acb3a8d544e474d9cd5c66be4c30f55693924 Mon Sep 17 00:00:00 2001 From: Jack Betteridge Date: Wed, 8 May 2024 15:27:15 +0100 Subject: [PATCH 3/3] Code review --- pyop2/compilation.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/pyop2/compilation.py b/pyop2/compilation.py index 51791ca24..f4a1af36a 100644 --- a/pyop2/compilation.py +++ b/pyop2/compilation.py @@ -89,12 +89,14 @@ def sniff_compiler(exe, comm=mpi.COMM_WORLD): """Obtain the correct compiler class by calling the compiler executable. :arg exe: String with name or path to compiler executable + :arg comm: Comm over which we want to determine the compiler type :returns: A compiler class """ - # Note: - # Sniffing compiler for very large numbers of MPI ranks is expensive compiler = None if comm.rank == 0: + # Note: + # Sniffing compiler for very large numbers of MPI ranks is + # expensive so we do this on one rank and broadcast try: output = subprocess.run( [exe, "--version"], @@ -546,7 +548,7 @@ class AnonymousCompiler(Compiler): @mpi.collective def load(jitmodule, extension, fn_name, cppargs=(), ldargs=(), - argtypes=None, restype=None, comm=mpi.COMM_WORLD): + argtypes=None, restype=None, comm=None): """Build a shared library and return a function pointer from it. :arg jitmodule: The JIT Module which can generate the code to compile, or