diff --git a/pyop2/base.py b/pyop2/base.py index 26c01d4cd..cdab6201e 100644 --- a/pyop2/base.py +++ b/pyop2/base.py @@ -170,8 +170,16 @@ def __init__(self, data=None, map=None, access=None, lgmaps=None, unroll_map=Fal "To set of %s doesn't match the set of %s." % (map, data)) @cached_property - def _kernel_args_(self): - return self.data._kernel_args_ + def ctypes_args(self): + return self.data.ctypes_args + + @property + def cffi_args(self): + return self.data.cffi_args + + @property + def cppyy_args(self): + return self.data.cppyy_args @cached_property def _argtypes_(self): @@ -403,7 +411,9 @@ class Set(object): _extruded = False - _kernel_args_ = () + ctypes_args = () + cffi_args = () + cppyy_args = () _argtypes_ = () @cached_property @@ -531,7 +541,9 @@ class GlobalSet(Set): """A proxy set allowing a :class:`Global` to be used in place of a :class:`Dat` where appropriate.""" - _kernel_args_ = () + ctypes_args = () + cffi_args = () + cppyy_args = () _argtypes_ = () def __init__(self, comm=None): @@ -642,8 +654,16 @@ def __init__(self, parent, layers): self._extruded = True @cached_property - def _kernel_args_(self): - return (self.layers_array.ctypes.data, ) + def ctypes_args(self): + return (self.layers_array.ctypes.data,) + + @property + def cffi_args(self): + return NotImplemented + + @property + def cppyy_args(self): + return (self._layers,) @cached_property def _argtypes_(self): @@ -723,8 +743,16 @@ def __init__(self, superset, indices): self._extruded = superset._extruded @cached_property - def _kernel_args_(self): - return self._superset._kernel_args_ + (self._indices.ctypes.data, ) + def ctypes_args(self): + return self._superset.ctypes_args + (self._indices.ctypes.data,) + + @property + def cffi_args(self): + return NotImplemented + + @property + def cppyy_args(self): + return self._superset.cppyy_args + (self._indices,) @cached_property def _argtypes_(self): @@ -782,8 +810,8 @@ def layers_array(self): class SetPartition(object): def __init__(self, set, offset, size): self.set = set - self.offset = offset - self.size = size + self.offset = int(offset) + self.size = int(size) class MixedSet(Set, ObjectCached): @@ -800,8 +828,16 @@ def __init__(self, sets): self.comm = reduce(lambda a, b: a or b, map(lambda s: s if s is None else s.comm, sets)) self._initialized = True - @cached_property - def _kernel_args_(self): + @property + def ctypes_args(self): + raise NotImplementedError + + @property + def cffi_args(self): + raise NotImplementedError + + @property + def cppyy_args(self): raise NotImplementedError @cached_property @@ -1370,9 +1406,17 @@ def __init__(self, dataset, data=None, dtype=None, name=None): self.halo_valid = True self._name = name or "dat_#x%x" % id(self) - @cached_property - def _kernel_args_(self): - return (self._data.ctypes.data, ) + @property + def ctypes_args(self): + return (self._data.ctypes.data,) + + @property + def cffi_args(self): + raise NotImplementedError + + @property + def cppyy_args(self): + return (self._data,) @cached_property def _argtypes_(self): @@ -1587,6 +1631,7 @@ def _copy_parloop(self, other, subset=None): if not hasattr(self, '_copy_kernel'): import islpy as isl import pymbolic.primitives as p + name = f"copy_{id(self)}" inames = isl.make_zero_and_vars(["i"]) domain = (inames[0].le_set(inames["i"])) & (inames["i"].lt_set(inames[0] + self.cdim)) _other = p.Variable("other") @@ -1595,9 +1640,9 @@ def _copy_parloop(self, other, subset=None): insn = loopy.Assignment(_other.index(i), _self.index(i), within_inames=frozenset(["i"])) data = [loopy.GlobalArg("self", dtype=self.dtype, shape=(self.cdim,)), loopy.GlobalArg("other", dtype=other.dtype, shape=(other.cdim,))] - knl = loopy.make_function([domain], [insn], data, name="copy") + knl = loopy.make_function([domain], [insn], data, name=name) - self._copy_kernel = _make_object('Kernel', knl, 'copy') + self._copy_kernel = _make_object('Kernel', knl, name) return _make_object('ParLoop', self._copy_kernel, subset or self.dataset.set, self(READ), other(WRITE)) @@ -1923,9 +1968,17 @@ def __init__(self, dat, index): name="view[%s](%s)" % (index, dat.name)) self._parent = dat - @cached_property - def _kernel_args_(self): - return self._parent._kernel_args_ + @property + def ctypes_args(self): + return self._parent.ctypes_args + + @property + def cffi_args(self): + raise NotImplementedError + + @property + def cppyy_args(self): + return self._parent.cppyy_args @cached_property def _argtypes_(self): @@ -2005,8 +2058,16 @@ def what(x): self.comm = self._dats[0].comm @cached_property - def _kernel_args_(self): - return tuple(itertools.chain(*(d._kernel_args_ for d in self))) + def ctypes_args(self): + return tuple(itertools.chain(*(d.ctypes_args for d in self))) + + @cached_property + def cffi_args(self): + raise NotImplementedError + + @cached_property + def cppyy_args(self): + return tuple(itertools.chain(*(d.cppyy_args for d in self))) @cached_property def _argtypes_(self): @@ -2290,8 +2351,16 @@ def __init__(self, dim, data=None, dtype=None, name=None, comm=None): self.comm = comm @cached_property - def _kernel_args_(self): - return (self._data.ctypes.data, ) + def ctypes_args(self): + return (self._data.ctypes.data,) + + @cached_property + def cffi_args(self): + raise NotImplementedError + + @cached_property + def cppyy_args(self): + return (self._data,) @cached_property def _argtypes_(self): @@ -2528,8 +2597,16 @@ def __init__(self, iterset, toset, arity, values=None, name=None, offset=None): self._cache = {} @cached_property - def _kernel_args_(self): - return (self._values.ctypes.data, ) + def ctypes_args(self): + return (self._values.ctypes.data,) + + @cached_property + def cffi_args(self): + raise NotImplementedError + + @cached_property + def cppyy_args(self): + return (self._values,) @cached_property def _argtypes_(self): @@ -2654,8 +2731,16 @@ def _cache_key(cls, maps): return maps @cached_property - def _kernel_args_(self): - return tuple(itertools.chain(*(m._kernel_args_ for m in self if m is not None))) + def ctypes_args(self): + return tuple(itertools.chain(*(m.ctypes_args for m in self if m is not None))) + + @cached_property + def cffi_args(self): + raise NotImplementedError + + @cached_property + def cppyy_args(self): + return tuple(itertools.chain(*(m.cppyy_args for m in self if m is not None))) @cached_property def _argtypes_(self): diff --git a/pyop2/compilation.py b/pyop2/compilation.py index e5a9fefdd..048b21dd9 100644 --- a/pyop2/compilation.py +++ b/pyop2/compilation.py @@ -32,14 +32,16 @@ # OF THE POSSIBILITY OF SUCH DAMAGE. +from abc import ABC, abstractmethod +import collections +import ctypes +from distutils import version +from enum import IntEnum, auto +from hashlib import md5 +from itertools import chain import os import subprocess import sys -import ctypes -import collections -from hashlib import md5 -from distutils import version - from pyop2.mpi import MPI, collective, COMM_WORLD from pyop2.mpi import dup_comm, get_compilation_comm, set_compilation_comm @@ -49,6 +51,12 @@ from pyop2.base import JITModule +class FFIBackend(IntEnum): + CTYPES = auto() + CFFI = auto() + CPPYY = auto() + + def _check_hashes(x, y, datatype): """MPI reduction op to check if code hashes differ across ranks.""" if x == y: @@ -153,9 +161,21 @@ def compilation_comm(comm): return retcomm -class Compiler(object): +class Compiler(ABC): + + compiler_versions = {} # TODO: what to do with this? + + default_cc = "mpicc" + default_cxx = "mpicxx" + + default_cppflags = None + default_ldflags = None - compiler_versions = {} + default_cflags = None + default_cxxflags = None + + default_optflags = None + default_debugflags = None """A compiler for shared libraries. @@ -173,18 +193,54 @@ class Compiler(object): :kwarg comm: Optional communicator to compile the code on (defaults to COMM_WORLD). """ - def __init__(self, cc, ld=None, cppargs=[], ldargs=[], - cpp=False, comm=None): - ccenv = 'CXX' if cpp else 'CC' + def __init__(self, extra_cppflags=None, extra_ldflags=None, cpp=False, comm=None): + self._extra_cppflags = extra_cppflags or [] + self._extra_ldflags = extra_ldflags or [] + + self._cpp = cpp + self._debug = configuration["debug"] + # Ensure that this is an internal communicator. comm = dup_comm(comm or COMM_WORLD) self.comm = compilation_comm(comm) - self._cc = os.environ.get(ccenv, cc) - self._ld = os.environ.get('LDSHARED', ld) - self._cppargs = cppargs + configuration['cflags'].split() - if configuration["use_safe_cflags"]: - self._cppargs += self.workaround_cflags - self._ldargs = ldargs + configuration['ldflags'].split() + + @property + def cc(self): + return os.environ.get("PYOP2_CC", self._cxx if self._cpp else self._cc) + + @property + def ld(self): + return os.environ.get("PYOP2_LD", None) + + @property + def cppflags(self): + try: + return os.environ["PYOP2_CPPFLAGS"] + except KeyError: + cppflags = self._cppflags + self._extra_cppflags + + if not self._debug: + cppflags += self._debugflags + else: + cppflags += self._optflags + + if self._cpp: + cppflags += self._cxxflags + else: + cppflags += self._cflags + + return cppflags + + @property + def ldflags(self): + try: + return os.environ["PYOP2_LDFLAGS"] + except KeyError: + return self._ldflags + self._extra_ldflags + + @property + def bugfix_cflags(self): + return [] @property def compiler_version(self): @@ -226,7 +282,8 @@ def workaround_cflags(self): return [] @collective - def get_so(self, jitmodule, extension): + @classmethod + def compile(cls, jitmodule, extension): """Build a shared library and load it :arg jitmodule: The JIT Module which can generate the code to compile. @@ -238,8 +295,8 @@ def get_so(self, jitmodule, extension): # Determine cache key hsh = md5(str(jitmodule.cache_key[1:]).encode()) hsh.update(self._cc.encode()) - if self._ld: - hsh.update(self._ld.encode()) + if self.ld: + hsh.update(self.ld.encode()) hsh.update("".join(self._cppargs).encode()) hsh.update("".join(self._ldargs).encode()) @@ -284,7 +341,7 @@ def get_so(self, jitmodule, extension): with open(cname, "w") as f: f.write(jitmodule.code_to_compile) # Compiler also links - if self._ld is None: + if self.ld is None: cc = [self._cc] + self._cppargs + \ ['-o', tmpname, cname] + self._ldargs debug('Compilation command: %s', ' '.join(cc)) @@ -312,7 +369,7 @@ def get_so(self, jitmodule, extension): else: cc = [self._cc] + self._cppargs + \ ['-c', '-o', oname, cname] - ld = self._ld.split() + ['-o', tmpname, oname] + self._ldargs + ld = self.ld.split() + ['-o', tmpname, oname] + self.ldargs debug('Compilation command: %s', ' '.join(cc)) debug('Link command: %s', ' '.join(ld)) with open(logfile, "w") as log: @@ -354,91 +411,48 @@ def get_so(self, jitmodule, extension): return ctypes.CDLL(soname) -class MacCompiler(Compiler): - """A compiler for building a shared library on mac systems. +class MacClangCompiler(Compiler): + """A compiler for building a shared library on mac systems.""" + + _cppflags = ["-fPIC", "-Wall", "-framework", "Accelerate"] + _ldflags = ["-dynamiclib"] - :arg cppargs: A list of arguments to pass to the C compiler - (optional). - :arg ldargs: A list of arguments to pass to the linker (optional). + _cflags = ["-std=c99"] + _cxxflags = [] - :arg cpp: Are we actually using the C++ compiler? + _optflags = ["-march=native", "-O3", "-ffast-math"] + _debugflags = ["-O0", "-g"] - :kwarg comm: Optional communicator to compile the code on (only - rank 0 compiles code) (defaults to COMM_WORLD). - """ - def __init__(self, cppargs=[], ldargs=[], cpp=False, comm=None): - opt_flags = ['-march=native', '-O3', '-ffast-math'] - if configuration['debug']: - opt_flags = ['-O0', '-g'] - cc = "mpicc" - stdargs = ["-std=c99"] - if cpp: - cc = "mpicxx" - stdargs = [] - cppargs = stdargs + ['-fPIC', '-Wall', '-framework', 'Accelerate'] + \ - opt_flags + cppargs - ldargs = ['-dynamiclib'] + ldargs - super(MacCompiler, self).__init__(cc, - cppargs=cppargs, - ldargs=ldargs, - cpp=cpp, - comm=comm) - - -class LinuxCompiler(Compiler): - """A compiler for building a shared library on linux systems. - - :arg cppargs: A list of arguments to pass to the C compiler - (optional). - :arg ldargs: A list of arguments to pass to the linker (optional). - :arg cpp: Are we actually using the C++ compiler? - :kwarg comm: Optional communicator to compile the code on (only - rank 0 compiles code) (defaults to COMM_WORLD).""" - def __init__(self, cppargs=[], ldargs=[], cpp=False, comm=None): - opt_flags = ['-march=native', '-O3', '-ffast-math'] - if configuration['debug']: - opt_flags = ['-O0', '-g'] - cc = "mpicc" - stdargs = ["-std=c99"] - if cpp: - cc = "mpicxx" - stdargs = [] - cppargs = stdargs + ['-fPIC', '-Wall'] + opt_flags + cppargs - ldargs = ['-shared'] + ldargs - - super(LinuxCompiler, self).__init__(cc, cppargs=cppargs, ldargs=ldargs, - cpp=cpp, comm=comm) +class LinuxGnuCompiler(Compiler): + """A compiler for building a shared library on Linux systems.""" + + _cppflags = ["-fPIC", "-Wall"] + _ldflags = ["-shared"] + + _cflags = ["-std=c99"] + _cxxflags = [] + + _optflags = ["-march=native", "-O3", "-ffast-math"] + _debugflags = ["-O0", "-g"] class LinuxIntelCompiler(Compiler): - """The intel compiler for building a shared library on linux systems. + """The Intel compiler for building a shared library on Linux systems.""" - :arg cppargs: A list of arguments to pass to the C compiler - (optional). - :arg ldargs: A list of arguments to pass to the linker (optional). - :arg cpp: Are we actually using the C++ compiler? - :kwarg comm: Optional communicator to compile the code on (only - rank 0 compiles code) (defaults to COMM_WORLD). - """ - def __init__(self, cppargs=[], ldargs=[], cpp=False, comm=None): - opt_flags = ['-Ofast', '-xHost'] - if configuration['debug']: - opt_flags = ['-O0', '-g'] - cc = "mpicc" - stdargs = ["-std=c99"] - if cpp: - cc = "mpicxx" - stdargs = [] - cppargs = stdargs + ['-fPIC', '-no-multibyte-chars'] + opt_flags + cppargs - ldargs = ['-shared'] + ldargs - super(LinuxIntelCompiler, self).__init__(cc, cppargs=cppargs, ldargs=ldargs, - cpp=cpp, comm=comm) + _cppflags = ["-fPIC", "-no-multibyte-chars"] + _ldflags = ["-shared"] + + _cflags = ["-std=c99"] + _cxxflags = [] + + _optflags = ["-Ofast", "-xHost"] + _debugflags = ["-O0", "-g"] @collective def load(jitmodule, extension, fn_name, cppargs=[], ldargs=[], - argtypes=None, restype=None, compiler=None, comm=None): + argtypes=None, restype=None, compiler=None, comm=None, ffi_backend=FFIBackend.CTYPES): """Build a shared library and return a function pointer from it. :arg jitmodule: The JIT Module which can generate the code to compile, or @@ -470,28 +484,60 @@ def __init__(self, code, argtypes): else: raise ValueError("Don't know how to compile code of type %r" % type(jitmodule)) - platform = sys.platform - cpp = extension == "cpp" - if not compiler: - compiler = configuration["compiler"] - if platform.find('linux') == 0: - if compiler == 'icc': - compiler = LinuxIntelCompiler(cppargs, ldargs, cpp=cpp, comm=comm) - elif compiler == 'gcc': - compiler = LinuxCompiler(cppargs, ldargs, cpp=cpp, comm=comm) + # testing + ffi_backend = FFIBackend.CPPYY + if ffi_backend == FFIBackend.CTYPES: + platform = sys.platform + cpp = extension == "cpp" + if not compiler: + compiler = configuration["compiler"] + if platform.find('linux') == 0: + if compiler == 'icc': + compiler = LinuxIntelCompiler(cppargs, ldargs, cpp=cpp, comm=comm) + elif compiler == 'gcc': + compiler = LinuxGnuCompiler(cppargs, ldargs, cpp=cpp, comm=comm) + else: + raise CompilationError("Unrecognized compiler name '%s'" % compiler) + elif platform.find('darwin') == 0: + compiler = MacClangCompiler(cppargs, ldargs, cpp=cpp, comm=comm) else: - raise CompilationError("Unrecognized compiler name '%s'" % compiler) - elif platform.find('darwin') == 0: - compiler = MacCompiler(cppargs, ldargs, cpp=cpp, comm=comm) + raise CompilationError("Don't know what compiler to use for platform '%s'" % + platform) + dll = compiler.compile(code, extension) + + fn = getattr(dll, fn_name) + fn.argtypes = code.argtypes + fn.restype = restype + return fn + elif ffi_backend == FFIBackend.CFFI: + raise NotImplementedError + elif ffi_backend == FFIBackend.CPPYY: + return _load_cppyy(code, fn_name, cppargs, ldargs) else: - raise CompilationError("Don't know what compiler to use for platform '%s'" % - platform) - dll = compiler.get_so(code, extension) - - fn = getattr(dll, fn_name) - fn.argtypes = code.argtypes - fn.restype = restype - return fn + raise AssertionError + + +def _load_cppyy(code, fn_name, cppargs, ldargs): + import cppyy + + # print(code.code_to_compile) + # exit() + + for flag in chain(cppargs, ldargs): + if flag.startswith("-I"): + cppyy.add_include_path(flag.strip("-I")) + elif flag.startswith("-L"): + cppyy.add_library_path(flag.strip("-L")) + elif flag.startswith("-l"): + cppyy.load_library(flag.strip("-l")) + + # debug + try: + cppyy.cppdef(code.code_to_compile) + except: + print(code.code_to_compile) + raise Exception + return getattr(cppyy.gbl, fn_name) def clear_cache(prompt=False): diff --git a/pyop2/configuration.py b/pyop2/configuration.py index fe5a2c4c5..4749924a7 100644 --- a/pyop2/configuration.py +++ b/pyop2/configuration.py @@ -77,11 +77,8 @@ class Configuration(dict): """ # name, env variable, type, default, write once DEFAULTS = { - "compiler": ("PYOP2_BACKEND_COMPILER", str, "gcc"), "simd_width": ("PYOP2_SIMD_WIDTH", int, 4), "debug": ("PYOP2_DEBUG", bool, False), - "cflags": ("PYOP2_CFLAGS", str, ""), - "ldflags": ("PYOP2_LDFLAGS", str, ""), "compute_kernel_flops": ("PYOP2_COMPUTE_KERNEL_FLOPS", bool, False), "use_safe_cflags": ("PYOP2_USE_SAFE_CFLAGS", bool, True), "type_check": ("PYOP2_TYPE_CHECK", bool, True), diff --git a/pyop2/petsc_base.py b/pyop2/petsc_base.py index 16ecdcefe..e170a932a 100644 --- a/pyop2/petsc_base.py +++ b/pyop2/petsc_base.py @@ -508,8 +508,21 @@ def __init__(self, parent, i, j): self.local_to_global_maps = self.handle.getLGMap() @utils.cached_property - def _kernel_args_(self): - return (self.handle.handle, ) + def ctypes_args(self): + return (self.handle.handle,) + + @utils.cached_property + def cffi_args(self): + raise NotImplementedError + + @utils.cached_property + def cppyy_args(self): + import cppyy + import cppyy.ll + for dir_ in utils.get_petsc_dir(): + cppyy.add_include_path(f"{dir_}/include") + cppyy.include("petsc.h") + return cppyy.ll.cast["Mat"](self.handle.handle) @utils.cached_property def _wrapper_cache_key_(self): @@ -607,9 +620,22 @@ def __init__(self, *args, **kwargs): local_to_global_maps = (None, None) @utils.cached_property - def _kernel_args_(self): + def ctypes_args(self): return tuple(a.handle.handle for a in self) + @utils.cached_property + def cffi_args(self): + raise NotImplementedError + + @utils.cached_property + def cppyy_args(self): + import cppyy + import cppyy.ll + for dir_ in utils.get_petsc_dir(): + cppyy.add_include_path(f"{dir_}/include") + cppyy.include("petsc.h") + return tuple(cppyy.ll.cast["Mat"](a.handle.handle) for a in self) + @collective def _init(self): if not self.dtype == PETSc.ScalarType: diff --git a/pyop2/sequential.py b/pyop2/sequential.py index 1dbab1c18..bcc9e7d0f 100644 --- a/pyop2/sequential.py +++ b/pyop2/sequential.py @@ -33,10 +33,11 @@ """OP2 sequential backend.""" -import os from copy import deepcopy as dcopy - import ctypes +import os + +import numpy as np from pyop2.datatypes import IntType, as_ctypes from pyop2 import base @@ -50,6 +51,7 @@ from pyop2.base import DatView # noqa: F401 from pyop2.base import Kernel # noqa: F401 from pyop2.base import Arg # noqa: F401 +from pyop2.compilation import FFIBackend from pyop2.petsc_base import DataSet, MixedDataSet # noqa: F401 from pyop2.petsc_base import Global, GlobalDataSet # noqa: F401 from pyop2.petsc_base import Dat, MixedDat, Mat # noqa: F401 @@ -103,6 +105,7 @@ def __init__(self, kernel, iterset, *args, **kwargs): @collective def __call__(self, *args): + # import pdb; pdb.set_trace() return self._fun(*args) @cached_property @@ -139,7 +142,8 @@ def compile(self): from pyop2.configuration import configuration - compiler = configuration["compiler"] + # compiler = configuration["compiler"] + compiler = "gcc" extension = "cpp" if self._kernel._cpp else "c" cppargs = self._cppargs cppargs += ["-I%s/include" % d for d in get_petsc_dir()] + \ @@ -174,7 +178,7 @@ def argtypes(self): for arg in self._args: maps = arg.map_tuple for map_ in maps: - for k, t in zip(map_._kernel_args_, map_._argtypes_): + for k, t in zip(map_.ctypes_args, map_._argtypes_): if k in seen: continue argtypes += (t,) @@ -184,21 +188,34 @@ def argtypes(self): class ParLoop(petsc_base.ParLoop): - def prepare_arglist(self, iterset, *args): - arglist = iterset._kernel_args_ + def prepare_arglist(self, iterset, *args, ffi_backend=FFIBackend.CTYPES): + ffi_backend = FFIBackend.CPPYY # testing + def get_args(obj): + """Return the appropriate arguments to pass into the wrapper.""" + if ffi_backend == FFIBackend.CTYPES: + return obj.ctypes_args + elif ffi_backend == FFIBackend.CFFI: + return obj.cffi_args + elif ffi_backend == FFIBackend.CPPYY: + return obj.cppyy_args + else: + raise AssertionError + + arglist = get_args(iterset) for arg in args: - arglist += arg._kernel_args_ + arglist += get_args(arg) seen = set() for arg in args: maps = arg.map_tuple for map_ in maps: if map_ is None: continue - for k in map_._kernel_args_: - if k in seen: + for k in get_args(map_): + key = k.ctypes.data if isinstance(k, np.ndarray) else k + if key in seen: continue arglist += (k,) - seen.add(k) + seen.add(key) return arglist @cached_property @@ -215,6 +232,7 @@ def _compute_event(self): def _compute(self, part, fun, *arglist): with self._compute_event: self.log_flops(part.size * self.num_flops) + # import pdb; pdb.set_trace() fun(part.offset, part.offset + part.size, *arglist)