diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 8458ee663..45df5ed57 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -23,6 +23,7 @@ jobs: - name: Install system dependencies shell: bash run: | + sudo apt update sudo apt install build-essential mpich libmpich-dev \ libblas-dev liblapack-dev gfortran diff --git a/.gitignore b/.gitignore index 089f35d42..2d3b4296b 100644 --- a/.gitignore +++ b/.gitignore @@ -6,11 +6,9 @@ PyOP2.egg-info *.py[cdo] # Extension modules -computeind.c -computeind.so -sparsity.cpp sparsity.so - +sparsity.c +sparsity.cpython*.so # Docs pyop2.coffee.rst pyop2.rst diff --git a/pyop2/base.py b/pyop2/base.py index b16d18687..4d5c7bb78 100644 --- a/pyop2/base.py +++ b/pyop2/base.py @@ -169,6 +169,21 @@ def __init__(self, data=None, map=None, access=None, lgmaps=None, unroll_map=Fal raise MapValueError( "To set of %s doesn't match the set of %s." % (map, data)) + def recreate(self, data=None, map=None, access=None, lgmaps=None, unroll_map=None): + """Creates a new Dat based on the existing Dat with the changes specified. + + :param data: A data-carrying object, either :class:`Dat` or class:`Mat` + :param map: A :class:`Map` to access this :class:`Arg` or the default + if the identity map is to be used. + :param access: An access descriptor of type :class:`Access` + :param lgmaps: For :class:`Mat` objects, a tuple of 2-tuples of local to + global maps used during assembly.""" + return type(self)(data=data or self.data, + map=map or self.map, + access=access or self.access, + lgmaps=lgmaps or self.lgmaps, + unroll_map=False if unroll_map is None else unroll_map) + @cached_property def _kernel_args_(self): return self.data._kernel_args_ @@ -523,6 +538,35 @@ def layers(self): """Return None (not an :class:`ExtrudedSet`).""" return None + def _check_operands(self, other): + if type(other) is Set: + if other is not self: + raise ValueError("Uable to perform set operations between two unrelated sets: %s and %s." % (self, other)) + elif type(other) is Subset: + if self is not other._superset: + raise TypeError("Superset mismatch: self (%s) != other._superset (%s)" % (self, other._superset)) + else: + raise TypeError("Unable to perform set operations between `Set` and %s." % (type(other), )) + + def intersection(self, other): + self._check_operands(other) + return other + + def union(self, other): + self._check_operands(other) + return self + + def difference(self, other): + self._check_operands(other) + if other is self: + return Subset(self, []) + else: + return type(other)(self, np.setdiff1d(np.asarray(range(self.total_size), dtype=IntType), other._indices)) + + def symmetric_difference(self, other): + self._check_operands(other) + return self.difference(other) + class GlobalSet(Set): @@ -771,6 +815,13 @@ def indices(self): """Returns the indices pointing in the superset.""" return self._indices + @cached_property + def owned_indices(self): + """Return the indices that correspond to the owned entities of the + superset. + """ + return self.indices[self.indices < self.superset.size] + @cached_property def layers_array(self): if self._superset.constant_layers: @@ -778,6 +829,44 @@ def layers_array(self): else: return self._superset.layers_array[self.indices, ...] + def _check_operands(self, other): + if type(other) is Set: + if other is not self._superset: + raise TypeError("Superset mismatch: self._superset (%s) != other (%s)" % (self._superset, other)) + elif type(other) is Subset: + if self._superset is not other._superset: + raise TypeError("Unable to perform set operation between subsets of mismatching supersets (%s != %s)" % (self._superset, other._superset)) + else: + raise TypeError("Unable to perform set operations between `Subset` and %s." % (type(other), )) + + def intersection(self, other): + self._check_operands(other) + if other is self._superset: + return self + else: + return type(self)(self._superset, np.intersect1d(self._indices, other._indices)) + + def union(self, other): + self._check_operands(other) + if other is self._superset: + return other + else: + return type(self)(self._superset, np.union1d(self._indices, other._indices)) + + def difference(self, other): + self._check_operands(other) + if other is self._superset: + return Subset(other, []) + else: + return type(self)(self._superset, np.setdiff1d(self._indices, other._indices)) + + def symmetric_difference(self, other): + self._check_operands(other) + if other is self._superset: + return other.symmetric_difference(self) + else: + return type(self)(self._superset, np.setxor1d(self._indices, other._indices)) + class SetPartition(object): def __init__(self, set, offset, size): @@ -1548,41 +1637,18 @@ def zero(self, subset=None): """Zero the data associated with this :class:`Dat` :arg subset: A :class:`Subset` of entries to zero (optional).""" + # Update dat_version self.update_dat_version() - if hasattr(self, "_zero_parloops"): - loops = self._zero_parloops + # If there is no subset we can safely zero the halo values. + if subset is None: + self._data[:] = 0 + self.halo_valid = True + elif subset.superset != self.dataset.set: + raise MapValueError("The subset and dataset are incompatible") else: - loops = {} - self._zero_parloops = loops - - iterset = subset or self.dataset.set - - loop = loops.get(iterset, None) - - if loop is None: - try: - knl = self._zero_kernels[(self.dtype, self.cdim)] - except KeyError: - import islpy as isl - import pymbolic.primitives as p - - inames = isl.make_zero_and_vars(["i"]) - domain = (inames[0].le_set(inames["i"])) & (inames["i"].lt_set(inames[0] + self.cdim)) - x = p.Variable("dat") - i = p.Variable("i") - insn = loopy.Assignment(x.index(i), 0, within_inames=frozenset(["i"])) - data = loopy.GlobalArg("dat", dtype=self.dtype, shape=(self.cdim,)) - knl = loopy.make_function([domain], [insn], [data], name="zero", target=loopy.CTarget(), lang_version=(2018, 2)) - - knl = _make_object('Kernel', knl, 'zero') - self._zero_kernels[(self.dtype, self.cdim)] = knl - loop = _make_object('ParLoop', knl, - iterset, - self(WRITE)) - loops[iterset] = loop - loop.compute() + self.data[subset.owned_indices] = 0 @collective def copy(self, other, subset=None): @@ -1592,28 +1658,17 @@ def copy(self, other, subset=None): :arg subset: A :class:`Subset` of elements to copy (optional)""" if other is self: return - self._copy_parloop(other, subset=subset).compute() - - @collective - def _copy_parloop(self, other, subset=None): - """Create the :class:`ParLoop` implementing copy.""" - if not hasattr(self, '_copy_kernel'): - import islpy as isl - import pymbolic.primitives as p - inames = isl.make_zero_and_vars(["i"]) - domain = (inames[0].le_set(inames["i"])) & (inames["i"].lt_set(inames[0] + self.cdim)) - _other = p.Variable("other") - _self = p.Variable("self") - i = p.Variable("i") - insn = loopy.Assignment(_other.index(i), _self.index(i), within_inames=frozenset(["i"])) - data = [loopy.GlobalArg("self", dtype=self.dtype, shape=(self.cdim,)), - loopy.GlobalArg("other", dtype=other.dtype, shape=(other.cdim,))] - knl = loopy.make_function([domain], [insn], data, name="copy", target=loopy.CTarget(), lang_version=(2018, 2)) - - self._copy_kernel = _make_object('Kernel', knl, 'copy') - return _make_object('ParLoop', self._copy_kernel, - subset or self.dataset.set, - self(READ), other(WRITE)) + if subset is None: + # If the current halo is valid we can also copy these values across. + if self.halo_valid: + other._data[:] = self._data + other.halo_valid = True + else: + other.data[:] = self.data_ro + elif subset.superset != self.dataset.set: + raise MapValueError("The subset and dataset are incompatible") + else: + other.data[subset.owned_indices] = self.data_ro[subset.owned_indices] def __iter__(self): """Yield self when iterated over.""" @@ -1852,8 +1907,6 @@ def __itruediv__(self, other): """Pointwise division or scaling of fields.""" return self._iop(other, operator.itruediv) - __idiv__ = __itruediv__ # Python 2 compatibility - @collective def global_to_local_begin(self, access_mode): """Begin a halo exchange from global to ghosted representation. @@ -2649,6 +2702,41 @@ def __le__(self, o): return self == o +class PermutedMap(Map): + """Composition of a standard :class:`Map` with a constant permutation. + + :arg map_: The map to permute. + :arg permutation: The permutation of the map indices. + + Where normally staging to element data is performed as + + .. code-block:: + + local[i] = global[map[i]] + + With a :class:`PermutedMap` we instead get + + .. code-block:: + + local[i] = global[map[permutation[i]]] + + This might be useful if your local kernel wants data in a + different order to the one that the map provides, and you don't + want two global-sized data structures. + """ + def __init__(self, map_, permutation): + self.map_ = map_ + self.permutation = np.asarray(permutation, dtype=Map.dtype) + assert (np.unique(permutation) == np.arange(map_.arity, dtype=Map.dtype)).all() + + @cached_property + def _wrapper_cache_key_(self): + return super()._wrapper_cache_key_ + (tuple(self.permutation),) + + def __getattr__(self, name): + return getattr(self.map_, name) + + class MixedMap(Map, ObjectCached): r"""A container for a bag of :class:`Map`\s.""" @@ -3350,7 +3438,8 @@ class Kernel(Cached): @classmethod @validate_type(('name', str, NameTypeError)) def _cache_key(cls, code, name, opts={}, include_dirs=[], headers=[], - user_code="", ldargs=None, cpp=False, requires_zeroed_output_arguments=False): + user_code="", ldargs=None, cpp=False, requires_zeroed_output_arguments=False, + flop_count=None): # Both code and name are relevant since there might be multiple kernels # extracting different functions from the same code # Also include the PyOP2 version, since the Kernel class might change @@ -3372,7 +3461,8 @@ def _wrapper_cache_key_(self): return (self._key, ) def __init__(self, code, name, opts={}, include_dirs=[], headers=[], - user_code="", ldargs=None, cpp=False, requires_zeroed_output_arguments=False): + user_code="", ldargs=None, cpp=False, requires_zeroed_output_arguments=False, + flop_count=None): # Protect against re-initialization when retrieved from cache if self._initialized: return @@ -3388,6 +3478,7 @@ def __init__(self, code, name, opts={}, include_dirs=[], headers=[], self._code = code self._initialized = True self.requires_zeroed_output_arguments = requires_zeroed_output_arguments + self.flop_count = flop_count @property def name(self): @@ -3400,6 +3491,8 @@ def code(self): @cached_property def num_flops(self): + if self.flop_count is not None: + return self.flop_count if not configuration["compute_kernel_flops"]: return 0 if isinstance(self.code, Node): diff --git a/pyop2/codegen/builder.py b/pyop2/codegen/builder.py index 5167e20a4..50d57ca25 100644 --- a/pyop2/codegen/builder.py +++ b/pyop2/codegen/builder.py @@ -16,7 +16,7 @@ When, Zero) from pyop2.datatypes import IntType from pyop2.op2 import (ALL, INC, MAX, MIN, ON_BOTTOM, ON_INTERIOR_FACETS, - ON_TOP, READ, RW, WRITE, Subset) + ON_TOP, READ, RW, WRITE, Subset, PermutedMap) from pyop2.utils import cached_property @@ -30,7 +30,7 @@ class Map(object): __slots__ = ("values", "offset", "interior_horizontal", "variable", "unroll", "layer_bounds", - "prefetch") + "prefetch", "permutation") def __init__(self, map_, interior_horizontal, layer_bounds, values=None, offset=None, unroll=False): @@ -50,11 +50,17 @@ def __init__(self, map_, interior_horizontal, layer_bounds, offset = map_.offset shape = (None, ) + map_.shape[1:] values = Argument(shape, dtype=map_.dtype, pfx="map") + if isinstance(map_, PermutedMap): + self.permutation = NamedLiteral(map_.permutation, parent=values, suffix="permutation") + if offset is not None: + offset = offset[map_.permutation] + else: + self.permutation = None if offset is not None: if len(set(map_.offset)) == 1: offset = Literal(offset[0], casting=True) else: - offset = NamedLiteral(offset, name=values.name + "_offset") + offset = NamedLiteral(offset, parent=values, suffix="offset") self.values = values self.offset = offset @@ -76,7 +82,10 @@ def indexed(self, multiindex, layer=None): base_key = None if base_key not in self.prefetch: j = Index() - base = Indexed(self.values, (n, j)) + if self.permutation is None: + base = Indexed(self.values, (n, j)) + else: + base = Indexed(self.values, (n, Indexed(self.permutation, (j,)))) self.prefetch[base_key] = Materialise(PackInst(), base, MultiIndex(j)) base = self.prefetch[base_key] @@ -103,7 +112,10 @@ def indexed(self, multiindex, layer=None): return Indexed(self.prefetch[key], (f, i)), (f, i) else: assert f.extent == 1 or f.extent is None - base = Indexed(self.values, (n, i)) + if self.permutation is None: + base = Indexed(self.values, (n, i)) + else: + base = Indexed(self.values, (n, Indexed(self.permutation, (i,)))) return base, (f, i) def indexed_vector(self, n, shape, layer=None): diff --git a/pyop2/codegen/loopycompat.py b/pyop2/codegen/loopycompat.py index d8a3aa9ec..3eeec83cc 100644 --- a/pyop2/codegen/loopycompat.py +++ b/pyop2/codegen/loopycompat.py @@ -85,7 +85,7 @@ def _shape_1_if_empty(shape_caller, shape_callee): get_kw_pos_association) _, pos_to_kw = get_kw_pos_association(callee_knl) arg_id_to_shape = {} - for arg_id, arg in insn.arg_id_to_val().items(): + for arg_id, arg in insn.arg_id_to_arg().items(): arg_id = pos_to_kw[arg_id] arg_descr = get_arg_descriptor_for_expression(caller_knl, arg) diff --git a/pyop2/codegen/optimise.py b/pyop2/codegen/optimise.py index f7a3550e5..f0a7b58b9 100644 --- a/pyop2/codegen/optimise.py +++ b/pyop2/codegen/optimise.py @@ -1,8 +1,7 @@ from pyop2.codegen.node import traversal, reuse_if_untouched, Memoizer from functools import singledispatch from pyop2.codegen.representation import (Index, RuntimeIndex, Node, - FunctionCall, Variable, Argument, - NamedLiteral) + FunctionCall, Variable, Argument) def collect_indices(expressions): @@ -90,7 +89,7 @@ def index_merger(instructions, cache=None): @singledispatch def _rename_node(node, self): - """Replace division with multiplication + """Rename nodes :param node: root of expression :param self: function for recursive calls @@ -103,7 +102,7 @@ def _rename_node(node, self): @_rename_node.register(Index) def _rename_node_index(node, self): - name = self.replace.get(node, node.name) + name = self.renamer(node) return Index(extent=node.extent, name=name) @@ -114,38 +113,25 @@ def _rename_node_func(node, self): return FunctionCall(node.name, node.label, node.access, free_indices, *children) -@_rename_node.register(RuntimeIndex) -def _rename_node_rtindex(node, self): - children = tuple(map(self, node.children)) - name = self.replace.get(node, node.name) - return RuntimeIndex(*children, name=name) - - -@_rename_node.register(NamedLiteral) -def _rename_node_namedliteral(node, self): - name = self.replace.get(node, node.name) - return NamedLiteral(node.value, name) - - @_rename_node.register(Variable) def _rename_node_variable(node, self): - name = self.replace.get(node, node.name) + name = self.renamer(node) return Variable(name, node.shape, node.dtype) @_rename_node.register(Argument) def _rename_node_argument(node, self): - name = self.replace.get(node, node.name) + name = self.renamer(node) return Argument(node.shape, node.dtype, name=name) -def rename_nodes(instructions, replace): +def rename_nodes(instructions, renamer): """Rename the nodes in the instructions. :param instructions: Iterable of nodes. - :param replace: Dictionary matching old names to new names. + :param renamer: Function that maps nodes to new names :return: List of instructions with nodes renamed. """ mapper = Memoizer(_rename_node) - mapper.replace = replace + mapper.renamer = renamer return list(map(mapper, instructions)) diff --git a/pyop2/codegen/rep2loopy.py b/pyop2/codegen/rep2loopy.py index af703f693..2dd21310e 100644 --- a/pyop2/codegen/rep2loopy.py +++ b/pyop2/codegen/rep2loopy.py @@ -15,7 +15,6 @@ from collections import OrderedDict, defaultdict from functools import singledispatch, reduce import itertools -import re import operator from pyop2.codegen.node import traversal, Node, Memoizer, reuse_if_untouched @@ -430,24 +429,35 @@ def generate(builder, wrapper_name=None): instructions = instructions + initialiser mapper.initialisers = [tuple(merger(i) for i in inits) for inits in mapper.initialisers] + def name_generator(prefix): + yield from (f"{prefix}{i}" for i in itertools.count()) + # rename indices and nodes (so that the counters start from zero) - pattern = re.compile(r"^([a-zA-Z_]+)([0-9]+)(_offset)?$") - replacements = {} - counter = defaultdict(itertools.count) - for node in traversal(instructions): - if isinstance(node, (Index, RuntimeIndex, Variable, Argument, NamedLiteral)): - match = pattern.match(node.name) - if match is None: - continue - prefix, _, postfix = match.groups() - if postfix is None: - postfix = "" - replacements[node] = "%s%d%s" % (prefix, next(counter[(prefix, postfix)]), postfix) - - instructions = rename_nodes(instructions, replacements) - mapper.initialisers = [rename_nodes(inits, replacements) for inits in mapper.initialisers] - parameters.wrapper_arguments = rename_nodes(parameters.wrapper_arguments, replacements) - s, e = rename_nodes([mapper(e) for e in builder.layer_extents], replacements) + node_names = {} + node_namers = dict((cls, name_generator(prefix)) + for cls, prefix in [(Index, "i"), (Variable, "t")]) + + def renamer(expr): + if isinstance(expr, Argument): + if expr._name is not None: + # Some arguments have given names + return expr._name + else: + # Otherwise generate one with their given prefix. + namer = node_namers.setdefault((type(expr), expr.prefix), + name_generator(expr.prefix)) + else: + namer = node_namers[type(expr)] + try: + return node_names[expr] + except KeyError: + return node_names.setdefault(expr, next(namer)) + + instructions = rename_nodes(instructions, renamer) + mapper.initialisers = [rename_nodes(inits, renamer) + for inits in mapper.initialisers] + parameters.wrapper_arguments = rename_nodes(parameters.wrapper_arguments, renamer) + s, e = rename_nodes([mapper(e) for e in builder.layer_extents], renamer) parameters.layer_start = s.name parameters.layer_end = e.name diff --git a/pyop2/codegen/representation.py b/pyop2/codegen/representation.py index 9a4025505..58f5b18f9 100644 --- a/pyop2/codegen/representation.py +++ b/pyop2/codegen/representation.py @@ -83,8 +83,8 @@ class IndexBase(metaclass=ABCMeta): class Index(Terminal, Scalar): _count = itertools.count() - __slots__ = ("name", "extent", "merge") - __front__ = ("name", "extent", "merge") + __slots__ = ("extent", "merge", "name") + __front__ = ("extent", "merge", "name") def __init__(self, extent=None, merge=True, name=None): self.name = name or "i%d" % next(Index._count) @@ -118,11 +118,12 @@ def __init__(self, value): class RuntimeIndex(Scalar): _count = itertools.count() - __slots__ = ("name", "children") + __slots__ = ("children", "name") __back__ = ("name", ) - def __init__(self, lo, hi, constraint, name=None): - self.name = name or "r%d" % next(RuntimeIndex._count) + def __init__(self, lo, hi, constraint, name): + assert name is not None, "runtime indices need a name" + self.name = name self.children = lo, hi, constraint @cached_property @@ -173,17 +174,23 @@ def __init__(self, name): class Argument(Terminal): _count = defaultdict(partial(itertools.count)) - __slots__ = ("shape", "dtype", "name") - __front__ = ("shape", "dtype", "name") + __slots__ = ("shape", "dtype", "_name", "prefix", "_gen_name") + __front__ = ("shape", "dtype", "_name", "prefix") def __init__(self, shape, dtype, name=None, pfx=None): self.dtype = dtype self.shape = shape - if name is None: - if pfx is None: - pfx = "v" - name = "%s%d" % (pfx, next(Argument._count[pfx])) - self.name = name + self._name = name + pfx = pfx or "v" + self.prefix = pfx + self._gen_name = name or "%s%d" % (pfx, next(Argument._count[pfx])) + + def get_hash(self): + return hash((type(self),) + self._cons_args(self.children) + (self.name,)) + + @property + def name(self): + return self._name or self._gen_name class Literal(Terminal, Scalar): @@ -218,19 +225,22 @@ def dtype(self): class NamedLiteral(Terminal): - __slots__ = ("value", "name") - __front__ = ("value", "name") + __slots__ = ("value", "parent", "suffix") + __front__ = ("value", "parent", "suffix") - def __init__(self, value, name): + def __init__(self, value, parent, suffix): self.value = value - self.name = name + self.parent = parent + self.suffix = suffix def is_equal(self, other): if type(self) != type(other): return False if self.shape != other.shape: return False - if self.name != other.name: + if self.parent != other.parent: + return False + if self.suffix != other.suffix: return False return tuple(self.value.flat) == tuple(other.value.flat) @@ -245,6 +255,10 @@ def shape(self): def dtype(self): return self.value.dtype + @property + def name(self): + return f"{self.parent.name}_{self.suffix}" + class Min(Scalar): __slots__ = ("children", ) diff --git a/pyop2/compilation.py b/pyop2/compilation.py index e5a9fefdd..2d72dbb11 100644 --- a/pyop2/compilation.py +++ b/pyop2/compilation.py @@ -33,6 +33,7 @@ import os +import shutil import subprocess import sys import ctypes @@ -500,31 +501,27 @@ def clear_cache(prompt=False): :arg prompt: if ``True`` prompt before removing any files """ cachedir = configuration['cache_dir'] + if not os.path.exists(cachedir): + print("Cache directory could not be found") return - - files = [os.path.join(cachedir, f) for f in os.listdir(cachedir) - if os.path.isfile(os.path.join(cachedir, f))] - nfiles = len(files) - - if nfiles == 0: + if len(os.listdir(cachedir)) == 0: print("No cached libraries to remove") return remove = True if prompt: - - user = input("Remove %d cached libraries from %s? [Y/n]: " % (nfiles, cachedir)) + user = input(f"Remove cached libraries from {cachedir}? [Y/n]: ") while user.lower() not in ['', 'y', 'n']: print("Please answer y or n.") - user = input("Remove %d cached libraries from %s? [Y/n]: " % (nfiles, cachedir)) + user = input(f"Remove cached libraries from {cachedir}? [Y/n]: ") if user.lower() == 'n': remove = False if remove: - print("Removing %d cached libraries from %s" % (nfiles, cachedir)) - [os.remove(f) for f in files] + print(f"Removing cached libraries from {cachedir}") + shutil.rmtree(cachedir) else: print("Not removing cached libraries") diff --git a/pyop2/op2.py b/pyop2/op2.py index 3082b5556..84ac26056 100644 --- a/pyop2/op2.py +++ b/pyop2/op2.py @@ -43,7 +43,7 @@ from pyop2.sequential import READ, WRITE, RW, INC, MIN, MAX # noqa: F401 from pyop2.base import ON_BOTTOM, ON_TOP, ON_INTERIOR_FACETS, ALL # noqa: F401 from pyop2.sequential import Set, ExtrudedSet, MixedSet, Subset, DataSet, MixedDataSet # noqa: F401 -from pyop2.sequential import Map, MixedMap, Sparsity, Halo # noqa: F401 +from pyop2.sequential import Map, MixedMap, PermutedMap, Sparsity, Halo # noqa: F401 from pyop2.sequential import Global, GlobalDataSet # noqa: F401 from pyop2.sequential import Dat, MixedDat, DatView, Mat # noqa: F401 from pyop2.sequential import ParLoop as SeqParLoop @@ -59,7 +59,7 @@ 'MixedSet', 'Subset', 'DataSet', 'GlobalDataSet', 'MixedDataSet', 'Halo', 'Dat', 'MixedDat', 'Mat', 'Global', 'Map', 'MixedMap', 'Sparsity', 'par_loop', 'ParLoop', - 'DatView'] + 'DatView', 'PermutedMap'] def ParLoop(kernel, *args, **kwargs): diff --git a/pyop2/petsc_base.py b/pyop2/petsc_base.py index 592d03ab4..1cdfe9502 100644 --- a/pyop2/petsc_base.py +++ b/pyop2/petsc_base.py @@ -986,7 +986,7 @@ def mult(self, mat, x, y): # Column matrix if x.sizes[1] == 1: v.copy(y) - a = np.zeros(1) + a = np.zeros(1, dtype=ScalarType) if x.comm.rank == 0: a[0] = x.array_r else: @@ -1002,7 +1002,7 @@ def multTranspose(self, mat, x, y): # Row matrix if x.sizes[1] == 1: v.copy(y) - a = np.zeros(1) + a = np.zeros(1, dtype=ScalarType) if x.comm.rank == 0: a[0] = x.array_r else: @@ -1026,7 +1026,7 @@ def multTransposeAdd(self, mat, x, y, z): # Row matrix if x.sizes[1] == 1: v.copy(z) - a = np.zeros(1) + a = np.zeros(1, dtype=ScalarType) if x.comm.rank == 0: a[0] = x.array_r else: diff --git a/pyop2/sequential.py b/pyop2/sequential.py index 1dbab1c18..ff8189be0 100644 --- a/pyop2/sequential.py +++ b/pyop2/sequential.py @@ -45,7 +45,7 @@ from pyop2.base import par_loop # noqa: F401 from pyop2.base import READ, WRITE, RW, INC, MIN, MAX # noqa: F401 from pyop2.base import ALL -from pyop2.base import Map, MixedMap, Sparsity, Halo # noqa: F401 +from pyop2.base import Map, MixedMap, PermutedMap, Sparsity, Halo # noqa: F401 from pyop2.base import Set, ExtrudedSet, MixedSet, Subset # noqa: F401 from pyop2.base import DatView # noqa: F401 from pyop2.base import Kernel # noqa: F401 @@ -58,6 +58,7 @@ from pyop2.profiling import timed_region from pyop2.utils import cached_property, get_petsc_dir +from petsc4py import PETSc import loopy @@ -131,6 +132,7 @@ def code_to_compile(self): return preamble + "\nextern \"C\" {\n" + device_code + "\n}\n" return code.device_code() + @PETSc.Log.EventDecorator() @collective def compile(self): # If we weren't in the cache we /must/ have arguments diff --git a/pyop2/utils.py b/pyop2/utils.py index 85190e7d9..0fc59901d 100644 --- a/pyop2/utils.py +++ b/pyop2/utils.py @@ -327,8 +327,11 @@ def get_petsc_dir(): return (dir, dir + arch) except KeyError: try: - import petsc - return (petsc.get_petsc_dir(), ) + import petsc4py + config = petsc4py.get_config() + petsc_dir = config["PETSC_DIR"] + petsc_arch = config["PETSC_ARCH"] + return petsc_dir, petsc_dir + petsc_arch except ImportError: sys.exit("""Error: Could not find PETSc library. diff --git a/requirements-git.txt b/requirements-git.txt index 26bdc5abc..438205043 100644 --- a/requirements-git.txt +++ b/requirements-git.txt @@ -1,2 +1,2 @@ git+https://github.com/coneoproject/COFFEE.git#egg=coffee -git+https://github.com/firedrakeproject/loopy.git@firedrake#egg=loopy +git+https://github.com/firedrakeproject/loopy.git@main#egg=loopy diff --git a/test/unit/test_api.py b/test/unit/test_api.py index 5c3f85aa1..eee28bb35 100644 --- a/test/unit/test_api.py +++ b/test/unit/test_api.py @@ -809,7 +809,7 @@ def test_dat_float(self, dset): def test_dat_int(self, dset): "Data type for int data should be numpy.int." d = op2.Dat(dset, [1] * dset.size * dset.cdim) - assert d.dtype == np.int + assert d.dtype == np.asarray(1).dtype def test_dat_convert_int_float(self, dset): "Explicit float type should override NumPy's default choice of int." @@ -909,7 +909,7 @@ def test_mixed_dat_illegal_arg(self): def test_mixed_dat_illegal_dtype(self, set): """Constructing a MixedDat from Dats of different dtype should fail.""" with pytest.raises(exceptions.DataValueError): - op2.MixedDat((op2.Dat(set, dtype=np.int), op2.Dat(set))) + op2.MixedDat((op2.Dat(set, dtype=np.int32), op2.Dat(set, dtype=np.float64))) def test_mixed_dat_dats(self, dats): """Constructing a MixedDat from an iterable of Dats should leave them @@ -1303,22 +1303,22 @@ def test_global_dim_list(self): def test_global_float(self): "Data type for float data should be numpy.float64." g = op2.Global(1, 1.0) - assert g.dtype == np.double + assert g.dtype == np.asarray(1.0).dtype def test_global_int(self): "Data type for int data should be numpy.int." g = op2.Global(1, 1) - assert g.dtype == np.int + assert g.dtype == np.asarray(1).dtype def test_global_convert_int_float(self): "Explicit float type should override NumPy's default choice of int." - g = op2.Global(1, 1, 'double') + g = op2.Global(1, 1, dtype=np.float64) assert g.dtype == np.float64 def test_global_convert_float_int(self): "Explicit int type should override NumPy's default choice of float." - g = op2.Global(1, 1.5, 'int') - assert g.dtype == np.int + g = op2.Global(1, 1.5, dtype=np.int64) + assert g.dtype == np.int64 def test_global_illegal_dtype(self): "Illegal data type should raise DataValueError." diff --git a/test/unit/test_caching.py b/test/unit/test_caching.py index 11a6c343c..f3c68e0ef 100644 --- a/test/unit/test_caching.py +++ b/test/unit/test_caching.py @@ -34,7 +34,6 @@ import pytest import numpy -import random from pyop2 import op2, base from coffee.base import * @@ -99,15 +98,13 @@ def y(dindset): @pytest.fixture def iter2ind1(iterset, indset): - u_map = numpy.array(list(range(nelems)), dtype=numpy.uint32) - random.shuffle(u_map, _seed) + u_map = numpy.array(list(range(nelems)), dtype=numpy.uint32)[::-1] return op2.Map(iterset, indset, 1, u_map, "iter2ind1") @pytest.fixture def iter2ind2(iterset, indset): - u_map = numpy.array(list(range(nelems)) * 2, dtype=numpy.uint32) - random.shuffle(u_map, _seed) + u_map = numpy.array(list(range(nelems)) * 2, dtype=numpy.uint32)[::-1] return op2.Map(iterset, indset, 2, u_map, "iter2ind2") diff --git a/test/unit/test_callables.py b/test/unit/test_callables.py index c42e19c97..98be8ff0f 100644 --- a/test/unit/test_callables.py +++ b/test/unit/test_callables.py @@ -75,7 +75,7 @@ def test_inverse_callable(self, zero_mat, inv_mat): loopy.set_caching_enabled(False) k = loopy.make_kernel( - ["{[i,j] : 0 <= i,j < 2}"], + ["{ : }"], """ B[:,:] = inverse(A[:,:]) """, @@ -99,7 +99,7 @@ def test_solve_callable(self, zero_vec, solve_mat, solve_vec): loopy.set_caching_enabled(False) k = loopy.make_kernel( - ["{[i,j] : 0 <= i,j < 2}"], + ["{ : }"], """ x[:] = solve(A[:,:], b[:]) """, diff --git a/test/unit/test_extrusion.py b/test/unit/test_extrusion.py index 8f84621db..dfae39b60 100644 --- a/test/unit/test_extrusion.py +++ b/test/unit/test_extrusion.py @@ -50,7 +50,7 @@ def compute_ind_extr(nums, map, lsize): count = 0 - ind = numpy.zeros(lsize, dtype=numpy.int) + ind = numpy.zeros(lsize, dtype=numpy.int32) len1 = len(mesh2d) for mm in range(lins): offset = 0 diff --git a/test/unit/test_indirect_loop.py b/test/unit/test_indirect_loop.py index 09347cfed..b1f4e3cbe 100644 --- a/test/unit/test_indirect_loop.py +++ b/test/unit/test_indirect_loop.py @@ -34,7 +34,6 @@ import pytest import numpy as np -import random from pyop2 import op2 from pyop2.exceptions import MapValueError @@ -81,8 +80,7 @@ def x2(indset): @pytest.fixture def mapd(): mapd = list(range(nelems)) - random.shuffle(mapd, lambda: 0.02041724) - return mapd + return mapd[::-1] @pytest.fixture diff --git a/test/unit/test_subset.py b/test/unit/test_subset.py index 156ae9b7b..a2c2f9f9b 100644 --- a/test/unit/test_subset.py +++ b/test/unit/test_subset.py @@ -57,7 +57,7 @@ class TestSubSet: def test_direct_loop(self, iterset): """Test a direct ParLoop on a subset""" - indices = np.array([i for i in range(nelems) if not i % 2], dtype=np.int) + indices = np.array([i for i in range(nelems) if not i % 2], dtype=np.int32) ss = op2.Subset(iterset, indices) d = op2.Dat(iterset ** 1, data=None, dtype=np.uint32) @@ -77,8 +77,8 @@ def test_direct_loop_empty(self, iterset): def test_direct_complementary_subsets(self, iterset): """Test direct par_loop over two complementary subsets""" - even = np.array([i for i in range(nelems) if not i % 2], dtype=np.int) - odd = np.array([i for i in range(nelems) if i % 2], dtype=np.int) + even = np.array([i for i in range(nelems) if not i % 2], dtype=np.int32) + odd = np.array([i for i in range(nelems) if i % 2], dtype=np.int32) sseven = op2.Subset(iterset, even) ssodd = op2.Subset(iterset, odd) @@ -91,8 +91,8 @@ def test_direct_complementary_subsets(self, iterset): def test_direct_complementary_subsets_with_indexing(self, iterset): """Test direct par_loop over two complementary subsets""" - even = np.arange(0, nelems, 2, dtype=np.int) - odd = np.arange(1, nelems, 2, dtype=np.int) + even = np.arange(0, nelems, 2, dtype=np.int32) + odd = np.arange(1, nelems, 2, dtype=np.int32) sseven = iterset(even) ssodd = iterset(odd) @@ -104,16 +104,16 @@ def test_direct_complementary_subsets_with_indexing(self, iterset): assert (d.data == 1).all() def test_direct_loop_sub_subset(self, iterset): - indices = np.arange(0, nelems, 2, dtype=np.int) + indices = np.arange(0, nelems, 2, dtype=np.int32) ss = op2.Subset(iterset, indices) - indices = np.arange(0, nelems//2, 2, dtype=np.int) + indices = np.arange(0, nelems//2, 2, dtype=np.int32) sss = op2.Subset(ss, indices) d = op2.Dat(iterset ** 1, data=None, dtype=np.uint32) k = op2.Kernel("static void inc(unsigned int* v) { *v += 1; }", "inc") op2.par_loop(k, sss, d(op2.RW)) - indices = np.arange(0, nelems, 4, dtype=np.int) + indices = np.arange(0, nelems, 4, dtype=np.int32) ss2 = op2.Subset(iterset, indices) d2 = op2.Dat(iterset ** 1, data=None, dtype=np.uint32) op2.par_loop(k, ss2, d2(op2.RW)) @@ -121,16 +121,16 @@ def test_direct_loop_sub_subset(self, iterset): assert (d.data == d2.data).all() def test_direct_loop_sub_subset_with_indexing(self, iterset): - indices = np.arange(0, nelems, 2, dtype=np.int) + indices = np.arange(0, nelems, 2, dtype=np.int32) ss = iterset(indices) - indices = np.arange(0, nelems//2, 2, dtype=np.int) + indices = np.arange(0, nelems//2, 2, dtype=np.int32) sss = ss(indices) d = op2.Dat(iterset ** 1, data=None, dtype=np.uint32) k = op2.Kernel("static void inc(unsigned int* v) { *v += 1; }", "inc") op2.par_loop(k, sss, d(op2.RW)) - indices = np.arange(0, nelems, 4, dtype=np.int) + indices = np.arange(0, nelems, 4, dtype=np.int32) ss2 = iterset(indices) d2 = op2.Dat(iterset ** 1, data=None, dtype=np.uint32) op2.par_loop(k, ss2, d2(op2.RW)) @@ -139,7 +139,7 @@ def test_direct_loop_sub_subset_with_indexing(self, iterset): def test_indirect_loop(self, iterset): """Test a indirect ParLoop on a subset""" - indices = np.array([i for i in range(nelems) if not i % 2], dtype=np.int) + indices = np.array([i for i in range(nelems) if not i % 2], dtype=np.int32) ss = op2.Subset(iterset, indices) indset = op2.Set(2, "indset") @@ -167,7 +167,7 @@ def test_indirect_loop_empty(self, iterset): def test_indirect_loop_with_direct_dat(self, iterset): """Test a indirect ParLoop on a subset""" - indices = np.array([i for i in range(nelems) if not i % 2], dtype=np.int) + indices = np.array([i for i in range(nelems) if not i % 2], dtype=np.int32) ss = op2.Subset(iterset, indices) indset = op2.Set(2, "indset") @@ -185,8 +185,8 @@ def test_indirect_loop_with_direct_dat(self, iterset): def test_complementary_subsets(self, iterset): """Test par_loop on two complementary subsets""" - even = np.array([i for i in range(nelems) if not i % 2], dtype=np.int) - odd = np.array([i for i in range(nelems) if i % 2], dtype=np.int) + even = np.array([i for i in range(nelems) if not i % 2], dtype=np.int32) + odd = np.array([i for i in range(nelems) if i % 2], dtype=np.int32) sseven = op2.Subset(iterset, even) ssodd = op2.Subset(iterset, odd) @@ -216,7 +216,7 @@ def test_matrix(self): ss10 = op2.Subset(iterset, [1, 0]) indset = op2.Set(4) - dat = op2.Dat(idset ** 1, data=[0, 1], dtype=np.float) + dat = op2.Dat(idset ** 1, data=[0, 1], dtype=np.float64) map = op2.Map(iterset, indset, 4, [0, 1, 2, 3, 0, 1, 2, 3]) idmap = op2.Map(iterset, idset, 1, [0, 1]) sparsity = op2.Sparsity((indset, indset), (map, map)) @@ -253,3 +253,62 @@ def test_matrix(self): assert (mat01.values == mat.values).all() assert (mat10.values == mat.values).all() + + +class TestSetOperations: + + """ + Set operation tests + """ + + def test_set_set_operations(self): + """Test standard set operations between a set and itself""" + a = op2.Set(10) + u = a.union(a) + i = a.intersection(a) + d = a.difference(a) + s = a.symmetric_difference(a) + assert u is a + assert i is a + assert d._indices.size == 0 + assert s._indices.size == 0 + + def test_set_subset_operations(self): + """Test standard set operations between a set and a subset""" + a = op2.Set(10) + b = op2.Subset(a, np.array([2, 3, 5, 7], dtype=np.int32)) + u = a.union(b) + i = a.intersection(b) + d = a.difference(b) + s = a.symmetric_difference(b) + assert u is a + assert i is b + assert (d._indices == [0, 1, 4, 6, 8, 9]).all() + assert (s._indices == d._indices).all() + + def test_subset_set_operations(self): + """Test standard set operations between a subset and a set""" + a = op2.Set(10) + b = op2.Subset(a, np.array([2, 3, 5, 7], dtype=np.int32)) + u = b.union(a) + i = b.intersection(a) + d = b.difference(a) + s = b.symmetric_difference(a) + assert u is a + assert i is b + assert d._indices.size == 0 + assert (s._indices == [0, 1, 4, 6, 8, 9]).all() + + def test_subset_subset_operations(self): + """Test standard set operations between two subsets""" + a = op2.Set(10) + b = op2.Subset(a, np.array([2, 3, 5, 7], dtype=np.int32)) + c = op2.Subset(a, np.array([2, 4, 6, 8], dtype=np.int32)) + u = b.union(c) + i = b.intersection(c) + d = b.difference(c) + s = b.symmetric_difference(c) + assert (u._indices == [2, 3, 4, 5, 6, 7, 8]).all() + assert (i._indices == [2, ]).all() + assert (d._indices == [3, 5, 7]).all() + assert (s._indices == [3, 4, 5, 6, 7, 8]).all()