From 5412f0de4ce4597c922abb69a8e1f9d0dcfb15d7 Mon Sep 17 00:00:00 2001 From: mrava87 Date: Mon, 18 Nov 2024 09:29:37 +0300 Subject: [PATCH] feat: ToCupy operator --- docs/source/api/index.rst | 2 + docs/source/gpu.rst | 109 ++++++++++++++++++++++++++--- pylops/basicoperators/__init__.py | 4 ++ pylops/basicoperators/blockdiag.py | 22 +++++- pylops/basicoperators/hstack.py | 23 +++++- pylops/basicoperators/tocupy.py | 60 ++++++++++++++++ pylops/basicoperators/vstack.py | 23 +++++- pylops/optimization/cls_basic.py | 21 ++++-- pylops/utils/backend.py | 44 ++++++++++++ 9 files changed, 284 insertions(+), 24 deletions(-) create mode 100644 pylops/basicoperators/tocupy.py diff --git a/docs/source/api/index.rst b/docs/source/api/index.rst index 59e40e51..da9beb36 100755 --- a/docs/source/api/index.rst +++ b/docs/source/api/index.rst @@ -62,6 +62,8 @@ Basic operators Real Imag Conj + ToCupy + Smoothing and derivatives ~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/docs/source/gpu.rst b/docs/source/gpu.rst index ecc87120..a409532b 100755 --- a/docs/source/gpu.rst +++ b/docs/source/gpu.rst @@ -409,11 +409,21 @@ Geophysical subsurface characterization: when ``explicit=False``. -Example -------- +Examples +-------- + +Finally, let's briefly look at some example. + +End-to-end GPU powered inverse problems +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Finally, let's briefly look at an example. First we write a code snippet using -``numpy`` arrays which PyLops will run on your CPU: +First we consider the most common scenario when both the model and data +vectors fit onto the GPU memory. We can therefore simply replace all our +``numpy`` arrays with ``cupy`` arrays and solve the inverse problem of +interest end-to-end on the GPU. + +Let's first write a code snippet using ``numpy`` arrays, which PyLops +will run on your CPU: .. code-block:: python @@ -421,11 +431,14 @@ Finally, let's briefly look at an example. First we write a code snippet using G = np.random.normal(0, 1, (ny, nx)).astype(np.float32) x = np.ones(nx, dtype=np.float32) + # Create operator Gop = MatrixMult(G, dtype='float32') - y = Gop * x + + # Create data and invert + y = Gop @ x xest = Gop / y -Now we write a code snippet using ``cupy`` arrays which PyLops will run on +Now we write a code snippet using ``cupy`` arrays, which PyLops will run on your GPU: .. code-block:: python @@ -434,8 +447,11 @@ your GPU: G = cp.random.normal(0, 1, (ny, nx)).astype(np.float32) x = cp.ones(nx, dtype=np.float32) + # Create operator Gop = MatrixMult(G, dtype='float32') - y = Gop * x + + # Create data and invert + y = Gop @ x xest = Gop / y The code is almost unchanged apart from the fact that we now use ``cupy`` arrays, @@ -450,15 +466,90 @@ your GPU/TPU: G = jnp.array(np.random.normal(0, 1, (ny, nx)).astype(np.float32)) x = jnp.ones(nx, dtype=np.float32) + # Create operator Gop = JaxOperator(MatrixMult(G, dtype='float32')) - y = Gop * x + + # Create data and invert + y = Gop @ x xest = Gop / y # Adjoint via AD xadj = Gop.rmatvecad(x, y) +Again, the code is almost unchanged apart from the fact that we now use ``jax`` arrays. + + +Mixed CPU-GPU powered inverse problems +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Let us now consider a more intricate scenario where we have acess to +a GPU-powered operator, however the model and/or data vectors are too large +to fit onto the memory. + +For the sake of clarity, we consider a problem where +the operator can be written as a :class:`pylops.basicoperators.BlockDiag` of +PyLops operators. Note how, by simply sandwitching any of the GPU-powered +operator within two :class:`pylops.basicoperators.ToCupy` operators, we are +able to tell PyLops to transfer to the GPU only the part of the model vector +required by a given operator and transfer back the output to the CPU before +forming the combine output vector (i.e., the output vector of the +:class:`pylops.basicoperators.BlockDiag`) + +.. code-block:: python + + nops, n = 5, 4 + Ms = [np.diag((i + 1) * np.ones(n, dtype=dtype)) \ + for i in range(nops)] + Ms = [M.T @ M for M in Ms] + + # Create operator + Mops = [] + for iop in range(nops): + Mop = MatrixMult(cp.asarray(Ms[iop], dtype=dtype)) + Top = ToCupy(Mop.dims, dtype=dtype) + Top1 = ToCupy(Mop.dimsd, dtype=dtype) + Mop = Top1.H @ Mop @ Top + Mops.append(Mop) + Mops = BlockDiag(Mops, forceflat=True) + + # Create data and invert + x = np.ones(n * nops, dtype=dtype) + y = Mops @ x.ravel() + xest = Mops / y + + +Finally, let us consider a problem where +the operator can be written as a :class:`pylops.basicoperators.VStack` of +PyLops operators and the model vector can be fully transferred to the GPU. +We can use again the :class:`pylops.basicoperators.ToCupy` operator, however this +time we will only use it to move the output of each operator to the CPU. +Since we are now in a special scenario, where the input of the overall +operator sits on the GPU and the output on the +CPU, we need to inform the :class:`pylops.basicoperators.VStack` operator about this. +This can be easily done using the additional ``inoutengine`` parameter. Let's +see this with an example: + +.. code-block:: python + + nops, n, m = 3, 4, 5 + Ms = [np.random.normal(0, 1, (n, m)) for _ in range(nops)] + + # Create operator + Mops = [] + for iop in range(nops): + Mop = MatrixMult(cp.asarray(Ms[iop]), dtype=dtype) + Top1 = ToCupy(Mop.dimsd, dtype=dtype) + Mop = Top1.H @ Mop + Mops.append(Mop) + Mops = VStack(Mops, inoutengine=("numpy", "cupy")) + + # Create data and invert + x = cp.ones(m, dtype=dtype) + y = Mops @ x.ravel() + xest = pylops_cgls(Mops, y, x0=cp.zeros_like(x))[0] + +**Note:**: this feature is currently not available for ``jax`` arrays. -Again, the code is almost unchanged apart from the fact that we now use ``jax`` arrays, .. note:: diff --git a/pylops/basicoperators/__init__.py b/pylops/basicoperators/__init__.py index d654e50d..5d3a3d3e 100755 --- a/pylops/basicoperators/__init__.py +++ b/pylops/basicoperators/__init__.py @@ -38,6 +38,7 @@ Gradient Gradient. FirstDirectionalDerivative First Directional derivative. SecondDirectionalDerivative Second Directional derivative. + ToCupy Convert to CuPy. """ from .functionoperator import * @@ -72,6 +73,8 @@ from .laplacian import * from .gradient import * from .directionalderivative import * +from .tocupy import * + __all__ = [ "FunctionOperator", @@ -107,4 +110,5 @@ "Gradient", "FirstDirectionalDerivative", "SecondDirectionalDerivative", + "ToCupy", ] diff --git a/pylops/basicoperators/blockdiag.py b/pylops/basicoperators/blockdiag.py index 166ae137..4d9fbc36 100644 --- a/pylops/basicoperators/blockdiag.py +++ b/pylops/basicoperators/blockdiag.py @@ -21,7 +21,7 @@ from pylops import LinearOperator from pylops.basicoperators import MatrixMult -from pylops.utils.backend import get_array_module, inplace_set +from pylops.utils.backend import get_array_module, get_module, inplace_set from pylops.utils.typing import DTypeLike, NDArray @@ -48,6 +48,12 @@ class BlockDiag(LinearOperator): .. versionadded:: 2.2.0 Force an array to be flattened after matvec and rmatvec. + inoutengine : :obj:`tuple`, optional + .. versionadded:: 2.4.0 + + Type of output vectors of `matvec` and `rmatvec. If ``None``, this is + inferred directly from the input vectors. Note that this is ignored + if ``nproc>1``. dtype : :obj:`str`, optional Type of elements in input array. @@ -113,6 +119,7 @@ def __init__( ops: Sequence[LinearOperator], nproc: int = 1, forceflat: bool = None, + inoutengine: Optional[tuple] = None, dtype: Optional[DTypeLike] = None, ) -> None: self.ops = ops @@ -149,6 +156,7 @@ def __init__( if self.nproc > 1: self.pool = mp.Pool(processes=nproc) + self.inoutengine = inoutengine dtype = _get_dtype(ops) if dtype is None else np.dtype(dtype) clinear = all([getattr(oper, "clinear", True) for oper in self.ops]) super().__init__( @@ -172,7 +180,11 @@ def nproc(self, nprocnew: int) -> None: self._nproc = nprocnew def _matvec_serial(self, x: NDArray) -> NDArray: - ncp = get_array_module(x) + ncp = ( + get_array_module(x) + if self.inoutengine is None + else get_module(self.inoutengine[0]) + ) y = ncp.zeros(self.nops, dtype=self.dtype) for iop, oper in enumerate(self.ops): y = inplace_set( @@ -183,7 +195,11 @@ def _matvec_serial(self, x: NDArray) -> NDArray: return y def _rmatvec_serial(self, x: NDArray) -> NDArray: - ncp = get_array_module(x) + ncp = ( + get_array_module(x) + if self.inoutengine is None + else get_module(self.inoutengine[1]) + ) y = ncp.zeros(self.mops, dtype=self.dtype) for iop, oper in enumerate(self.ops): y = inplace_set( diff --git a/pylops/basicoperators/hstack.py b/pylops/basicoperators/hstack.py index b71e8723..4cd450d5 100644 --- a/pylops/basicoperators/hstack.py +++ b/pylops/basicoperators/hstack.py @@ -21,7 +21,7 @@ from pylops import LinearOperator from pylops.basicoperators import MatrixMult -from pylops.utils.backend import get_array_module, inplace_add, inplace_set +from pylops.utils.backend import get_array_module, get_module, inplace_add, inplace_set from pylops.utils.typing import NDArray @@ -48,6 +48,12 @@ class HStack(LinearOperator): .. versionadded:: 2.2.0 Force an array to be flattened after matvec. + inoutengine : :obj:`tuple`, optional + .. versionadded:: 2.4.0 + + Type of output vectors of `matvec` and `rmatvec. If ``None``, this is + inferred directly from the input vectors. Note that this is ignored + if ``nproc>1``. dtype : :obj:`str`, optional Type of elements in input array. @@ -112,6 +118,7 @@ def __init__( ops: Sequence[LinearOperator], nproc: int = 1, forceflat: bool = None, + inoutengine: Optional[tuple] = None, dtype: Optional[str] = None, ) -> None: self.ops = ops @@ -139,6 +146,8 @@ def __init__( self.pool = None if self.nproc > 1: self.pool = mp.Pool(processes=nproc) + + self.inoutengine = inoutengine dtype = _get_dtype(self.ops) if dtype is None else np.dtype(dtype) clinear = all([getattr(oper, "clinear", True) for oper in self.ops]) super().__init__( @@ -162,7 +171,11 @@ def nproc(self, nprocnew: int): self._nproc = nprocnew def _matvec_serial(self, x: NDArray) -> NDArray: - ncp = get_array_module(x) + ncp = ( + get_array_module(x) + if self.inoutengine is None + else get_module(self.inoutengine[0]) + ) y = ncp.zeros(self.nops, dtype=self.dtype) for iop, oper in enumerate(self.ops): y = inplace_add( @@ -173,7 +186,11 @@ def _matvec_serial(self, x: NDArray) -> NDArray: return y def _rmatvec_serial(self, x: NDArray) -> NDArray: - ncp = get_array_module(x) + ncp = ( + get_array_module(x) + if self.inoutengine is None + else get_module(self.inoutengine[1]) + ) y = ncp.zeros(self.mops, dtype=self.dtype) for iop, oper in enumerate(self.ops): y = inplace_set( diff --git a/pylops/basicoperators/tocupy.py b/pylops/basicoperators/tocupy.py new file mode 100644 index 00000000..8a3a7c6b --- /dev/null +++ b/pylops/basicoperators/tocupy.py @@ -0,0 +1,60 @@ +__all__ = ["ToCupy"] + +from typing import Union + +import numpy as np + +from pylops import LinearOperator +from pylops.utils._internal import _value_or_sized_to_tuple +from pylops.utils.backend import to_cupy, to_numpy +from pylops.utils.typing import DTypeLike, InputDimsLike, NDArray + + +class ToCupy(LinearOperator): + r"""Convert to CuPy. + + Convert an input array to CuPy in forward mode, + and convert back to NumPy in adjoint mode. + + Parameters + ---------- + dims : :obj:`list` or :obj:`int` + Number of samples for each dimension + dtype : :obj:`str`, optional + Type of elements in input array. + name : :obj:`str`, optional + Name of operator (to be used by :func:`pylops.utils.describe.describe`) + + Attributes + ---------- + shape : :obj:`tuple` + Operator shape + explicit : :obj:`bool` + Operator contains a matrix that can be solved explicitly + (``True``) or not (``False``) + + Notes + ----- + The ToCupy operator is a special operator that does not perform + any transformation on the input arrays other than converting + them from NumPy to CuPy. This operator can be used when one + is interested to create a chain of operators where only one + (or some of them) act on CuPy arrays, whilst other operate + on NumPy arrays. + + """ + + def __init__( + self, + dims: Union[int, InputDimsLike], + dtype: DTypeLike = "float64", + name: str = "C", + ) -> None: + dims = _value_or_sized_to_tuple(dims) + super().__init__(dtype=np.dtype(dtype), dims=dims, dimsd=dims, name=name) + + def _matvec(self, x: NDArray) -> NDArray: + return to_cupy(x) + + def _rmatvec(self, x: NDArray) -> NDArray: + return to_numpy(x) diff --git a/pylops/basicoperators/vstack.py b/pylops/basicoperators/vstack.py index 0d66642e..55341411 100644 --- a/pylops/basicoperators/vstack.py +++ b/pylops/basicoperators/vstack.py @@ -21,7 +21,7 @@ from pylops import LinearOperator from pylops.basicoperators import MatrixMult -from pylops.utils.backend import get_array_module, inplace_add, inplace_set +from pylops.utils.backend import get_array_module, get_module, inplace_add, inplace_set from pylops.utils.typing import DTypeLike, NDArray @@ -48,6 +48,12 @@ class VStack(LinearOperator): .. versionadded:: 2.2.0 Force an array to be flattened after rmatvec. + inoutengine : :obj:`tuple`, optional + .. versionadded:: 2.4.0 + + Type of output vectors of `matvec` and `rmatvec. If ``None``, this is + inferred directly from the input vectors. Note that this is ignored + if ``nproc>1``. dtype : :obj:`str`, optional Type of elements in input array. @@ -112,6 +118,7 @@ def __init__( ops: Sequence[LinearOperator], nproc: int = 1, forceflat: bool = None, + inoutengine: Optional[tuple] = None, dtype: Optional[DTypeLike] = None, ) -> None: self.ops = ops @@ -139,6 +146,8 @@ def __init__( self.pool = None if self.nproc > 1: self.pool = mp.Pool(processes=nproc) + + self.inoutengine = inoutengine dtype = _get_dtype(self.ops) if dtype is None else np.dtype(dtype) clinear = all([getattr(oper, "clinear", True) for oper in self.ops]) super().__init__( @@ -162,7 +171,11 @@ def nproc(self, nprocnew: int): self._nproc = nprocnew def _matvec_serial(self, x: NDArray) -> NDArray: - ncp = get_array_module(x) + ncp = ( + get_array_module(x) + if self.inoutengine is None + else get_module(self.inoutengine[0]) + ) y = ncp.zeros(self.nops, dtype=self.dtype) for iop, oper in enumerate(self.ops): y = inplace_set( @@ -171,7 +184,11 @@ def _matvec_serial(self, x: NDArray) -> NDArray: return y def _rmatvec_serial(self, x: NDArray) -> NDArray: - ncp = get_array_module(x) + ncp = ( + get_array_module(x) + if self.inoutengine is None + else get_module(self.inoutengine[1]) + ) y = ncp.zeros(self.mops, dtype=self.dtype) for iop, oper in enumerate(self.ops): y = inplace_add( diff --git a/pylops/optimization/cls_basic.py b/pylops/optimization/cls_basic.py index 2e848fcf..b7c98e0a 100644 --- a/pylops/optimization/cls_basic.py +++ b/pylops/optimization/cls_basic.py @@ -10,7 +10,12 @@ import numpy as np from pylops.optimization.basesolver import Solver -from pylops.utils.backend import get_array_module, to_numpy +from pylops.utils.backend import ( + get_array_module, + to_cupy_conditional, + to_numpy, + to_numpy_conditional, +) from pylops.utils.typing import NDArray if TYPE_CHECKING: @@ -131,10 +136,10 @@ def step(self, x: NDArray, show: bool = False) -> NDArray: Updated model vector """ - Opc = self.Op.matvec(self.c) + Opc = self.Op.matvec(to_cupy_conditional(x, self.c)) cOpc = self.ncp.abs(self.c.dot(Opc.conj())) a = self.kold / cOpc - x += a * self.c + x += to_cupy_conditional(x, a) * to_cupy_conditional(x, self.c) self.r -= a * Opc k = self.ncp.abs(self.r.dot(self.r.conj())) b = k / self.kold @@ -386,7 +391,7 @@ def step(self, x: NDArray, show: bool = False) -> NDArray: self.q.dot(self.q.conj()) + self.damp * self.c.dot(self.c.conj()) ) x = x + a * self.c - self.s = self.s - a * self.q + self.s = self.s - to_numpy_conditional(self.q, a) * self.q r = self.Op.rmatvec(self.s) - self.damp * x k = self.ncp.abs(r.dot(r.conj())) b = k / self.kold @@ -773,7 +778,9 @@ def step(self, x: NDArray, show: bool = False) -> NDArray: # next beta, u, alfa, v. These satisfy the relations # beta*u = Op*v - alfa*u, # alfa*v = Op'*u - beta*v' - self.u = self.Op.matvec(self.v) - self.alfa * self.u + self.u = ( + self.Op.matvec(self.v) - to_numpy_conditional(self.u, self.alfa) * self.u + ) self.beta = self.ncp.linalg.norm(self.u) if self.beta > 0: self.u = self.u / self.beta @@ -812,7 +819,9 @@ def step(self, x: NDArray, show: bool = False) -> NDArray: self.w = self.v + self.t2 * self.w self.ddnorm = self.ddnorm + self.ncp.linalg.norm(self.dk) ** 2 if self.calc_var: - self.var = self.var + self.ncp.dot(self.dk, self.dk) + self.var = self.var + to_numpy_conditional( + self.var, self.ncp.dot(self.dk, self.dk) + ) # use a plane rotation on the right to eliminate the # super-diagonal element (theta) of the upper-bidiagonal matrix. diff --git a/pylops/utils/backend.py b/pylops/utils/backend.py index 9b7d3b2d..31836265 100644 --- a/pylops/utils/backend.py +++ b/pylops/utils/backend.py @@ -17,8 +17,10 @@ "get_sp_fft", "get_complex_dtype", "get_real_dtype", + "to_cupy", "to_numpy", "to_cupy_conditional", + "to_numpy_conditional", "inplace_set", "inplace_add", "inplace_multiply", @@ -484,6 +486,26 @@ def get_real_dtype(dtype: DTypeLike) -> DTypeLike: return np.real(np.ones(1, dtype)).dtype +def to_cupy(x: NDArray) -> NDArray: + """Convert x to cupy array + + Parameters + ---------- + x : :obj:`numpy.ndarray` or :obj:`cupy.ndarray` + Array to evaluate + + Returns + ------- + x : :obj:`numpy.ndarray` + Converted array + + """ + if deps.cupy_enabled: + if cp.get_array_module(x) == np: + x = cp.asarray(x) + return x + + def to_numpy(x: NDArray) -> NDArray: """Convert x to numpy array @@ -527,6 +549,28 @@ def to_cupy_conditional(x: npt.ArrayLike, y: npt.ArrayLike) -> NDArray: return y +def to_numpy_conditional(x: npt.ArrayLike, y: npt.ArrayLike) -> NDArray: + """Convert y to numpy array conditional to x being a numpy array + + Parameters + ---------- + x : :obj:`numpy.ndarray` or :obj:`cupy.ndarray` + Array to evaluate + y : :obj:`numpy.ndarray` + Array to convert + + Returns + ------- + y : :obj:`cupy.ndarray` + Converted array + + """ + if deps.cupy_enabled: + if cp.get_array_module(x) == np and cp.get_array_module(y) == cp: + y = cp.asnumpy(y) + return y + + def inplace_set(x: npt.ArrayLike, y: npt.ArrayLike, idx: list) -> NDArray: """Perform inplace set based on input