From a4593a3eb9051683d22b1d1aea2727c048d78a11 Mon Sep 17 00:00:00 2001 From: mrava87 Date: Fri, 23 Aug 2024 10:30:05 -0500 Subject: [PATCH 1/7] feat: added AndersonProximalGradient --- docs/source/api/index.rst | 1 + pyproximal/optimization/__init__.py | 2 + pyproximal/optimization/primal.py | 202 +++++++++++++++++++++++++++- pyproximal/proximal/Simplex.py | 2 +- tutorials/twist.py | 34 +++-- 5 files changed, 229 insertions(+), 12 deletions(-) diff --git a/docs/source/api/index.rst b/docs/source/api/index.rst index 4000054..492124c 100755 --- a/docs/source/api/index.rst +++ b/docs/source/api/index.rst @@ -157,6 +157,7 @@ Primal AcceleratedProximalGradient ADMM ADMML2 + AndersonProximalGradient GeneralizedProximalGradient HQS LinearizedADMM diff --git a/pyproximal/optimization/__init__.py b/pyproximal/optimization/__init__.py index 6dd04b7..f3beba8 100644 --- a/pyproximal/optimization/__init__.py +++ b/pyproximal/optimization/__init__.py @@ -11,6 +11,8 @@ ProximalPoint Proximal point algorithm (or proximal min.) ProximalGradient Proximal gradient algorithm AcceleratedProximalGradient Accelerated Proximal gradient algorithm + AndersonProximalGradient Proximal gradient algorithm with Anderson acceleration + GeneralizedProximalGradient Generalized Proximal gradient algorithm HQS Half Quadrating Splitting ADMM Alternating Direction Method of Multipliers ADMML2 ADMM with L2 misfit term diff --git a/pyproximal/optimization/primal.py b/pyproximal/optimization/primal.py index 1bee7e0..2a163b8 100644 --- a/pyproximal/optimization/primal.py +++ b/pyproximal/optimization/primal.py @@ -156,7 +156,7 @@ def ProximalGradient(proxf, proxg, x0, epsg=1., Proximal operator of g function x0 : :obj:`numpy.ndarray` Initial vector - epsg : :obj:`float` or :obj:`np.ndarray`, optional + epsg : :obj:`float` or :obj:`numpy.ndarray`, optional Scaling factor of g function tau : :obj:`float` or :obj:`numpy.ndarray`, optional Positive scalar weight, which should satisfy the following condition @@ -195,7 +195,7 @@ def ProximalGradient(proxf, proxg, x0, epsg=1., Notes ----- - The Proximal point algorithm can be expressed by the following recursion: + The Proximal gradient algorithm can be expressed by the following recursion: .. math:: @@ -359,6 +359,202 @@ def AcceleratedProximalGradient(proxf, proxg, x0, tau=None, beta=0.5, callback=callback, show=show) +def AndersonProximalGradient(proxf, proxg, x0, epsg=1., + tau=None, niter=10, + nhistory=10, epsr=1e-10, + safeguard=False, tol=None, + callback=None, show=False): + r"""Proximal gradient with Anderson acceleration + + Solves the following minimization problem using the Proximal + gradient algorithm with Anderson acceleration: + + .. math:: + + \mathbf{x} = \argmin_\mathbf{x} f(\mathbf{x}) + \epsilon g(\mathbf{x}) + + where :math:`f(\mathbf{x})` is a smooth convex function with a uniquely + defined gradient and :math:`g(\mathbf{x})` is any convex function that + has a known proximal operator. + + Parameters + ---------- + proxf : :obj:`pyproximal.ProxOperator` + Proximal operator of f function (must have ``grad`` implemented) + proxg : :obj:`pyproximal.ProxOperator` + Proximal operator of g function + x0 : :obj:`numpy.ndarray` + Initial vector + epsg : :obj:`float` or :obj:`numpy.ndarray`, optional + Scaling factor of g function + tau : :obj:`float` or :obj:`numpy.ndarray`, optional + Positive scalar weight, which should satisfy the following condition + to guarantees convergence: :math:`\tau \in (0, 1/L]` where ``L`` is + the Lipschitz constant of :math:`\nabla f`. When ``tau=None``, + backtracking is used to adaptively estimate the best tau at each + iteration. Finally, note that :math:`\tau` can be chosen to be a vector + when dealing with problems with multiple right-hand-sides + niter : :obj:`int`, optional + Number of iterations of iterative scheme + nhistory : :obj:`int`, optional + Number of previous iterates to be kept in memory (to compute the scaling factors + epsr : :obj:`float`, optional + Scaling factor for regularization added to the inverse of :math:\mathbf{R}^T \mathbf{R}` + safeguard : :obj:`bool`, optional + Apply safeguarding strategy to the update (``True``) or not (``False``) + tol : :obj:`float`, optional + Tolerance on change of objective function (used as stopping criterion). If + ``tol=None``, run until ``niter`` is reached + callback : :obj:`callable`, optional + Function with signature (``callback(x)``) to call after each iteration + where ``x`` is the current model vector + show : :obj:`bool`, optional + Display iterations log + + Returns + ------- + x : :obj:`numpy.ndarray` + Inverted model + + Notes + ----- + The Proximal gradient algorithm with Anderson acceleration can be expressed by the + following recursion [1]_: + + .. math:: + m_k = min(m, k)\\ + \mathbf{g}^{k} = \mathbf{x}^{k} - \tau^k \nabla f(\mathbf{x}^k)\\ + \mathbf{r}^{k} = \mathbf{g}^{k} - \mathbf{g}^{k}\\ + \mathbf{G}^{k} = [\mathbf{g}^{k},..., \mathbf{g}^{k-m_k}]\\ + \mathbf{R}^{k} = [\mathbf{r}^{k},..., \mathbf{r}^{k-m_k}]\\ + \alpha_k = (\mathbf{R}^{kT} \mathbf{R}^{k})^{-1} \mathbf{1} / \mathbf{1}^T + (\mathbf{R}^{kT} \mathbf{R}^{k})^{-1} \mathbf{1}\\ + \mathbf{y}^{k+1} = \mathbf{G}^{k} \alpha_k\\ + \mathbf{x}^{k+1} = \prox_{\tau^{k+1} g}(\mathbf{y}^{k+1}) + + where :math:`m` equals ``nhistory``, :math:`k=1,2,...,n_{iter}`, :math:`\mathbf{y}^{0}=\mathbf{x}^{0}`, + :math:`\mathbf{y}^{1}=\mathbf{x}^{0} - \tau^0 \nabla f(\mathbf{x}^0)`, + :math:`\mathbf{x}^{1}=\prox_{\tau^k g}(\mathbf{y}^{1})`, and + :math:`\mathbf{g}^{0}=\mathbf{y}^{1}`. + + Refer to [1]_ for the guarded version of the algorithm (when ``safeguard=True``). + + .. [1] Mai, V., and Johansson, M. "Anderson Acceleration of Proximal Gradient + Methods", 2020. + + """ + # check if epgs is a vector + if np.asarray(epsg).size == 1.: + epsg = epsg * np.ones(niter) + epsg_print = str(epsg[0]) + else: + epsg_print = 'Multi' + + if show: + tstart = time.time() + print('Proximal Gradient with Anderson Acceleration \n' + '---------------------------------------------------------\n' + 'Proximal operator (f): %s\n' + 'Proximal operator (g): %s\n' + 'tau = %s\t\tepsg = %s\tniter = %d\n' + 'nhist = %d\tepsr = %s\n' + 'guard = %s\ttol = %s\n' % (type(proxf), type(proxg), + str(tau), epsg_print, niter, + nhistory, str(epsr), + str(safeguard), str(tol))) + head = ' Itn x[0] f g J=f+eps*g tau' + print(head) + + # initialize model + y = x0 - tau * proxf.grad(x0) + x = proxg.prox(y, epsg[0] * tau) + g = y.copy() + r = g - x0 + R, G = [g, ], [r, ] + pf = proxf(x) + pfg = np.inf + tolbreak = False + + # iterate + for iiter in range(niter): + + # update fix point + g = x - tau * proxf.grad(x) + r = g - y + + # update history vectors + R.insert(0, r) + G.insert(0, g) + if iiter >= nhistory - 1: + R.pop(-1) + G.pop(-1) + + # solve for alpha coefficients + Rstack = np.vstack(R) + Rinv = np.linalg.pinv(Rstack @ Rstack.T + epsr * np.linalg.norm(Rstack) ** 2) + ones = np.ones(min(nhistory, iiter + 2)) + Rinvones = Rinv @ ones + alpha = Rinvones / (ones[None] @ Rinvones) + + if not safeguard: + # update auxiliary variable + y = np.vstack(G).T @ alpha + + # update main variable + x = proxg.prox(y, epsg[iiter] * tau) + + else: + # update auxiliary variable + ytest = np.vstack(G).T @ alpha + + # update main variable + xtest = proxg.prox(ytest, epsg[iiter] * tau) + + # check if function is decreased, otherwise do basic PG step + pfold, pf = pf, proxf(xtest) + if pf <= pfold - tau * np.linalg.norm(proxf.grad(x)) ** 2 / 2: + y = ytest + x = xtest + else: + x = proxg.prox(g, epsg[iiter] * tau) + y = g + + # run callback + if callback is not None: + callback(x) + + # tolerance check: break iterations if overall + # objective does not decrease below tolerance + if tol is not None: + pfgold = pfg + pf, pg = proxf(x), proxg(x) + pfg = pf + np.sum(epsg[iiter] * pg) + if np.abs(1.0 - pfg / pfgold) < tol: + tolbreak = True + + # show iteration logger + if show: + if iiter < 10 or niter - iiter < 10 or iiter % (niter // 10) == 0: + if tol is None: + pf, pg = proxf(x), proxg(x) + pfg = pf + np.sum(epsg[iiter] * pg) + msg = '%6g %12.5e %10.3e %10.3e %10.3e %10.3e' % \ + (iiter + 1, np.real(to_numpy(x[0])) if x.ndim == 1 else np.real(to_numpy(x[0, 0])), + pf, pg, + pfg, + tau) + print(msg) + + # break if tolerance condition is met + if tolbreak: + break + + if show: + print('\nTotal time (s) = %.2f' % (time.time() - tstart)) + print('---------------------------------------------------------\n') + return x + + def GeneralizedProximalGradient(proxfs, proxgs, x0, tau, epsg=1., weights=None, eta=1., niter=10, @@ -390,7 +586,7 @@ def GeneralizedProximalGradient(proxfs, proxgs, x0, tau, Positive scalar weight, which should satisfy the following condition to guarantees convergence: :math:`\tau \in (0, 1/L]` where ``L`` is the Lipschitz constant of :math:`\sum_{i=1}^n \nabla f_i`. - epsg : :obj:`float` or :obj:`np.ndarray`, optional + epsg : :obj:`float` or :obj:`numpy.ndarray`, optional Scaling factor(s) of ``g`` function(s) weights : :obj:`float`, optional Weighting factors of ``g`` functions. Must sum to 1. diff --git a/pyproximal/proximal/Simplex.py b/pyproximal/proximal/Simplex.py index c3d10a7..8850264 100644 --- a/pyproximal/proximal/Simplex.py +++ b/pyproximal/proximal/Simplex.py @@ -204,7 +204,7 @@ def Simplex(n, radius, dims=None, axis=-1, maxiter=100, Notes ----- As the Simplex is an indicator function, the proximal operator corresponds - to its orthogonal projection (see :class:`pyprox.projection.SimplexProj` + to its orthogonal projection (see :class:`pyproximal.projection.SimplexProj` for details. Note that ``tau`` does not have effect for this proximal operator, any diff --git a/tutorials/twist.py b/tutorials/twist.py index 0d75c59..a6ce8a0 100644 --- a/tutorials/twist.py +++ b/tutorials/twist.py @@ -1,12 +1,13 @@ r""" -IHT, ISTA, FISTA, and TWIST for Compressive sensing -=================================================== +IHT, ISTA, FISTA, AA-ISTA, and TWIST for Compressive sensing +============================================================ -In this example we want to compare four popular solvers in compressive -sensing problem, namely IHT, ISTA, FISTA, and TwIST. The first three can +In this example we want to compare five popular solvers in compressive +sensing problem, namely IHT, ISTA, FISTA, AA-ISTA, and TwIST. The first three can be implemented using the same solver, namely the :py:class:`pyproximal.optimization.primal.ProximalGradient`, whilst the latter -is implemented in :py:class:`pyproximal.optimization.primal.TwIST`. +two are implemented using the :py:class:`pyproximal.optimization.primal.AndersonProximalGradient` and +:py:class:`pyproximal.optimization.primal.TwIST` solvers, respectively The IHT solver tries to solve the following unconstrained problem with a L0Ball regularization term: @@ -48,7 +49,6 @@ def callback(x, pf, pg, eps, cost): A = np.random.randn(N, M) A = A / np.linalg.norm(A, axis=0) Aop = pylops.MatrixMult(A) -Aop.explicit = False # temporary solution whilst PyLops gets updated x = np.random.rand(M) x[x < 0.9] = 0 @@ -88,6 +88,17 @@ def callback(x, pf, pg, eps, cost): callback=lambda x: callback(x, l2, l1, eps, costf)) niterf = len(costf) +# Anderson accelerated ISTA +l1 = pyproximal.proximal.L1() +l2 = pyproximal.proximal.L2(Op=Aop, b=y) +costa = [] +x_ander = \ + pyproximal.optimization.primal.AndersonProximalGradient(l2, l1, tau=tau, x0=np.zeros(M), + epsg=eps, niter=maxit, + nhistory=5, show=False, + callback=lambda x: callback(x, l2, l1, eps, costa)) +nitera = len(costa) + # TWIST (Note that since the smallest eigenvalue is zero, we arbitrarily # choose a small value for the solver to converge stably) l1 = pyproximal.proximal.L1(sigma=eps) @@ -111,6 +122,9 @@ def callback(x, pf, pg, eps, cost): m, s, b = ax.stem(x_fista, linefmt='--g', basefmt='--g', markerfmt='go', label='FISTA') plt.setp(m, markersize=7) +m, s, b = ax.stem(x_ander, linefmt='--m', basefmt='--m', + markerfmt='mo', label='AA-ISTA') +plt.setp(m, markersize=7) m, s, b = ax.stem(x_twist, linefmt='--b', basefmt='--b', markerfmt='bo', label='TWIST') plt.setp(m, markersize=7) @@ -118,9 +132,13 @@ def callback(x, pf, pg, eps, cost): ax.legend() plt.tight_layout() +############################################################################### +# Finally, let's compare the converge behaviour of the different algorithms + fig, ax = plt.subplots(1, 1, figsize=(8, 3)) ax.semilogy(costi, 'r', lw=2, label=r'$x_{ISTA} (niter=%d)$' % niteri) ax.semilogy(costf, 'g', lw=2, label=r'$x_{FISTA} (niter=%d)$' % niterf) +ax.semilogy(costa, 'm', lw=2, label=r'$x_{AA-ISTA} (niter=%d)$' % nitera) ax.semilogy(costt, 'b', lw=2, label=r'$x_{TWIST} (niter=%d)$' % maxit) ax.set_title('Cost function', size=15, fontweight='bold') ax.set_xlabel('Iteration') @@ -134,6 +152,6 @@ def callback(x, pf, pg, eps, cost): # cost function since this is a constrained problem. This is however greatly influenced # by the fact that we assume exact knowledge of the number of non-zero coefficients. # When this information is not available, IHT may become suboptimal. In this case the -# FISTA solver should always be preferred (over ISTA) and TwIST represents an -# alternative worth checking. +# FISTA or AA-ISTA solvers should always be preferred (over ISTA) and TwIST represents +# an alternative worth checking. From f17bb1301b9920644d9c1b68ad38ac4f4dfa77f4 Mon Sep 17 00:00:00 2001 From: mrava87 Date: Fri, 23 Aug 2024 10:43:03 -0500 Subject: [PATCH 2/7] feat: added fungrad to Nonlinear --- pyproximal/proximal/Nonlinear.py | 12 ++++++++++++ pytests/test_grads.py | 1 + 2 files changed, 13 insertions(+) diff --git a/pyproximal/proximal/Nonlinear.py b/pyproximal/proximal/Nonlinear.py index 4bf318c..4564732 100644 --- a/pyproximal/proximal/Nonlinear.py +++ b/pyproximal/proximal/Nonlinear.py @@ -14,6 +14,8 @@ class Nonlinear(ProxOperator): - ``fun``: a method evaluating the generic function :math:`f` - ``grad``: a method evaluating the gradient of the generic function :math:`f` + - ``fungrad``: a method evaluating both the generic function :math:`f` + and its gradient - ``optimize``: a method that solves the optimization problem associated with the proximal operator of :math:`f`. Note that the ``gradprox`` method must be used (instead of ``grad``) as this will @@ -58,6 +60,12 @@ def _funprox(self, x, tau): def _gradprox(self, x, tau): return self.grad(x) + 1. / tau * (x - self.y) + def _fungradprox(self, x, tau): + f, g = self.fungrad(x) + f = f + 1. / (2 * tau) * ((x - self.y) ** 2).sum() + g = g + 1. / tau * (x - self.y) + return f, g + def fun(self, x): raise NotImplementedError('The method fun has not been implemented.' 'Refer to the documentation for details on ' @@ -66,6 +74,10 @@ def grad(self, x): raise NotImplementedError('The method grad has not been implemented.' 'Refer to the documentation for details on ' 'how to subclass this operator.') + def fungrad(self, x): + raise NotImplementedError('The method grad has not been implemented.' + 'Refer to the documentation for details on ' + 'how to subclass this operator.') def optimize(self): raise NotImplementedError('The method optimize has not been implemented.' 'Refer to the documentation for details on ' diff --git a/pytests/test_grads.py b/pytests/test_grads.py index 88b8fd4..b121593 100644 --- a/pytests/test_grads.py +++ b/pytests/test_grads.py @@ -52,6 +52,7 @@ def test_l2(par): raiseerror=True, atol=1e-3, verb=False) + @pytest.mark.parametrize("par", [(par1), (par2), (par1j), (par2j)]) def test_lowrank(par): """LowRankFactorizedMatrix gradient From 179064aaa4f92599eee5651f5b4468d3a022fbfd Mon Sep 17 00:00:00 2001 From: mrava87 Date: Fri, 23 Aug 2024 10:58:21 -0500 Subject: [PATCH 3/7] doc: increased iterations in twist tutorial --- tutorials/twist.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/tutorials/twist.py b/tutorials/twist.py index a6ce8a0..c83414a 100644 --- a/tutorials/twist.py +++ b/tutorials/twist.py @@ -59,7 +59,7 @@ def callback(x, pf, pg, eps, cost): L = np.abs((Aop.H * Aop).eigs(1)[0]) tau = 0.95 / L eps = 5e-3 -maxit = 100 +maxit = 300 # IHT l0 = pyproximal.proximal.L0Ball(3) @@ -99,6 +99,7 @@ def callback(x, pf, pg, eps, cost): callback=lambda x: callback(x, l2, l1, eps, costa)) nitera = len(costa) + # TWIST (Note that since the smallest eigenvalue is zero, we arbitrarily # choose a small value for the solver to converge stably) l1 = pyproximal.proximal.L1(sigma=eps) @@ -135,11 +136,11 @@ def callback(x, pf, pg, eps, cost): ############################################################################### # Finally, let's compare the converge behaviour of the different algorithms -fig, ax = plt.subplots(1, 1, figsize=(8, 3)) -ax.semilogy(costi, 'r', lw=2, label=r'$x_{ISTA} (niter=%d)$' % niteri) -ax.semilogy(costf, 'g', lw=2, label=r'$x_{FISTA} (niter=%d)$' % niterf) -ax.semilogy(costa, 'm', lw=2, label=r'$x_{AA-ISTA} (niter=%d)$' % nitera) -ax.semilogy(costt, 'b', lw=2, label=r'$x_{TWIST} (niter=%d)$' % maxit) +fig, ax = plt.subplots(1, 1, figsize=(8, 4)) +ax.loglog(costi, 'r', lw=2, label=r'$x_{ISTA} (niter=%d)$' % niteri) +ax.loglog(costf, 'g', lw=2, label=r'$x_{FISTA} (niter=%d)$' % niterf) +ax.loglog(costa, 'm', lw=2, label=r'$x_{AA-ISTA} (niter=%d)$' % nitera) +ax.loglog(costt, 'b', lw=2, label=r'$x_{TWIST} (niter=%d)$' % maxit) ax.set_title('Cost function', size=15, fontweight='bold') ax.set_xlabel('Iteration') ax.legend() @@ -154,4 +155,3 @@ def callback(x, pf, pg, eps, cost): # When this information is not available, IHT may become suboptimal. In this case the # FISTA or AA-ISTA solvers should always be preferred (over ISTA) and TwIST represents # an alternative worth checking. - From 9acab7e5982713edc35bf64d0b60011ea517e103 Mon Sep 17 00:00:00 2001 From: Matteo Ravasi Date: Fri, 13 Sep 2024 22:00:53 +0300 Subject: [PATCH 4/7] Numpy v2 support (#188) * build: remove requirement to use numpy<2.0.0 * build: remove python 3.8 from GA * build: temporarely remove mac from GA --- .github/workflows/build.yaml | 8 ++++---- .readthedocs.yaml | 2 +- .travis.yml | 22 ---------------------- azure-pipelines.yml | 6 +++--- environment-dev-arm.yml | 4 ++-- environment-dev.yml | 4 ++-- environment.yml | 4 ++-- pyproject.toml | 4 ++-- requirements-dev.txt | 4 ++-- requirements-doc.txt | 20 ++++++++++++++++++++ 10 files changed, 38 insertions(+), 40 deletions(-) delete mode 100644 .travis.yml create mode 100644 requirements-doc.txt diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index 8a6de9d..624bcdc 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -6,18 +6,18 @@ jobs: build: strategy: matrix: - platform: [ ubuntu-latest, macos-latest ] - python-version: ["3.8", "3.9", "3.10", "3.11"] + platform: [ ubuntu-latest ] # temporarily removed macos-latest because of different behavior with numpy v2 + python-version: ["3.9", "3.10", "3.11"] runs-on: ${{ matrix.platform }} steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Get history and tags for SCM versioning to work run: | git fetch --prune --unshallow git fetch --depth=1 origin +refs/tags/*:refs/tags/* - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} - name: Install dependencies diff --git a/.readthedocs.yaml b/.readthedocs.yaml index 7edf0cb..364e88d 100644 --- a/.readthedocs.yaml +++ b/.readthedocs.yaml @@ -18,6 +18,6 @@ sphinx: # Declare the Python requirements required to build your docs python: install: - - requirements: requirements-dev.txt + - requirements: requirements-doc.txt - method: pip path: . \ No newline at end of file diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index cb1179a..0000000 --- a/.travis.yml +++ /dev/null @@ -1,22 +0,0 @@ -language: python -python: -- '3.8' -os: -- linux -script: -- pip3 install --upgrade setuptools -- pip3 install -r requirements-dev.txt -- pip3 install coverage codacy-coverage -- pip3 install . -- python3 setup.py test -deploy: - provider: pypi - user: __token__ - distributions: sdist bdist_wheel - skip_existing: true - skip_upload_docs: true - skip_cleanup: true - on: - tags: true - password: - secure: Er4noQo2sMKhtvtheKvvKKOfj8uNEhpjhBRf54y0S5e3HNK1U38nMZuKT7GXRwCDjPXzWRF6oZdlAyi1SRB0hPGtT4Ucm31k6qAEzi9Ph43T9BNIMIuIcEIwY3X2D9g5ySHyfEYLgGNAYzxttiKIhMPOk7vafUMsFUSh5ldZbY/ykSpcB8DiLKw+Z6+AV9pM5YTFn1Djn4pfC88G7tzFySw+b8BL9d8/hLAWmw70Kczh20l86zIOFV/CaW6ph9irssx9nrxz7W3Kb6YFl/QaOh34mEC0ZKoiz6LMdNAGX0RI0iCwtPHdUlPi2qSXXHLvLgqPXKjoqc2bTI19n39EBoSnIveyTYP1wUj3jRLG5pqeFr/Bo2Ti8By8Hye8Iqqdx2PT5wR8bWLiy+M1FMkeJjMT/ZvUudy00gg+7J/xRutBWhRmk2bZt6aCBG0NwpAVoN9UqssoXcYwFmRcOpleGNHo6Wi/0Rg59oAlN0PI+SCWMOcW1veKbsOgSi6nXisffgnZEsFWfEVZB0sLGMqFtBLAmY56PPKbEKqJXDZZ2MRcHDiZB0YfczewCeKdlPiRIQlpLDPu/xaUCiEAnvFZS1EKkpZ9F8/gpJey3e4UPUL+PPmAyMNA/4yBM5xrSjKVl09wuyzqvyTktBGDjaMHvZw0kwnysrmsdP5p30Rf3y8= diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 5e7d229..cdd0fa1 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -25,7 +25,7 @@ jobs: # steps: # - task: UsePythonVersion@0 # inputs: -# versionSpec: '3.8' +# versionSpec: '3.9' # architecture: 'x64' # # - script: | @@ -54,7 +54,7 @@ jobs: steps: - task: UsePythonVersion@0 inputs: - versionSpec: '3.8' + versionSpec: '3.9' architecture: 'x64' - script: | @@ -83,7 +83,7 @@ jobs: steps: - task: UsePythonVersion@0 inputs: - versionSpec: '3.8' + versionSpec: '3.9' architecture: 'x64' - script: | diff --git a/environment-dev-arm.yml b/environment-dev-arm.yml index 6e2f743..9402b0f 100644 --- a/environment-dev-arm.yml +++ b/environment-dev-arm.yml @@ -5,8 +5,8 @@ channels: - numba dependencies: - python>=3.8.12 - - numpy>=1.15.0, <2.0.0 - - scipy>=1.8.0 + - numpy>=1.21.0 + - scipy>=1.11.0 - pylops>=2.0.0 - scikit-image - matplotlib diff --git a/environment-dev.yml b/environment-dev.yml index 3855772..8095e9c 100644 --- a/environment-dev.yml +++ b/environment-dev.yml @@ -5,8 +5,8 @@ channels: - numba dependencies: - python>=3.8.12 - - numpy>=1.15.0, <2.0.0 - - scipy>=1.8.0 + - numpy>=1.21.0 + - scipy>=1.11.0 - pylops>=2.0.0 - scikit-image - matplotlib diff --git a/environment.yml b/environment.yml index 4ca4350..8f14a35 100644 --- a/environment.yml +++ b/environment.yml @@ -3,6 +3,6 @@ channels: - defaults dependencies: - python>=3.8.12 - - numpy>=1.15.0, <2.0.0 - - scipy>=1.8.0 + - numpy>=1.21.0 + - scipy>=1.11.0 - pylops>=2.0.0 diff --git a/pyproject.toml b/pyproject.toml index 0b61eea..2d49437 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -30,8 +30,8 @@ classifiers = [ "Topic :: Scientific/Engineering :: Mathematics", ] dependencies = [ - "numpy >= 1.15.0, <2.0.0", - "scipy >= 1.8.0", + "numpy >= 1.21.0", + "scipy >= 1.11.0", "pylops >= 2.0.0", ] dynamic = ["version"] diff --git a/requirements-dev.txt b/requirements-dev.txt index ad0b3f5..b762f5c 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -1,5 +1,5 @@ -numpy>=1.15.0, <2.0.0 -scipy>=1.8.0 +numpy>=1.21.0 +scipy>=1.11.0 pylops>=2.0.0 numba scikit-image diff --git a/requirements-doc.txt b/requirements-doc.txt new file mode 100644 index 0000000..b762f5c --- /dev/null +++ b/requirements-doc.txt @@ -0,0 +1,20 @@ +numpy>=1.21.0 +scipy>=1.11.0 +pylops>=2.0.0 +numba +scikit-image +matplotlib +ipython +bm4d<4.2.4 # temporary as gclib problem arises in readthedocs +bm3d<4.0.2 # temporary as gclib problem arises in readthedocs +pytest +pytest-runner +setuptools_scm +docutils<0.18 +Sphinx +sphinx-gallery +pydata-sphinx-theme +numpydoc +nbsphinx +image +sphinxemoji \ No newline at end of file From 660bce700167c0fa2475f5375aec0c817292699f Mon Sep 17 00:00:00 2001 From: mrava87 Date: Tue, 10 Dec 2024 16:39:22 +0300 Subject: [PATCH 5/7] doc: small change in definition of Huber prox --- pyproximal/proximal/Huber.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pyproximal/proximal/Huber.py b/pyproximal/proximal/Huber.py index 21ba6cb..c0b6ec6 100644 --- a/pyproximal/proximal/Huber.py +++ b/pyproximal/proximal/Huber.py @@ -32,8 +32,8 @@ class Huber(ProxOperator): .. math:: - \prox^*_{\tau H_\alpha(\cdot)}(\mathbf{x}) = - \left( 1 - \frac{\tau}{\max\{\|\mathbf{x}\|_2, \tau\} + \alpha} \right) \mathbf{x} + \prox_{\tau H_\alpha(\cdot)}(\mathbf{x}) = + \left( 1 - \frac{\tau}{\max\{\|\mathbf{x}\|_2, \tau + \alpha \} } \right) \mathbf{x} """ def __init__(self, alpha): From 38d865090bc3eeb3f32cb9cfca648384a0bac578 Mon Sep 17 00:00:00 2001 From: mrava87 Date: Wed, 11 Dec 2024 09:42:59 +0300 Subject: [PATCH 6/7] feat: added HuberCircular --- docs/source/api/index.rst | 1 + pyproximal/optimization/primal.py | 2 +- pyproximal/proximal/Huber.py | 79 ++++++++++++++++++++++++++++--- pyproximal/proximal/__init__.py | 3 +- pytests/test_norms.py | 20 +++++++- 5 files changed, 95 insertions(+), 10 deletions(-) diff --git a/docs/source/api/index.rst b/docs/source/api/index.rst index 492124c..4ca4c77 100755 --- a/docs/source/api/index.rst +++ b/docs/source/api/index.rst @@ -66,6 +66,7 @@ Convex EuclideanBall Hankel Huber + HuberCircular Intersection L0 L0Ball diff --git a/pyproximal/optimization/primal.py b/pyproximal/optimization/primal.py index 2a163b8..c2c6dcb 100644 --- a/pyproximal/optimization/primal.py +++ b/pyproximal/optimization/primal.py @@ -345,7 +345,7 @@ def AcceleratedProximalGradient(proxf, proxg, x0, tau=None, beta=0.5, r"""Accelerated Proximal gradient This is a thin wrapper around :func:`pyproximal.optimization.primal.ProximalGradient` with - ``vandenberghe`` or ``fista``acceleration. See :func:`pyproximal.optimization.primal.ProximalGradient` + ``vandenberghe`` or ``fista`` acceleration. See :func:`pyproximal.optimization.primal.ProximalGradient` for details. """ diff --git a/pyproximal/proximal/Huber.py b/pyproximal/proximal/Huber.py index c0b6ec6..cc32aad 100644 --- a/pyproximal/proximal/Huber.py +++ b/pyproximal/proximal/Huber.py @@ -1,14 +1,78 @@ import numpy as np from pyproximal.ProxOperator import _check_tau -from pyproximal.projection import BoxProj from pyproximal import ProxOperator +from pyproximal.proximal import L2, L1 class Huber(ProxOperator): r"""Huber norm proximal operator. - Proximal operator of the Huber norm defined as: + Proximal operator of the Huber norm defined as + :math:`H_\alpha(\mathbf{x}) = \sum_i H_\alpha(x_i)` where: + + .. math:: + + H_\alpha(x_i) = + \begin{cases} + \frac{|x_i|^2}{2 \alpha}, & |x_i| \leq \alpha \\ + |x_i| - \frac{\alpha}{2}, & |x_i| > \alpha + \end{cases} + + which behaves like a :math:`\ell_2` norm for :math:`|x_i| \leq \alpha` and a + :math:`\ell_1` norm for :math:`|x_i| > \alpha`. + + Parameters + ---------- + alpha : :obj:`float` + Huber parameter + + Notes + ----- + The Huber proximal operator is defined as: + + .. math:: + + \prox_{\tau H_\alpha(\cdot)}(\mathbf{x}) = + \begin{cases} + \prox_{\frac{\tau}{2 \alpha} |x_i|^2}(x_i), & |x_i| \leq \alpha \\ + \prox_{\tau |x_i|}(x_i), & |x_i| > \alpha + \end{cases} + + """ + def __init__(self, alpha): + super().__init__(None, False) + self.alpha = alpha + self.l2 = L2(sigma=1. / self.alpha) + self.l1 = L1() + + def __call__(self, x): + h = np.zeros_like(x) + xabs = np.abs(x) + mask = xabs > self.alpha + h[~mask] = xabs[~mask] ** 2 / (2. * self.alpha) + h[mask] = xabs[mask] - self.alpha / 2. + return np.sum(h) + + @_check_tau + def prox(self, x, tau): + y = np.zeros_like(x) + xabs = np.abs(x) + mask = xabs > self.alpha + y[~mask] = self.l2.prox(x[~mask], tau) + y[mask] = self.l1.prox(x[mask], tau) + # alternative from https://math.stackexchange.com/questions/1650411/ + # proximal-operator-of-the-huber-loss-function... currently commented + # as it does not provide the same result + # y = (1. - tau / np.maximum(np.abs(x), tau + self.alpha)) * x + + return y + + +class HuberCircular(ProxOperator): + r"""Circular Huber norm proximal operator. + + Proximal operator of the Circular Huber norm defined as: .. math:: @@ -18,8 +82,8 @@ class Huber(ProxOperator): \|\mathbf{x}\|_2 - \frac{\alpha}{2}, & \|\mathbf{x}\|_2 > \alpha \\ \end{cases} - which behaves like a :math:`\ell_2` norm for :math:`|x_i| \leq \alpha` and a - :math:`\ell_1` norm for :math:`|x_i| < \alpha`. + which behaves like a :math:`\ell_2` norm for :math:`\|\mathbf{x}\|_2 \leq \alpha` and a + :math:`\ell_1` norm for :math:`\|\mathbf{x}\|_2 > \alpha`. Parameters ---------- @@ -28,13 +92,16 @@ class Huber(ProxOperator): Notes ----- - The Huber proximal operator is defined as: + The Circular Huber proximal operator is defined as [1]_: .. math:: \prox_{\tau H_\alpha(\cdot)}(\mathbf{x}) = \left( 1 - \frac{\tau}{\max\{\|\mathbf{x}\|_2, \tau + \alpha \} } \right) \mathbf{x} + .. [1] O’Donoghue, B. and Stathopoulos, G. and Boyd, S. "A Splitting Method for Optimal Control", + In the IEEE Transactions on Control Systems Technology, 2013. + """ def __init__(self, alpha): super().__init__(None, False) @@ -51,4 +118,4 @@ def __call__(self, x): @_check_tau def prox(self, x, tau): x = (1. - tau / max(np.linalg.norm(x), tau + self.alpha)) * x - return x \ No newline at end of file + return x diff --git a/pyproximal/proximal/__init__.py b/pyproximal/proximal/__init__.py index 26ef53f..22d803b 100644 --- a/pyproximal/proximal/__init__.py +++ b/pyproximal/proximal/__init__.py @@ -22,6 +22,7 @@ L21 L2,1 Norm L21_plus_L1 L2,1 + L1 mixed-norm Huber Huber Norm + HuberCircular Circular Huber Norm TV Total Variation Norm RelaxedMumfordShah Relaxed Mumford Shah Norm Nuclear Nuclear Norm @@ -69,7 +70,7 @@ __all__ = ['Box', 'Simplex', 'Intersection', 'AffineSet', 'Quadratic', 'Euclidean', 'EuclideanBall', 'L0', 'L0Ball', 'L01Ball', 'L1', 'L1Ball', 'L2', - 'L2Convolve', 'L21', 'L21_plus_L1', 'Huber', 'TV', 'RelaxedMumfordShah', + 'L2Convolve', 'L21', 'L21_plus_L1', 'Huber', 'HuberCircular', 'TV', 'RelaxedMumfordShah', 'Nuclear', 'NuclearBall', 'Orthogonal', 'VStack', 'Nonlinear', 'SCAD', 'Log', 'Log1', 'ETP', 'Geman', 'QuadraticEnvelopeCard', 'SingularValuePenalty', 'QuadraticEnvelopeCardIndicator', 'QuadraticEnvelopeRankL2', diff --git a/pytests/test_norms.py b/pytests/test_norms.py index 104fe1f..4f0725b 100644 --- a/pytests/test_norms.py +++ b/pytests/test_norms.py @@ -5,8 +5,8 @@ from pylops.basicoperators import Identity, Diagonal, MatrixMult, FirstDerivative from pyproximal.utils import moreau -from pyproximal.proximal import Box, Euclidean, L2, L1, L21, L21_plus_L1, \ - Huber, Nuclear, RelaxedMumfordShah, TV +from pyproximal.proximal import Euclidean, L2, L1, L21, L21_plus_L1, \ + Huber, HuberCircular, Nuclear, RelaxedMumfordShah, TV par1 = {'nx': 10, 'sigma': 1., 'dtype': 'float32'} # even float32 par2 = {'nx': 11, 'sigma': 2., 'dtype': 'float64'} # odd float64 @@ -182,8 +182,24 @@ def test_Huber(par): """ hub = Huber(alpha=par['sigma']) + # norm + x = np.random.uniform(0., 0.1 * par['sigma'], par['nx']).astype(par['dtype']) + assert hub(x) == np.sum(x ** 2) / (2 * par['sigma']) + + # prox / dualprox + tau = 2. + assert moreau(hub, x, tau) + + +@pytest.mark.parametrize("par", [(par1), (par2)]) +def test_HuberCircular(par): + """Circular Huber norm and proximal/dual proximal + """ + hub = HuberCircular(alpha=par['sigma']) + # norm x = np.random.uniform(0., 0.1, par['nx']).astype(par['dtype']) + x = (0.1 * par['sigma']) * x / np.linalg.norm(x) # to ensure that is smaller than sigma assert hub(x) == np.linalg.norm(x) ** 2 / (2 * par['sigma']) # prox / dualprox From f69c50030001e2cfaa037429da1600cc05b7eebc Mon Sep 17 00:00:00 2001 From: mrava87 Date: Wed, 11 Dec 2024 10:03:45 +0300 Subject: [PATCH 7/7] doc: prepare for release v0.10.0 --- CHANGELOG.md | 21 +++++++++++++++------ docs/source/changelog.rst | 11 +++++++++++ 2 files changed, 26 insertions(+), 6 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 327925b..406545f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,14 +1,23 @@ Changelog ========= +# 0.10.0 + +* Added ``pyproximal.optimization.primal.AndersonProximalGradient`` solver +* Added ``pyproximal.proximal.HuberCircular`` operator +* Added `fungrad` method to ``pyproximal.proximal.Nonlinear`` +* Modified ``pyproximal.proximal.Huber`` operator as previously + erroneously implemented using the definition of Circular Huber norm + + # 0.9.0 -* Added :py:class:`pyproximal.optimization.palm.iPALM` solver -* Added :py:func:`pyproximal.optimization.palm._backtracking` method to be used when `gammaf=None` and/or `gammag=None` -* Added :py:func:`pyproximal.utils.gradtest.gradtest_proximal` and :py:func:`pyproximal.utils.gradtest.gradtest_bilinear` methods -* Added `tol` to :py:class:`pyproximal.optimization.primal.ProximalPoint` and - :py:class:`pyproximal.optimization.primal.ProximalGradient` solvers -* Modified :py:class:`pyproximal.ProxOperator.precomposition` to allow `b` being also a vector +* Added ``pyproximal.optimization.palm.iPALM`` solver +* Added ``pyproximal.optimization.palm._backtracking`` method to be used when `gammaf=None` and/or `gammag=None` +* Added ``pyproximal.utils.gradtest.gradtest_proximal`` and ``pyproximal.utils.gradtest.gradtest_bilinear`` methods +* Added `tol` to ``pyproximal.optimization.primal.ProximalPoint`` and + ``pyproximal.optimization.primal.ProximalGradient`` solvers +* Modified ``pyproximal.ProxOperator.precomposition`` to allow `b` being also a vector # 0.8.0 diff --git a/docs/source/changelog.rst b/docs/source/changelog.rst index a0fc72b..e878bc4 100644 --- a/docs/source/changelog.rst +++ b/docs/source/changelog.rst @@ -3,6 +3,17 @@ Changelog ========= +Version 0.10.0 +-------------- +*Released on: 11/12/2024* + +* Added :py:func:`pyproximal.optimization.primal.AndersonProximalGradient` solver +* Added :py:class:`pyproximal.proximal.HuberCircular` operator +* Added `fungrad` method to :py:class:`pyproximal.proximal.Nonlinear` +* Modified :py:class:`pyproximal.proximal.Huber` operator as previously + erroneously implemented using the definition of Circular Huber norm + + Version 0.9.0 -------------- *Released on: 16/08/2024*