From 4bbca383813bdb03f64395c2d18b486227388b88 Mon Sep 17 00:00:00 2001
From: Jesse Livezey <jesse.livezey@gmail.com>
Date: Thu, 1 Apr 2021 18:55:56 -0700
Subject: [PATCH 1/7] index masks

---
 pyuoi/linear_model/logistic.py | 34 ++++++++++++++++++++++++++++++----
 1 file changed, 30 insertions(+), 4 deletions(-)

diff --git a/pyuoi/linear_model/logistic.py b/pyuoi/linear_model/logistic.py
index 483b43b..12bb8fd 100644
--- a/pyuoi/linear_model/logistic.py
+++ b/pyuoi/linear_model/logistic.py
@@ -541,9 +541,10 @@ def _logistic_regression_path(X, y, Cs=48, fit_intercept=True,
     _, n_features = X.shape
 
     classes = np.unique(y)
+    n_classes = len(classes)
 
     if multi_class == 'auto':
-        if len(classes) > 2:
+        if n_classes > 2:
             multi_class = 'multinomial'
         else:
             multi_class = 'ovr'
@@ -618,7 +619,23 @@ def _logistic_regression_path(X, y, Cs=48, fit_intercept=True,
             w0 = w0.ravel()
 
             def func(x, *args):
-                return _multinomial_loss_grad(x, *args)[0:2]
+                X, Y, _, mask, _ = args
+                if mask is not None:
+                    x0 = np.zeros_like(w0)
+                    n_classes = Y.shape[1]
+                    n_samples, n_features = X.shape
+                    fit_intercept = (w0.size == n_classes * (n_features + 1))
+                    if fit_intercept:
+                        idxs = np.arange(n_features * n_classes, n_classes * (n_features + 1))
+                    else:
+                        idxs = np.array([], dtype=int)
+                    idxs = np.concatenate([np.nonzero(mask.ravel())[0], idxs])
+                    x0[idxs] = x
+                    f, df = _multinomial_loss_grad(x0, *args)[0:2]
+                    df = df[idxs]
+                else:
+                    f, df = _multinomial_loss_grad(x, *args)[0:2]
+                return f, df
         else:
             w0 = w0.T.ravel().copy()
 
@@ -647,10 +664,19 @@ def func(x, g, *args):
         iprint = [-1, 50, 1, 100, 101][
             np.searchsorted(np.array([0, 1, 2, 3]), verbose)]
         if penalty == 'l2':
-            w0, loss, info = optimize.fmin_l_bfgs_b(
-                func, w0, fprime=None,
+            fit_intercept = (w0.size == n_classes * (n_features + 1))
+            if fit_intercept:
+                idxs = np.arange(n_features * n_classes, n_classes * (n_features + 1))
+            else:
+                idxs = np.array([], dtype=int)
+            idxs = np.concatenate([np.nonzero(coef_mask.ravel())[0], idxs])
+            wp = w0[idxs]
+            wp, loss, info = optimize.fmin_l_bfgs_b(
+                func, wp, fprime=None,
                 args=(X, target, 1. / C, coef_mask, sample_weight),
                 iprint=iprint, pgtol=tol, maxiter=max_iter)
+            w0 = np.zeros_like(w0)
+            w0[idxs] = wp
         else:
             zeros_seen = [0]
 

From 5620febb10954943d46266ce94c8a039396b3eb5 Mon Sep 17 00:00:00 2001
From: Jesse Livezey <jesse.livezey@gmail.com>
Date: Fri, 2 Apr 2021 15:45:05 -0700
Subject: [PATCH 2/7] updates

---
 pyuoi/linear_model/logistic.py | 53 +++++++++++++++++++++++-----------
 1 file changed, 36 insertions(+), 17 deletions(-)

diff --git a/pyuoi/linear_model/logistic.py b/pyuoi/linear_model/logistic.py
index 12bb8fd..6290989 100644
--- a/pyuoi/linear_model/logistic.py
+++ b/pyuoi/linear_model/logistic.py
@@ -622,11 +622,9 @@ def func(x, *args):
                 X, Y, _, mask, _ = args
                 if mask is not None:
                     x0 = np.zeros_like(w0)
-                    n_classes = Y.shape[1]
-                    n_samples, n_features = X.shape
-                    fit_intercept = (w0.size == n_classes * (n_features + 1))
                     if fit_intercept:
-                        idxs = np.arange(n_features * n_classes, n_classes * (n_features + 1))
+                        idxs = np.arange(n_features * n_classes,
+                                         n_classes * (n_features + 1))
                     else:
                         idxs = np.array([], dtype=int)
                     idxs = np.concatenate([np.nonzero(mask.ravel())[0], idxs])
@@ -651,7 +649,21 @@ def func(x, g, *args):
     else:
         target = y_bin
         if penalty == 'l2':
-            func = _logistic_loss_and_grad
+            def func(x, *args):
+                X, Y, _, mask, _ = args
+                if mask is not None:
+                    x0 = np.zeros_like(w0)
+                    if fit_intercept:
+                        idxs = np.array([n_features], dtype=int)
+                    else:
+                        idxs = np.array([], dtype=int)
+                    idxs = np.concatenate([np.nonzero(mask.ravel())[0], idxs])
+                    x0[idxs] = x
+                    f, df = _logistic_loss_and_grad(x0, *args)
+                    df = df[idxs]
+                else:
+                    f, df = _logistic_loss_and_grad(x, *args)
+                return f, df
         else:
             def func(x, g, *args):
                 loss, grad = _logistic_loss_and_grad(x, *args)
@@ -664,19 +676,26 @@ def func(x, g, *args):
         iprint = [-1, 50, 1, 100, 101][
             np.searchsorted(np.array([0, 1, 2, 3]), verbose)]
         if penalty == 'l2':
-            fit_intercept = (w0.size == n_classes * (n_features + 1))
-            if fit_intercept:
-                idxs = np.arange(n_features * n_classes, n_classes * (n_features + 1))
+            if coef_mask is None:
+                w0, loss, info = optimize.fmin_l_bfgs_b(
+                    func, w0, fprime=None,
+                    args=(X, target, 1. / C, coef_mask, sample_weight),
+                    iprint=iprint, pgtol=tol, maxiter=max_iter)
             else:
-                idxs = np.array([], dtype=int)
-            idxs = np.concatenate([np.nonzero(coef_mask.ravel())[0], idxs])
-            wp = w0[idxs]
-            wp, loss, info = optimize.fmin_l_bfgs_b(
-                func, wp, fprime=None,
-                args=(X, target, 1. / C, coef_mask, sample_weight),
-                iprint=iprint, pgtol=tol, maxiter=max_iter)
-            w0 = np.zeros_like(w0)
-            w0[idxs] = wp
+                fit_intercept = (w0.size == n_classes * (n_features + 1))
+                if fit_intercept:
+                    idxs = np.arange(n_features * n_classes,
+                                     n_classes * (n_features + 1))
+                else:
+                    idxs = np.array([], dtype=int)
+                idxs = np.concatenate([np.nonzero(coef_mask.ravel())[0], idxs])
+                wp = w0[idxs]
+                wp, loss, info = optimize.fmin_l_bfgs_b(
+                    func, wp, fprime=None,
+                    args=(X, target, 1. / C, coef_mask, sample_weight),
+                    iprint=iprint, pgtol=tol, maxiter=max_iter)
+                w0 = np.zeros_like(w0)
+                w0[idxs] = wp
         else:
             zeros_seen = [0]
 

From 49620f94e1f85f91188d217134974506bb111e49 Mon Sep 17 00:00:00 2001
From: Jesse Livezey <jesse.livezey@gmail.com>
Date: Mon, 5 Apr 2021 11:22:40 -0700
Subject: [PATCH 3/7] test and LR path with indexing

---
 pyuoi/datasets/__init__.py     |   5 +-
 pyuoi/linear_model/logistic.py |  57 +++----
 tests/test_uoi_l1logistic.py   | 298 ++++++++++++++++++++++++++++++++-
 3 files changed, 322 insertions(+), 38 deletions(-)

diff --git a/pyuoi/datasets/__init__.py b/pyuoi/datasets/__init__.py
index 45f4ae3..9d3ec6b 100644
--- a/pyuoi/datasets/__init__.py
+++ b/pyuoi/datasets/__init__.py
@@ -126,7 +126,10 @@ def make_classification(n_samples=100, n_features=20, n_informative=2,
     if isinstance(random_state, int):
         rng = np.random.RandomState(random_state)
     else:
-        rng = random_state
+        if random_state is None:
+            rng = np.random
+        else:
+            rng = random_state
     n_not_informative = n_features - n_informative
 
     X = rng.randn(n_samples, n_features)
diff --git a/pyuoi/linear_model/logistic.py b/pyuoi/linear_model/logistic.py
index 6290989..f355941 100644
--- a/pyuoi/linear_model/logistic.py
+++ b/pyuoi/linear_model/logistic.py
@@ -617,20 +617,16 @@ def _logistic_regression_path(X, y, Cs=48, fit_intercept=True,
         target = Y_multi
         if penalty == 'l2':
             w0 = w0.ravel()
+            if coef_mask is not None:
+                x0 = np.zeros_like(w0)
 
             def func(x, *args):
                 X, Y, _, mask, _ = args
                 if mask is not None:
-                    x0 = np.zeros_like(w0)
-                    if fit_intercept:
-                        idxs = np.arange(n_features * n_classes,
-                                         n_classes * (n_features + 1))
-                    else:
-                        idxs = np.array([], dtype=int)
-                    idxs = np.concatenate([np.nonzero(mask.ravel())[0], idxs])
-                    x0[idxs] = x
+                    x0[mask] = x
+                    args = args[:3] + (None,) + (args[-1],)
                     f, df = _multinomial_loss_grad(x0, *args)[0:2]
-                    df = df[idxs]
+                    df = df[mask]
                 else:
                     f, df = _multinomial_loss_grad(x, *args)[0:2]
                 return f, df
@@ -649,18 +645,15 @@ def func(x, g, *args):
     else:
         target = y_bin
         if penalty == 'l2':
+            x0 = np.zeros_like(w0)
+
             def func(x, *args):
                 X, Y, _, mask, _ = args
                 if mask is not None:
-                    x0 = np.zeros_like(w0)
-                    if fit_intercept:
-                        idxs = np.array([n_features], dtype=int)
-                    else:
-                        idxs = np.array([], dtype=int)
-                    idxs = np.concatenate([np.nonzero(mask.ravel())[0], idxs])
-                    x0[idxs] = x
+                    x0[mask] = x
+                    args = args[:3] + (None,) + (args[-1],)
                     f, df = _logistic_loss_and_grad(x0, *args)
-                    df = df[idxs]
+                    df = df[mask]
                 else:
                     f, df = _logistic_loss_and_grad(x, *args)
                 return f, df
@@ -682,20 +675,22 @@ def func(x, g, *args):
                     args=(X, target, 1. / C, coef_mask, sample_weight),
                     iprint=iprint, pgtol=tol, maxiter=max_iter)
             else:
-                fit_intercept = (w0.size == n_classes * (n_features + 1))
                 if fit_intercept:
-                    idxs = np.arange(n_features * n_classes,
-                                     n_classes * (n_features + 1))
+                    if multi_class == 'multinomial':
+                        mask = np.concatenate([coef_mask,
+                                               np.ones(n_classes)[:, np.newaxis]], axis=1)
+                    else:
+                        mask = np.concatenate([coef_mask, np.ones(1)])
                 else:
-                    idxs = np.array([], dtype=int)
-                idxs = np.concatenate([np.nonzero(coef_mask.ravel())[0], idxs])
-                wp = w0[idxs]
+                    mask = coef_mask
+                mask = np.nonzero(mask.ravel())[0]
+                wp = w0[mask]
                 wp, loss, info = optimize.fmin_l_bfgs_b(
                     func, wp, fprime=None,
-                    args=(X, target, 1. / C, coef_mask, sample_weight),
+                    args=(X, target, 1. / C, mask, sample_weight),
                     iprint=iprint, pgtol=tol, maxiter=max_iter)
                 w0 = np.zeros_like(w0)
-                w0[idxs] = wp
+                w0[mask] = wp
         else:
             zeros_seen = [0]
 
@@ -734,16 +729,10 @@ def zero_coef(x, *args):
         if multi_class == 'multinomial':
             n_classes = max(2, classes.size)
             if penalty == 'l2':
-                multi_w0 = np.reshape(w0, (n_classes, -1))
+                w0 = np.reshape(w0, (n_classes, -1))
             else:
-                multi_w0 = np.reshape(w0, (-1, n_classes)).T
-            if coef_mask is not None:
-                multi_w0[:, :n_features] *= coef_mask
-            coefs.append(multi_w0.copy())
-        else:
-            if coef_mask is not None:
-                w0[:n_features] *= coef_mask
-            coefs.append(w0.copy())
+                w0 = np.reshape(w0, (-1, n_classes)).T
+        coefs.append(w0.copy())
 
         n_iter[i] = n_iter_i
 
diff --git a/tests/test_uoi_l1logistic.py b/tests/test_uoi_l1logistic.py
index 397b551..fd751ea 100644
--- a/tests/test_uoi_l1logistic.py
+++ b/tests/test_uoi_l1logistic.py
@@ -1,18 +1,274 @@
-import pytest
+import pytest, numbers, warnings
 import numpy as np
 
 from numpy.testing import assert_array_equal, assert_allclose, assert_equal
 
 from scipy.sparse import rand as sprand
+from scipy import optimize
 
 from pyuoi import UoI_L1Logistic
 from pyuoi.linear_model.logistic import (fit_intercept_fixed_coef,
                                          MaskedCoefLogisticRegression,
-                                         LogisticInterceptFitterNoFeatures)
+                                         LogisticInterceptFitterNoFeatures,
+                                         _logistic_regression_path,
+                                         _multinomial_loss_grad,
+                                         _logistic_loss_and_grad)
 from sklearn.metrics import accuracy_score
-from sklearn.preprocessing import LabelEncoder
+from sklearn.preprocessing import LabelEncoder, OneHotEncoder
+from sklearn.utils import (compute_class_weight,
+                           check_consistent_length, check_array)
+from sklearn.exceptions import ConvergenceWarning
 
 from pyuoi.datasets import make_classification
+from pyuoi.lbfgs import fmin_lbfgs, AllZeroLBFGSError
+
+
+def _logistic_regression_path_old(X, y, Cs=48, fit_intercept=True,
+                                  max_iter=100, tol=1e-4, verbose=0, coef=None,
+                                  class_weight=None, penalty='l2',
+                                  multi_class='auto',
+                                  check_input=True,
+                                  sample_weight=None,
+                                  l1_ratio=None, coef_mask=None):
+    """Compute a Logistic Regression model for a list of regularization
+    parameters.
+
+    This is the original function used to check the new indexing-based
+    version rather than the masking version implemented here.
+
+    Parameters
+    ----------
+    X : array-like or sparse matrix, shape (n_samples, n_features)
+        Input data.
+    y : array-like, shape (n_samples,) or (n_samples, n_targets)
+        Input data, target values.
+    Cs : int | array-like, shape (n_cs,)
+        List of values for the regularization parameter or integer specifying
+        the number of regularization parameters that should be used. In this
+        case, the parameters will be chosen in a logarithmic scale between
+        1e-4 and 1e4.
+    fit_intercept : bool
+        Whether to fit an intercept for the model. In this case the shape of
+        the returned array is (n_cs, n_features + 1).
+    max_iter : int
+        Maximum number of iterations for the solver.
+    tol : float
+        Stopping criterion. For the newton-cg and lbfgs solvers, the iteration
+        will stop when ``max{|g_i | i = 1, ..., n} <= tol``
+        where ``g_i`` is the i-th component of the gradient.
+    verbose : int
+        For the liblinear and lbfgs solvers set verbose to any positive
+        number for verbosity.
+    coef : array-like, shape (n_features,), default None
+        Initialization value for coefficients of logistic regression.
+        Useless for liblinear solver.
+    class_weight : dict or 'balanced', optional
+        Weights associated with classes in the form ``{class_label: weight}``.
+        If not given, all classes are supposed to have weight one.
+        The "balanced" mode uses the values of y to automatically adjust
+        weights inversely proportional to class frequencies in the input data
+        as ``n_samples / (n_classes * np.bincount(y))``.
+        Note that these weights will be multiplied with sample_weight (passed
+        through the fit method) if sample_weight is specified.
+    multi_class : str, {'multinomial', 'auto'}, default: 'auto'
+        For 'multinomial' the loss minimised is the multinomial loss fit
+        across the entire probability distribution, *even when the data is
+        binary*. 'auto' selects binary if the data is binary
+        and otherwise selects 'multinomial'.
+    check_input : bool, default True
+        If False, the input arrays X and y will not be checked.
+    sample_weight : array-like, shape(n_samples,) optional
+        Array of weights that are assigned to individual samples.
+        If not provided, then each sample is given unit weight.
+    coef_mask : array-like, shape (n_features), (n_classes, n_features) optional
+        Masking array for coef.
+    Returns
+    -------
+    coefs : ndarray, shape (n_cs, n_features) or (n_cs, n_features + 1)
+        List of coefficients for the Logistic Regression model. If
+        fit_intercept is set to True then the second dimension will be
+        n_features + 1, where the last item represents the intercept. For
+        ``multiclass='multinomial'``, the shape is (n_classes, n_cs,
+        n_features) or (n_classes, n_cs, n_features + 1).
+    Cs : ndarray
+        Grid of Cs used for cross-validation.
+    n_iter : array, shape (n_cs,)
+        Actual number of iteration for each Cs.
+    """
+    if isinstance(Cs, numbers.Integral):
+        Cs = np.logspace(-4, 4, Cs)
+
+    # Preprocessing.
+    if check_input:
+        X = check_array(X, accept_sparse='csr', dtype=np.float64,
+                        accept_large_sparse=True)
+        y = check_array(y, ensure_2d=False, dtype=None)
+        check_consistent_length(X, y)
+    _, n_features = X.shape
+
+    classes = np.unique(y)
+
+    if multi_class == 'auto':
+        if len(classes) > 2:
+            multi_class = 'multinomial'
+        else:
+            multi_class = 'ovr'
+
+    # If sample weights exist, convert them to array (support for lists)
+    # and check length
+    # Otherwise set them to 1 for all examples
+    if sample_weight is not None:
+        sample_weight = np.array(sample_weight, dtype=X.dtype, order='C')
+        check_consistent_length(y, sample_weight)
+    else:
+        sample_weight = np.ones(X.shape[0], dtype=X.dtype)
+
+    # If class_weights is a dict (provided by the user), the weights
+    # are assigned to the original labels. If it is "balanced", then
+    # the class_weights are assigned after masking the labels with a OvR.
+    le = LabelEncoder()
+    if isinstance(class_weight, dict) or multi_class == 'multinomial':
+        class_weight_ = compute_class_weight(class_weight, classes=classes, y=y)
+        sample_weight *= class_weight_[le.fit_transform(y)]
+
+    # For doing a ovr, we need to mask the labels first. for the
+    # multinomial case this is not necessary.
+    if multi_class == 'ovr':
+        coef_size = n_features
+        w0 = np.zeros(n_features + int(fit_intercept), dtype=X.dtype)
+        mask_classes = np.array([-1, 1])
+        mask = (y == 1)
+        y_bin = np.ones(y.shape, dtype=X.dtype)
+        y_bin[~mask] = -1.
+        # for compute_class_weight
+
+        if class_weight == "balanced":
+            class_weight_ = compute_class_weight(class_weight,
+                                                 classes=mask_classes,
+                                                 y=y_bin)
+            sample_weight *= class_weight_[le.fit_transform(y_bin)]
+
+    else:
+        coef_size = classes.size * n_features
+        lbin = OneHotEncoder(categories=[range(classes.size)], sparse=False)
+        Y_multi = lbin.fit_transform(y[:, np.newaxis])
+        if Y_multi.shape[1] == 1:
+            Y_multi = np.hstack([1 - Y_multi, Y_multi])
+        w0 = np.zeros((classes.size, n_features + int(fit_intercept)),
+                      dtype=X.dtype)
+        w0[:, -1] = LogisticInterceptFitterNoFeatures(y,
+                                                      classes.size).intercept_
+
+    if coef is not None:
+        # it must work both giving the bias term and not
+        if multi_class == 'ovr':
+            if coef.size not in (n_features, w0.size):
+                raise ValueError(
+                    'Initialization coef is of shape %d, expected shape '
+                    '%d or %d' % (coef.size, n_features, w0.size))
+            w0[:coef.size] = coef
+        else:
+            w0[:, :coef.shape[1]] = coef
+
+    # Mask initial array
+    if coef_mask is not None:
+        if multi_class == 'ovr':
+            w0[:n_features] *= coef_mask
+        else:
+            w0[:, :n_features] *= coef_mask
+
+    if multi_class == 'multinomial':
+        # fmin_l_bfgs_b and newton-cg accepts only ravelled parameters.
+        target = Y_multi
+        if penalty == 'l2':
+            w0 = w0.ravel()
+
+            def func(x, *args):
+                return _multinomial_loss_grad(x, *args)[0:2]
+        else:
+            w0 = w0.T.ravel().copy()
+
+            def inner_func(x, *args):
+                return _multinomial_loss_grad(x, *args)[0:2]
+
+            def func(x, g, *args):
+                x = x.reshape(-1, classes.size).T.ravel()
+                loss, grad = inner_func(x, *args)
+                grad = grad.reshape(classes.size, -1).T.ravel()
+                g[:] = grad
+                return loss
+    else:
+        target = y_bin
+        if penalty == 'l2':
+            func = _logistic_loss_and_grad
+        else:
+            def func(x, g, *args):
+                loss, grad = _logistic_loss_and_grad(x, *args)
+                g[:] = grad
+                return loss
+
+    coefs = list()
+    n_iter = np.zeros(len(Cs), dtype=np.int32)
+    for i, C in enumerate(Cs):
+        iprint = [-1, 50, 1, 100, 101][
+            np.searchsorted(np.array([0, 1, 2, 3]), verbose)]
+        if penalty == 'l2':
+            w0, loss, info = optimize.fmin_l_bfgs_b(
+                func, w0, fprime=None,
+                args=(X, target, 1. / C, coef_mask, sample_weight),
+                iprint=iprint, pgtol=tol, maxiter=max_iter)
+        else:
+            zeros_seen = [0]
+
+            def zero_coef(x, *args):
+                if multi_class == 'multinomial':
+                    x = x.reshape(-1, classes.size)[:-1]
+                else:
+                    x = x[:-1]
+                now_zeros = np.array_equiv(x, 0.)
+                if now_zeros:
+                    zeros_seen[0] += 1
+                else:
+                    zeros_seen[0] = 0
+                if zeros_seen[0] > 1:
+                    return -2048
+            try:
+                w0 = fmin_lbfgs(func, w0, orthantwise_c=1. / C,
+                                args=(X, target, 0., coef_mask, sample_weight),
+                                max_iterations=max_iter,
+                                epsilon=tol,
+                                orthantwise_end=coef_size,
+                                progress=zero_coef)
+            except AllZeroLBFGSError:
+                w0 *= 0.
+            info = None
+        if info is not None and info["warnflag"] == 1:
+            warnings.warn("lbfgs failed to converge. Increase the number "
+                          "of iterations.", ConvergenceWarning)
+        # In scipy <= 1.0.0, nit may exceed maxiter.
+        # See https://github.com/scipy/scipy/issues/7854.
+        if info is None:
+            n_iter_i = -1
+        else:
+            n_iter_i = min(info['nit'], max_iter)
+
+        if multi_class == 'multinomial':
+            n_classes = max(2, classes.size)
+            if penalty == 'l2':
+                multi_w0 = np.reshape(w0, (n_classes, -1))
+            else:
+                multi_w0 = np.reshape(w0, (-1, n_classes)).T
+            if coef_mask is not None:
+                multi_w0[:, :n_features] *= coef_mask
+            coefs.append(multi_w0.copy())
+        else:
+            if coef_mask is not None:
+                w0[:n_features] *= coef_mask
+            coefs.append(w0.copy())
+
+        n_iter[i] = n_iter_i
+
+    return np.array(coefs), np.array(Cs), n_iter
 
 
 def test_fit_intercept_fixed_coef():
@@ -218,6 +474,42 @@ def test_masked_logistic_standardize():
                     lr.fit(X, y, coef_mask=mask)
 
 
+def test_masking_with_indexing_binary():
+    """Check that indexing the masks gives the same results as masking with
+    binary logistic regression.
+    """
+    X, y, w, intercept = make_classification(n_samples=1000,
+                                             n_classes=2,
+                                             n_features=20,
+                                             n_informative=10,
+                                             random_state=0)
+    mask = (w != 0.).ravel()
+    coefs, _, _ = _logistic_regression_path(X, y, [10.], coef_mask=mask)
+    coefs_old, _, _ = _logistic_regression_path_old(X, y, [10.], coef_mask=mask)
+    assert_allclose(coefs, coefs_old)
+    coefs, _, _ = _logistic_regression_path(X, y, [10.])
+    coefs_old, _, _ = _logistic_regression_path_old(X, y, [10.])
+    assert_allclose(coefs, coefs_old)
+
+
+def test_masking_with_indexing_multiclass():
+    """Check that indexing the masks gives the same results as masking with
+    multiclass logistic regression.
+    """
+    X, y, w, intercept = make_classification(n_samples=1000,
+                                             n_classes=3,
+                                             n_features=20,
+                                             n_informative=10,
+                                             random_state=0)
+    mask = w != 0.
+    coefs, _, _ = _logistic_regression_path(X, y, [10.], coef_mask=mask)
+    coefs_old, _, _ = _logistic_regression_path_old(X, y, [10.], coef_mask=mask)
+    assert_allclose(coefs, coefs_old)
+    coefs, _, _ = _logistic_regression_path(X, y, [10.])
+    coefs_old, _, _ = _logistic_regression_path_old(X, y, [10.])
+    assert_allclose(coefs, coefs_old)
+
+
 def test_estimation_score_usage():
     """Test the ability to change the estimation score in UoI L1Logistic"""
     methods = ('acc', 'log', 'BIC', 'AIC', 'AICc')

From d11da04c0d31d636966bb73949ed4eb9b85f1852 Mon Sep 17 00:00:00 2001
From: Jesse Livezey <jesse.livezey@gmail.com>
Date: Wed, 28 Apr 2021 10:25:16 -0700
Subject: [PATCH 4/7] clean up args

---
 pyuoi/linear_model/logistic.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pyuoi/linear_model/logistic.py b/pyuoi/linear_model/logistic.py
index f355941..0709499 100644
--- a/pyuoi/linear_model/logistic.py
+++ b/pyuoi/linear_model/logistic.py
@@ -621,7 +621,7 @@ def _logistic_regression_path(X, y, Cs=48, fit_intercept=True,
                 x0 = np.zeros_like(w0)
 
             def func(x, *args):
-                X, Y, _, mask, _ = args
+                mask = args[3]
                 if mask is not None:
                     x0[mask] = x
                     args = args[:3] + (None,) + (args[-1],)
@@ -648,7 +648,7 @@ def func(x, g, *args):
             x0 = np.zeros_like(w0)
 
             def func(x, *args):
-                X, Y, _, mask, _ = args
+                mask = args[3]
                 if mask is not None:
                     x0[mask] = x
                     args = args[:3] + (None,) + (args[-1],)

From e5155730d8c51059a9e151960a9175dd00508142 Mon Sep 17 00:00:00 2001
From: Jesse Livezey <jesse.livezey@gmail.com>
Date: Wed, 28 Apr 2021 11:08:17 -0700
Subject: [PATCH 5/7] updates for new pip version

---
 LICENSE.txt           | 2 +-
 README.md             | 4 ++--
 bin/generate_build.sh | 2 +-
 requirements.txt      | 2 +-
 setup.py              | 2 +-
 5 files changed, 6 insertions(+), 6 deletions(-)
 mode change 100644 => 100755 bin/generate_build.sh

diff --git a/LICENSE.txt b/LICENSE.txt
index 9eab1af..66c9307 100644
--- a/LICENSE.txt
+++ b/LICENSE.txt
@@ -1,4 +1,4 @@
-PyUol Copyright (c) 2019, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from the U.S. Dept. of Energy).  All rights reserved.
+PyUoI Copyright (c) 2019, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from the U.S. Dept. of Energy).  All rights reserved.
 
 Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
 
diff --git a/README.md b/README.md
index ed602a1..1023cae 100644
--- a/README.md
+++ b/README.md
@@ -89,8 +89,8 @@ Please see our <a href="https://pyuoi.readthedocs.io/en/latest/">ReadTheDocs</a>
 
 # Copyright
 
-PyUol Copyright (c) 2019, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from the U.S. Dept. of Energy).  All rights reserved.
+PyUoI Copyright (c) 2019, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from the U.S. Dept. of Energy).  All rights reserved.
 
-If you have questions about your rights to use or distribute this software, please contact Berkeley Lab's Innovation & Partnerships Office at  IPO@lbl.gov referring to " PyUol" (LBNL Ref 2019-157)."
+If you have questions about your rights to use or distribute this software, please contact Berkeley Lab's Innovation & Partnerships Office at  IPO@lbl.gov referring to " PyUoI" (LBNL Ref 2019-157)."
 
 NOTICE.  This software was developed under funding from the U.S. Department of Energy.  As such, the U.S. Government has been granted for itself and others acting on its behalf a paid-up, nonexclusive, irrevocable, worldwide license in the Software to reproduce, prepare derivative works, and perform publicly and display publicly.  The U.S. Government is granted for itself and others acting on its behalf a paid-up, nonexclusive, irrevocable, worldwide license in the Software to reproduce, prepare derivative works, distribute copies to the public, perform publicly and display publicly, and to permit others to do so.
diff --git a/bin/generate_build.sh b/bin/generate_build.sh
old mode 100644
new mode 100755
index 0d7854e..34f9776
--- a/bin/generate_build.sh
+++ b/bin/generate_build.sh
@@ -1,6 +1,6 @@
 eval "$(conda shell.bash hook)"
 mkdir dist
-for py in 3.6 3.7; do
+for py in 3.6 3.7 3.8; do
   git clone https://github.com/BouchardLab/pyuoi.git
   cd pyuoi
   conda create -y -n temp_build_env python=$py
diff --git a/requirements.txt b/requirements.txt
index 851c57c..369f443 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,3 +1,3 @@
 numpy>=1.14
 h5py>=2.8
-scikit-learn>=0.20
+scikit-learn>=0.24
diff --git a/setup.py b/setup.py
index 4f1ed95..dcd7285 100644
--- a/setup.py
+++ b/setup.py
@@ -46,7 +46,7 @@ def finalize_options(self):
     # Versions should comply with PEP440.  For a discussion on single-sourcing
     # the version across setup.py and the project code, see
     # https://packaging.python.org/en/latest/single_source_version.html
-    version='1.0.0',
+    version='1.1.0',
 
     description='The Union of Intersections framework in Python.',
     long_description=long_description,

From fb18af3833b31b644c0c1342876ac38525a13b18 Mon Sep 17 00:00:00 2001
From: Jesse Livezey <jesse.livezey@gmail.com>
Date: Wed, 28 Apr 2021 12:00:20 -0700
Subject: [PATCH 6/7] update sklearn version number in docs

---
 README.md                    | 2 +-
 docs/source/installation.rst | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index 1023cae..facb121 100644
--- a/README.md
+++ b/README.md
@@ -46,7 +46,7 @@ PyUoI requires
 
 * numpy>=1.14
 * h5py>=2.8
-* scikit-learn>=0.20
+* scikit-learn>=0.24
 
 and optionally
 
diff --git a/docs/source/installation.rst b/docs/source/installation.rst
index 6c0ef1d..d05f75d 100644
--- a/docs/source/installation.rst
+++ b/docs/source/installation.rst
@@ -28,7 +28,7 @@ PyUoI requires
 
   * numpy>=1.14
   * h5py>=2.8
-  * scikit-learn>=0.20
+  * scikit-learn>=0.24
 
 and optionally
 

From f719da9a83fbd8cf57190b1b4f79a36b2bae70ae Mon Sep 17 00:00:00 2001
From: Jesse Livezey <jesse.livezey@gmail.com>
Date: Wed, 28 Apr 2021 12:48:10 -0700
Subject: [PATCH 7/7] tests and fixes for all masked

---
 pyuoi/linear_model/logistic.py | 118 +++++++++++++++++----------------
 tests/test_uoi_l1logistic.py   |  68 +++++++++++++------
 2 files changed, 110 insertions(+), 76 deletions(-)

diff --git a/pyuoi/linear_model/logistic.py b/pyuoi/linear_model/logistic.py
index 0709499..145e8d6 100644
--- a/pyuoi/linear_model/logistic.py
+++ b/pyuoi/linear_model/logistic.py
@@ -666,65 +666,71 @@ def func(x, g, *args):
     coefs = list()
     n_iter = np.zeros(len(Cs), dtype=np.int32)
     for i, C in enumerate(Cs):
-        iprint = [-1, 50, 1, 100, 101][
-            np.searchsorted(np.array([0, 1, 2, 3]), verbose)]
-        if penalty == 'l2':
-            if coef_mask is None:
-                w0, loss, info = optimize.fmin_l_bfgs_b(
-                    func, w0, fprime=None,
-                    args=(X, target, 1. / C, coef_mask, sample_weight),
-                    iprint=iprint, pgtol=tol, maxiter=max_iter)
+        if coef_mask is None or coef_mask.sum():
+            iprint = [-1, 50, 1, 100, 101][
+                np.searchsorted(np.array([0, 1, 2, 3]), verbose)]
+            if penalty == 'l2':
+                if coef_mask is None:
+                    w0, loss, info = optimize.fmin_l_bfgs_b(
+                        func, w0, fprime=None,
+                        args=(X, target, 1. / C, coef_mask, sample_weight),
+                        iprint=iprint, pgtol=tol, maxiter=max_iter)
+                else:
+                    if fit_intercept:
+                        if multi_class == 'multinomial':
+                            mask = [coef_mask,
+                                    np.ones(n_classes)[:, np.newaxis]]
+                            mask = np.concatenate(mask, axis=1)
+                        else:
+                            mask = np.concatenate([coef_mask, np.ones(1)])
+                    else:
+                        mask = coef_mask
+                    mask = np.nonzero(mask.ravel())[0]
+                    wp = w0[mask]
+                    wp, loss, info = optimize.fmin_l_bfgs_b(
+                        func, wp, fprime=None,
+                        args=(X, target, 1. / C, mask, sample_weight),
+                        iprint=iprint, pgtol=tol, maxiter=max_iter)
+                    w0 = np.zeros_like(w0)
+                    w0[mask] = wp
+
             else:
-                if fit_intercept:
+                zeros_seen = [0]
+
+                def zero_coef(x, *args):
                     if multi_class == 'multinomial':
-                        mask = np.concatenate([coef_mask,
-                                               np.ones(n_classes)[:, np.newaxis]], axis=1)
+                        x = x.reshape(-1, classes.size)[:-1]
                     else:
-                        mask = np.concatenate([coef_mask, np.ones(1)])
-                else:
-                    mask = coef_mask
-                mask = np.nonzero(mask.ravel())[0]
-                wp = w0[mask]
-                wp, loss, info = optimize.fmin_l_bfgs_b(
-                    func, wp, fprime=None,
-                    args=(X, target, 1. / C, mask, sample_weight),
-                    iprint=iprint, pgtol=tol, maxiter=max_iter)
-                w0 = np.zeros_like(w0)
-                w0[mask] = wp
-        else:
-            zeros_seen = [0]
+                        x = x[:-1]
+                    now_zeros = np.array_equiv(x, 0.)
+                    if now_zeros:
+                        zeros_seen[0] += 1
+                    else:
+                        zeros_seen[0] = 0
+                    if zeros_seen[0] > 1:
+                        return -2048
+                try:
+                    args = (X, target, 0., coef_mask, sample_weight)
+                    w0 = fmin_lbfgs(func, w0, orthantwise_c=1. / C,
+                                    args=args,
+                                    max_iterations=max_iter,
+                                    epsilon=tol,
+                                    orthantwise_end=coef_size,
+                                    progress=zero_coef)
+                except AllZeroLBFGSError:
+                    w0 *= 0.
+                info = None
+            if info is not None and info["warnflag"] == 1:
+                warnings.warn("lbfgs failed to converge. Increase the number "
+                              "of iterations.", ConvergenceWarning)
+            # In scipy <= 1.0.0, nit may exceed maxiter.
+            # See https://github.com/scipy/scipy/issues/7854.
+            if info is None:
+                n_iter_i = -1
+            else:
+                n_iter_i = min(info['nit'], max_iter)
 
-            def zero_coef(x, *args):
-                if multi_class == 'multinomial':
-                    x = x.reshape(-1, classes.size)[:-1]
-                else:
-                    x = x[:-1]
-                now_zeros = np.array_equiv(x, 0.)
-                if now_zeros:
-                    zeros_seen[0] += 1
-                else:
-                    zeros_seen[0] = 0
-                if zeros_seen[0] > 1:
-                    return -2048
-            try:
-                w0 = fmin_lbfgs(func, w0, orthantwise_c=1. / C,
-                                args=(X, target, 0., coef_mask, sample_weight),
-                                max_iterations=max_iter,
-                                epsilon=tol,
-                                orthantwise_end=coef_size,
-                                progress=zero_coef)
-            except AllZeroLBFGSError:
-                w0 *= 0.
-            info = None
-        if info is not None and info["warnflag"] == 1:
-            warnings.warn("lbfgs failed to converge. Increase the number "
-                          "of iterations.", ConvergenceWarning)
-        # In scipy <= 1.0.0, nit may exceed maxiter.
-        # See https://github.com/scipy/scipy/issues/7854.
-        if info is None:
-            n_iter_i = -1
-        else:
-            n_iter_i = min(info['nit'], max_iter)
+            n_iter[i] = n_iter_i
 
         if multi_class == 'multinomial':
             n_classes = max(2, classes.size)
@@ -734,8 +740,6 @@ def zero_coef(x, *args):
                 w0 = np.reshape(w0, (-1, n_classes)).T
         coefs.append(w0.copy())
 
-        n_iter[i] = n_iter_i
-
     return np.array(coefs), np.array(Cs), n_iter
 
 
diff --git a/tests/test_uoi_l1logistic.py b/tests/test_uoi_l1logistic.py
index fd751ea..84a7d8a 100644
--- a/tests/test_uoi_l1logistic.py
+++ b/tests/test_uoi_l1logistic.py
@@ -474,40 +474,70 @@ def test_masked_logistic_standardize():
                     lr.fit(X, y, coef_mask=mask)
 
 
-def test_masking_with_indexing_binary():
+@pytest.mark.parametrize("n_classes,penalty,fit_intercept", [(3, "l2", True),
+                                                             (3, "l2", False),
+                                                             (3, "l1", True),
+                                                             (3, "l1", False),
+                                                             (2, "l2", True),
+                                                             (2, "l2", False),
+                                                             (2, "l1", True),
+                                                             (2, "l1", False)])
+def test_masking_with_indexing(n_classes, penalty, fit_intercept):
     """Check that indexing the masks gives the same results as masking with
-    binary logistic regression.
+    logistic regression.
     """
     X, y, w, intercept = make_classification(n_samples=1000,
-                                             n_classes=2,
+                                             n_classes=n_classes,
                                              n_features=20,
                                              n_informative=10,
                                              random_state=0)
-    mask = (w != 0.).ravel()
-    coefs, _, _ = _logistic_regression_path(X, y, [10.], coef_mask=mask)
-    coefs_old, _, _ = _logistic_regression_path_old(X, y, [10.], coef_mask=mask)
+    mask = w != 0.
+    if n_classes == 2:
+        mask = mask.ravel()
+    coefs, _, _ = _logistic_regression_path(X, y, [10.], coef_mask=mask,
+                                            penalty=penalty,
+                                            fit_intercept=fit_intercept)
+    coefs_old, _, _ = _logistic_regression_path_old(X, y, [10.], coef_mask=mask,
+                                                    penalty=penalty,
+                                                    fit_intercept=fit_intercept)
     assert_allclose(coefs, coefs_old)
-    coefs, _, _ = _logistic_regression_path(X, y, [10.])
-    coefs_old, _, _ = _logistic_regression_path_old(X, y, [10.])
+    coefs, _, _ = _logistic_regression_path(X, y, [10.],
+                                            penalty=penalty,
+                                            fit_intercept=fit_intercept)
+    coefs_old, _, _ = _logistic_regression_path_old(X, y, [10.],
+                                                    penalty=penalty,
+                                                    fit_intercept=fit_intercept)
     assert_allclose(coefs, coefs_old)
 
 
-def test_masking_with_indexing_multiclass():
-    """Check that indexing the masks gives the same results as masking with
-    multiclass logistic regression.
+@pytest.mark.parametrize("n_classes,penalty,fit_intercept", [(3, "l2", True),
+                                                             (3, "l2", False),
+                                                             (3, "l1", True),
+                                                             (3, "l1", False),
+                                                             (2, "l2", True),
+                                                             (2, "l2", False),
+                                                             (2, "l1", True),
+                                                             (2, "l1", False)])
+def test_all_masked_with_indexing(n_classes, penalty, fit_intercept):
+    """Check masking all of the coef either works with intercept or raises an error.
     """
     X, y, w, intercept = make_classification(n_samples=1000,
-                                             n_classes=3,
+                                             n_classes=n_classes,
                                              n_features=20,
                                              n_informative=10,
                                              random_state=0)
-    mask = w != 0.
-    coefs, _, _ = _logistic_regression_path(X, y, [10.], coef_mask=mask)
-    coefs_old, _, _ = _logistic_regression_path_old(X, y, [10.], coef_mask=mask)
-    assert_allclose(coefs, coefs_old)
-    coefs, _, _ = _logistic_regression_path(X, y, [10.])
-    coefs_old, _, _ = _logistic_regression_path_old(X, y, [10.])
-    assert_allclose(coefs, coefs_old)
+    mask = np.zeros_like(w)
+    if n_classes == 2:
+        mask = mask.ravel()
+    coefs, _, _ = _logistic_regression_path(X, y, [10.], coef_mask=mask,
+                                            fit_intercept=fit_intercept)
+    if fit_intercept:
+        if n_classes == 2:
+            assert_equal(coefs[0][:-1], 0.)
+        else:
+            assert_equal(coefs[0][:, :-1], 0.)
+    else:
+        assert_equal(coefs[0], 0.)
 
 
 def test_estimation_score_usage():