From c47797c895359d65a90e6c7cb5969bd82737773d Mon Sep 17 00:00:00 2001 From: Maximiliano Vargas <43217761+mvargas33@users.noreply.github.com> Date: Wed, 15 Sep 2021 19:15:42 +0200 Subject: [PATCH 01/20] Remove 3.9 from compatibility --- .github/workflows/main.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 46e5d2c9..c32755ff 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -41,7 +41,7 @@ jobs: strategy: matrix: os: [ubuntu-latest] - python-version: ['3.6', '3.7', '3.8', '3.9'] + python-version: ['3.6', '3.7', '3.8'] steps: - uses: actions/checkout@v2 - name: Set up Python From e07b11a41e6334ee220cb9da6ae93b91107b41f6 Mon Sep 17 00:00:00 2001 From: mvargas33 Date: Fri, 1 Oct 2021 17:19:14 +0200 Subject: [PATCH 02/20] First draft of refactoring BaseMetricLearner and Mahalanobis Learner --- metric_learn/base_metric.py | 129 +++++++++++++++++++++++++++++++++++- 1 file changed, 127 insertions(+), 2 deletions(-) diff --git a/metric_learn/base_metric.py b/metric_learn/base_metric.py index 721d7ba0..7cfe4dc9 100644 --- a/metric_learn/base_metric.py +++ b/metric_learn/base_metric.py @@ -9,6 +9,7 @@ import numpy as np from abc import ABCMeta, abstractmethod from ._util import ArrayIndexer, check_input, validate_vector +import warnings class BaseMetricLearner(BaseEstimator, metaclass=ABCMeta): @@ -27,7 +28,8 @@ def __init__(self, preprocessor=None): @abstractmethod def score_pairs(self, pairs): - """Returns the score between pairs + """Deprecated. + Returns the score between pairs (can be a similarity, or a distance/metric depending on the algorithm) Parameters @@ -49,6 +51,57 @@ def score_pairs(self, pairs): learner is. """ + @abstractmethod + def pair_similarity(self, pairs): + """Returns the similarity score between pairs. Depending on the algorithm, + this function can return the direct learned similarity score between pairs, + or it can return the inverse of the distance learned between two pairs. + + Parameters + ---------- + pairs : `numpy.ndarray`, shape=(n_samples, 2, n_features) + 3D array of pairs. + + Returns + ------- + scores : `numpy.ndarray` of shape=(n_pairs,) + The score of every pair. + + See Also + -------- + get_metric : a method that returns a function to compute the metric between + two points. The difference with `score_pairs` is that it works on two 1D + arrays and cannot use a preprocessor. Besides, the returned function is + independent of the metric learner and hence is not modified if the metric + learner is. + """ + + @abstractmethod + def pair_distance(self, pairs): + """Returns the distance score between pairs. Depending on the algorithm, + this function can return the direct learned distance (or pseudo-distance) + score between pairs, or it can return the inverse score of the similarity + learned between two pairs. + + Parameters + ---------- + pairs : `numpy.ndarray`, shape=(n_samples, 2, n_features) + 3D array of pairs. + + Returns + ------- + scores : `numpy.ndarray` of shape=(n_pairs,) + The score of every pair. + + See Also + -------- + get_metric : a method that returns a function to compute the metric between + two points. The difference with `score_pairs` is that it works on two 1D + arrays and cannot use a preprocessor. Besides, the returned function is + independent of the metric learner and hence is not modified if the metric + learner is. + """ + def _check_preprocessor(self): """Initializes the preprocessor""" if _is_arraylike(self.preprocessor): @@ -182,7 +235,79 @@ class MahalanobisMixin(BaseMetricLearner, MetricTransformer, """ def score_pairs(self, pairs): - r"""Returns the learned Mahalanobis distance between pairs. + r"""Deprecated. + + Returns the learned Mahalanobis distance between pairs. + + This distance is defined as: :math:`d_M(x, x') = \sqrt{(x-x')^T M (x-x')}` + where ``M`` is the learned Mahalanobis matrix, for every pair of points + ``x`` and ``x'``. This corresponds to the euclidean distance between + embeddings of the points in a new space, obtained through a linear + transformation. Indeed, we have also: :math:`d_M(x, x') = \sqrt{(x_e - + x_e')^T (x_e- x_e')}`, with :math:`x_e = L x` (See + :class:`MahalanobisMixin`). + + Parameters + ---------- + pairs : array-like, shape=(n_pairs, 2, n_features) or (n_pairs, 2) + 3D Array of pairs to score, with each row corresponding to two points, + for 2D array of indices of pairs if the metric learner uses a + preprocessor. + + Returns + ------- + scores : `numpy.ndarray` of shape=(n_pairs,) + The learned Mahalanobis distance for every pair. + + See Also + -------- + get_metric : a method that returns a function to compute the metric between + two points. The difference with `score_pairs` is that it works on two 1D + arrays and cannot use a preprocessor. Besides, the returned function is + independent of the metric learner and hence is not modified if the metric + learner is. + + :ref:`mahalanobis_distances` : The section of the project documentation + that describes Mahalanobis Distances. + """ + dpr_msg = ("score_pairs will be deprecated in the next release. " + "Use pair_similarity to compute similarities, or " + "pair_distances to compute distances.") + warnings.warn(dpr_msg, category=FutureWarning) + return self.pair_distance(pairs) + + def pair_similarity(self, pairs): + """ + Returns the inverse of the learned Mahalanobis distance between pairs. + + Parameters + ---------- + pairs : array-like, shape=(n_pairs, 2, n_features) or (n_pairs, 2) + 3D Array of pairs to score, with each row corresponding to two points, + for 2D array of indices of pairs if the metric learner uses a + preprocessor. + + Returns + ------- + scores : `numpy.ndarray` of shape=(n_pairs,) + The inverse of the learned Mahalanobis distance for every pair. + + See Also + -------- + get_metric : a method that returns a function to compute the metric between + two points. The difference with `score_pairs` is that it works on two 1D + arrays and cannot use a preprocessor. Besides, the returned function is + independent of the metric learner and hence is not modified if the metric + learner is. + + :ref:`mahalanobis_distances` : The section of the project documentation + that describes Mahalanobis Distances. + """ + return -1 * self.pair_distance(pairs) + + def pair_distance(self, pairs): + """ + Returns the learned Mahalanobis distance between pairs. This distance is defined as: :math:`d_M(x, x') = \sqrt{(x-x')^T M (x-x')}` where ``M`` is the learned Mahalanobis matrix, for every pair of points From 8210acd02ccb1a3387fa422c5d400b44f6410d53 Mon Sep 17 00:00:00 2001 From: mvargas33 Date: Wed, 6 Oct 2021 12:07:38 +0200 Subject: [PATCH 03/20] Avoid warning related to score_pairs deprecation in tests of pair_calibraiton --- metric_learn/base_metric.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/metric_learn/base_metric.py b/metric_learn/base_metric.py index 7cfe4dc9..9bece1ef 100644 --- a/metric_learn/base_metric.py +++ b/metric_learn/base_metric.py @@ -486,7 +486,7 @@ def decision_function(self, pairs): pairs = check_input(pairs, type_of_inputs='tuples', preprocessor=self.preprocessor_, estimator=self, tuple_size=self._tuple_size) - return - self.score_pairs(pairs) + return - self.pair_distance(pairs) def score(self, pairs, y): """Computes score of pairs similarity prediction. @@ -756,8 +756,8 @@ def decision_function(self, triplets): triplets = check_input(triplets, type_of_inputs='tuples', preprocessor=self.preprocessor_, estimator=self, tuple_size=self._tuple_size) - return (self.score_pairs(triplets[:, [0, 2]]) - - self.score_pairs(triplets[:, :2])) + return (self.pair_distance(triplets[:, [0, 2]]) - + self.pair_distance(triplets[:, :2])) def score(self, triplets): """Computes score on input triplets. @@ -841,8 +841,8 @@ def decision_function(self, quadruplets): quadruplets = check_input(quadruplets, type_of_inputs='tuples', preprocessor=self.preprocessor_, estimator=self, tuple_size=self._tuple_size) - return (self.score_pairs(quadruplets[:, 2:]) - - self.score_pairs(quadruplets[:, :2])) + return (self.pair_distance(quadruplets[:, 2:]) - + self.pair_distance(quadruplets[:, :2])) def score(self, quadruplets): """Computes score on input quadruplets From 11b5df62b4d69477cd8c4c140744826201e2c810 Mon Sep 17 00:00:00 2001 From: mvargas33 Date: Wed, 6 Oct 2021 12:07:50 +0200 Subject: [PATCH 04/20] Minor fix --- metric_learn/base_metric.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/metric_learn/base_metric.py b/metric_learn/base_metric.py index 9bece1ef..62694c88 100644 --- a/metric_learn/base_metric.py +++ b/metric_learn/base_metric.py @@ -239,11 +239,11 @@ def score_pairs(self, pairs): Returns the learned Mahalanobis distance between pairs. - This distance is defined as: :math:`d_M(x, x') = \sqrt{(x-x')^T M (x-x')}` + This distance is defined as: :math:`d_M(x, x') = \\sqrt{(x-x')^T M (x-x')}` where ``M`` is the learned Mahalanobis matrix, for every pair of points ``x`` and ``x'``. This corresponds to the euclidean distance between embeddings of the points in a new space, obtained through a linear - transformation. Indeed, we have also: :math:`d_M(x, x') = \sqrt{(x_e - + transformation. Indeed, we have also: :math:`d_M(x, x') = \\sqrt{(x_e - x_e')^T (x_e- x_e')}`, with :math:`x_e = L x` (See :class:`MahalanobisMixin`). @@ -309,11 +309,11 @@ def pair_distance(self, pairs): """ Returns the learned Mahalanobis distance between pairs. - This distance is defined as: :math:`d_M(x, x') = \sqrt{(x-x')^T M (x-x')}` + This distance is defined as: :math:`d_M(x, x') = \\sqrt{(x-x')^T M (x-x')}` where ``M`` is the learned Mahalanobis matrix, for every pair of points ``x`` and ``x'``. This corresponds to the euclidean distance between embeddings of the points in a new space, obtained through a linear - transformation. Indeed, we have also: :math:`d_M(x, x') = \sqrt{(x_e - + transformation. Indeed, we have also: :math:`d_M(x, x') = \\sqrt{(x_e - x_e')^T (x_e- x_e')}`, with :math:`x_e = L x` (See :class:`MahalanobisMixin`). From 06b71313d8ff77c61acdbe1ca84e57acbbb69f29 Mon Sep 17 00:00:00 2001 From: mvargas33 Date: Wed, 6 Oct 2021 14:35:10 +0200 Subject: [PATCH 05/20] Replaced score_pairs with pair_distance in tests --- test/test_mahalanobis_mixin.py | 22 +++++++++++----------- test/test_pairs_classifiers.py | 6 +++--- test/test_sklearn_compat.py | 2 +- test/test_utils.py | 16 ++++++++-------- 4 files changed, 23 insertions(+), 23 deletions(-) diff --git a/test/test_mahalanobis_mixin.py b/test/test_mahalanobis_mixin.py index e3d981a4..2ba78a57 100644 --- a/test/test_mahalanobis_mixin.py +++ b/test/test_mahalanobis_mixin.py @@ -27,7 +27,7 @@ @pytest.mark.parametrize('estimator, build_dataset', metric_learners, ids=ids_metric_learners) -def test_score_pairs_pairwise(estimator, build_dataset): +def test_pair_distance_pairwise(estimator, build_dataset): # Computing pairwise scores should return a euclidean distance matrix. input_data, labels, _, X = build_dataset() n_samples = 20 @@ -36,7 +36,7 @@ def test_score_pairs_pairwise(estimator, build_dataset): set_random_state(model) model.fit(*remove_y(estimator, input_data, labels)) - pairwise = model.score_pairs(np.array(list(product(X, X))))\ + pairwise = model.pair_distance(np.array(list(product(X, X))))\ .reshape(n_samples, n_samples) check_is_distance_matrix(pairwise) @@ -51,8 +51,8 @@ def test_score_pairs_pairwise(estimator, build_dataset): @pytest.mark.parametrize('estimator, build_dataset', metric_learners, ids=ids_metric_learners) -def test_score_pairs_toy_example(estimator, build_dataset): - # Checks that score_pairs works on a toy example +def test_pair_distance_toy_example(estimator, build_dataset): + # Checks that pair_distance works on a toy example input_data, labels, _, X = build_dataset() n_samples = 20 X = X[:n_samples] @@ -64,24 +64,24 @@ def test_score_pairs_toy_example(estimator, build_dataset): distances = np.sqrt(np.sum((embedded_pairs[:, 1] - embedded_pairs[:, 0])**2, axis=-1)) - assert_array_almost_equal(model.score_pairs(pairs), distances) + assert_array_almost_equal(model.pair_distance(pairs), distances) @pytest.mark.parametrize('estimator, build_dataset', metric_learners, ids=ids_metric_learners) -def test_score_pairs_finite(estimator, build_dataset): +def test_pair_distance_finite(estimator, build_dataset): # tests that the score is finite input_data, labels, _, X = build_dataset() model = clone(estimator) set_random_state(model) model.fit(*remove_y(estimator, input_data, labels)) pairs = np.array(list(product(X, X))) - assert np.isfinite(model.score_pairs(pairs)).all() + assert np.isfinite(model.pair_distance(pairs)).all() @pytest.mark.parametrize('estimator, build_dataset', metric_learners, ids=ids_metric_learners) -def test_score_pairs_dim(estimator, build_dataset): +def test_pair_distance_dim(estimator, build_dataset): # scoring of 3D arrays should return 1D array (several tuples), # and scoring of 2D arrays (one tuple) should return an error (like # scikit-learn's error when scoring 1D arrays) @@ -90,13 +90,13 @@ def test_score_pairs_dim(estimator, build_dataset): set_random_state(model) model.fit(*remove_y(estimator, input_data, labels)) tuples = np.array(list(product(X, X))) - assert model.score_pairs(tuples).shape == (tuples.shape[0],) + assert model.pair_distance(tuples).shape == (tuples.shape[0],) context = make_context(estimator) msg = ("3D array of formed tuples expected{}. Found 2D array " "instead:\ninput={}. Reshape your data and/or use a preprocessor.\n" .format(context, tuples[1])) with pytest.raises(ValueError) as raised_error: - model.score_pairs(tuples[1]) + model.pair_distance(tuples[1]) assert str(raised_error.value) == msg @@ -140,7 +140,7 @@ def test_embed_dim(estimator, build_dataset): "instead:\ninput={}. Reshape your data and/or use a " "preprocessor.\n".format(context, X[0])) with pytest.raises(ValueError) as raised_error: - model.score_pairs(model.transform(X[0, :])) + model.pair_distance(model.transform(X[0, :])) assert str(raised_error.value) == err_msg # we test that the shape is also OK when doing dimensionality reduction if hasattr(model, 'n_components'): diff --git a/test/test_pairs_classifiers.py b/test/test_pairs_classifiers.py index 824bb622..7023fbea 100644 --- a/test/test_pairs_classifiers.py +++ b/test/test_pairs_classifiers.py @@ -49,7 +49,7 @@ def test_predict_monotonous(estimator, build_dataset, pairs_train, pairs_test, y_train, y_test = train_test_split(input_data, labels) estimator.fit(pairs_train, y_train) - distances = estimator.score_pairs(pairs_test) + distances = estimator.pair_distance(pairs_test) predictions = estimator.predict(pairs_test) min_dissimilar = np.min(distances[predictions == -1]) max_similar = np.max(distances[predictions == 1]) @@ -65,7 +65,7 @@ def test_predict_monotonous(estimator, build_dataset, def test_raise_not_fitted_error_if_not_fitted(estimator, build_dataset, with_preprocessor): """Test that a NotFittedError is raised if someone tries to use - score_pairs, decision_function, get_metric, transform or + pair_distance, decision_function, get_metric, transform or get_mahalanobis_matrix on input data and the metric learner has not been fitted.""" input_data, labels, preprocessor, _ = build_dataset(with_preprocessor) @@ -73,7 +73,7 @@ def test_raise_not_fitted_error_if_not_fitted(estimator, build_dataset, estimator.set_params(preprocessor=preprocessor) set_random_state(estimator) with pytest.raises(NotFittedError): - estimator.score_pairs(input_data) + estimator.pair_distance(input_data) with pytest.raises(NotFittedError): estimator.decision_function(input_data) with pytest.raises(NotFittedError): diff --git a/test/test_sklearn_compat.py b/test/test_sklearn_compat.py index 3ad69712..d6077a16 100644 --- a/test/test_sklearn_compat.py +++ b/test/test_sklearn_compat.py @@ -148,7 +148,7 @@ def test_array_like_inputs(estimator, build_dataset, with_preprocessor): pairs = np.array([[X[0], X[1]], [X[0], X[2]]]) pairs_variants, _ = generate_array_like(pairs) for pairs_variant in pairs_variants: - estimator.score_pairs(pairs_variant) + estimator.pair_distance(pairs_variant) @pytest.mark.parametrize('with_preprocessor', [True, False]) diff --git a/test/test_utils.py b/test/test_utils.py index 072b94c5..39515f78 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -834,8 +834,8 @@ def test_error_message_tuple_size(estimator, _): @pytest.mark.parametrize('estimator, _', metric_learners, ids=ids_metric_learners) -def test_error_message_t_score_pairs(estimator, _): - """tests that if you want to score_pairs on triplets for instance, it returns +def test_error_message_t_pair_distance(estimator, _): + """tests that if you want to pair_distance on triplets for instance, it returns the right error message """ estimator = clone(estimator) @@ -844,7 +844,7 @@ def test_error_message_t_score_pairs(estimator, _): triplets = np.array([[[1.3, 6.3], [3., 6.8], [6.5, 4.4]], [[1.9, 5.3], [1., 7.8], [3.2, 1.2]]]) with pytest.raises(ValueError) as raised_err: - estimator.score_pairs(triplets) + estimator.pair_distance(triplets) expected_msg = ("Tuples of 2 element(s) expected{}. Got tuples of 3 " "element(s) instead (shape=(2, 3, 2)):\ninput={}.\n" .format(make_context(estimator), triplets)) @@ -930,17 +930,17 @@ def test_same_with_or_without_preprocessor(estimator, build_dataset): method)(formed_test) assert np.array(output_with_prep == output_with_prep_formed).all() - # test score_pairs + # test pair_distance idx1 = np.array([[0, 2], [5, 3]], dtype=int) - output_with_prep = estimator_with_preprocessor.score_pairs( + output_with_prep = estimator_with_preprocessor.pair_distance( indicators_to_transform[idx1]) - output_without_prep = estimator_without_preprocessor.score_pairs( + output_without_prep = estimator_without_preprocessor.pair_distance( formed_points_to_transform[idx1]) assert np.array(output_with_prep == output_without_prep).all() - output_with_prep = estimator_with_preprocessor.score_pairs( + output_with_prep = estimator_with_preprocessor.pair_distance( indicators_to_transform[idx1]) - output_without_prep = estimator_with_prep_formed.score_pairs( + output_without_prep = estimator_with_prep_formed.pair_distance( formed_points_to_transform[idx1]) assert np.array(output_with_prep == output_without_prep).all() From d5cb8b49da42d42d2f869b9ad6116918f5528337 Mon Sep 17 00:00:00 2001 From: mvargas33 Date: Wed, 6 Oct 2021 14:35:51 +0200 Subject: [PATCH 06/20] Replace score_pairs with pair_distance inb docs. --- doc/introduction.rst | 2 +- doc/supervised.rst | 4 ++-- doc/weakly_supervised.rst | 10 +++++----- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/doc/introduction.rst b/doc/introduction.rst index 7d9f52d0..6faf021d 100644 --- a/doc/introduction.rst +++ b/doc/introduction.rst @@ -141,7 +141,7 @@ to the following resources: .. :math:`D`-dimensional learned metric space :math:`X L^{\top}`, .. in which standard Euclidean distances may be used. .. - ``transform(X)``, which applies the aforementioned transformation. -.. - ``score_pairs(pairs)`` which returns the distance between pairs of +.. - ``pair_distance(pairs)`` which returns the distance between pairs of .. points. ``pairs`` should be a 3D array-like of pairs of shape ``(n_pairs, .. 2, n_features)``, or it can be a 2D array-like of pairs indicators of .. shape ``(n_pairs, 2)`` (see section :ref:`preprocessor_section` for more diff --git a/doc/supervised.rst b/doc/supervised.rst index c6d8b68b..cde7cb86 100644 --- a/doc/supervised.rst +++ b/doc/supervised.rst @@ -69,9 +69,9 @@ Also, as explained before, our metric learners has learn a distance between points. You can use this distance in two main ways: - You can either return the distance between pairs of points using the - `score_pairs` function: + `pair_distance` function: ->>> nca.score_pairs([[[3.5, 3.6], [5.6, 2.4]], [[1.2, 4.2], [2.1, 6.4]]]) +>>> nca.pair_distance([[[3.5, 3.6], [5.6, 2.4]], [[1.2, 4.2], [2.1, 6.4]]]) array([0.49627072, 3.65287282]) - Or you can return a function that will return the distance (in the new diff --git a/doc/weakly_supervised.rst b/doc/weakly_supervised.rst index 174210b8..c98b9ea0 100644 --- a/doc/weakly_supervised.rst +++ b/doc/weakly_supervised.rst @@ -160,9 +160,9 @@ Also, as explained before, our metric learner has learned a distance between points. You can use this distance in two main ways: - You can either return the distance between pairs of points using the - `score_pairs` function: + `pair_distance` function: ->>> mmc.score_pairs([[[3.5, 3.6, 5.2], [5.6, 2.4, 6.7]], +>>> mmc.pair_distance([[[3.5, 3.6, 5.2], [5.6, 2.4, 6.7]], ... [[1.2, 4.2, 7.7], [2.1, 6.4, 0.9]]]) array([7.27607365, 0.88853014]) @@ -344,7 +344,7 @@ returns the `sklearn.metrics.roc_auc_score` (which is threshold-independent). .. note:: See :ref:`fit_ws` for more details on metric learners functions that are - not specific to learning on pairs, like `transform`, `score_pairs`, + not specific to learning on pairs, like `transform`, `pair_distance`, `get_metric` and `get_mahalanobis_matrix`. Algorithms @@ -691,7 +691,7 @@ of triplets that have the right predicted ordering. .. note:: See :ref:`fit_ws` for more details on metric learners functions that are - not specific to learning on pairs, like `transform`, `score_pairs`, + not specific to learning on pairs, like `transform`, `pair_distance`, `get_metric` and `get_mahalanobis_matrix`. @@ -859,7 +859,7 @@ of quadruplets have the right predicted ordering. .. note:: See :ref:`fit_ws` for more details on metric learners functions that are - not specific to learning on pairs, like `transform`, `score_pairs`, + not specific to learning on pairs, like `transform`, `pair_distance`, `get_metric` and `get_mahalanobis_matrix`. From 2f61e7bc823709f26873eb5cc5ae63c56b6af6e6 Mon Sep 17 00:00:00 2001 From: mvargas33 Date: Fri, 8 Oct 2021 11:27:41 +0200 Subject: [PATCH 07/20] Fix weird commit --- .github/workflows/main.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index c32755ff..46e5d2c9 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -41,7 +41,7 @@ jobs: strategy: matrix: os: [ubuntu-latest] - python-version: ['3.6', '3.7', '3.8'] + python-version: ['3.6', '3.7', '3.8', '3.9'] steps: - uses: actions/checkout@v2 - name: Set up Python From 5f68ed2120ff24212212482c69bac1817b102631 Mon Sep 17 00:00:00 2001 From: mvargas33 Date: Fri, 8 Oct 2021 11:38:43 +0200 Subject: [PATCH 08/20] Update classifiers to use pair_similarity --- metric_learn/base_metric.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/metric_learn/base_metric.py b/metric_learn/base_metric.py index 62694c88..b317db49 100644 --- a/metric_learn/base_metric.py +++ b/metric_learn/base_metric.py @@ -486,7 +486,7 @@ def decision_function(self, pairs): pairs = check_input(pairs, type_of_inputs='tuples', preprocessor=self.preprocessor_, estimator=self, tuple_size=self._tuple_size) - return - self.pair_distance(pairs) + return self.pair_similarity(pairs) def score(self, pairs, y): """Computes score of pairs similarity prediction. @@ -756,8 +756,8 @@ def decision_function(self, triplets): triplets = check_input(triplets, type_of_inputs='tuples', preprocessor=self.preprocessor_, estimator=self, tuple_size=self._tuple_size) - return (self.pair_distance(triplets[:, [0, 2]]) - - self.pair_distance(triplets[:, :2])) + return (self.pair_similarity(triplets[:, :2]) - + self.pair_similarity(triplets[:, [0, 2]])) def score(self, triplets): """Computes score on input triplets. @@ -841,8 +841,8 @@ def decision_function(self, quadruplets): quadruplets = check_input(quadruplets, type_of_inputs='tuples', preprocessor=self.preprocessor_, estimator=self, tuple_size=self._tuple_size) - return (self.pair_distance(quadruplets[:, 2:]) - - self.pair_distance(quadruplets[:, :2])) + return (self.pair_similarity(quadruplets[:, :2]) - + self.pair_similarity(quadruplets[:, 2:])) def score(self, quadruplets): """Computes score on input quadruplets From 3d6450b950fd94707e172e993c39ea885a1f1cd9 Mon Sep 17 00:00:00 2001 From: mvargas33 Date: Fri, 8 Oct 2021 14:06:28 +0200 Subject: [PATCH 09/20] Updated rst docs --- doc/supervised.rst | 28 ++++++++++++++++++++++++++-- doc/weakly_supervised.rst | 23 +++++++++++++++++++++++ 2 files changed, 49 insertions(+), 2 deletions(-) diff --git a/doc/supervised.rst b/doc/supervised.rst index cde7cb86..ea5f5d29 100644 --- a/doc/supervised.rst +++ b/doc/supervised.rst @@ -71,8 +71,8 @@ points. You can use this distance in two main ways: - You can either return the distance between pairs of points using the `pair_distance` function: ->>> nca.pair_distance([[[3.5, 3.6], [5.6, 2.4]], [[1.2, 4.2], [2.1, 6.4]]]) -array([0.49627072, 3.65287282]) +>>> nca.pair_distance([[[3.5, 3.6], [5.6, 2.4]], [[1.2, 4.2], [2.1, 6.4]], [[3.3, 7.8], [10.9, 0.1]]]) +array([0.49627072, 3.65287282, 6.06079877]) - Or you can return a function that will return the distance (in the new space) between two 1D arrays (the coordinates of the points in the original @@ -82,6 +82,29 @@ array([0.49627072, 3.65287282]) >>> metric_fun([3.5, 3.6], [5.6, 2.4]) 0.4962707194621285 +- Alternatively, you can use `pair_similarity` to return the **score** between + points, the more the **score**, the closer the pairs and vice-versa. For + Mahalanobis learners, it is equal to the inverse of the distance. + +>>> score = nca.pair_similarity([[[3.5, 3.6], [5.6, 2.4]], [[1.2, 4.2], [2.1, 6.4]], [[3.3, 7.8], [10.9, 0.1]]]) +>>> score +array([-0.49627072, -3.65287282, -6.06079877]) + + This is useful because `pair_similarity` matches the **score** sematic of + scikit-learn's `Classification matrics `_. + For instance, given a labeled data, you can pass the labels and the + **score** of your data to get the ROC curve. + +>>> from sklearn.metrics import roc_curve +>>> fpr, tpr, thresholds = roc_curve(['dog', 'cat', 'dog'], score, pos_label='dog') +>>> fpr +array([0., 0., 1., 1.]) +>>> tpr +array([0. , 0.5, 0.5, 1. ]) +>>> +>>> thresholds +array([ 0.50372928, -0.49627072, -3.65287282, -6.06079877]) + .. note:: If the metric learner that you use learns a :ref:`Mahalanobis distance @@ -105,6 +128,7 @@ All supervised algorithms are scikit-learn estimators scikit-learn model selection routines (`sklearn.model_selection.cross_val_score`, `sklearn.model_selection.GridSearchCV`, etc). +You can also use methods from `sklearn.metrics` that rely on y_scores. Algorithms ========== diff --git a/doc/weakly_supervised.rst b/doc/weakly_supervised.rst index c98b9ea0..66a34dff 100644 --- a/doc/weakly_supervised.rst +++ b/doc/weakly_supervised.rst @@ -175,6 +175,29 @@ array([7.27607365, 0.88853014]) >>> metric_fun([3.5, 3.6, 5.2], [5.6, 2.4, 6.7]) 7.276073646278203 +- Alternatively, you can use `pair_similarity` to return the **score** between + points, the more the **score**, the closer the pairs and vice-versa. For + Mahalanobis learners, it is equal to the inverse of the distance. + +>>> score = mmc.pair_similarity([[[3.5, 3.6], [5.6, 2.4]], [[1.2, 4.2], [2.1, 6.4]], [[3.3, 7.8], [10.9, 0.1]]]) +>>> score +array([-0.49627072, -3.65287282, -6.06079877]) + + This is useful because `pair_similarity` matches the **score** sematic of + scikit-learn's `Classification matrics `_. + For instance, given a labeled data, you can pass the labels and the + **score** of your data to get the ROC curve. + +>>> from sklearn.metrics import roc_curve +>>> fpr, tpr, thresholds = roc_curve(['dog', 'cat', 'dog'], score, pos_label='dog') +>>> fpr +array([0., 0., 1., 1.]) +>>> tpr +array([0. , 0.5, 0.5, 1. ]) +>>> +>>> thresholds +array([ 0.50372928, -0.49627072, -3.65287282, -6.06079877]) + .. note:: If the metric learner that you use learns a :ref:`Mahalanobis distance From 7bce493b4ad9f9f56d68d11200fc695147a9a606 Mon Sep 17 00:00:00 2001 From: mvargas33 Date: Fri, 8 Oct 2021 15:15:28 +0200 Subject: [PATCH 10/20] Fix identation --- metric_learn/base_metric.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/metric_learn/base_metric.py b/metric_learn/base_metric.py index b317db49..59a8a302 100644 --- a/metric_learn/base_metric.py +++ b/metric_learn/base_metric.py @@ -756,7 +756,7 @@ def decision_function(self, triplets): triplets = check_input(triplets, type_of_inputs='tuples', preprocessor=self.preprocessor_, estimator=self, tuple_size=self._tuple_size) - return (self.pair_similarity(triplets[:, :2]) - + return (self.pair_similarity(triplets[:, :2]) - self.pair_similarity(triplets[:, [0, 2]])) def score(self, triplets): @@ -841,7 +841,7 @@ def decision_function(self, quadruplets): quadruplets = check_input(quadruplets, type_of_inputs='tuples', preprocessor=self.preprocessor_, estimator=self, tuple_size=self._tuple_size) - return (self.pair_similarity(quadruplets[:, :2]) - + return (self.pair_similarity(quadruplets[:, :2]) - self.pair_similarity(quadruplets[:, 2:])) def score(self, quadruplets): From 7e6584a186fd2405814caada6d9422c1e8626f6d Mon Sep 17 00:00:00 2001 From: mvargas33 Date: Mon, 11 Oct 2021 14:07:59 +0200 Subject: [PATCH 11/20] Update docs of score_pairs, get_metric --- metric_learn/base_metric.py | 102 +++++++++++++++++++++++------------- 1 file changed, 66 insertions(+), 36 deletions(-) diff --git a/metric_learn/base_metric.py b/metric_learn/base_metric.py index 59a8a302..2d48ca65 100644 --- a/metric_learn/base_metric.py +++ b/metric_learn/base_metric.py @@ -28,7 +28,14 @@ def __init__(self, preprocessor=None): @abstractmethod def score_pairs(self, pairs): - """Deprecated. + """ + .. deprecated:: 0.6.3 Refer to `pair_distance` and `pair_similarity`. + + .. warning:: + This method will be deleted in 0.6.4. Please refer to `pair_distance` + or `pair_similarity`. This change will occur in order to add learners + that don't necessarly learn a Mahalanobis distance. + Returns the score between pairs (can be a similarity, or a distance/metric depending on the algorithm) @@ -45,17 +52,22 @@ def score_pairs(self, pairs): See Also -------- get_metric : a method that returns a function to compute the metric between - two points. The difference with `score_pairs` is that it works on two 1D - arrays and cannot use a preprocessor. Besides, the returned function is - independent of the metric learner and hence is not modified if the metric - learner is. + two points. The difference with `score_pairs` is that it works on two + 1D arrays and cannot use a preprocessor. Besides, the returned function + is independent of the metric learner and hence is not modified if the + metric learner is. """ @abstractmethod def pair_similarity(self, pairs): - """Returns the similarity score between pairs. Depending on the algorithm, - this function can return the direct learned similarity score between pairs, - or it can return the inverse of the distance learned between two pairs. + """ + .. versionadded:: 0.6.3 Compute the similarity score bewteen pairs + + Returns the similarity score between pairs. Depending on the algorithm, + this method can return the learned similarity score between pairs, + or the inverse of the distance learned between two pairs. The more the + score, the more similar the pairs. All learners have access to this + method. Parameters ---------- @@ -70,18 +82,21 @@ def pair_similarity(self, pairs): See Also -------- get_metric : a method that returns a function to compute the metric between - two points. The difference with `score_pairs` is that it works on two 1D - arrays and cannot use a preprocessor. Besides, the returned function is - independent of the metric learner and hence is not modified if the metric - learner is. + two points. The difference with `pair_similarity` is that it works on two + 1D arrays and cannot use a preprocessor. Besides, the returned function + is independent of the metric learner and hence is not modified if the + metric learner is. """ @abstractmethod def pair_distance(self, pairs): - """Returns the distance score between pairs. Depending on the algorithm, - this function can return the direct learned distance (or pseudo-distance) - score between pairs, or it can return the inverse score of the similarity - learned between two pairs. + """ + .. versionadded:: 0.6.3 Compute the distance score between pairs + + Returns the distance score between pairs. For Mahalanobis learners, it + returns the pseudo-distance bewtween pairs. It is not available for + learners that does not learn a distance or pseudo-distance, an error + will be shown instead. Parameters ---------- @@ -96,10 +111,10 @@ def pair_distance(self, pairs): See Also -------- get_metric : a method that returns a function to compute the metric between - two points. The difference with `score_pairs` is that it works on two 1D - arrays and cannot use a preprocessor. Besides, the returned function is - independent of the metric learner and hence is not modified if the metric - learner is. + two points. The difference with `pair_distance` is that it works on two + 1D arrays and cannot use a preprocessor. Besides, the returned function + is independent of the metric learner and hence is not modified if the + metric learner is. """ def _check_preprocessor(self): @@ -156,7 +171,9 @@ def _prepare_inputs(self, X, y=None, type_of_inputs='classic', @abstractmethod def get_metric(self): """Returns a function that takes as input two 1D arrays and outputs the - learned metric score on these two points. + learned metric score on these two points. Depending on the algorithm, it + can return the distance or similarity function between pairs. It always + returns what the specific algorithm learns. This function will be independent from the metric learner that learned it (it will not be modified if the initial metric learner is modified), @@ -189,7 +206,12 @@ def get_metric(self): See Also -------- - score_pairs : a method that returns the metric score between several pairs + pair_distance : a method that returns the distance score between several pairs + of points. Unlike `get_metric`, this is a method of the metric learner + and therefore can change if the metric learner changes. Besides, it can + use the metric learner's preprocessor, and works on concatenated arrays. + + pair_similarity : a method that returns the similarity score between several pairs of points. Unlike `get_metric`, this is a method of the metric learner and therefore can change if the metric learner changes. Besides, it can use the metric learner's preprocessor, and works on concatenated arrays. @@ -235,7 +257,14 @@ class MahalanobisMixin(BaseMetricLearner, MetricTransformer, """ def score_pairs(self, pairs): - r"""Deprecated. + r""" + .. deprecated:: 0.6.3 + This method is deprecated. Please use `pair_distance` instead. + + .. warning:: + This method will be deleted in 0.6.4. Please refer to `pair_distance` + or `pair_similarity`. This change will occur in order to add learners + that don't necessarly learn a Mahalanobis distance. Returns the learned Mahalanobis distance between pairs. @@ -262,15 +291,16 @@ def score_pairs(self, pairs): See Also -------- get_metric : a method that returns a function to compute the metric between - two points. The difference with `score_pairs` is that it works on two 1D - arrays and cannot use a preprocessor. Besides, the returned function is - independent of the metric learner and hence is not modified if the metric - learner is. + two points. The difference with `score_pairs` is that it works on two + 1D arrays and cannot use a preprocessor. Besides, the returned function + is independent of the metric learner and hence is not modified if the + metric learner is. :ref:`mahalanobis_distances` : The section of the project documentation that describes Mahalanobis Distances. + """ - dpr_msg = ("score_pairs will be deprecated in the next release. " + dpr_msg = ("score_pairs will be deprecated in release 0.6.3. " "Use pair_similarity to compute similarities, or " "pair_distances to compute distances.") warnings.warn(dpr_msg, category=FutureWarning) @@ -295,10 +325,10 @@ def pair_similarity(self, pairs): See Also -------- get_metric : a method that returns a function to compute the metric between - two points. The difference with `score_pairs` is that it works on two 1D - arrays and cannot use a preprocessor. Besides, the returned function is - independent of the metric learner and hence is not modified if the metric - learner is. + two points. The difference with `pair_similarity` is that it works on two + 1D arrays and cannot use a preprocessor. Besides, the returned function + is independent of the metric learner and hence is not modified if the + metric learner is. :ref:`mahalanobis_distances` : The section of the project documentation that describes Mahalanobis Distances. @@ -332,10 +362,10 @@ def pair_distance(self, pairs): See Also -------- get_metric : a method that returns a function to compute the metric between - two points. The difference with `score_pairs` is that it works on two 1D - arrays and cannot use a preprocessor. Besides, the returned function is - independent of the metric learner and hence is not modified if the metric - learner is. + two points. The difference with `pair_distance` is that it works on two + 1D arrays and cannot use a preprocessor. Besides, the returned function + is independent of the metric learner and hence is not modified if the + metric learner is. :ref:`mahalanobis_distances` : The section of the project documentation that describes Mahalanobis Distances. From 0b58f455dc6bab5557ac65c58ebb2befa67e93d9 Mon Sep 17 00:00:00 2001 From: mvargas33 Date: Mon, 11 Oct 2021 14:23:10 +0200 Subject: [PATCH 12/20] Add deprecation Test. Fix identation --- metric_learn/base_metric.py | 27 ++++++++++++++------------- test/test_base_metric.py | 24 ++++++++++++++++++++++++ 2 files changed, 38 insertions(+), 13 deletions(-) diff --git a/metric_learn/base_metric.py b/metric_learn/base_metric.py index 2d48ca65..b9e540c6 100644 --- a/metric_learn/base_metric.py +++ b/metric_learn/base_metric.py @@ -62,7 +62,7 @@ def score_pairs(self, pairs): def pair_similarity(self, pairs): """ .. versionadded:: 0.6.3 Compute the similarity score bewteen pairs - + Returns the similarity score between pairs. Depending on the algorithm, this method can return the learned similarity score between pairs, or the inverse of the distance learned between two pairs. The more the @@ -92,7 +92,7 @@ def pair_similarity(self, pairs): def pair_distance(self, pairs): """ .. versionadded:: 0.6.3 Compute the distance score between pairs - + Returns the distance score between pairs. For Mahalanobis learners, it returns the pseudo-distance bewtween pairs. It is not available for learners that does not learn a distance or pseudo-distance, an error @@ -206,15 +206,17 @@ def get_metric(self): See Also -------- - pair_distance : a method that returns the distance score between several pairs - of points. Unlike `get_metric`, this is a method of the metric learner - and therefore can change if the metric learner changes. Besides, it can - use the metric learner's preprocessor, and works on concatenated arrays. - - pair_similarity : a method that returns the similarity score between several pairs - of points. Unlike `get_metric`, this is a method of the metric learner - and therefore can change if the metric learner changes. Besides, it can - use the metric learner's preprocessor, and works on concatenated arrays. + pair_distance : a method that returns the distance score between several + pairs of points. Unlike `get_metric`, this is a method of the metric + learner and therefore can change if the metric learner changes. Besides, + it can use the metric learner's preprocessor, and works on concatenated + arrays. + + pair_similarity : a method that returns the similarity score between + several pairs of points. Unlike `get_metric`, this is a method of the + metric learner and therefore can change if the metric learner changes. + Besides, it can use the metric learner's preprocessor, and works on + concatenated arrays. """ @@ -261,7 +263,7 @@ def score_pairs(self, pairs): .. deprecated:: 0.6.3 This method is deprecated. Please use `pair_distance` instead. - .. warning:: + .. warning:: This method will be deleted in 0.6.4. Please refer to `pair_distance` or `pair_similarity`. This change will occur in order to add learners that don't necessarly learn a Mahalanobis distance. @@ -298,7 +300,6 @@ def score_pairs(self, pairs): :ref:`mahalanobis_distances` : The section of the project documentation that describes Mahalanobis Distances. - """ dpr_msg = ("score_pairs will be deprecated in release 0.6.3. " "Use pair_similarity to compute similarities, or " diff --git a/test/test_base_metric.py b/test/test_base_metric.py index 67f9b6a0..e96afe23 100644 --- a/test/test_base_metric.py +++ b/test/test_base_metric.py @@ -1,3 +1,4 @@ +from numpy.core.numeric import array_equal import pytest import re import unittest @@ -274,5 +275,28 @@ def test_n_components(estimator, build_dataset): 'Invalid n_components, must be in [1, {}]'.format(X.shape[1])) +@pytest.mark.parametrize('estimator, build_dataset', metric_learners, + ids=ids_metric_learners) +def test_score_pairs_warning(estimator, build_dataset): + """Tests that score_pairs returns a FutureWarning regarding deprecation. + Also that score_pairs and pair_distance have the same behaviour""" + input_data, labels, _, X = build_dataset() + model = clone(estimator) + set_random_state(model) + + # we fit the metric learner on it and then we call score_apirs on some + # points + model.fit(*remove_y(model, input_data, labels)) + + msg = ("score_pairs will be deprecated in release 0.6.3. " + "Use pair_similarity to compute similarities, or " + "pair_distances to compute distances.") + with pytest.warns(FutureWarning) as raised_warning: + score = model.score_pairs([[X[0], X[1]], ]) + dist = model.pair_distance([[X[0], X[1]], ]) + assert array_equal(score, dist) + assert np.any([str(warning.message) == msg for warning in raised_warning]) + + if __name__ == '__main__': unittest.main() From 8c559705a735f2160d630b4ad4062fce27cac9ad Mon Sep 17 00:00:00 2001 From: mvargas33 Date: Tue, 19 Oct 2021 11:59:53 +0200 Subject: [PATCH 13/20] Fixed changes requested 1 --- doc/introduction.rst | 23 ----------------------- doc/supervised.rst | 25 +++++++------------------ doc/weakly_supervised.rst | 30 +++++++++--------------------- metric_learn/base_metric.py | 20 ++++++++++---------- test/test_base_metric.py | 2 +- 5 files changed, 27 insertions(+), 73 deletions(-) diff --git a/doc/introduction.rst b/doc/introduction.rst index 6faf021d..e9ff0015 100644 --- a/doc/introduction.rst +++ b/doc/introduction.rst @@ -123,26 +123,3 @@ to the following resources: Survey `_ (2012) - **Book:** `Metric Learning `_ (2015) - -.. Methods [TO MOVE TO SUPERVISED/WEAK SECTIONS] -.. ============================================= - -.. Currently, each metric learning algorithm supports the following methods: - -.. - ``fit(...)``, which learns the model. -.. - ``get_mahalanobis_matrix()``, which returns a Mahalanobis matrix -.. - ``get_metric()``, which returns a function that takes as input two 1D - arrays and outputs the learned metric score on these two points -.. :math:`M = L^{\top}L` such that distance between vectors ``x`` and -.. ``y`` can be computed as :math:`\sqrt{\left(x-y\right)M\left(x-y\right)}`. -.. - ``components_from_metric(metric)``, which returns a transformation matrix -.. :math:`L \in \mathbb{R}^{D \times d}`, which can be used to convert a -.. data matrix :math:`X \in \mathbb{R}^{n \times d}` to the -.. :math:`D`-dimensional learned metric space :math:`X L^{\top}`, -.. in which standard Euclidean distances may be used. -.. - ``transform(X)``, which applies the aforementioned transformation. -.. - ``pair_distance(pairs)`` which returns the distance between pairs of -.. points. ``pairs`` should be a 3D array-like of pairs of shape ``(n_pairs, -.. 2, n_features)``, or it can be a 2D array-like of pairs indicators of -.. shape ``(n_pairs, 2)`` (see section :ref:`preprocessor_section` for more -.. details). \ No newline at end of file diff --git a/doc/supervised.rst b/doc/supervised.rst index ea5f5d29..6d3070dc 100644 --- a/doc/supervised.rst +++ b/doc/supervised.rst @@ -83,27 +83,17 @@ array([0.49627072, 3.65287282, 6.06079877]) 0.4962707194621285 - Alternatively, you can use `pair_similarity` to return the **score** between - points, the more the **score**, the closer the pairs and vice-versa. For - Mahalanobis learners, it is equal to the inverse of the distance. + pairs of points, the larger the **score**, the more similar the pair + and vice-versa. For Mahalanobis learners, it is equal to the opposite + of the distance. >>> score = nca.pair_similarity([[[3.5, 3.6], [5.6, 2.4]], [[1.2, 4.2], [2.1, 6.4]], [[3.3, 7.8], [10.9, 0.1]]]) >>> score array([-0.49627072, -3.65287282, -6.06079877]) - This is useful because `pair_similarity` matches the **score** sematic of - scikit-learn's `Classification matrics `_. - For instance, given a labeled data, you can pass the labels and the - **score** of your data to get the ROC curve. - ->>> from sklearn.metrics import roc_curve ->>> fpr, tpr, thresholds = roc_curve(['dog', 'cat', 'dog'], score, pos_label='dog') ->>> fpr -array([0., 0., 1., 1.]) ->>> tpr -array([0. , 0.5, 0.5, 1. ]) ->>> ->>> thresholds -array([ 0.50372928, -0.49627072, -3.65287282, -6.06079877]) + This is useful because `pair_similarity` matches the **score** semantic of + scikit-learn's `Classification metrics + `_. .. note:: @@ -116,7 +106,6 @@ array([ 0.50372928, -0.49627072, -3.65287282, -6.06079877]) array([[0.43680409, 0.89169412], [0.89169412, 1.9542479 ]]) -.. TODO: remove the "like it is the case etc..." if it's not the case anymore Scikit-learn compatibility -------------------------- @@ -128,7 +117,7 @@ All supervised algorithms are scikit-learn estimators scikit-learn model selection routines (`sklearn.model_selection.cross_val_score`, `sklearn.model_selection.GridSearchCV`, etc). -You can also use methods from `sklearn.metrics` that rely on y_scores. +You can also use some of the scoring functions from `sklearn.metrics`. Algorithms ========== diff --git a/doc/weakly_supervised.rst b/doc/weakly_supervised.rst index 66a34dff..f7db61ed 100644 --- a/doc/weakly_supervised.rst +++ b/doc/weakly_supervised.rst @@ -176,27 +176,17 @@ array([7.27607365, 0.88853014]) 7.276073646278203 - Alternatively, you can use `pair_similarity` to return the **score** between - points, the more the **score**, the closer the pairs and vice-versa. For - Mahalanobis learners, it is equal to the inverse of the distance. + pairs of points, the larger the **score**, the more similar the pair + and vice-versa. For Mahalanobis learners, it is equal to the opposite + of the distance. >>> score = mmc.pair_similarity([[[3.5, 3.6], [5.6, 2.4]], [[1.2, 4.2], [2.1, 6.4]], [[3.3, 7.8], [10.9, 0.1]]]) >>> score array([-0.49627072, -3.65287282, -6.06079877]) - This is useful because `pair_similarity` matches the **score** sematic of - scikit-learn's `Classification matrics `_. - For instance, given a labeled data, you can pass the labels and the - **score** of your data to get the ROC curve. - ->>> from sklearn.metrics import roc_curve ->>> fpr, tpr, thresholds = roc_curve(['dog', 'cat', 'dog'], score, pos_label='dog') ->>> fpr -array([0., 0., 1., 1.]) ->>> tpr -array([0. , 0.5, 0.5, 1. ]) ->>> ->>> thresholds -array([ 0.50372928, -0.49627072, -3.65287282, -6.06079877]) + This is useful because `pair_similarity` matches the **score** semantic of + scikit-learn's `Classification metrics + `_. .. note:: @@ -210,8 +200,6 @@ array([[ 0.58603894, -5.69883982, -1.66614919], [-5.69883982, 55.41743549, 16.20219519], [-1.66614919, 16.20219519, 4.73697721]]) -.. TODO: remove the "like it is the case etc..." if it's not the case anymore - .. _sklearn_compat_ws: Prediction and scoring @@ -368,7 +356,7 @@ returns the `sklearn.metrics.roc_auc_score` (which is threshold-independent). .. note:: See :ref:`fit_ws` for more details on metric learners functions that are not specific to learning on pairs, like `transform`, `pair_distance`, - `get_metric` and `get_mahalanobis_matrix`. + `pair_similarity`, `get_metric` and `get_mahalanobis_matrix`. Algorithms ---------- @@ -715,7 +703,7 @@ of triplets that have the right predicted ordering. .. note:: See :ref:`fit_ws` for more details on metric learners functions that are not specific to learning on pairs, like `transform`, `pair_distance`, - `get_metric` and `get_mahalanobis_matrix`. + `pair_similarity`, `get_metric` and `get_mahalanobis_matrix`. @@ -883,7 +871,7 @@ of quadruplets have the right predicted ordering. .. note:: See :ref:`fit_ws` for more details on metric learners functions that are not specific to learning on pairs, like `transform`, `pair_distance`, - `get_metric` and `get_mahalanobis_matrix`. + `pair_similarity`, `get_metric` and `get_mahalanobis_matrix`. diff --git a/metric_learn/base_metric.py b/metric_learn/base_metric.py index 99defc97..10e873ca 100644 --- a/metric_learn/base_metric.py +++ b/metric_learn/base_metric.py @@ -29,12 +29,12 @@ def __init__(self, preprocessor=None): @abstractmethod def score_pairs(self, pairs): """ - .. deprecated:: 0.6.3 Refer to `pair_distance` and `pair_similarity`. + .. deprecated:: 0.7.0 Refer to `pair_distance` and `pair_similarity`. .. warning:: - This method will be deleted in 0.6.4. Please refer to `pair_distance` + This method will be removed in 0.8.0. Please refer to `pair_distance` or `pair_similarity`. This change will occur in order to add learners - that don't necessarly learn a Mahalanobis distance. + that don't necessarily learn a Mahalanobis distance. Returns the score between pairs (can be a similarity, or a distance/metric depending on the algorithm) @@ -61,7 +61,7 @@ def score_pairs(self, pairs): @abstractmethod def pair_similarity(self, pairs): """ - .. versionadded:: 0.6.3 Compute the similarity score bewteen pairs + .. versionadded:: 0.7.0 Compute the similarity score between pairs Returns the similarity score between pairs. Depending on the algorithm, this method can return the learned similarity score between pairs, @@ -91,10 +91,10 @@ def pair_similarity(self, pairs): @abstractmethod def pair_distance(self, pairs): """ - .. versionadded:: 0.6.3 Compute the distance score between pairs + .. versionadded:: 0.7.0 Compute the distance score between pairs Returns the distance score between pairs. For Mahalanobis learners, it - returns the pseudo-distance bewtween pairs. It is not available for + returns the pseudo-distance between pairs. It is not available for learners that does not learn a distance or pseudo-distance, an error will be shown instead. @@ -260,13 +260,13 @@ class MahalanobisMixin(BaseMetricLearner, MetricTransformer, def score_pairs(self, pairs): r""" - .. deprecated:: 0.6.3 + .. deprecated:: 0.7.0 This method is deprecated. Please use `pair_distance` instead. .. warning:: - This method will be deleted in 0.6.4. Please refer to `pair_distance` + This method will be removed in 0.8.0. Please refer to `pair_distance` or `pair_similarity`. This change will occur in order to add learners - that don't necessarly learn a Mahalanobis distance. + that don't necessarily learn a Mahalanobis distance. Returns the learned Mahalanobis distance between pairs. @@ -301,7 +301,7 @@ def score_pairs(self, pairs): :ref:`mahalanobis_distances` : The section of the project documentation that describes Mahalanobis Distances. """ - dpr_msg = ("score_pairs will be deprecated in release 0.6.3. " + dpr_msg = ("score_pairs will be deprecated in release 0.7.0. " "Use pair_similarity to compute similarities, or " "pair_distances to compute distances.") warnings.warn(dpr_msg, category=FutureWarning) diff --git a/test/test_base_metric.py b/test/test_base_metric.py index e96afe23..bcdb4320 100644 --- a/test/test_base_metric.py +++ b/test/test_base_metric.py @@ -288,7 +288,7 @@ def test_score_pairs_warning(estimator, build_dataset): # points model.fit(*remove_y(model, input_data, labels)) - msg = ("score_pairs will be deprecated in release 0.6.3. " + msg = ("score_pairs will be deprecated in release 0.7.0. " "Use pair_similarity to compute similarities, or " "pair_distances to compute distances.") with pytest.warns(FutureWarning) as raised_warning: From 787a8d16df04e05f176da61d1ad6d668872d7300 Mon Sep 17 00:00:00 2001 From: mvargas33 Date: Tue, 19 Oct 2021 15:47:29 +0200 Subject: [PATCH 14/20] Fixed changes requested 2 --- doc/supervised.rst | 10 ++--- doc/weakly_supervised.rst | 12 ++--- metric_learn/base_metric.py | 81 ++++++++++++++++++---------------- test/test_base_metric.py | 4 +- test/test_pairs_classifiers.py | 20 +++++---- test/test_sklearn_compat.py | 11 ++++- 6 files changed, 77 insertions(+), 61 deletions(-) diff --git a/doc/supervised.rst b/doc/supervised.rst index 6d3070dc..28fdf02b 100644 --- a/doc/supervised.rst +++ b/doc/supervised.rst @@ -82,18 +82,18 @@ array([0.49627072, 3.65287282, 6.06079877]) >>> metric_fun([3.5, 3.6], [5.6, 2.4]) 0.4962707194621285 -- Alternatively, you can use `pair_similarity` to return the **score** between +- Alternatively, you can use `pair_score` to return the **score** between pairs of points, the larger the **score**, the more similar the pair and vice-versa. For Mahalanobis learners, it is equal to the opposite of the distance. ->>> score = nca.pair_similarity([[[3.5, 3.6], [5.6, 2.4]], [[1.2, 4.2], [2.1, 6.4]], [[3.3, 7.8], [10.9, 0.1]]]) +>>> score = nca.pair_score([[[3.5, 3.6], [5.6, 2.4]], [[1.2, 4.2], [2.1, 6.4]], [[3.3, 7.8], [10.9, 0.1]]]) >>> score array([-0.49627072, -3.65287282, -6.06079877]) - This is useful because `pair_similarity` matches the **score** semantic of - scikit-learn's `Classification metrics - `_. +This is useful because `pair_score` matches the **score** semantic of +scikit-learn's `Classification metrics +`_. .. note:: diff --git a/doc/weakly_supervised.rst b/doc/weakly_supervised.rst index f7db61ed..13346c22 100644 --- a/doc/weakly_supervised.rst +++ b/doc/weakly_supervised.rst @@ -175,16 +175,16 @@ array([7.27607365, 0.88853014]) >>> metric_fun([3.5, 3.6, 5.2], [5.6, 2.4, 6.7]) 7.276073646278203 -- Alternatively, you can use `pair_similarity` to return the **score** between +- Alternatively, you can use `pair_score` to return the **score** between pairs of points, the larger the **score**, the more similar the pair and vice-versa. For Mahalanobis learners, it is equal to the opposite of the distance. ->>> score = mmc.pair_similarity([[[3.5, 3.6], [5.6, 2.4]], [[1.2, 4.2], [2.1, 6.4]], [[3.3, 7.8], [10.9, 0.1]]]) +>>> score = mmc.pair_score([[[3.5, 3.6], [5.6, 2.4]], [[1.2, 4.2], [2.1, 6.4]], [[3.3, 7.8], [10.9, 0.1]]]) >>> score array([-0.49627072, -3.65287282, -6.06079877]) - This is useful because `pair_similarity` matches the **score** semantic of + This is useful because `pair_score` matches the **score** semantic of scikit-learn's `Classification metrics `_. @@ -356,7 +356,7 @@ returns the `sklearn.metrics.roc_auc_score` (which is threshold-independent). .. note:: See :ref:`fit_ws` for more details on metric learners functions that are not specific to learning on pairs, like `transform`, `pair_distance`, - `pair_similarity`, `get_metric` and `get_mahalanobis_matrix`. + `pair_score`, `get_metric` and `get_mahalanobis_matrix`. Algorithms ---------- @@ -703,7 +703,7 @@ of triplets that have the right predicted ordering. .. note:: See :ref:`fit_ws` for more details on metric learners functions that are not specific to learning on pairs, like `transform`, `pair_distance`, - `pair_similarity`, `get_metric` and `get_mahalanobis_matrix`. + `pair_score`, `get_metric` and `get_mahalanobis_matrix`. @@ -871,7 +871,7 @@ of quadruplets have the right predicted ordering. .. note:: See :ref:`fit_ws` for more details on metric learners functions that are not specific to learning on pairs, like `transform`, `pair_distance`, - `pair_similarity`, `get_metric` and `get_mahalanobis_matrix`. + `pair_score`, `get_metric` and `get_mahalanobis_matrix`. diff --git a/metric_learn/base_metric.py b/metric_learn/base_metric.py index 10e873ca..c04754e3 100644 --- a/metric_learn/base_metric.py +++ b/metric_learn/base_metric.py @@ -41,8 +41,10 @@ def score_pairs(self, pairs): Parameters ---------- - pairs : `numpy.ndarray`, shape=(n_samples, 2, n_features) - 3D array of pairs. + pairs : array-like, shape=(n_pairs, 2, n_features) or (n_pairs, 2) + 3D Array of pairs to score, with each row corresponding to two points, + for 2D array of indices of pairs if the metric learner uses a + preprocessor. Returns ------- @@ -52,27 +54,29 @@ def score_pairs(self, pairs): See Also -------- get_metric : a method that returns a function to compute the metric between - two points. The difference with `score_pairs` is that it works on two - 1D arrays and cannot use a preprocessor. Besides, the returned function - is independent of the metric learner and hence is not modified if the - metric learner is. + two points. The difference between `pair_score` and `pair_distance` is + that it works on two 1D arrays and cannot use a preprocessor. Besides, + the returned function is independent of the metric learner and hence is + not modified if the metric learner is. """ @abstractmethod - def pair_similarity(self, pairs): + def pair_score(self, pairs): """ .. versionadded:: 0.7.0 Compute the similarity score between pairs - Returns the similarity score between pairs. Depending on the algorithm, - this method can return the learned similarity score between pairs, - or the inverse of the distance learned between two pairs. The more the - score, the more similar the pairs. All learners have access to this + Returns the similarity score between pairs of points. Depending on the + algorithm, this method can return the learned similarity score between + pairs, or the opposite of the distance learned between pairs. The larger + the score, the more similar the pair. All learners have access to this method. Parameters ---------- - pairs : `numpy.ndarray`, shape=(n_samples, 2, n_features) - 3D array of pairs. + pairs : array-like, shape=(n_pairs, 2, n_features) or (n_pairs, 2) + 3D Array of pairs to score, with each row corresponding to two points, + for 2D array of indices of pairs if the metric learner uses a + preprocessor. Returns ------- @@ -82,7 +86,7 @@ def pair_similarity(self, pairs): See Also -------- get_metric : a method that returns a function to compute the metric between - two points. The difference with `pair_similarity` is that it works on two + two points. The difference with `pair_score` is that it works on two 1D arrays and cannot use a preprocessor. Besides, the returned function is independent of the metric learner and hence is not modified if the metric learner is. @@ -91,17 +95,18 @@ def pair_similarity(self, pairs): @abstractmethod def pair_distance(self, pairs): """ - .. versionadded:: 0.7.0 Compute the distance score between pairs + .. versionadded:: 0.7.0 Compute the distance between pairs - Returns the distance score between pairs. For Mahalanobis learners, it - returns the pseudo-distance between pairs. It is not available for - learners that does not learn a distance or pseudo-distance, an error - will be shown instead. + Returns the (pseudo) distance between pairs, when available. For metric + learners that do not learn a (pseudo) distance, an error is thrown + instead. Parameters ---------- - pairs : `numpy.ndarray`, shape=(n_samples, 2, n_features) - 3D array of pairs. + pairs : array-like, shape=(n_pairs, 2, n_features) or (n_pairs, 2) + 3D Array of pairs to score, with each row corresponding to two points, + for 2D array of indices of pairs if the metric learner uses a + preprocessor. Returns ------- @@ -170,10 +175,10 @@ def _prepare_inputs(self, X, y=None, type_of_inputs='classic', @abstractmethod def get_metric(self): - """Returns a function that takes as input two 1D arrays and outputs the - learned metric score on these two points. Depending on the algorithm, it - can return the distance or similarity function between pairs. It always - returns what the specific algorithm learns. + """Returns a function that takes as input two 1D arrays and outputs + the value of the learned metric on these two points. Depending on the + algorithm, it can return a distance or a score function between pairs. + It always returns what the specific algorithm learns. This function will be independent from the metric learner that learned it (it will not be modified if the initial metric learner is modified), @@ -206,13 +211,13 @@ def get_metric(self): See Also -------- - pair_distance : a method that returns the distance score between several + pair_distance : a method that returns the distance between several pairs of points. Unlike `get_metric`, this is a method of the metric learner and therefore can change if the metric learner changes. Besides, it can use the metric learner's preprocessor, and works on concatenated arrays. - pair_similarity : a method that returns the similarity score between + pair_score : a method that returns the similarity score between several pairs of points. Unlike `get_metric`, this is a method of the metric learner and therefore can change if the metric learner changes. Besides, it can use the metric learner's preprocessor, and works on @@ -265,7 +270,7 @@ def score_pairs(self, pairs): .. warning:: This method will be removed in 0.8.0. Please refer to `pair_distance` - or `pair_similarity`. This change will occur in order to add learners + or `pair_score`. This change will occur in order to add learners that don't necessarily learn a Mahalanobis distance. Returns the learned Mahalanobis distance between pairs. @@ -302,14 +307,14 @@ def score_pairs(self, pairs): that describes Mahalanobis Distances. """ dpr_msg = ("score_pairs will be deprecated in release 0.7.0. " - "Use pair_similarity to compute similarities, or " + "Use pair_score to compute similarity scores, or " "pair_distances to compute distances.") warnings.warn(dpr_msg, category=FutureWarning) return self.pair_distance(pairs) - def pair_similarity(self, pairs): + def pair_score(self, pairs): """ - Returns the inverse of the learned Mahalanobis distance between pairs. + Returns the opposite of the learned Mahalanobis distance between pairs. Parameters ---------- @@ -321,12 +326,12 @@ def pair_similarity(self, pairs): Returns ------- scores : `numpy.ndarray` of shape=(n_pairs,) - The inverse of the learned Mahalanobis distance for every pair. + The opposite of the learned Mahalanobis distance for every pair. See Also -------- get_metric : a method that returns a function to compute the metric between - two points. The difference with `pair_similarity` is that it works on two + two points. The difference with `pair_score` is that it works on two 1D arrays and cannot use a preprocessor. Besides, the returned function is independent of the metric learner and hence is not modified if the metric learner is. @@ -517,7 +522,7 @@ def decision_function(self, pairs): pairs = check_input(pairs, type_of_inputs='tuples', preprocessor=self.preprocessor_, estimator=self, tuple_size=self._tuple_size) - return self.pair_similarity(pairs) + return self.pair_score(pairs) def score(self, pairs, y): """Computes score of pairs similarity prediction. @@ -787,8 +792,8 @@ def decision_function(self, triplets): triplets = check_input(triplets, type_of_inputs='tuples', preprocessor=self.preprocessor_, estimator=self, tuple_size=self._tuple_size) - return (self.pair_similarity(triplets[:, :2]) - - self.pair_similarity(triplets[:, [0, 2]])) + return (self.pair_score(triplets[:, :2]) - + self.pair_score(triplets[:, [0, 2]])) def score(self, triplets): """Computes score on input triplets. @@ -872,8 +877,8 @@ def decision_function(self, quadruplets): quadruplets = check_input(quadruplets, type_of_inputs='tuples', preprocessor=self.preprocessor_, estimator=self, tuple_size=self._tuple_size) - return (self.pair_similarity(quadruplets[:, :2]) - - self.pair_similarity(quadruplets[:, 2:])) + return (self.pair_score(quadruplets[:, :2]) - + self.pair_score(quadruplets[:, 2:])) def score(self, quadruplets): """Computes score on input quadruplets diff --git a/test/test_base_metric.py b/test/test_base_metric.py index bcdb4320..5a409fe1 100644 --- a/test/test_base_metric.py +++ b/test/test_base_metric.py @@ -284,12 +284,12 @@ def test_score_pairs_warning(estimator, build_dataset): model = clone(estimator) set_random_state(model) - # we fit the metric learner on it and then we call score_apirs on some + # We fit the metric learner on it and then we call score_pairs on some # points model.fit(*remove_y(model, input_data, labels)) msg = ("score_pairs will be deprecated in release 0.7.0. " - "Use pair_similarity to compute similarities, or " + "Use pair_score to compute similarity scores, or " "pair_distances to compute distances.") with pytest.warns(FutureWarning) as raised_warning: score = model.score_pairs([[X[0], X[1]], ]) diff --git a/test/test_pairs_classifiers.py b/test/test_pairs_classifiers.py index 7023fbea..4df3b2f1 100644 --- a/test/test_pairs_classifiers.py +++ b/test/test_pairs_classifiers.py @@ -49,14 +49,14 @@ def test_predict_monotonous(estimator, build_dataset, pairs_train, pairs_test, y_train, y_test = train_test_split(input_data, labels) estimator.fit(pairs_train, y_train) - distances = estimator.pair_distance(pairs_test) + scores = estimator.pair_score(pairs_test) predictions = estimator.predict(pairs_test) - min_dissimilar = np.min(distances[predictions == -1]) - max_similar = np.max(distances[predictions == 1]) - assert max_similar <= min_dissimilar - separator = np.mean([min_dissimilar, max_similar]) - assert (predictions[distances > separator] == -1).all() - assert (predictions[distances < separator] == 1).all() + max_dissimilar = np.max(scores[predictions == -1]) + min_similar = np.min(scores[predictions == 1]) + assert max_dissimilar <= min_similar + separator = np.mean([max_dissimilar, min_similar]) + assert (predictions[scores < separator] == -1).all() + assert (predictions[scores > separator] == 1).all() @pytest.mark.parametrize('with_preprocessor', [True, False]) @@ -65,15 +65,17 @@ def test_predict_monotonous(estimator, build_dataset, def test_raise_not_fitted_error_if_not_fitted(estimator, build_dataset, with_preprocessor): """Test that a NotFittedError is raised if someone tries to use - pair_distance, decision_function, get_metric, transform or + pair_score, score_pairs, decision_function, get_metric, transform or get_mahalanobis_matrix on input data and the metric learner has not been fitted.""" input_data, labels, preprocessor, _ = build_dataset(with_preprocessor) estimator = clone(estimator) estimator.set_params(preprocessor=preprocessor) set_random_state(estimator) + with pytest.raises(NotFittedError): # Remove in 0.8.0 + estimator.score_pairs(input_data) with pytest.raises(NotFittedError): - estimator.pair_distance(input_data) + estimator.pair_score(input_data) with pytest.raises(NotFittedError): estimator.decision_function(input_data) with pytest.raises(NotFittedError): diff --git a/test/test_sklearn_compat.py b/test/test_sklearn_compat.py index d6077a16..9ad1482f 100644 --- a/test/test_sklearn_compat.py +++ b/test/test_sklearn_compat.py @@ -147,8 +147,17 @@ def test_array_like_inputs(estimator, build_dataset, with_preprocessor): pairs = np.array([[X[0], X[1]], [X[0], X[2]]]) pairs_variants, _ = generate_array_like(pairs) + msg = "" + # Todo in 0.7.0: Change 'msg' for the message that says "This learner does + # not have pair_distance" for pairs_variant in pairs_variants: - estimator.pair_distance(pairs_variant) + estimator.pair_score(pairs_variant) # All learners have pair_score + # But all of them will have pair_distance + with pytest.raises(Exception) as raised_exception: + estimator.pair_distance(pairs_variant) + if raised_exception is not None: + assert msg == raised_exception.value.args[0] + @pytest.mark.parametrize('with_preprocessor', [True, False]) From e14f956888f0b2b952450b1e2d076daa403fc8b2 Mon Sep 17 00:00:00 2001 From: mvargas33 Date: Tue, 19 Oct 2021 16:03:25 +0200 Subject: [PATCH 15/20] Add equivalence test, p_dist == p_score --- test/test_mahalanobis_mixin.py | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/test/test_mahalanobis_mixin.py b/test/test_mahalanobis_mixin.py index 2ba78a57..5b1ebe2e 100644 --- a/test/test_mahalanobis_mixin.py +++ b/test/test_mahalanobis_mixin.py @@ -3,7 +3,7 @@ import pytest import numpy as np from numpy.linalg import LinAlgError -from numpy.testing import assert_array_almost_equal, assert_allclose +from numpy.testing import assert_array_almost_equal, assert_allclose, assert_array_equal from scipy.spatial.distance import pdist, squareform, mahalanobis from scipy.stats import ortho_group from sklearn import clone @@ -24,6 +24,24 @@ RNG = check_random_state(0) +@pytest.mark.parametrize('estimator, build_dataset', metric_learners, + ids=ids_metric_learners) +def test_pair_distance_pair_score_equivalent(estimator, build_dataset): + """ + For Mahalanobis learners, pair_score should be equivalent to the + opposite of the pair_distance result. + """ + input_data, labels, _, X = build_dataset() + n_samples = 20 + X = X[:n_samples] + model = clone(estimator) + set_random_state(model) + model.fit(*remove_y(estimator, input_data, labels)) + + distances = model.pair_distance(np.array(list(product(X, X)))) + scores_1 = -1 * model.pair_score(np.array(list(product(X, X)))) + + assert_array_equal(distances, scores_1) @pytest.mark.parametrize('estimator, build_dataset', metric_learners, ids=ids_metric_learners) From 0941a320f6a71cac0fa8223ac753ce57ada69604 Mon Sep 17 00:00:00 2001 From: mvargas33 Date: Tue, 19 Oct 2021 16:36:40 +0200 Subject: [PATCH 16/20] Fix tests and identation. --- test/test_mahalanobis_mixin.py | 7 ++- test/test_pairs_classifiers.py | 2 +- test/test_sklearn_compat.py | 19 ++++---- test/test_utils.py | 79 +++++++++++++++++++++++++--------- 4 files changed, 75 insertions(+), 32 deletions(-) diff --git a/test/test_mahalanobis_mixin.py b/test/test_mahalanobis_mixin.py index 5b1ebe2e..f52f4592 100644 --- a/test/test_mahalanobis_mixin.py +++ b/test/test_mahalanobis_mixin.py @@ -3,7 +3,8 @@ import pytest import numpy as np from numpy.linalg import LinAlgError -from numpy.testing import assert_array_almost_equal, assert_allclose, assert_array_equal +from numpy.testing import assert_array_almost_equal, assert_allclose, \ + assert_array_equal from scipy.spatial.distance import pdist, squareform, mahalanobis from scipy.stats import ortho_group from sklearn import clone @@ -24,6 +25,7 @@ RNG = check_random_state(0) + @pytest.mark.parametrize('estimator, build_dataset', metric_learners, ids=ids_metric_learners) def test_pair_distance_pair_score_equivalent(estimator, build_dataset): @@ -40,9 +42,10 @@ def test_pair_distance_pair_score_equivalent(estimator, build_dataset): distances = model.pair_distance(np.array(list(product(X, X)))) scores_1 = -1 * model.pair_score(np.array(list(product(X, X)))) - + assert_array_equal(distances, scores_1) + @pytest.mark.parametrize('estimator, build_dataset', metric_learners, ids=ids_metric_learners) def test_pair_distance_pairwise(estimator, build_dataset): diff --git a/test/test_pairs_classifiers.py b/test/test_pairs_classifiers.py index 4df3b2f1..714cbd08 100644 --- a/test/test_pairs_classifiers.py +++ b/test/test_pairs_classifiers.py @@ -72,7 +72,7 @@ def test_raise_not_fitted_error_if_not_fitted(estimator, build_dataset, estimator = clone(estimator) estimator.set_params(preprocessor=preprocessor) set_random_state(estimator) - with pytest.raises(NotFittedError): # Remove in 0.8.0 + with pytest.raises(NotFittedError): # Remove in 0.8.0 estimator.score_pairs(input_data) with pytest.raises(NotFittedError): estimator.pair_score(input_data) diff --git a/test/test_sklearn_compat.py b/test/test_sklearn_compat.py index 9ad1482f..738ddc86 100644 --- a/test/test_sklearn_compat.py +++ b/test/test_sklearn_compat.py @@ -147,17 +147,20 @@ def test_array_like_inputs(estimator, build_dataset, with_preprocessor): pairs = np.array([[X[0], X[1]], [X[0], X[2]]]) pairs_variants, _ = generate_array_like(pairs) - msg = "" - # Todo in 0.7.0: Change 'msg' for the message that says "This learner does - # not have pair_distance" + + not_implemented_msg = "" + # Todo in 0.7.0: Change 'not_implemented_msg' for the message that says + # "This learner does not have pair_distance" + for pairs_variant in pairs_variants: - estimator.pair_score(pairs_variant) # All learners have pair_score - # But all of them will have pair_distance - with pytest.raises(Exception) as raised_exception: + estimator.pair_score(pairs_variant) # All learners have pair_score + + # But not all of them will have pair_distance + try: estimator.pair_distance(pairs_variant) - if raised_exception is not None: - assert msg == raised_exception.value.args[0] + except Exception as raised_exception: + assert raised_exception.value.args[0] == not_implemented_msg @pytest.mark.parametrize('with_preprocessor', [True, False]) diff --git a/test/test_utils.py b/test/test_utils.py index 39515f78..83bdd86a 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -834,9 +834,9 @@ def test_error_message_tuple_size(estimator, _): @pytest.mark.parametrize('estimator, _', metric_learners, ids=ids_metric_learners) -def test_error_message_t_pair_distance(estimator, _): - """tests that if you want to pair_distance on triplets for instance, it returns - the right error message +def test_error_message_t_pair_distance_or_score(estimator, _): + """Tests that if you want to pair_distance or pair_score on triplets + for instance, it returns the right error message """ estimator = clone(estimator) set_random_state(estimator) @@ -844,12 +844,22 @@ def test_error_message_t_pair_distance(estimator, _): triplets = np.array([[[1.3, 6.3], [3., 6.8], [6.5, 4.4]], [[1.9, 5.3], [1., 7.8], [3.2, 1.2]]]) with pytest.raises(ValueError) as raised_err: - estimator.pair_distance(triplets) + estimator.pair_score(triplets) expected_msg = ("Tuples of 2 element(s) expected{}. Got tuples of 3 " "element(s) instead (shape=(2, 3, 2)):\ninput={}.\n" .format(make_context(estimator), triplets)) assert str(raised_err.value) == expected_msg + not_implemented_msg = "" + # Todo in 0.7.0: Change 'not_implemented_msg' for the message that says + # "This learner does not have pair_distance" + + # One exception will trigger for sure + with pytest.raises(Exception) as raised_exception: + estimator.pair_distance(triplets) + err_value = raised_exception.value.args[0] + assert err_value == expected_msg or err_value == not_implemented_msg + def test_preprocess_tuples_simple_example(): """Test the preprocessor on a very simple example of tuples to ensure the @@ -930,32 +940,59 @@ def test_same_with_or_without_preprocessor(estimator, build_dataset): method)(formed_test) assert np.array(output_with_prep == output_with_prep_formed).all() - # test pair_distance + # Test pair_score, all learners have it. idx1 = np.array([[0, 2], [5, 3]], dtype=int) - output_with_prep = estimator_with_preprocessor.pair_distance( + output_with_prep = estimator_with_preprocessor.pair_score( indicators_to_transform[idx1]) - output_without_prep = estimator_without_preprocessor.pair_distance( + output_without_prep = estimator_without_preprocessor.pair_score( formed_points_to_transform[idx1]) assert np.array(output_with_prep == output_without_prep).all() - output_with_prep = estimator_with_preprocessor.pair_distance( + output_with_prep = estimator_with_preprocessor.pair_score( indicators_to_transform[idx1]) - output_without_prep = estimator_with_prep_formed.pair_distance( + output_without_prep = estimator_with_prep_formed.pair_score( formed_points_to_transform[idx1]) assert np.array(output_with_prep == output_without_prep).all() - # test transform - output_with_prep = estimator_with_preprocessor.transform( - indicators_to_transform) - output_without_prep = estimator_without_preprocessor.transform( - formed_points_to_transform) - assert np.array(output_with_prep == output_without_prep).all() - - output_with_prep = estimator_with_preprocessor.transform( - indicators_to_transform) - output_without_prep = estimator_with_prep_formed.transform( - formed_points_to_transform) - assert np.array(output_with_prep == output_without_prep).all() + # Test pair_distance + not_implemented_msg = "" + # Todo in 0.7.0: Change 'not_implemented_msg' for the message that says + # "This learner does not have pair_distance" + try: + output_with_prep = estimator_with_preprocessor.pair_distance( + indicators_to_transform[idx1]) + output_without_prep = estimator_without_preprocessor.pair_distance( + formed_points_to_transform[idx1]) + assert np.array(output_with_prep == output_without_prep).all() + + output_with_prep = estimator_with_preprocessor.pair_distance( + indicators_to_transform[idx1]) + output_without_prep = estimator_with_prep_formed.pair_distance( + formed_points_to_transform[idx1]) + assert np.array(output_with_prep == output_without_prep).all() + + except Exception as raised_exception: + assert raised_exception.value.args[0] == not_implemented_msg + + # Test transform + not_implemented_msg = "" + # Todo in 0.7.0: Change 'not_implemented_msg' for the message that says + # "This learner does not have transform" + try: + output_with_prep = estimator_with_preprocessor.transform( + indicators_to_transform) + output_without_prep = estimator_without_preprocessor.transform( + formed_points_to_transform) + assert np.array(output_with_prep == output_without_prep).all() + + output_with_prep = estimator_with_preprocessor.transform( + indicators_to_transform) + output_without_prep = estimator_with_prep_formed.transform( + formed_points_to_transform) + assert np.array(output_with_prep == output_without_prep).all() + + except Exception as raised_exception: + assert raised_exception.value.args[0] == not_implemented_msg def test_check_collapsed_pairs_raises_no_error(): From b019d857c443d69b0c263f37ca2b03ba45f4a2a1 Mon Sep 17 00:00:00 2001 From: mvargas33 Date: Wed, 20 Oct 2021 11:26:46 +0200 Subject: [PATCH 17/20] Fixed changes requested 3 --- doc/supervised.rst | 5 ++--- doc/weakly_supervised.rst | 5 ++--- metric_learn/base_metric.py | 29 ++++++++++++++--------------- test/test_mahalanobis_mixin.py | 4 ++-- 4 files changed, 20 insertions(+), 23 deletions(-) diff --git a/doc/supervised.rst b/doc/supervised.rst index 28fdf02b..e27b58ec 100644 --- a/doc/supervised.rst +++ b/doc/supervised.rst @@ -83,9 +83,8 @@ array([0.49627072, 3.65287282, 6.06079877]) 0.4962707194621285 - Alternatively, you can use `pair_score` to return the **score** between - pairs of points, the larger the **score**, the more similar the pair - and vice-versa. For Mahalanobis learners, it is equal to the opposite - of the distance. + pairs of points (the larger the score, the more similar the pair). + For Mahalanobis learners, it is equal to the opposite of the distance. >>> score = nca.pair_score([[[3.5, 3.6], [5.6, 2.4]], [[1.2, 4.2], [2.1, 6.4]], [[3.3, 7.8], [10.9, 0.1]]]) >>> score diff --git a/doc/weakly_supervised.rst b/doc/weakly_supervised.rst index 13346c22..02ea4ef6 100644 --- a/doc/weakly_supervised.rst +++ b/doc/weakly_supervised.rst @@ -176,9 +176,8 @@ array([7.27607365, 0.88853014]) 7.276073646278203 - Alternatively, you can use `pair_score` to return the **score** between - pairs of points, the larger the **score**, the more similar the pair - and vice-versa. For Mahalanobis learners, it is equal to the opposite - of the distance. + pairs of points (the larger the score, the more similar the pair). + For Mahalanobis learners, it is equal to the opposite of the distance. >>> score = mmc.pair_score([[[3.5, 3.6], [5.6, 2.4]], [[1.2, 4.2], [2.1, 6.4]], [[3.3, 7.8], [10.9, 0.1]]]) >>> score diff --git a/metric_learn/base_metric.py b/metric_learn/base_metric.py index c04754e3..84a22ad3 100644 --- a/metric_learn/base_metric.py +++ b/metric_learn/base_metric.py @@ -54,10 +54,10 @@ def score_pairs(self, pairs): See Also -------- get_metric : a method that returns a function to compute the metric between - two points. The difference between `pair_score` and `pair_distance` is - that it works on two 1D arrays and cannot use a preprocessor. Besides, - the returned function is independent of the metric learner and hence is - not modified if the metric learner is. + two points. The difference between `score_pairs` is that it works on two + 1D arrays and cannot use a preprocessor. Besides, the returned function + is independent of the metric learner and hence is not modified if the + metric learner is. """ @abstractmethod @@ -65,11 +65,10 @@ def pair_score(self, pairs): """ .. versionadded:: 0.7.0 Compute the similarity score between pairs - Returns the similarity score between pairs of points. Depending on the - algorithm, this method can return the learned similarity score between - pairs, or the opposite of the distance learned between pairs. The larger - the score, the more similar the pair. All learners have access to this - method. + Returns the similarity score between pairs of points (the larger the score, + the more similar the pair). For metric learners that learn a distance, + the score is simply the opposite of the distance between pairs. All learners + have access to this method. Parameters ---------- @@ -104,14 +103,14 @@ def pair_distance(self, pairs): Parameters ---------- pairs : array-like, shape=(n_pairs, 2, n_features) or (n_pairs, 2) - 3D Array of pairs to score, with each row corresponding to two points, - for 2D array of indices of pairs if the metric learner uses a - preprocessor. + 3D Array of pairs for which to compute the distance, with each + row corresponding to two points, for 2D array of indices of pairs + if the metric learner uses a preprocessor. Returns ------- scores : `numpy.ndarray` of shape=(n_pairs,) - The score of every pair. + The distance between every pair. See Also -------- @@ -177,8 +176,8 @@ def _prepare_inputs(self, X, y=None, type_of_inputs='classic', def get_metric(self): """Returns a function that takes as input two 1D arrays and outputs the value of the learned metric on these two points. Depending on the - algorithm, it can return a distance or a score function between pairs. - It always returns what the specific algorithm learns. + algorithm, it can return a distance or a similarity function between + pairs. This function will be independent from the metric learner that learned it (it will not be modified if the initial metric learner is modified), diff --git a/test/test_mahalanobis_mixin.py b/test/test_mahalanobis_mixin.py index f52f4592..64ab10b4 100644 --- a/test/test_mahalanobis_mixin.py +++ b/test/test_mahalanobis_mixin.py @@ -41,9 +41,9 @@ def test_pair_distance_pair_score_equivalent(estimator, build_dataset): model.fit(*remove_y(estimator, input_data, labels)) distances = model.pair_distance(np.array(list(product(X, X)))) - scores_1 = -1 * model.pair_score(np.array(list(product(X, X)))) + scores = model.pair_score(np.array(list(product(X, X)))) - assert_array_equal(distances, scores_1) + assert_array_equal(distances, -1 * scores) @pytest.mark.parametrize('estimator, build_dataset', metric_learners, From 74df897171a041a5fb6d0be5c85df770792bdbec Mon Sep 17 00:00:00 2001 From: mvargas33 Date: Wed, 20 Oct 2021 15:28:06 +0200 Subject: [PATCH 18/20] Fix identation --- metric_learn/base_metric.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/metric_learn/base_metric.py b/metric_learn/base_metric.py index 84a22ad3..edbd5ed2 100644 --- a/metric_learn/base_metric.py +++ b/metric_learn/base_metric.py @@ -67,8 +67,8 @@ def pair_score(self, pairs): Returns the similarity score between pairs of points (the larger the score, the more similar the pair). For metric learners that learn a distance, - the score is simply the opposite of the distance between pairs. All learners - have access to this method. + the score is simply the opposite of the distance between pairs. All + learners have access to this method. Parameters ---------- From c62a4e741728d6e6924abd45c60f1961dfcf6894 Mon Sep 17 00:00:00 2001 From: mvargas33 Date: Thu, 21 Oct 2021 10:41:36 +0200 Subject: [PATCH 19/20] Last requested changes --- test/test_base_metric.py | 2 +- test/test_mahalanobis_mixin.py | 2 +- test/test_sklearn_compat.py | 1 - 3 files changed, 2 insertions(+), 3 deletions(-) diff --git a/test/test_base_metric.py b/test/test_base_metric.py index 5a409fe1..baa585b9 100644 --- a/test/test_base_metric.py +++ b/test/test_base_metric.py @@ -295,7 +295,7 @@ def test_score_pairs_warning(estimator, build_dataset): score = model.score_pairs([[X[0], X[1]], ]) dist = model.pair_distance([[X[0], X[1]], ]) assert array_equal(score, dist) - assert np.any([str(warning.message) == msg for warning in raised_warning]) + assert any([str(warning.message) == msg for warning in raised_warning]) if __name__ == '__main__': diff --git a/test/test_mahalanobis_mixin.py b/test/test_mahalanobis_mixin.py index 64ab10b4..e2aa1e4d 100644 --- a/test/test_mahalanobis_mixin.py +++ b/test/test_mahalanobis_mixin.py @@ -646,7 +646,7 @@ def test_singular_covariance_init_of_non_strict_pd(estimator, build_dataset): 'preprocessing step.') with pytest.warns(UserWarning) as raised_warning: model.fit(input_data, labels) - assert np.any([str(warning.message) == msg for warning in raised_warning]) + assert any([str(warning.message) == msg for warning in raised_warning]) M, _ = _initialize_metric_mahalanobis(X, init='covariance', random_state=RNG, return_inverse=True, diff --git a/test/test_sklearn_compat.py b/test/test_sklearn_compat.py index 738ddc86..b08fcf25 100644 --- a/test/test_sklearn_compat.py +++ b/test/test_sklearn_compat.py @@ -158,7 +158,6 @@ def test_array_like_inputs(estimator, build_dataset, with_preprocessor): # But not all of them will have pair_distance try: estimator.pair_distance(pairs_variant) - except Exception as raised_exception: assert raised_exception.value.args[0] == not_implemented_msg From 249e0fe7ca4a250f554a92fae9f5da213d48600d Mon Sep 17 00:00:00 2001 From: mvargas33 Date: Thu, 21 Oct 2021 10:53:28 +0200 Subject: [PATCH 20/20] Last small detail --- metric_learn/base_metric.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/metric_learn/base_metric.py b/metric_learn/base_metric.py index edbd5ed2..e7dbd608 100644 --- a/metric_learn/base_metric.py +++ b/metric_learn/base_metric.py @@ -29,11 +29,11 @@ def __init__(self, preprocessor=None): @abstractmethod def score_pairs(self, pairs): """ - .. deprecated:: 0.7.0 Refer to `pair_distance` and `pair_similarity`. + .. deprecated:: 0.7.0 Refer to `pair_distance` and `pair_score`. .. warning:: This method will be removed in 0.8.0. Please refer to `pair_distance` - or `pair_similarity`. This change will occur in order to add learners + or `pair_score`. This change will occur in order to add learners that don't necessarily learn a Mahalanobis distance. Returns the score between pairs