Add pairwise_scores_pyo3()

dustalov · Aug 17, 2024 · 496c25d · 496c25d
1 parent a65f49b
commit 496c25d
Show file tree

Hide file tree

Showing 7 changed files with 85 additions and 29 deletions.
diff --git a/python/evalica/__init__.py b/python/evalica/__init__.py
@@ -24,13 +24,15 @@
     matrices_pyo3,
     newman_pyo3,
     pagerank_pyo3,
+    pairwise_scores_pyo3,
 )
 from .naive import bradley_terry as bradley_terry_naive
 from .naive import counting as counting_naive
 from .naive import eigen as eigen_naive
 from .naive import elo as elo_naive
 from .naive import newman as newman_naive
 from .naive import pagerank as pagerank_naive
+from .naive import pairwise_scores as pairwise_scores_naive
 
 WINNERS = [
     Winner.X,
@@ -762,12 +764,16 @@ def __init__(self, ndim: int) -> None:
         super().__init__(f"scores should be one-dimensional, {ndim} was provided")
 
 
-def pairwise_scores(scores: npt.NDArray[np.float64 | np.int64]) -> npt.NDArray[np.float64]:
+def pairwise_scores(
+    scores: npt.NDArray[np.float64],
+    solver: Literal["naive", "pyo3"] = "pyo3",
+) -> npt.NDArray[np.float64]:
     """
     Estimate the pairwise scores.
 
     Args:
         scores: The element scores.
+        solver: The solver.
 
     Returns:
         The matrix representing pairwise scores between the elements.
@@ -776,15 +782,10 @@ def pairwise_scores(scores: npt.NDArray[np.float64 | np.int64]) -> npt.NDArray[n
     if scores.ndim != 1:
         raise ScoreDimensionError(scores.ndim)
 
-    if not scores.shape[0]:
-        return np.zeros((0, 0), dtype=np.float64)
-
-    pairwise = scores[:, np.newaxis] / (scores + scores[:, np.newaxis])
-
-    if np.isfinite(scores).all():
-        pairwise = np.nan_to_num(pairwise)
+    if solver == "naive":
+        return pairwise_scores_naive(scores)
 
-    return pairwise
+    return pairwise_scores_pyo3(scores)
 
 
 def pairwise_frame(scores: pd.Series[T]) -> pd.DataFrame:  # type: ignore[type-var]

diff --git a/python/evalica/evalica.pyi b/python/evalica/evalica.pyi
@@ -32,6 +32,9 @@ def matrices_pyo3(
 ) -> tuple[npt.NDArray[np.int64], npt.NDArray[np.int64]]: ...
 
 
+def pairwise_scores_pyo3(scores: npt.ArrayLike) -> npt.NDArray[np.float64]: ...
+
+
 def counting_pyo3(
         xs: npt.ArrayLike,
         ys: npt.ArrayLike,

diff --git a/python/evalica/naive.py b/python/evalica/naive.py
@@ -9,6 +9,14 @@
 
 if TYPE_CHECKING:
     from collections.abc import Collection
+    from typing import Any
+
+
+def pairwise_scores(scores: npt.NDArray[np.number[Any]]) -> npt.NDArray[np.float64]:
+    if not scores.shape[0]:
+        return np.zeros((0, 0))
+
+    return np.nan_to_num(scores[:, np.newaxis] / (scores + scores[:, np.newaxis]))
 
 
 def counting(

diff --git a/python/evalica/test_evalica.py b/python/evalica/test_evalica.py
@@ -18,6 +18,8 @@
 from conftest import Comparison, comparisons
 
 if TYPE_CHECKING:
+    from typing import Literal
+
     import pandas as pd
     from pytest_codspeed import BenchmarkFixture
 
@@ -519,33 +521,34 @@ def test_llmfao_pairwise_scores(llmfao: Comparison, benchmark: BenchmarkFixture)
 @given(arrays(dtype=np.float64, shape=array_shapes(max_dims=1, min_side=0)))
 def test_pairwise_scores(scores: npt.NDArray[np.float64]) -> None:
     with np.errstate(all="ignore"):
-        pairwise = evalica.pairwise_scores(scores)
-
-    assert pairwise.dtype == scores.dtype
-    assert pairwise.shape == (len(scores), len(scores))
+        pairwise_pyo3 = evalica.pairwise_scores(scores, solver="pyo3")
+        pairwise_naive = evalica.pairwise_scores(scores, solver="naive")
 
-    if np.isfinite(scores).all():
+    for pairwise in (pairwise_pyo3, pairwise_naive):
+        assert pairwise.dtype == scores.dtype
+        assert pairwise.shape == (len(scores), len(scores))
         assert np.isfinite(pairwise).all()
-    else:
-        assert not np.isfinite(pairwise).all()
 
 
-def test_pairwise_scores_empty() -> None:
-    pairwise = evalica.pairwise_scores(np.zeros(0, dtype=np.float64))
+@pytest.mark.parametrize("solver", ["pyo3", "naive"])
+def test_pairwise_scores_empty(solver: Literal["pyo3", "naive"]) -> None:
+    pairwise = evalica.pairwise_scores(np.zeros(0, dtype=np.float64), solver=solver)
+
     assert pairwise.dtype == np.float64
     assert pairwise.shape == (0, 0)
 
 
+@pytest.mark.parametrize("solver", ["pyo3", "naive"])
 @given(array_shapes())
-def test_pairwise_scores_shape(shape: tuple[int, ...]) -> None:
-    scores = np.zeros(shape, dtype=np.int64)
+def test_pairwise_scores_shape(solver: Literal["pyo3", "naive"], shape: tuple[int, ...]) -> None:
+    scores = np.zeros(shape)
 
     if len(shape) == 1:
         with np.errstate(all="ignore"):
-            evalica.pairwise_scores(scores)
+            evalica.pairwise_scores(scores, solver=solver)
     else:
-        with pytest.raises(evalica.ScoreDimensionError):
-            evalica.pairwise_scores(scores)
+        with pytest.raises(ValueError):  # noqa: PT011
+            evalica.pairwise_scores(scores, solver=solver)
 
 
 @given(series(dtype=np.float64))

diff --git a/src/counting.rs b/src/counting.rs
@@ -3,8 +3,8 @@ use std::ops::AddAssign;
 use ndarray::{Array1, ArrayView1, Axis, ErrorKind, ShapeError};
 use num_traits::{Float, Num};
 
-use crate::{check_lengths, check_total, Winner};
 use crate::utils::{matrices, nan_mean, nan_to_num};
+use crate::{check_lengths, check_total, Winner};
 
 pub fn counting<A: Num + Copy + AddAssign>(
     xs: &ArrayView1<usize>,

diff --git a/src/python.rs b/src/python.rs
@@ -1,12 +1,13 @@
 use numpy::{Element, IntoPyArray, PyArray1, PyArray2, PyArrayDescr, PyArrayLike1};
 use pyo3::create_exception;
+use pyo3::exceptions::PyValueError;
 use pyo3::prelude::*;
 
 use crate::bradley_terry::{bradley_terry, newman};
 use crate::counting::{average_win_rate, counting};
 use crate::elo::elo;
 use crate::linalg::{eigen, pagerank};
-use crate::utils::matrices;
+use crate::utils::{matrices, pairwise_scores};
 use crate::Winner;
 
 #[pymethods]
@@ -34,7 +35,7 @@ unsafe impl Element for Winner {
     }
 }
 
-create_exception!(evalica, LengthMismatchError, pyo3::exceptions::PyValueError);
+create_exception!(evalica, LengthMismatchError, PyValueError);
 
 #[pyfunction]
 fn matrices_pyo3<'py>(
@@ -53,6 +54,16 @@ fn matrices_pyo3<'py>(
     }
 }
 
+#[pyfunction]
+fn pairwise_scores_pyo3<'py>(
+    py: Python,
+    scores: PyArrayLike1<'py, f64>,
+) -> PyResult<Py<PyArray2<f64>>> {
+    let pairwise = pairwise_scores(&scores.as_array());
+
+    Ok(pairwise.into_pyarray_bound(py).unbind())
+}
+
 #[pyfunction]
 fn counting_pyo3<'py>(
     py: Python,
@@ -276,6 +287,7 @@ fn evalica(py: Python<'_>, m: &Bound<'_, PyModule>) -> PyResult<()> {
         py.get_type_bound::<LengthMismatchError>(),
     )?;
     m.add_function(wrap_pyfunction!(matrices_pyo3, m)?)?;
+    m.add_function(wrap_pyfunction!(pairwise_scores_pyo3, m)?)?;
     m.add_function(wrap_pyfunction!(counting_pyo3, m)?)?;
     m.add_function(wrap_pyfunction!(average_win_rate_pyo3, m)?)?;
     m.add_function(wrap_pyfunction!(bradley_terry_pyo3, m)?)?;

diff --git a/src/utils.rs b/src/utils.rs
@@ -3,7 +3,7 @@ use std::hash::Hash;
 use std::num::FpCategory;
 use std::ops::AddAssign;
 
-use ndarray::{Array1, Array2, ArrayView1, ErrorKind, ShapeError};
+use ndarray::{Array, Array2, ArrayView1, Dimension, ErrorKind, ShapeError};
 use num_traits::{Float, Num};
 
 use crate::Winner;
@@ -54,7 +54,7 @@ pub fn one_nan_to_num<A: Float>(x: A, nan: A) -> A {
     }
 }
 
-pub fn nan_to_num<A: Float>(xs: &mut Array1<A>, nan: A) {
+pub fn nan_to_num<A: Float, D: Dimension>(xs: &mut Array<A, D>, nan: A) {
     xs.map_inplace(|x| *x = one_nan_to_num(*x, nan));
 }
 
@@ -114,6 +114,24 @@ pub fn matrices<A: Num + Copy + AddAssign, B: Num + Copy + AddAssign>(
     Ok((wins, ties))
 }
 
+pub fn pairwise_scores<A: Float>(scores: &ArrayView1<A>) -> Array2<A> {
+    if scores.is_empty() {
+        return Array2::zeros((0, 0));
+    }
+
+    let len = scores.len();
+
+    let mut pairwise = Array2::zeros((len, len));
+
+    for ((i, j), value) in pairwise.indexed_iter_mut() {
+        *value = scores[i] / (scores[i] + scores[j]);
+    }
+
+    nan_to_num(&mut pairwise, A::zero());
+
+    pairwise
+}
+
 #[cfg(test)]
 pub mod fixtures {
     use crate::Winner;
@@ -145,7 +163,7 @@ pub mod fixtures {
 mod tests {
     use ndarray::array;
 
-    use super::{index, matrices, Winner};
+    use super::{index, matrices, pairwise_scores, Winner};
 
     #[test]
     fn test_index() {
@@ -189,4 +207,15 @@ mod tests {
         assert_eq!(wins, expected_wins);
         assert_eq!(ties, expected_ties);
     }
+
+    #[test]
+    fn test_pairwise_scores() {
+        let scores = array![0.0, 1.0, 3.0];
+
+        let expected = array![[0.00, 0.00, 0.00], [1.00, 0.50, 0.25], [1.00, 0.75, 0.50]];
+
+        let actual = pairwise_scores(&scores.view()).unwrap();
+
+        assert_eq!(actual, expected);
+    }
 }