diff --git a/doc/releases.md b/doc/releases.md index 81dac60d7..373b4573e 100644 --- a/doc/releases.md +++ b/doc/releases.md @@ -4,11 +4,19 @@ See the [GitHub milestone](https://github.com/lenskit/lkpy/milestone/5) for full change list. +### Infrastructure Updates + +- Dropped support for Python 3.5 +- Removed `*args` from `Algorithm.fit`, so additional data must be provided via keyword arguments +- Made `Algorithm.fit` implementations consistently take `**kwargs` for hybrid flexibility + ### Algorithm Updates +- Substantial performance and stability improvements to item-item - Added a coordinate descent solver to explicit-feedback ALS and made it the default. The old LU-based solver is still available with `method='lu'`. - Added a conjugate gradient solver to implicit-feedback ALS and made it the default. +- Added a random recommender ## 0.7.0 diff --git a/lenskit/algorithms/__init__.py b/lenskit/algorithms/__init__.py index 31be94aa9..a3d2c7dea 100644 --- a/lenskit/algorithms/__init__.py +++ b/lenskit/algorithms/__init__.py @@ -21,14 +21,15 @@ class Algorithm(metaclass=ABCMeta): """ @abstractmethod - def fit(self, ratings, *args, **kwargs): + def fit(self, ratings, **kwargs): """ Train a model using the specified ratings (or similar) data. Args: ratings(pandas.DataFrame): The ratings data. - args: Additional training data the algorithm may require. - kwargs: Additional training data the algorithm may require. + kwargs: Additional training data the algorithm may require. Algorithms should + avoid using the same keyword arguments for different purposes, so that + they can be more easily hybridized. Returns: The algorithm object. diff --git a/lenskit/algorithms/als.py b/lenskit/algorithms/als.py index 4f9a8d310..6a28c6805 100644 --- a/lenskit/algorithms/als.py +++ b/lenskit/algorithms/als.py @@ -302,7 +302,7 @@ def __init__(self, features, *, iterations=20, reg=0.1, damping=5, bias=True, me self.progress = progress if progress is not None else util.no_progress self._random = rand - def fit(self, ratings): + def fit(self, ratings, **kwargs): """ Run ALS to train a model. @@ -488,7 +488,7 @@ def __init__(self, features, *, iterations=20, reg=0.1, weight=40, method='cg', self._random = rand self.progress = progress if progress is not None else util.no_progress - def fit(self, ratings): + def fit(self, ratings, **kwargs): self.timer = util.Stopwatch() current, uctx, ictx = self._initial_model(ratings) diff --git a/lenskit/algorithms/basic.py b/lenskit/algorithms/basic.py index 275fcccb1..350584bd6 100644 --- a/lenskit/algorithms/basic.py +++ b/lenskit/algorithms/basic.py @@ -69,22 +69,22 @@ def __init__(self, items=True, users=True, damping=0.0): check.check_value(self.item_damping >= 0, "item damping value {} must be nonnegative", self.item_damping) - def fit(self, data): + def fit(self, ratings, **kwargs): """ Train the bias model on some rating data. Args: - data (DataFrame): a data frame of ratings. Must have at least `user`, - `item`, and `rating` columns. + ratings (DataFrame): a data frame of ratings. Must have at least `user`, + `item`, and `rating` columns. Returns: Bias: the fit bias object. """ - _logger.info('building bias model for %d ratings', len(data)) - self.mean_ = data.rating.mean() + _logger.info('building bias model for %d ratings', len(ratings)) + self.mean_ = ratings.rating.mean() _logger.info('global mean: %.3f', self.mean_) - nrates = data.assign(rating=lambda df: df.rating - self.mean_) + nrates = ratings.assign(rating=lambda df: df.rating - self.mean_) if self.items: group = nrates.groupby('item').rating @@ -167,7 +167,7 @@ def __init__(self, selector=None): else: self.selector = selector - def fit(self, ratings): + def fit(self, ratings, **kwargs): pop = ratings.groupby('item').user.count() pop.name = 'score' self.item_pop_ = pop.astype('float64') @@ -236,9 +236,9 @@ def __init__(self, algorithms, *others): else: self.algorithms = [algorithms] - def fit(self, ratings, *args, **kwargs): + def fit(self, ratings, **kwargs): for algo in self.algorithms: - algo.fit(ratings, *args, **kwargs) + algo.fit(ratings, **kwargs) return self @@ -285,7 +285,7 @@ def __init__(self, predictor, selector=None): self.predictor = predictor self.selector = selector if selector is not None else UnratedItemCandidateSelector() - def fit(self, ratings, *args, **kwargs): + def fit(self, ratings, **kwargs): """ Fit the recommender. @@ -296,8 +296,8 @@ def fit(self, ratings, *args, **kwargs): args, kwargs: Additional arguments for the predictor to use in its training process. """ - self.predictor.fit(ratings, *args, **kwargs) - self.selector.fit(ratings) + self.predictor.fit(ratings, **kwargs) + self.selector.fit(ratings, **kwargs) return self def recommend(self, user, n=None, candidates=None, ratings=None): @@ -339,7 +339,7 @@ class UnratedItemCandidateSelector(CandidateSelector): users_ = None user_items_ = None - def fit(self, ratings): + def fit(self, ratings, **kwargs): r2 = ratings[['user', 'item']] sparse = sparse_ratings(r2) _logger.info('trained unrated candidate selector for %d ratings', sparse.matrix.nnz) @@ -388,8 +388,8 @@ def __init__(self, selector=None, random_state=None): self.random_state = random_state self.items = None - def fit(self, ratings, *args, **kwargs): - self.selector.fit(ratings) + def fit(self, ratings, **kwargs): + self.selector.fit(ratings, **kwargs) items = pd.DataFrame(ratings['item'].unique(), columns=['item']) self.items = items return self diff --git a/lenskit/algorithms/funksvd.py b/lenskit/algorithms/funksvd.py index 2c9ea86f5..00bcf0ea7 100644 --- a/lenskit/algorithms/funksvd.py +++ b/lenskit/algorithms/funksvd.py @@ -221,7 +221,7 @@ def __init__(self, features, iterations=100, *, lrate=0.001, reg=0.015, else: self.bias = bias - def fit(self, ratings): + def fit(self, ratings, **kwargs): """ Train a FunkSVD model. diff --git a/lenskit/algorithms/hpf.py b/lenskit/algorithms/hpf.py index 40c7a2ed3..4f788cb5f 100644 --- a/lenskit/algorithms/hpf.py +++ b/lenskit/algorithms/hpf.py @@ -22,7 +22,7 @@ def __init__(self, features, **kwargs): self.features = features self._kwargs = kwargs - def fit(self, ratings): + def fit(self, ratings, **kwargs): import hpfrec users = pd.Index(ratings.user.unique()) diff --git a/lenskit/algorithms/implicit.py b/lenskit/algorithms/implicit.py index c4df255bd..43c2cb988 100644 --- a/lenskit/algorithms/implicit.py +++ b/lenskit/algorithms/implicit.py @@ -31,7 +31,7 @@ class BaseRec(Recommender, Predictor): def __init__(self, delegate): self.delegate = delegate - def fit(self, ratings): + def fit(self, ratings, **kwargs): matrix, users, items = sparse_ratings(ratings, scipy=True) iur = matrix.T.tocsr() diff --git a/lenskit/algorithms/item_knn.py b/lenskit/algorithms/item_knn.py index 5c91d6daa..5f1d09f27 100644 --- a/lenskit/algorithms/item_knn.py +++ b/lenskit/algorithms/item_knn.py @@ -303,7 +303,7 @@ def __init__(self, nnbrs, min_nbrs=1, min_sim=1.0e-6, save_nbrs=None, self.center = center self.aggregate = aggregate - def fit(self, ratings): + def fit(self, ratings, **kwargs): """ Train a model. diff --git a/lenskit/algorithms/user_knn.py b/lenskit/algorithms/user_knn.py index 20c4ad23d..61903853e 100644 --- a/lenskit/algorithms/user_knn.py +++ b/lenskit/algorithms/user_knn.py @@ -124,7 +124,7 @@ def __init__(self, nnbrs, min_nbrs=1, min_sim=0, center=True, aggregate='weighte self.center = center self.aggregate = intern(aggregate) - def fit(self, ratings): + def fit(self, ratings, **kwargs): """ "Train" a user-user CF model. This memorizes the rating data in a format that is usable for future computations.