Skip to content

Commit

Permalink
Merge pull request #128 from mdekstrand/feature/fit-kwargs
Browse files Browse the repository at this point in the history
Make the fit signature consistent
  • Loading branch information
mdekstrand authored Dec 9, 2019
2 parents 6c9d250 + d50e7ad commit fd8ca53
Show file tree
Hide file tree
Showing 9 changed files with 34 additions and 25 deletions.
8 changes: 8 additions & 0 deletions doc/releases.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,19 @@

See the [GitHub milestone](https://github.com/lenskit/lkpy/milestone/5) for full change list.

### Infrastructure Updates

- Dropped support for Python 3.5
- Removed `*args` from `Algorithm.fit`, so additional data must be provided via keyword arguments
- Made `Algorithm.fit` implementations consistently take `**kwargs` for hybrid flexibility

### Algorithm Updates

- Substantial performance and stability improvements to item-item
- Added a coordinate descent solver to explicit-feedback ALS and made it the default. The old
LU-based solver is still available with `method='lu'`.
- Added a conjugate gradient solver to implicit-feedback ALS and made it the default.
- Added a random recommender

## 0.7.0

Expand Down
7 changes: 4 additions & 3 deletions lenskit/algorithms/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,14 +21,15 @@ class Algorithm(metaclass=ABCMeta):
"""

@abstractmethod
def fit(self, ratings, *args, **kwargs):
def fit(self, ratings, **kwargs):
"""
Train a model using the specified ratings (or similar) data.
Args:
ratings(pandas.DataFrame): The ratings data.
args: Additional training data the algorithm may require.
kwargs: Additional training data the algorithm may require.
kwargs: Additional training data the algorithm may require. Algorithms should
avoid using the same keyword arguments for different purposes, so that
they can be more easily hybridized.
Returns:
The algorithm object.
Expand Down
4 changes: 2 additions & 2 deletions lenskit/algorithms/als.py
Original file line number Diff line number Diff line change
Expand Up @@ -302,7 +302,7 @@ def __init__(self, features, *, iterations=20, reg=0.1, damping=5, bias=True, me
self.progress = progress if progress is not None else util.no_progress
self._random = rand

def fit(self, ratings):
def fit(self, ratings, **kwargs):
"""
Run ALS to train a model.
Expand Down Expand Up @@ -488,7 +488,7 @@ def __init__(self, features, *, iterations=20, reg=0.1, weight=40, method='cg',
self._random = rand
self.progress = progress if progress is not None else util.no_progress

def fit(self, ratings):
def fit(self, ratings, **kwargs):
self.timer = util.Stopwatch()
current, uctx, ictx = self._initial_model(ratings)

Expand Down
30 changes: 15 additions & 15 deletions lenskit/algorithms/basic.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,22 +69,22 @@ def __init__(self, items=True, users=True, damping=0.0):
check.check_value(self.item_damping >= 0, "item damping value {} must be nonnegative",
self.item_damping)

def fit(self, data):
def fit(self, ratings, **kwargs):
"""
Train the bias model on some rating data.
Args:
data (DataFrame): a data frame of ratings. Must have at least `user`,
`item`, and `rating` columns.
ratings (DataFrame): a data frame of ratings. Must have at least `user`,
`item`, and `rating` columns.
Returns:
Bias: the fit bias object.
"""

_logger.info('building bias model for %d ratings', len(data))
self.mean_ = data.rating.mean()
_logger.info('building bias model for %d ratings', len(ratings))
self.mean_ = ratings.rating.mean()
_logger.info('global mean: %.3f', self.mean_)
nrates = data.assign(rating=lambda df: df.rating - self.mean_)
nrates = ratings.assign(rating=lambda df: df.rating - self.mean_)

if self.items:
group = nrates.groupby('item').rating
Expand Down Expand Up @@ -167,7 +167,7 @@ def __init__(self, selector=None):
else:
self.selector = selector

def fit(self, ratings):
def fit(self, ratings, **kwargs):
pop = ratings.groupby('item').user.count()
pop.name = 'score'
self.item_pop_ = pop.astype('float64')
Expand Down Expand Up @@ -236,9 +236,9 @@ def __init__(self, algorithms, *others):
else:
self.algorithms = [algorithms]

def fit(self, ratings, *args, **kwargs):
def fit(self, ratings, **kwargs):
for algo in self.algorithms:
algo.fit(ratings, *args, **kwargs)
algo.fit(ratings, **kwargs)

return self

Expand Down Expand Up @@ -285,7 +285,7 @@ def __init__(self, predictor, selector=None):
self.predictor = predictor
self.selector = selector if selector is not None else UnratedItemCandidateSelector()

def fit(self, ratings, *args, **kwargs):
def fit(self, ratings, **kwargs):
"""
Fit the recommender.
Expand All @@ -296,8 +296,8 @@ def fit(self, ratings, *args, **kwargs):
args, kwargs:
Additional arguments for the predictor to use in its training process.
"""
self.predictor.fit(ratings, *args, **kwargs)
self.selector.fit(ratings)
self.predictor.fit(ratings, **kwargs)
self.selector.fit(ratings, **kwargs)
return self

def recommend(self, user, n=None, candidates=None, ratings=None):
Expand Down Expand Up @@ -339,7 +339,7 @@ class UnratedItemCandidateSelector(CandidateSelector):
users_ = None
user_items_ = None

def fit(self, ratings):
def fit(self, ratings, **kwargs):
r2 = ratings[['user', 'item']]
sparse = sparse_ratings(r2)
_logger.info('trained unrated candidate selector for %d ratings', sparse.matrix.nnz)
Expand Down Expand Up @@ -388,8 +388,8 @@ def __init__(self, selector=None, random_state=None):
self.random_state = random_state
self.items = None

def fit(self, ratings, *args, **kwargs):
self.selector.fit(ratings)
def fit(self, ratings, **kwargs):
self.selector.fit(ratings, **kwargs)
items = pd.DataFrame(ratings['item'].unique(), columns=['item'])
self.items = items
return self
Expand Down
2 changes: 1 addition & 1 deletion lenskit/algorithms/funksvd.py
Original file line number Diff line number Diff line change
Expand Up @@ -221,7 +221,7 @@ def __init__(self, features, iterations=100, *, lrate=0.001, reg=0.015,
else:
self.bias = bias

def fit(self, ratings):
def fit(self, ratings, **kwargs):
"""
Train a FunkSVD model.
Expand Down
2 changes: 1 addition & 1 deletion lenskit/algorithms/hpf.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ def __init__(self, features, **kwargs):
self.features = features
self._kwargs = kwargs

def fit(self, ratings):
def fit(self, ratings, **kwargs):
import hpfrec

users = pd.Index(ratings.user.unique())
Expand Down
2 changes: 1 addition & 1 deletion lenskit/algorithms/implicit.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ class BaseRec(Recommender, Predictor):
def __init__(self, delegate):
self.delegate = delegate

def fit(self, ratings):
def fit(self, ratings, **kwargs):
matrix, users, items = sparse_ratings(ratings, scipy=True)
iur = matrix.T.tocsr()

Expand Down
2 changes: 1 addition & 1 deletion lenskit/algorithms/item_knn.py
Original file line number Diff line number Diff line change
Expand Up @@ -303,7 +303,7 @@ def __init__(self, nnbrs, min_nbrs=1, min_sim=1.0e-6, save_nbrs=None,
self.center = center
self.aggregate = aggregate

def fit(self, ratings):
def fit(self, ratings, **kwargs):
"""
Train a model.
Expand Down
2 changes: 1 addition & 1 deletion lenskit/algorithms/user_knn.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ def __init__(self, nnbrs, min_nbrs=1, min_sim=0, center=True, aggregate='weighte
self.center = center
self.aggregate = intern(aggregate)

def fit(self, ratings):
def fit(self, ratings, **kwargs):
"""
"Train" a user-user CF model. This memorizes the rating data in a format that is usable
for future computations.
Expand Down

0 comments on commit fd8ca53

Please sign in to comment.