Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature json serialization #129

Open
wants to merge 40 commits into
base: cython-wrapper
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
40 commits
Select commit Hold shift + click to select a range
d818cee
init serialization
AlexJoz Nov 30, 2017
a2bf716
submodule update
AlexJoz Nov 30, 2017
14756ac
temp test change
AlexJoz Nov 30, 2017
0ee1e50
upd submodule
AlexJoz Nov 30, 2017
51bd404
upd submodule
AlexJoz Dec 2, 2017
c3eb7a7
update core2
Feb 13, 2018
654bdf5
use docker
Feb 14, 2018
de5d0a3
add .
Feb 14, 2018
1febbcb
update
Feb 14, 2018
ec484c1
cache externals
Feb 14, 2018
385abdd
cache only openblas
Feb 14, 2018
3b29ad3
mv to before_install
Feb 14, 2018
028da05
rm cache
Feb 14, 2018
99fcae2
smoke test
Feb 14, 2018
04a9fa4
rm smoke test
Feb 14, 2018
25138f3
clean out Dockerfile
Feb 14, 2018
f3b269e
Merge pull request #131 from ibayer/fix_json
Feb 14, 2018
9fe4170
update fastFM-core2
Aug 7, 2018
3eef4be
Merge pull request #139 from ibayer/update_core
Aug 7, 2018
3b2ca6c
update core2
Aug 20, 2018
a6a022c
add loss and solver to json
Aug 21, 2018
8171cf7
Merge pull request #140 from ibayer/update_core2
Aug 21, 2018
d7517f9
update core2
Aug 22, 2018
ed32da0
add factory methods for Data and Model
Aug 22, 2018
0f75dba
add col-major support to cpp fit
Aug 22, 2018
1f0d958
add test for cpp sgd
Aug 22, 2018
204db6f
fix train_test_split depreciation warining
Aug 22, 2018
b30f70a
replace predict with cpp version
Aug 23, 2018
c9daed8
put intercept in np array
Aug 29, 2018
f8d4eec
replace sgd solver (use core2 now)
Aug 29, 2018
230e688
remove sgd wrapper for C solver
Aug 29, 2018
53577c5
Merge pull request #142 from ibayer/replace_sgd_solver
Aug 29, 2018
916ad29
update core2
Sep 21, 2018
fae6d7a
upgrade cd regression to core2
Sep 21, 2018
6d928e0
refactor class predict
Sep 21, 2018
bda4cad
rm als core (C) code
Sep 21, 2018
2832f59
Merge pull request #143 from ibayer/irls
Oct 7, 2018
8074170
update fastfm-core2
Oct 14, 2018
7b145d5
fix import
Oct 14, 2018
4cc2fa4
Merge pull request #144 from ibayer/update_core2
Oct 14, 2018
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .dockerignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
.git
Dockerfile
66 changes: 4 additions & 62 deletions .travis.yml
Original file line number Diff line number Diff line change
@@ -1,79 +1,21 @@
language: c


env:
- TRAVIS_PYTHON_VERSION="2.7"
- TRAVIS_PYTHON_VERSION="3.4"
- TRAVIS_PYTHON_VERSION="3.5"

matrix:
include:
- os: linux
dist: trusty
# - os: osx
# osx_image: xcode8.3
- TRAVIS_PYTHON_VERSION="3.6"

git:
submodules: false

before_install:
# fastFM-core depends on cblas
- if [ "$TRAVIS_OS_NAME" = "linux" ]; then sudo apt-get update -qq; sudo apt-get install -y libopenblas-dev; fi
- if [[ "$TRAVIS_PYTHON_VERSION" =~ "^2" ]]; then
if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then
wget https://repo.continuum.io/miniconda/Miniconda-latest-Linux-x86_64.sh -O miniconda.sh;
else
wget https://repo.continuum.io/miniconda/Miniconda-latest-MacOSX-x86_64.sh -O miniconda.sh;
fi
else
if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then
wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh;
else
wget https://repo.continuum.io/miniconda/Miniconda3-latest-MacOSX-x86_64.sh -O miniconda.sh;
fi
fi
- bash miniconda.sh -b -p $HOME/miniconda
- export PATH="$HOME/miniconda/bin:$PATH"
- hash -r
- conda config --set always_yes yes --set changeps1 no
- conda update -q conda
# Useful for debugging any issues with conda
- conda info -a
- conda create -q -n test-environment python=$TRAVIS_PYTHON_VERSION cython numpy pandas scipy scikit-learn nose
- source activate test-environment

# use credentials
- sed -i -e "s|[email protected]:|https://[email protected]/|" .gitmodules

install:
- git submodule update --init --recursive
- cd fastFM-core2
- |
if [ "$TRAVIS_OS_NAME" = "linux" ]; then
cmake -H. -B_lib -DCMAKE_BUILD_TYPE=Debug -DFASTFM_MINIMAL=ON -DCMAKE_DEBUG_POSTFIX=d
cmake --build _lib
else
cmake -H. -B_lib -DFASTFM_MINIMAL=ON -GXcode
cmake --build _lib --config Debug
mv _lib/fastFM/Debug/libfastFMd.a _lib/fastFM/
fi
- ls _lib/fastFM
- cd ..
- make
- python setup.py bdist_wheel
- pip install dist/*.whl
- docker build -t fastfm-test .

script:
- nosetests

before_deploy:
- export RELEASE_PKG_FILE=$(ls dist/*.whl)
- echo "deploying $RELEASE_PKG_FILE to GitHub releases"
- docker run --rm -i -v $(pwd):/fastfm/ fastfm-test /bin/bash -s < docker_run_tests.sh

deploy:
provider: releases
api_key:
secure: AJcZoe2+OiMJ4VlSkASAeMc/ii0ZRnj2PFaaL7zlSbx1THMpY/49U5BSyqX1PQioPSlTV3ZsIXI3u7KyqoXIQSXWzAuaBzpLTLS85fGSuTvUuexmaJtKU92OC143tuVVLCPnjC992+1uyctjrxMSqgoaUolfYkEftt5RGrMIKl2duGfDXrPXIueHSl8FQGXkmlY6NqkRx2v5kxsAjFcurvwTNU8ptJ84jVKjrE6t1IB61vp2eUcqVR/z6Lwau6mdvIybglnbH4lCMXP98zEIibLA8vbn3XxrC+0uU7Kjz37K6/CsJEPNL5tujJDMRKAupnrkgPsAGTpsAn6O6uLUz0ISgcen8R6KJ7cBli+cq08OZ3JLLoJpqkni62YVSQV+uYkQk9b5Pu09vUTOozJMnOqLSj9hVIswyxGiFPcTFskMgqMdx15M59gd0YpXH633YqwBgRmWNsctp4BKnTaE3iGW6aZc8lrXxpL7qcVAosjmpjLp3jiPXVSRdYf0yHl6pDUj5ZVyu27kAn1/I9JL0nH19zjXF2tUlEjuT9ydHwnhmsgBN/V+JhZxi7ZeEbOZfY1MfekKM/NwSRehVEp/J0XWqWg+kIXRU/rqY1/w0vLVNFeQirpEjUp39eCBydXeS3Bik8uANW2UTxojJo3LBfLLoAT8ZWFb3YrIBAYkzjc=
file: "${RELEASE_PKG_FILE}"
skip_cleanup: true
on:
tags: true
31 changes: 31 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
FROM ubuntu:16.04

MAINTAINER Immanuel Bayer

ENV LANG=C.UTF-8 LC_ALL=C.UTF-8

USER root

RUN apt-get update --fix-missing && apt-get install -y wget bzip2 ca-certificates \
libglib2.0-0 libxext6 libsm6 libxrender1 \
build-essential cmake git

# Download and install miniconda.
RUN echo 'export PATH=/opt/conda/bin:$PATH' > /etc/profile.d/conda.sh && \
wget --quiet https://repo.continuum.io/miniconda/Miniconda2-4.3.27-Linux-x86_64.sh -O ~/miniconda.sh && \
/bin/bash ~/miniconda.sh -b -p /opt/conda && \
rm ~/miniconda.sh

ENV PATH /opt/conda/bin:$PATH

RUN conda config --set always_yes yes --set changeps1 no
RUN conda update -q conda

# Setup test virtual env
ARG TRAVIS_PYTHON_VERSION=3
ENV PY_VERSION=$TRAVIS_PYTHON_VERSION

RUN conda update -q conda && \
conda create -q -n test-environment python=$TRAVIS_PYTHON_VERSION \
cython numpy pandas scipy scikit-learn nose

7 changes: 6 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
@@ -1,6 +1,11 @@
PYTHON ?= python

all:
( cd fastFM-core2 ; \
cmake -H. -B_lib -DCMAKE_BUILD_TYPE=Debug -DCMAKE_DEBUG_POSTFIX=d; \
cmake --build _lib; )
( cd fastFM-core ; $(MAKE) lib )
python setup.py build_ext --inplace
$(PYTHON) setup.py build_ext --inplace

.PHONY : clean
clean:
Expand Down
6 changes: 6 additions & 0 deletions docker_run_tests.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
source activate test-environment
# Build fastFM-core
cd /fastfm/
make
pip install .
nosetests
2 changes: 1 addition & 1 deletion fastFM-core2
70 changes: 37 additions & 33 deletions fastFM/als.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,13 @@
# License: BSD 3 clause

import ffm
import ffm2
import numpy as np
from sklearn.base import RegressorMixin
from .validation import check_consistent_length, check_array
from .base import (FactorizationMachine, BaseFMClassifier,
_validate_class_labels, _check_warm_start)
_validate_class_labels, _check_warm_start,
_init_parameter, _settings_factory)


class FMRegression(FactorizationMachine, RegressorMixin):
Expand Down Expand Up @@ -63,9 +65,11 @@ def __init__(self, n_iter=100, init_stdev=0.1, rank=8, random_state=123,
self.l2_reg_w = l2_reg_w
self.l2_reg_V = l2_reg_V
self.l2_reg = l2_reg
self.task = "regression"
self.loss = "squared"
self.solver = "cd"
self.iter_count = 0

def fit(self, X_train, y_train, n_more_iter=0):
def fit(self, X, y, n_more_iter=0):
""" Fit model with specified loss.

Parameters
Expand All @@ -78,27 +82,24 @@ def fit(self, X_train, y_train, n_more_iter=0):
Number of iterations to continue from the current Coefficients.

"""
check_consistent_length(X, y)
y = check_array(y, ensure_2d=False, dtype=np.float64)

check_consistent_length(X_train, y_train)
y_train = check_array(y_train, ensure_2d=False, dtype=np.float64)
X = check_array(X, accept_sparse="csc", dtype=np.float64)
n_features = X.shape[1]

X_train = check_array(X_train, accept_sparse="csc", dtype=np.float64,
order="F")
self.n_iter = self.n_iter + n_more_iter
if self.iter_count == 0:
self.w0_, self.w_, self.V_ = _init_parameter(self, n_features)

if n_more_iter > 0:
_check_warm_start(self, X_train)
self.warm_start = True
if n_more_iter != 0:
_check_warm_start(self, X)
self.n_iter = n_more_iter

self.w0_, self.w_, self.V_ = ffm.ffm_als_fit(self, X_train, y_train)
settings_dict = _settings_factory(self)
ffm2.ffm_fit(self.w0_, self.w_, self.V_, X, y, self.rank,
settings_dict)

if self.iter_count != 0:
self.iter_count = self.iter_count + n_more_iter
else:
self.iter_count = self.n_iter

# reset to default setting
self.warm_start = False
self.iter_count += self.n_iter
return self


Expand Down Expand Up @@ -158,34 +159,37 @@ def __init__(self, n_iter=100, init_stdev=0.1, rank=8, random_state=123,
self.l2_reg_w = l2_reg_w
self.l2_reg_V = l2_reg_V
self.l2_reg = l2_reg
self.task = "classification"
self.loss = "squared"
self.solver = "cd"
self.iter_count = 0

def fit(self, X_train, y_train):
def fit(self, X, y):
""" Fit model with specified loss.

Parameters
----------
X : scipy.sparse.csc_matrix, (n_samples, n_features)

y : float | ndarray, shape = (n_samples, )
the targets have to be encodes as {-1, 1}.
the targets have to be encodes as {0, 1}.
"""
check_consistent_length(X_train, y_train)
X_train = check_array(X_train, accept_sparse="csc", dtype=np.float64,
order="F")
y_train = _validate_class_labels(y_train)
check_consistent_length(X, y)

X = check_array(X, accept_sparse="csc", dtype=np.float64,
order="F")
y = _validate_class_labels(y)

self.classes_ = np.unique(y)

self.classes_ = np.unique(y_train)
if len(self.classes_) != 2:
raise ValueError("This solver only supports binary classification"
" but the data contains"
" class: %r" % self.classes_)

# fastFM-core expects labels to be in {-1,1}
y_train = y_train.copy()
i_class1 = (y_train == self.classes_[0])
y_train[i_class1] = -1
y_train[~i_class1] = 1
self.w0_, self.w_, self.V_ = _init_parameter(self, X.shape[1])

settings_dict = _settings_factory(self)
ffm2.ffm_fit(self.w0_, self.w_, self.V_, X, y, self.rank,
settings_dict)

self.w0_, self.w_, self.V_ = ffm.ffm_als_fit(self, X_train, y_train)
return self
50 changes: 43 additions & 7 deletions fastFM/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,17 +4,40 @@
import numpy as np
import scipy.sparse as sp
from scipy.stats import norm
from scipy.special import expit as sigmoid
from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.utils import check_random_state

import ffm2
from .validation import check_array
import ffm


def _init_parameter(fm, n_features):
generator = check_random_state(fm.random_state)
w0 = np.zeros(1, dtype=np.float64)
w = np.zeros(n_features, dtype=np.float64)
V = generator.normal(loc=0.0, scale=fm.init_stdev,
size=(fm.rank, n_features))
return w0, w, V


def _settings_factory(fm):
settings_dict = fm.get_params()
settings_dict['loss'] = fm.loss
settings_dict['solver'] = fm.solver

# TODO align naming
settings_dict['iter'] = int(settings_dict['n_iter'])
del settings_dict['n_iter']

return settings_dict


def _validate_class_labels(y):
assert len(set(y)) == 2
assert y.min() == -1
assert y.max() == 1
return check_array(y, ensure_2d=False, dtype=np.float64)
assert len(set(y)) == 2
assert y.min() == -1
assert y.max() == 1
return check_array(y, ensure_2d=False, dtype=np.float64)


def _check_warm_start(fm, X):
Expand Down Expand Up @@ -82,6 +105,7 @@ def __init__(self, n_iter=100, init_stdev=0.1, rank=8, random_state=123,
self.step_size = 0
self.copy_X = copy_X


def predict(self, X_test):
""" Return predictions

Expand All @@ -99,12 +123,12 @@ def predict(self, X_test):
order="F")
assert sp.isspmatrix_csc(X_test)
assert X_test.shape[1] == len(self.w_)
return ffm.ffm_predict(self.w0_, self.w_, self.V_, X_test)
return ffm2.ffm_predict(self.w0_, self.w_, self.V_, X_test)


class BaseFMClassifier(FactorizationMachine, ClassifierMixin):

def predict(self, X_test):
def predict(self, X_test, threshold=0.5):
""" Return predictions

Parameters
Expand All @@ -117,6 +141,13 @@ def predict(self, X_test):
y : array, shape (n_samples)
Class labels
"""

if self.loss == "logistic":
y_proba = self.predict_proba(X_test)
y_binary = np.ones_like(y_proba, dtype=np.float64)
y_binary[y_proba < threshold] = -1
return y_binary

y_proba = norm.cdf(super(BaseFMClassifier, self).predict(X_test))
# convert probs to labels
y_pred = np.zeros_like(y_proba, dtype=np.float64) + self.classes_[0]
Expand All @@ -136,5 +167,10 @@ def predict_proba(self, X_test):
y : array, shape (n_samples)
Class Probability for the class with smaller label.
"""

if self.loss == "logistic":
pred = ffm2.ffm_predict(self.w0_, self.w_, self.V_, X_test)
return sigmoid(pred)

pred = super(BaseFMClassifier, self).predict(X_test)
return norm.cdf(pred)
1 change: 1 addition & 0 deletions fastFM/bpr.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,4 +92,5 @@ def fit(self, X, pairs):
assert pairs.max() <= X.shape[1]
assert pairs.min() >= 0
self.w0_, self.w_, self.V_ = ffm.ffm_fit_sgd_bpr(self, X, pairs)
self.w0_ = np.array([self.w0_], dtype=np.float64)
return self
6 changes: 0 additions & 6 deletions fastFM/cffm.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -33,15 +33,9 @@ cdef extern from "../fastFM-core/include/ffm.h":

void ffm_predict(double *w_0, double * w, double * V, cs_di *X, double *y_pred, int k)

void ffm_als_fit(double *w_0, double *w, double *V,
cs_di *X, double *y, ffm_param *param)

void ffm_mcmc_fit_predict(double *w_0, double *w, double *V,
cs_di *X_train, cs_di *X_test, double *y_train, double *y_pred,
ffm_param *param)

void ffm_sgd_fit(double *w_0, double *w, double *V,
cs_di *X, double *y, ffm_param *param)

void ffm_sgd_bpr_fit(double *w_0, double *w, double *V,
cs_di *X, double *pairs, int n_pairs, ffm_param *param)
Loading