ibayer · AlexJoz · Nov 30, 2017 · Nov 30, 2017 · Nov 30, 2017 · Nov 30, 2017
diff --git a/.dockerignore b/.dockerignore
@@ -0,0 +1,2 @@
+.git
+Dockerfile
diff --git a/.travis.yml b/.travis.yml
@@ -1,79 +1,21 @@
 language: c
 
+
 env:
     - TRAVIS_PYTHON_VERSION="2.7"
-    - TRAVIS_PYTHON_VERSION="3.4"
-    - TRAVIS_PYTHON_VERSION="3.5"
-
-matrix:
-  include:
-    - os: linux
-      dist: trusty
-    # - os: osx
-    #   osx_image: xcode8.3
+    - TRAVIS_PYTHON_VERSION="3.6"
 
 git:
     submodules: false
 
 before_install:
-      # fastFM-core depends on cblas
-    - if [ "$TRAVIS_OS_NAME" = "linux" ]; then sudo apt-get update -qq; sudo apt-get install -y libopenblas-dev; fi
-    - if [[ "$TRAVIS_PYTHON_VERSION" =~ "^2" ]]; then
-        if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then
-          wget https://repo.continuum.io/miniconda/Miniconda-latest-Linux-x86_64.sh -O miniconda.sh;
-        else
-          wget https://repo.continuum.io/miniconda/Miniconda-latest-MacOSX-x86_64.sh -O miniconda.sh;
-        fi
-      else
-        if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then
-          wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh;
-        else
-          wget https://repo.continuum.io/miniconda/Miniconda3-latest-MacOSX-x86_64.sh -O miniconda.sh;
-        fi
-      fi
-    - bash miniconda.sh -b -p $HOME/miniconda
-    - export PATH="$HOME/miniconda/bin:$PATH"
-    - hash -r
-    - conda config --set always_yes yes --set changeps1 no
-    - conda update -q conda
-    # Useful for debugging any issues with conda
-    - conda info -a
-    - conda create -q -n test-environment python=$TRAVIS_PYTHON_VERSION cython numpy pandas scipy scikit-learn nose
-    - source activate test-environment
-
     # use credentials
     - sed -i -e "s|[email protected]:|https://[email protected]/|" .gitmodules
 
 install:
     - git submodule update --init --recursive
-    - cd fastFM-core2
-    - |
-      if [ "$TRAVIS_OS_NAME" = "linux" ]; then
-        cmake -H. -B_lib -DCMAKE_BUILD_TYPE=Debug -DFASTFM_MINIMAL=ON -DCMAKE_DEBUG_POSTFIX=d
-        cmake --build _lib
-      else 
-        cmake -H. -B_lib -DFASTFM_MINIMAL=ON -GXcode
-        cmake --build _lib --config Debug
-        mv _lib/fastFM/Debug/libfastFMd.a  _lib/fastFM/
-      fi 
-    - ls _lib/fastFM  
-    - cd ..
-    - make
-    - python setup.py bdist_wheel
-    - pip install dist/*.whl
+    - docker build -t fastfm-test .
 
 script:
-    - nosetests
-
-before_deploy:
-    - export RELEASE_PKG_FILE=$(ls dist/*.whl)
-    - echo "deploying $RELEASE_PKG_FILE to GitHub releases"
+    - docker run --rm -i -v $(pwd):/fastfm/ fastfm-test /bin/bash -s < docker_run_tests.sh
 
-deploy:
-  provider: releases
-  api_key:
-    secure: AJcZoe2+OiMJ4VlSkASAeMc/ii0ZRnj2PFaaL7zlSbx1THMpY/49U5BSyqX1PQioPSlTV3ZsIXI3u7KyqoXIQSXWzAuaBzpLTLS85fGSuTvUuexmaJtKU92OC143tuVVLCPnjC992+1uyctjrxMSqgoaUolfYkEftt5RGrMIKl2duGfDXrPXIueHSl8FQGXkmlY6NqkRx2v5kxsAjFcurvwTNU8ptJ84jVKjrE6t1IB61vp2eUcqVR/z6Lwau6mdvIybglnbH4lCMXP98zEIibLA8vbn3XxrC+0uU7Kjz37K6/CsJEPNL5tujJDMRKAupnrkgPsAGTpsAn6O6uLUz0ISgcen8R6KJ7cBli+cq08OZ3JLLoJpqkni62YVSQV+uYkQk9b5Pu09vUTOozJMnOqLSj9hVIswyxGiFPcTFskMgqMdx15M59gd0YpXH633YqwBgRmWNsctp4BKnTaE3iGW6aZc8lrXxpL7qcVAosjmpjLp3jiPXVSRdYf0yHl6pDUj5ZVyu27kAn1/I9JL0nH19zjXF2tUlEjuT9ydHwnhmsgBN/V+JhZxi7ZeEbOZfY1MfekKM/NwSRehVEp/J0XWqWg+kIXRU/rqY1/w0vLVNFeQirpEjUp39eCBydXeS3Bik8uANW2UTxojJo3LBfLLoAT8ZWFb3YrIBAYkzjc=
-  file: "${RELEASE_PKG_FILE}"
-  skip_cleanup: true
-  on:
-    tags: true
diff --git a/Dockerfile b/Dockerfile
@@ -0,0 +1,31 @@
+FROM ubuntu:16.04
+
+MAINTAINER Immanuel Bayer
+
+ENV LANG=C.UTF-8 LC_ALL=C.UTF-8
+
+USER root
+
+RUN apt-get update --fix-missing && apt-get install -y wget bzip2 ca-certificates \
+    libglib2.0-0 libxext6 libsm6 libxrender1 \
+    build-essential cmake git
+
+# Download and install miniconda.
+RUN echo 'export PATH=/opt/conda/bin:$PATH' > /etc/profile.d/conda.sh && \
+    wget --quiet https://repo.continuum.io/miniconda/Miniconda2-4.3.27-Linux-x86_64.sh -O ~/miniconda.sh && \
+    /bin/bash ~/miniconda.sh -b -p /opt/conda && \
+    rm ~/miniconda.sh
+
+ENV PATH /opt/conda/bin:$PATH
+
+RUN conda config --set always_yes yes --set changeps1 no
+RUN conda update -q conda
+
+# Setup test virtual env
+ARG TRAVIS_PYTHON_VERSION=3
+ENV PY_VERSION=$TRAVIS_PYTHON_VERSION
+
+RUN conda update -q conda && \
+    conda create -q -n test-environment python=$TRAVIS_PYTHON_VERSION \
+        cython numpy pandas scipy scikit-learn nose
+
diff --git a/Makefile b/Makefile
@@ -1,6 +1,11 @@
+PYTHON ?= python
+
 all:
+	( cd fastFM-core2 ; \
+	  cmake -H. -B_lib -DCMAKE_BUILD_TYPE=Debug -DCMAKE_DEBUG_POSTFIX=d; \
+	  cmake --build _lib; )
 	( cd fastFM-core ; $(MAKE) lib )
-	python setup.py build_ext --inplace
+	$(PYTHON) setup.py build_ext --inplace
 
 .PHONY : clean
 clean:

diff --git a/docker_run_tests.sh b/docker_run_tests.sh
@@ -0,0 +1,6 @@
+source activate test-environment
+# Build fastFM-core
+cd /fastfm/
+make
+pip install .
+nosetests
diff --git a/fastFM-core2 b/fastFM-core2
diff --git a/fastFM/als.py b/fastFM/als.py
@@ -2,11 +2,13 @@
 # License: BSD 3 clause
 
 import ffm
+import ffm2
 import numpy as np
 from sklearn.base import RegressorMixin
 from .validation import check_consistent_length, check_array
 from .base import (FactorizationMachine, BaseFMClassifier,
-                   _validate_class_labels, _check_warm_start)
+                   _validate_class_labels, _check_warm_start,
+                   _init_parameter, _settings_factory)
 
 
 class FMRegression(FactorizationMachine, RegressorMixin):
@@ -63,9 +65,11 @@ def __init__(self, n_iter=100, init_stdev=0.1, rank=8, random_state=123,
             self.l2_reg_w = l2_reg_w
             self.l2_reg_V = l2_reg_V
         self.l2_reg = l2_reg
-        self.task = "regression"
+        self.loss = "squared"
+        self.solver = "cd"
+        self.iter_count = 0
 
-    def fit(self, X_train, y_train, n_more_iter=0):
+    def fit(self, X, y, n_more_iter=0):
         """ Fit model with specified loss.
 
         Parameters
@@ -78,27 +82,24 @@ def fit(self, X_train, y_train, n_more_iter=0):
                 Number of iterations to continue from the current Coefficients.
 
         """
+        check_consistent_length(X, y)
+        y = check_array(y, ensure_2d=False, dtype=np.float64)
 
-        check_consistent_length(X_train, y_train)
-        y_train = check_array(y_train, ensure_2d=False, dtype=np.float64)
+        X = check_array(X, accept_sparse="csc", dtype=np.float64)
+        n_features = X.shape[1]
 
-        X_train = check_array(X_train, accept_sparse="csc", dtype=np.float64,
-                              order="F")
-        self.n_iter = self.n_iter + n_more_iter
+        if self.iter_count == 0:
+            self.w0_, self.w_, self.V_ = _init_parameter(self, n_features)
 
-        if n_more_iter > 0:
-            _check_warm_start(self, X_train)
-            self.warm_start = True
+        if n_more_iter != 0:
+            _check_warm_start(self, X)
+            self.n_iter = n_more_iter
 
-        self.w0_, self.w_, self.V_ = ffm.ffm_als_fit(self, X_train, y_train)
+        settings_dict = _settings_factory(self)
+        ffm2.ffm_fit(self.w0_, self.w_, self.V_, X, y, self.rank,
+                     settings_dict)
 
-        if self.iter_count != 0:
-            self.iter_count = self.iter_count + n_more_iter
-        else:
-            self.iter_count = self.n_iter
-
-        # reset to default setting
-        self.warm_start = False
+        self.iter_count += self.n_iter
         return self
 
 
@@ -158,34 +159,37 @@ def __init__(self, n_iter=100, init_stdev=0.1, rank=8, random_state=123,
             self.l2_reg_w = l2_reg_w
             self.l2_reg_V = l2_reg_V
         self.l2_reg = l2_reg
-        self.task = "classification"
+        self.loss = "squared"
+        self.solver = "cd"
+        self.iter_count = 0
 
-    def fit(self, X_train, y_train):
+    def fit(self, X, y):
         """ Fit model with specified loss.
 
         Parameters
         ----------
         X : scipy.sparse.csc_matrix, (n_samples, n_features)
 
         y : float | ndarray, shape = (n_samples, )
-                the targets have to be encodes as {-1, 1}.
+                the targets have to be encodes as {0, 1}.
         """
-        check_consistent_length(X_train, y_train)
-        X_train = check_array(X_train, accept_sparse="csc", dtype=np.float64,
-                              order="F")
-        y_train = _validate_class_labels(y_train)
+        check_consistent_length(X, y)
+
+        X = check_array(X, accept_sparse="csc", dtype=np.float64,
+                        order="F")
+        y = _validate_class_labels(y)
+
+        self.classes_ = np.unique(y)
 
-        self.classes_ = np.unique(y_train)
         if len(self.classes_) != 2:
             raise ValueError("This solver only supports binary classification"
                              " but the data contains"
                              " class: %r" % self.classes_)
 
-        # fastFM-core expects labels to be in {-1,1}
-        y_train = y_train.copy()
-        i_class1 = (y_train == self.classes_[0])
-        y_train[i_class1] = -1
-        y_train[~i_class1] = 1
+        self.w0_, self.w_, self.V_ = _init_parameter(self, X.shape[1])
+
+        settings_dict = _settings_factory(self)
+        ffm2.ffm_fit(self.w0_, self.w_, self.V_, X, y, self.rank,
+                     settings_dict)
 
-        self.w0_, self.w_, self.V_ = ffm.ffm_als_fit(self, X_train, y_train)
         return self
diff --git a/fastFM/base.py b/fastFM/base.py
@@ -4,17 +4,40 @@
 import numpy as np
 import scipy.sparse as sp
 from scipy.stats import norm
+from scipy.special import expit as sigmoid
 from sklearn.base import BaseEstimator, ClassifierMixin
+from sklearn.utils import check_random_state
 
+import ffm2
 from .validation import check_array
-import ffm
+
+
+def _init_parameter(fm, n_features):
+    generator = check_random_state(fm.random_state)
+    w0 = np.zeros(1, dtype=np.float64)
+    w = np.zeros(n_features, dtype=np.float64)
+    V = generator.normal(loc=0.0, scale=fm.init_stdev,
+                         size=(fm.rank, n_features))
+    return w0, w, V
+
+
+def _settings_factory(fm):
+    settings_dict = fm.get_params()
+    settings_dict['loss'] = fm.loss
+    settings_dict['solver'] = fm.solver
+
+    # TODO align naming
+    settings_dict['iter'] = int(settings_dict['n_iter'])
+    del settings_dict['n_iter']
+
+    return settings_dict
 
 
 def _validate_class_labels(y):
-        assert len(set(y)) == 2
-        assert y.min() == -1
-        assert y.max() == 1
-        return check_array(y, ensure_2d=False, dtype=np.float64)
+    assert len(set(y)) == 2
+    assert y.min() == -1
+    assert y.max() == 1
+    return check_array(y, ensure_2d=False, dtype=np.float64)
 
 
 def _check_warm_start(fm, X):
@@ -82,6 +105,7 @@ def __init__(self, n_iter=100, init_stdev=0.1, rank=8, random_state=123,
         self.step_size = 0
         self.copy_X = copy_X
 
+
     def predict(self, X_test):
         """ Return predictions
 
@@ -99,12 +123,12 @@ def predict(self, X_test):
                              order="F")
         assert sp.isspmatrix_csc(X_test)
         assert X_test.shape[1] == len(self.w_)
-        return ffm.ffm_predict(self.w0_, self.w_, self.V_, X_test)
+        return ffm2.ffm_predict(self.w0_, self.w_, self.V_, X_test)
 
 
 class BaseFMClassifier(FactorizationMachine, ClassifierMixin):
 
-    def predict(self, X_test):
+    def predict(self, X_test, threshold=0.5):
         """ Return predictions
 
         Parameters
@@ -117,6 +141,13 @@ def predict(self, X_test):
         y : array, shape (n_samples)
             Class labels
         """
+
+        if self.loss == "logistic":
+            y_proba = self.predict_proba(X_test)
+            y_binary = np.ones_like(y_proba, dtype=np.float64)
+            y_binary[y_proba < threshold] = -1
+            return y_binary
+
         y_proba = norm.cdf(super(BaseFMClassifier, self).predict(X_test))
         # convert probs to labels
         y_pred = np.zeros_like(y_proba, dtype=np.float64) + self.classes_[0]
@@ -136,5 +167,10 @@ def predict_proba(self, X_test):
         y : array, shape (n_samples)
             Class Probability for the class with smaller label.
         """
+
+        if self.loss == "logistic":
+            pred = ffm2.ffm_predict(self.w0_, self.w_, self.V_, X_test)
+            return sigmoid(pred)
+
         pred = super(BaseFMClassifier, self).predict(X_test)
         return norm.cdf(pred)
diff --git a/fastFM/bpr.py b/fastFM/bpr.py
@@ -92,4 +92,5 @@ def fit(self, X, pairs):
         assert pairs.max() <= X.shape[1]
         assert pairs.min() >= 0
         self.w0_, self.w_, self.V_ = ffm.ffm_fit_sgd_bpr(self, X, pairs)
+        self.w0_ = np.array([self.w0_], dtype=np.float64)
         return self
diff --git a/fastFM/cffm.pxd b/fastFM/cffm.pxd
@@ -33,15 +33,9 @@ cdef extern from "../fastFM-core/include/ffm.h":
 
     void ffm_predict(double *w_0, double * w, double * V, cs_di *X, double *y_pred, int k)
 
-    void ffm_als_fit(double *w_0, double *w, double *V,
-        cs_di *X, double *y, ffm_param *param)
-
     void ffm_mcmc_fit_predict(double *w_0, double *w, double *V,
         cs_di *X_train, cs_di *X_test, double *y_train, double *y_pred,
         ffm_param *param)
 
-    void ffm_sgd_fit(double *w_0, double *w, double *V,
-        cs_di *X, double *y, ffm_param *param)
-
     void ffm_sgd_bpr_fit(double *w_0, double *w, double *V,
         cs_di *X, double *pairs, int n_pairs, ffm_param *param)