From d818ceec3fb781a604ef108f4789fbaef9dcb678 Mon Sep 17 00:00:00 2001 From: AlexJoz Date: Thu, 30 Nov 2017 19:49:39 +0200 Subject: [PATCH 01/34] init serialization --- fastFM/cpp_ffm.pxd | 2 ++ fastFM/ffm2.pyx | 7 +++++-- fastFM/tests/test_ffm.py | 23 +++++++++++++---------- 3 files changed, 20 insertions(+), 12 deletions(-) diff --git a/fastFM/cpp_ffm.pxd b/fastFM/cpp_ffm.pxd index 8d8af41..177abe2 100644 --- a/fastFM/cpp_ffm.pxd +++ b/fastFM/cpp_ffm.pxd @@ -2,11 +2,13 @@ # License: BSD 3 clause #distutils: language=c++ +from libcpp.string cimport string cdef extern from "../fastFM-core2/fastFM/fastfm.h" namespace "fastfm": cdef cppclass Settings: Settings() + Settings(string settings) cdef cppclass Model: Model() diff --git a/fastFM/ffm2.pyx b/fastFM/ffm2.pyx index 20af616..2d233e0 100644 --- a/fastFM/ffm2.pyx +++ b/fastFM/ffm2.pyx @@ -4,10 +4,12 @@ cimport cpp_ffm from cpp_ffm cimport Settings, Data, Model, predict, fit from libcpp.memory cimport nullptr +from libcpp.string cimport string cimport numpy as np import numpy as np + def ffm_predict(double w_0, double[:] w, np.ndarray[np.float64_t, ndim = 2] V, X): assert X.shape[1] == len(w) @@ -50,8 +52,9 @@ def ffm_predict(double w_0, double[:] w, return y def ffm_als_fit(double w_0, double[:] w, np.ndarray[np.float64_t, ndim = 2] V, - X, double[:] y, int rank): + X, double[:] y, int rank, settings): assert X.shape[0] == len(y) # test shapes + n_features = X.shape[1] n_samples = X.shape[0] nnz = X.count_nonzero() @@ -71,7 +74,7 @@ def ffm_als_fit(double w_0, double[:] w, np.ndarray[np.float64_t, ndim = 2] V, m.add_parameter(&w[0], n_features) m.add_parameter( V.data, rank, n_features, 2) - cdef Settings* s = new Settings() + cdef Settings* s = new Settings(settings) cpp_ffm.fit(s, m, d) diff --git a/fastFM/tests/test_ffm.py b/fastFM/tests/test_ffm.py index 3a1cde2..02bd5b6 100644 --- a/fastFM/tests/test_ffm.py +++ b/fastFM/tests/test_ffm.py @@ -1,5 +1,6 @@ # Author: Immanuel Bayer # License: BSD 3 clause +import json import numpy as np import scipy.sparse as sp @@ -50,16 +51,18 @@ def test_ffm2_fit(): V_init = np.copy(V) rank = 2 - y_pred = ffm2.ffm_predict(w0, w, V, X) - msqr_before = mean_squared_error(y, y_pred) + # y_pred = ffm2.ffm_predict(w0, w, V, X) + # msqr_before = mean_squared_error(y, y_pred) - w0, w, V = ffm2.ffm_als_fit(w0, w, V, X, y, rank) + jsn = json.dumps({'n_iter': 1000, 'rank': 2}) - y_pred = ffm2.ffm_predict(w0, w, V, X) - msqr_after = mean_squared_error(y, y_pred) + w0, w, V = ffm2.ffm_als_fit(w0, w, V, X, y, rank, jsn.encode()) - assert(w0 != w0_init) - # FIXME: use np.all instead np.any after we can set solver params from python - assert (np.any(w != w_init)) - assert (np.any(V != V_init)) - assert(msqr_before > msqr_after) + # y_pred = ffm2.ffm_predict(w0, w, V, X) + # msqr_after = mean_squared_error(y, y_pred) + # + # assert(w0 != w0_init) + # # FIXME: use np.all instead np.any after we can set solver params from python + # assert (np.any(w != w_init)) + # assert (np.any(V != V_init)) + # assert(msqr_before > msqr_after) From a2bf716d501925f68cc0929703757a6eda2506e2 Mon Sep 17 00:00:00 2001 From: AlexJoz Date: Thu, 30 Nov 2017 19:50:00 +0200 Subject: [PATCH 02/34] submodule update --- fastFM-core2 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fastFM-core2 b/fastFM-core2 index 832fa76..0adee8f 160000 --- a/fastFM-core2 +++ b/fastFM-core2 @@ -1 +1 @@ -Subproject commit 832fa76f2677e401710d6ac7512390e6b8f8a0ad +Subproject commit 0adee8fd798ec68f61bee33f1c84856f7f55ae41 From 14756ac2b1991e08d18cd07f4cbf4a15d9fb9e7f Mon Sep 17 00:00:00 2001 From: AlexJoz Date: Thu, 30 Nov 2017 23:16:22 +0200 Subject: [PATCH 03/34] temp test change --- fastFM/tests/test_ffm.py | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/fastFM/tests/test_ffm.py b/fastFM/tests/test_ffm.py index 02bd5b6..0d59ac1 100644 --- a/fastFM/tests/test_ffm.py +++ b/fastFM/tests/test_ffm.py @@ -51,18 +51,14 @@ def test_ffm2_fit(): V_init = np.copy(V) rank = 2 - # y_pred = ffm2.ffm_predict(w0, w, V, X) - # msqr_before = mean_squared_error(y, y_pred) + y_pred = ffm2.ffm_predict(w0, w, V, X) + msqr_before = mean_squared_error(y, y_pred) + + jsn = json.dumps({'n_iter': 1000, 'l2_reg_w': 0.1, 'l2_reg_V': 0.2}).encode() - jsn = json.dumps({'n_iter': 1000, 'rank': 2}) + w0, w, V = ffm2.ffm_als_fit(w0, w, V, X, y, rank, jsn) - w0, w, V = ffm2.ffm_als_fit(w0, w, V, X, y, rank, jsn.encode()) + y_pred = ffm2.ffm_predict(w0, w, V, X) + msqr_after = mean_squared_error(y, y_pred) - # y_pred = ffm2.ffm_predict(w0, w, V, X) - # msqr_after = mean_squared_error(y, y_pred) - # - # assert(w0 != w0_init) - # # FIXME: use np.all instead np.any after we can set solver params from python - # assert (np.any(w != w_init)) - # assert (np.any(V != V_init)) - # assert(msqr_before > msqr_after) + assert(msqr_before > msqr_after) From 0ee1e50ad20dd2bb34f5a5b88c268995528bc473 Mon Sep 17 00:00:00 2001 From: AlexJoz Date: Thu, 30 Nov 2017 23:16:38 +0200 Subject: [PATCH 04/34] upd submodule --- fastFM-core2 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fastFM-core2 b/fastFM-core2 index 0adee8f..edb50b0 160000 --- a/fastFM-core2 +++ b/fastFM-core2 @@ -1 +1 @@ -Subproject commit 0adee8fd798ec68f61bee33f1c84856f7f55ae41 +Subproject commit edb50b052536d2933fdbf5e566d29344d9ccca46 From 51bd404cf6481e696db754af26bbfb80f695d4ab Mon Sep 17 00:00:00 2001 From: AlexJoz Date: Sat, 2 Dec 2017 17:53:19 +0200 Subject: [PATCH 05/34] upd submodule --- fastFM-core2 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fastFM-core2 b/fastFM-core2 index edb50b0..1ed2c8a 160000 --- a/fastFM-core2 +++ b/fastFM-core2 @@ -1 +1 @@ -Subproject commit edb50b052536d2933fdbf5e566d29344d9ccca46 +Subproject commit 1ed2c8aecc08991fa192b5b9ec2d1cbb943fdb1f From c3eb7a7ffab4596841962c9d03b02c96345d0236 Mon Sep 17 00:00:00 2001 From: Immanuel Bayer Date: Tue, 13 Feb 2018 23:12:28 +0100 Subject: [PATCH 06/34] update core2 --- fastFM-core2 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fastFM-core2 b/fastFM-core2 index 1ed2c8a..bad9e33 160000 --- a/fastFM-core2 +++ b/fastFM-core2 @@ -1 +1 @@ -Subproject commit 1ed2c8aecc08991fa192b5b9ec2d1cbb943fdb1f +Subproject commit bad9e33d0c1016971270333c266df8541fbc0103 From 654bdf5e5d571869d3e7985db47e690f2c118ce3 Mon Sep 17 00:00:00 2001 From: Immanuel Bayer Date: Wed, 14 Feb 2018 05:33:10 +0100 Subject: [PATCH 07/34] use docker --- .dockerignore | 2 ++ .travis.yml | 66 +++------------------------------------------ Dockerfile | 45 +++++++++++++++++++++++++++++++ Makefile | 7 ++++- docker_run_tests.sh | 6 +++++ 5 files changed, 62 insertions(+), 64 deletions(-) create mode 100644 .dockerignore create mode 100644 Dockerfile create mode 100644 docker_run_tests.sh diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..331d387 --- /dev/null +++ b/.dockerignore @@ -0,0 +1,2 @@ +.git +Dockerfile diff --git a/.travis.yml b/.travis.yml index b3220eb..31ab87a 100644 --- a/.travis.yml +++ b/.travis.yml @@ -5,75 +5,15 @@ env: - TRAVIS_PYTHON_VERSION="3.4" - TRAVIS_PYTHON_VERSION="3.5" -matrix: - include: - - os: linux - dist: trusty - # - os: osx - # osx_image: xcode8.3 - git: submodules: false -before_install: - # fastFM-core depends on cblas - - if [ "$TRAVIS_OS_NAME" = "linux" ]; then sudo apt-get update -qq; sudo apt-get install -y libopenblas-dev; fi - - if [[ "$TRAVIS_PYTHON_VERSION" =~ "^2" ]]; then - if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then - wget https://repo.continuum.io/miniconda/Miniconda-latest-Linux-x86_64.sh -O miniconda.sh; - else - wget https://repo.continuum.io/miniconda/Miniconda-latest-MacOSX-x86_64.sh -O miniconda.sh; - fi - else - if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then - wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh; - else - wget https://repo.continuum.io/miniconda/Miniconda3-latest-MacOSX-x86_64.sh -O miniconda.sh; - fi - fi - - bash miniconda.sh -b -p $HOME/miniconda - - export PATH="$HOME/miniconda/bin:$PATH" - - hash -r - - conda config --set always_yes yes --set changeps1 no - - conda update -q conda - # Useful for debugging any issues with conda - - conda info -a - - conda create -q -n test-environment python=$TRAVIS_PYTHON_VERSION cython numpy pandas scipy scikit-learn nose - - source activate test-environment - +install: # use credentials - sed -i -e "s|git@github.com:|https://$CI_TOKEN@github.com/|" .gitmodules - -install: - git submodule update --init --recursive - - cd fastFM-core2 - - | - if [ "$TRAVIS_OS_NAME" = "linux" ]; then - cmake -H. -B_lib -DCMAKE_BUILD_TYPE=Debug -DFASTFM_MINIMAL=ON -DCMAKE_DEBUG_POSTFIX=d - cmake --build _lib - else - cmake -H. -B_lib -DFASTFM_MINIMAL=ON -GXcode - cmake --build _lib --config Debug - mv _lib/fastFM/Debug/libfastFMd.a _lib/fastFM/ - fi - - ls _lib/fastFM - - cd .. - - make - - python setup.py bdist_wheel - - pip install dist/*.whl + - docker build -t fastfm-test script: - - nosetests - -before_deploy: - - export RELEASE_PKG_FILE=$(ls dist/*.whl) - - echo "deploying $RELEASE_PKG_FILE to GitHub releases" + - docker run --rm -i -v $(pwd):/fastfm/ fastfm-test /bin/bash -s < docker_run_tests.sh -deploy: - provider: releases - api_key: - secure: AJcZoe2+OiMJ4VlSkASAeMc/ii0ZRnj2PFaaL7zlSbx1THMpY/49U5BSyqX1PQioPSlTV3ZsIXI3u7KyqoXIQSXWzAuaBzpLTLS85fGSuTvUuexmaJtKU92OC143tuVVLCPnjC992+1uyctjrxMSqgoaUolfYkEftt5RGrMIKl2duGfDXrPXIueHSl8FQGXkmlY6NqkRx2v5kxsAjFcurvwTNU8ptJ84jVKjrE6t1IB61vp2eUcqVR/z6Lwau6mdvIybglnbH4lCMXP98zEIibLA8vbn3XxrC+0uU7Kjz37K6/CsJEPNL5tujJDMRKAupnrkgPsAGTpsAn6O6uLUz0ISgcen8R6KJ7cBli+cq08OZ3JLLoJpqkni62YVSQV+uYkQk9b5Pu09vUTOozJMnOqLSj9hVIswyxGiFPcTFskMgqMdx15M59gd0YpXH633YqwBgRmWNsctp4BKnTaE3iGW6aZc8lrXxpL7qcVAosjmpjLp3jiPXVSRdYf0yHl6pDUj5ZVyu27kAn1/I9JL0nH19zjXF2tUlEjuT9ydHwnhmsgBN/V+JhZxi7ZeEbOZfY1MfekKM/NwSRehVEp/J0XWqWg+kIXRU/rqY1/w0vLVNFeQirpEjUp39eCBydXeS3Bik8uANW2UTxojJo3LBfLLoAT8ZWFb3YrIBAYkzjc= - file: "${RELEASE_PKG_FILE}" - skip_cleanup: true - on: - tags: true diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..526044f --- /dev/null +++ b/Dockerfile @@ -0,0 +1,45 @@ +FROM ubuntu:16.04 + +MAINTAINER Immanuel Bayer + +ENV LANG=C.UTF-8 LC_ALL=C.UTF-8 + +USER root + +RUN apt-get update --fix-missing && apt-get install -y wget bzip2 ca-certificates \ + libglib2.0-0 libxext6 libsm6 libxrender1 \ + build-essential cmake git + +# Build fastfm-core + +# Download and install miniconda. +RUN echo 'export PATH=/opt/conda/bin:$PATH' > /etc/profile.d/conda.sh && \ + wget --quiet https://repo.continuum.io/miniconda/Miniconda2-4.3.27-Linux-x86_64.sh -O ~/miniconda.sh && \ + /bin/bash ~/miniconda.sh -b -p /opt/conda && \ + rm ~/miniconda.sh + +ENV PATH /opt/conda/bin:$PATH + +RUN conda config --set always_yes yes --set changeps1 no +RUN conda update -q conda + +# Setup test virtual env + +ARG TRAVIS_PYTHON_VERSION=3 +ENV PY_VERSION=$TRAVIS_PYTHON_VERSION + +RUN conda update -q conda && \ + conda create -q -n test-environment python=$TRAVIS_PYTHON_VERSION \ + cython numpy pandas scipy scikit-learn nose + +#RUN echo 'source activate test-environment' > /tmp/activate_env.sh && \ +# /bin/bash /tmp/activate_env.sh && rm /tmp/activate_env.sh + +#RUN [ “/bin/bash”, “-c”, “source activate test-environment && python setup.py develop” ] +#RUN ["/bin/bash", "-c", "source activate test-environment && conda info -a"] + +#ADD fastFM-core2/ /fastfm/fastFM-core2/ + +# Build and install fastfm + +# Run tests diff --git a/Makefile b/Makefile index 613a534..a853a2e 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,11 @@ +PYTHON ?= python + all: + ( cd fastFM-core2 ; \ + cmake -H. -B_lib -DCMAKE_BUILD_TYPE=Debug -DFASTFM_MINIMAL=ON -DCMAKE_DEBUG_POSTFIX=d; \ + cmake --build _lib; ) ( cd fastFM-core ; $(MAKE) lib ) - python setup.py build_ext --inplace + $(PYTHON) setup.py build_ext --inplace .PHONY : clean clean: diff --git a/docker_run_tests.sh b/docker_run_tests.sh new file mode 100644 index 0000000..1e3aa0c --- /dev/null +++ b/docker_run_tests.sh @@ -0,0 +1,6 @@ +source activate test-environment +# Build fastFM-core +cd /fastfm/ +make +pip install . +nosetests From de5d0a360a24f5dbc65a1f440f84feb9a15c2455 Mon Sep 17 00:00:00 2001 From: Immanuel Bayer Date: Wed, 14 Feb 2018 05:36:46 +0100 Subject: [PATCH 08/34] add . --- .travis.yml | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/.travis.yml b/.travis.yml index 31ab87a..bb77459 100644 --- a/.travis.yml +++ b/.travis.yml @@ -2,8 +2,7 @@ language: c env: - TRAVIS_PYTHON_VERSION="2.7" - - TRAVIS_PYTHON_VERSION="3.4" - - TRAVIS_PYTHON_VERSION="3.5" + - TRAVIS_PYTHON_VERSION="3.6" git: submodules: false @@ -12,7 +11,7 @@ install: # use credentials - sed -i -e "s|git@github.com:|https://$CI_TOKEN@github.com/|" .gitmodules - git submodule update --init --recursive - - docker build -t fastfm-test + - docker build -t fastfm-test . script: - docker run --rm -i -v $(pwd):/fastfm/ fastfm-test /bin/bash -s < docker_run_tests.sh From 1febbcb7393de3bcac32863694f997cf491987c9 Mon Sep 17 00:00:00 2001 From: Immanuel Bayer Date: Wed, 14 Feb 2018 05:51:56 +0100 Subject: [PATCH 09/34] update --- fastFM-core2 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fastFM-core2 b/fastFM-core2 index bad9e33..cc64fe8 160000 --- a/fastFM-core2 +++ b/fastFM-core2 @@ -1 +1 @@ -Subproject commit bad9e33d0c1016971270333c266df8541fbc0103 +Subproject commit cc64fe86d100aed7473d53697dfd0fa81a182759 From ec484c1796fbb4e576c0c453d59332e32d661131 Mon Sep 17 00:00:00 2001 From: Immanuel Bayer Date: Wed, 14 Feb 2018 06:05:31 +0100 Subject: [PATCH 10/34] cache externals --- .travis.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.travis.yml b/.travis.yml index bb77459..82efb0a 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,5 +1,9 @@ language: c +cache: + directories: + - fastFM-core/externals + env: - TRAVIS_PYTHON_VERSION="2.7" - TRAVIS_PYTHON_VERSION="3.6" From 385abdd5771e921f073b9d08fc6fae1b67e1f5cf Mon Sep 17 00:00:00 2001 From: Immanuel Bayer Date: Wed, 14 Feb 2018 06:11:23 +0100 Subject: [PATCH 11/34] cache only openblas --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 82efb0a..9a9dbf0 100644 --- a/.travis.yml +++ b/.travis.yml @@ -2,7 +2,7 @@ language: c cache: directories: - - fastFM-core/externals + - fastFM-core/externals/OpenBLAS env: - TRAVIS_PYTHON_VERSION="2.7" From 3b29ad35ed2bc95bff673e3822d0a1ed45e36fd2 Mon Sep 17 00:00:00 2001 From: Immanuel Bayer Date: Wed, 14 Feb 2018 06:14:32 +0100 Subject: [PATCH 12/34] mv to before_install --- .travis.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 9a9dbf0..5c230cb 100644 --- a/.travis.yml +++ b/.travis.yml @@ -11,9 +11,11 @@ env: git: submodules: false -install: +before_install: # use credentials - sed -i -e "s|git@github.com:|https://$CI_TOKEN@github.com/|" .gitmodules + +install: - git submodule update --init --recursive - docker build -t fastfm-test . From 028da05327a5ff5a9118a4ba55da9d718cac1b2d Mon Sep 17 00:00:00 2001 From: Immanuel Bayer Date: Wed, 14 Feb 2018 06:18:11 +0100 Subject: [PATCH 13/34] rm cache --- .travis.yml | 3 --- 1 file changed, 3 deletions(-) diff --git a/.travis.yml b/.travis.yml index 5c230cb..2d97b0b 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,8 +1,5 @@ language: c -cache: - directories: - - fastFM-core/externals/OpenBLAS env: - TRAVIS_PYTHON_VERSION="2.7" From 99fcae20c6699d671f8d91b5d1795a29ef5b52f6 Mon Sep 17 00:00:00 2001 From: Immanuel Bayer Date: Wed, 14 Feb 2018 06:32:57 +0100 Subject: [PATCH 14/34] smoke test --- fastFM/tests/test_als.py | 1 + 1 file changed, 1 insertion(+) diff --git a/fastFM/tests/test_als.py b/fastFM/tests/test_als.py index 804812e..4cf2226 100644 --- a/fastFM/tests/test_als.py +++ b/fastFM/tests/test_als.py @@ -60,6 +60,7 @@ def _test_raise_when_input_is_dense(): def test_fm_linear_regression(): + assert False X, y = get_small_data() fm = als.FMRegression(n_iter=1, l2_reg_w=1, l2_reg_V=1, rank=0) From 04a9fa4c96f27892657d42453c74b222226c9856 Mon Sep 17 00:00:00 2001 From: Immanuel Bayer Date: Wed, 14 Feb 2018 06:44:48 +0100 Subject: [PATCH 15/34] rm smoke test --- fastFM/tests/test_als.py | 1 - 1 file changed, 1 deletion(-) diff --git a/fastFM/tests/test_als.py b/fastFM/tests/test_als.py index 4cf2226..804812e 100644 --- a/fastFM/tests/test_als.py +++ b/fastFM/tests/test_als.py @@ -60,7 +60,6 @@ def _test_raise_when_input_is_dense(): def test_fm_linear_regression(): - assert False X, y = get_small_data() fm = als.FMRegression(n_iter=1, l2_reg_w=1, l2_reg_V=1, rank=0) From 25138f337dcbfb34b3f52486789f4891dbc63bfc Mon Sep 17 00:00:00 2001 From: Immanuel Bayer Date: Wed, 14 Feb 2018 06:45:02 +0100 Subject: [PATCH 16/34] clean out Dockerfile --- Dockerfile | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/Dockerfile b/Dockerfile index 526044f..af6af14 100644 --- a/Dockerfile +++ b/Dockerfile @@ -10,8 +10,6 @@ RUN apt-get update --fix-missing && apt-get install -y wget bzip2 ca-certificate libglib2.0-0 libxext6 libsm6 libxrender1 \ build-essential cmake git -# Build fastfm-core - # Download and install miniconda. RUN echo 'export PATH=/opt/conda/bin:$PATH' > /etc/profile.d/conda.sh && \ wget --quiet https://repo.continuum.io/miniconda/Miniconda2-4.3.27-Linux-x86_64.sh -O ~/miniconda.sh && \ @@ -24,7 +22,6 @@ RUN conda config --set always_yes yes --set changeps1 no RUN conda update -q conda # Setup test virtual env - ARG TRAVIS_PYTHON_VERSION=3 ENV PY_VERSION=$TRAVIS_PYTHON_VERSION @@ -32,14 +29,3 @@ RUN conda update -q conda && \ conda create -q -n test-environment python=$TRAVIS_PYTHON_VERSION \ cython numpy pandas scipy scikit-learn nose -#RUN echo 'source activate test-environment' > /tmp/activate_env.sh && \ -# /bin/bash /tmp/activate_env.sh && rm /tmp/activate_env.sh - -#RUN [ “/bin/bash”, “-c”, “source activate test-environment && python setup.py develop” ] -#RUN ["/bin/bash", "-c", "source activate test-environment && conda info -a"] - -#ADD fastFM-core2/ /fastfm/fastFM-core2/ - -# Build and install fastfm - -# Run tests From 9fe417026ca33effc2120e01d7c24431bf9493c3 Mon Sep 17 00:00:00 2001 From: Immanuel Bayer Date: Tue, 7 Aug 2018 16:10:39 +0200 Subject: [PATCH 17/34] update fastFM-core2 --- fastFM-core2 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fastFM-core2 b/fastFM-core2 index cc64fe8..61a7c96 160000 --- a/fastFM-core2 +++ b/fastFM-core2 @@ -1 +1 @@ -Subproject commit cc64fe86d100aed7473d53697dfd0fa81a182759 +Subproject commit 61a7c96ad8582597c75a051c7fdf520945d151fd From 3b2ca6c1a3d2e2d8eed870f972a174b3a5a648b4 Mon Sep 17 00:00:00 2001 From: Immanuel Bayer Date: Mon, 20 Aug 2018 18:54:58 +0200 Subject: [PATCH 18/34] update core2 --- fastFM-core2 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fastFM-core2 b/fastFM-core2 index 61a7c96..87be5de 160000 --- a/fastFM-core2 +++ b/fastFM-core2 @@ -1 +1 @@ -Subproject commit 61a7c96ad8582597c75a051c7fdf520945d151fd +Subproject commit 87be5de43d51d1128287afe988975aa1975aabef From a6a022ca28eb416f35ff2475012b73070b1d19df Mon Sep 17 00:00:00 2001 From: Immanuel Bayer Date: Tue, 21 Aug 2018 11:52:24 +0200 Subject: [PATCH 19/34] add loss and solver to json --- fastFM/tests/test_ffm.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/fastFM/tests/test_ffm.py b/fastFM/tests/test_ffm.py index 0d59ac1..b2e7749 100644 --- a/fastFM/tests/test_ffm.py +++ b/fastFM/tests/test_ffm.py @@ -54,7 +54,11 @@ def test_ffm2_fit(): y_pred = ffm2.ffm_predict(w0, w, V, X) msqr_before = mean_squared_error(y, y_pred) - jsn = json.dumps({'n_iter': 1000, 'l2_reg_w': 0.1, 'l2_reg_V': 0.2}).encode() + jsn = json.dumps({'solver': 'cd', + 'loss': 'squared', + 'n_iter': 1000, + 'l2_reg_w': 0.1, + 'l2_reg_V': 0.2}).encode() w0, w, V = ffm2.ffm_als_fit(w0, w, V, X, y, rank, jsn) From d7517f943542f23b63be6823db08dea5d1fd8231 Mon Sep 17 00:00:00 2001 From: Immanuel Bayer Date: Wed, 22 Aug 2018 13:34:35 +0200 Subject: [PATCH 20/34] update core2 --- fastFM-core2 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fastFM-core2 b/fastFM-core2 index 87be5de..2bfcbcb 160000 --- a/fastFM-core2 +++ b/fastFM-core2 @@ -1 +1 @@ -Subproject commit 87be5de43d51d1128287afe988975aa1975aabef +Subproject commit 2bfcbcba70393a8ebfe413dd719658b69acc2f59 From ed32da01707f024a99fbdc9935b2aa97f3e85cdf Mon Sep 17 00:00:00 2001 From: Immanuel Bayer Date: Wed, 22 Aug 2018 16:22:01 +0200 Subject: [PATCH 21/34] add factory methods for Data and Model --- fastFM/ffm2.pyx | 77 +++++++++++++++++++++------------------- fastFM/tests/test_ffm.py | 11 +++--- 2 files changed, 47 insertions(+), 41 deletions(-) diff --git a/fastFM/ffm2.pyx b/fastFM/ffm2.pyx index 2d233e0..cffb0e9 100644 --- a/fastFM/ffm2.pyx +++ b/fastFM/ffm2.pyx @@ -10,11 +10,20 @@ cimport numpy as np import numpy as np -def ffm_predict(double w_0, double[:] w, - np.ndarray[np.float64_t, ndim = 2] V, X): - assert X.shape[1] == len(w) - assert X.shape[1] == V.shape[1] +cdef Model* _model_factory(double* w_0, double[:] w, + np.ndarray[np.float64_t, ndim = 2] V): + + cdef Model *m = new Model() + rank = V.shape[0] + n_features = V.shape[1] + m.add_parameter(w_0) + m.add_parameter(&w[0], n_features) + m.add_parameter( V.data, rank, n_features, 2) + + return m + +cdef Data* _data_factory(X, double[:] y_pred): # get attributes from csc scipy n_features = X.shape[1] n_samples = X.shape[0] @@ -24,25 +33,31 @@ def ffm_predict(double w_0, double[:] w, cdef np.ndarray[int, ndim=1, mode='c'] outer = X.indptr cdef np.ndarray[np.float64_t, ndim=1, mode='c'] data = X.data - assert n_features == w.shape[0] - assert n_features == V.shape[1] + cdef Data *d = new Data() + d.add_design_matrix(n_samples, n_features, nnz, &outer[0], &inner[0], + &data[0]) + d.add_prediction(n_samples, &y_pred[0]) + return d - rank = V.shape[0] - cdef np.ndarray[np.float64_t, ndim=1, mode='c'] y =\ - np.zeros(X.shape[0], dtype=np.float64) +# cython doesn't support function overloading +cdef Data* _data_factory_fit(X, double[:] y_pred, double[:] y_true): + d = _data_factory(X, y_pred) + d.add_target(X.shape[0], &y_true[0]) + return d - cdef Model *m = new Model() - cdef Data *d = new Data() - m.add_parameter(&w_0) - m.add_parameter(&w[0], n_features) - m.add_parameter( V.data, rank, n_features, 2) +def ffm_predict(double w_0, double[:] w, + np.ndarray[np.float64_t, ndim = 2] V, X): + assert X.shape[1] == len(w) + assert X.shape[1] == V.shape[1] - d.add_target(n_samples, &y[0]) - d.add_prediction(n_samples, &y[0]) - d.add_design_matrix(n_samples, n_features, nnz, &outer[0], &inner[0], - &data[0]) + # allocate memory for predictions + cdef np.ndarray[np.float64_t, ndim=1, mode='c'] y =\ + np.zeros(X.shape[0], dtype=np.float64) + + m = _model_factory(&w_0, w, V) + d = _data_factory(X, y) cpp_ffm.predict(m, d) @@ -55,26 +70,14 @@ def ffm_als_fit(double w_0, double[:] w, np.ndarray[np.float64_t, ndim = 2] V, X, double[:] y, int rank, settings): assert X.shape[0] == len(y) # test shapes - n_features = X.shape[1] - n_samples = X.shape[0] - nnz = X.count_nonzero() - - cdef np.ndarray[int, ndim=1, mode='c'] inner = X.indices - cdef np.ndarray[int, ndim=1, mode='c'] outer = X.indptr - cdef np.ndarray[np.float64_t, ndim=1, mode='c'] data = X.data - cdef np.ndarray[np.float64_t, ndim=1, mode='c'] y_pred = np.zeros(n_samples, dtype=np.float64) - - cdef Data* d = new Data() - d.add_design_matrix(n_samples, n_features, nnz, &outer[0], &inner[0], &data[0]) - d.add_target(n_samples, &y[0]) - d.add_prediction(n_samples, &y_pred[0]) + cdef Settings* s = new Settings(settings) + m = _model_factory(&w_0, w, V) - cdef Model* m = new Model() - m.add_parameter(&w_0) - m.add_parameter(&w[0], n_features) - m.add_parameter( V.data, rank, n_features, 2) + # allocate memory for prediction + cdef np.ndarray[np.float64_t, ndim=1, mode='c'] y_pred = np.zeros( + X.shape[0], dtype=np.float64) - cdef Settings* s = new Settings(settings) + d = _data_factory_fit(X, y, y_pred) cpp_ffm.fit(s, m, d) @@ -82,4 +85,4 @@ def ffm_als_fit(double w_0, double[:] w, np.ndarray[np.float64_t, ndim = 2] V, del m del s - return w_0, w, V \ No newline at end of file + return w_0, w, V diff --git a/fastFM/tests/test_ffm.py b/fastFM/tests/test_ffm.py index b2e7749..550a286 100644 --- a/fastFM/tests/test_ffm.py +++ b/fastFM/tests/test_ffm.py @@ -44,7 +44,9 @@ def test_ffm2_fit(): w0, w, V, y, X = get_test_problem() w0 = 0 w[:] = 0 - V = np.random.normal(loc=0.0, scale=1.0, size=(2, 2)) + np.random.seed(123) + V = np.random.normal(loc=0.0, scale=1.0, + size=(2, 2)) w0_init = w0 w_init = np.copy(w) @@ -56,13 +58,14 @@ def test_ffm2_fit(): jsn = json.dumps({'solver': 'cd', 'loss': 'squared', - 'n_iter': 1000, - 'l2_reg_w': 0.1, - 'l2_reg_V': 0.2}).encode() + 'iter': 5, + 'l2_reg_w': 0.01, + 'l2_reg_V': 0.02}).encode() w0, w, V = ffm2.ffm_als_fit(w0, w, V, X, y, rank, jsn) y_pred = ffm2.ffm_predict(w0, w, V, X) msqr_after = mean_squared_error(y, y_pred) + assert w0 != 0 assert(msqr_before > msqr_after) From 0f75dbaa5e2d3d68eb8de77662c013792c504eab Mon Sep 17 00:00:00 2001 From: Immanuel Bayer Date: Wed, 22 Aug 2018 22:34:08 +0200 Subject: [PATCH 22/34] add col-major support to cpp fit --- fastFM/cpp_ffm.pxd | 4 +++- fastFM/ffm2.pyx | 22 ++++++++++++++++++---- fastFM/tests/test_ffm.py | 13 ++++++------- 3 files changed, 27 insertions(+), 12 deletions(-) diff --git a/fastFM/cpp_ffm.pxd b/fastFM/cpp_ffm.pxd index 177abe2..14b29ad 100644 --- a/fastFM/cpp_ffm.pxd +++ b/fastFM/cpp_ffm.pxd @@ -3,6 +3,7 @@ #distutils: language=c++ from libcpp.string cimport string +from libcpp cimport bool cdef extern from "../fastFM-core2/fastFM/fastfm.h" namespace "fastfm": @@ -21,7 +22,8 @@ cdef extern from "../fastFM-core2/fastFM/fastfm.h" namespace "fastfm": cdef cppclass Data: Data() void add_design_matrix(int n_samples, int n_features, int nnz, - int* outer_ptr, int* inter_ptr, double* data) + int* outer_ptr, int* inter_ptr, double* data, + bool is_col_major) void add_target(const int n_samples, double *data) void add_prediction(const int n_samples, double* data) diff --git a/fastFM/ffm2.pyx b/fastFM/ffm2.pyx index cffb0e9..ba04e27 100644 --- a/fastFM/ffm2.pyx +++ b/fastFM/ffm2.pyx @@ -1,11 +1,15 @@ # Author: Immanuel Bayer # License: BSD 3 clause +import json + cimport cpp_ffm from cpp_ffm cimport Settings, Data, Model, predict, fit from libcpp.memory cimport nullptr from libcpp.string cimport string +import scipy.sparse as sp + cimport numpy as np import numpy as np @@ -29,13 +33,21 @@ cdef Data* _data_factory(X, double[:] y_pred): n_samples = X.shape[0] nnz = X.count_nonzero() + is_col_major = None + if sp.isspmatrix_csc(X): + is_col_major = True + elif sp.isspmatrix_csr(X): + is_col_major = False + else: + raise "matrix format is not supported" + cdef np.ndarray[int, ndim=1, mode='c'] inner = X.indices cdef np.ndarray[int, ndim=1, mode='c'] outer = X.indptr cdef np.ndarray[np.float64_t, ndim=1, mode='c'] data = X.data cdef Data *d = new Data() d.add_design_matrix(n_samples, n_features, nnz, &outer[0], &inner[0], - &data[0]) + &data[0], is_col_major) d.add_prediction(n_samples, &y_pred[0]) return d @@ -66,11 +78,13 @@ def ffm_predict(double w_0, double[:] w, return y -def ffm_als_fit(double w_0, double[:] w, np.ndarray[np.float64_t, ndim = 2] V, - X, double[:] y, int rank, settings): +def ffm_fit(double w_0, double[:] w, np.ndarray[np.float64_t, ndim = 2] V, + X, double[:] y, int rank, dict settings): + if not isinstance(settings, dict): + raise "settings must be of type dict" assert X.shape[0] == len(y) # test shapes - cdef Settings* s = new Settings(settings) + cdef Settings* s = new Settings(json.dumps(settings).encode()) m = _model_factory(&w_0, w, V) # allocate memory for prediction diff --git a/fastFM/tests/test_ffm.py b/fastFM/tests/test_ffm.py index 550a286..73b714e 100644 --- a/fastFM/tests/test_ffm.py +++ b/fastFM/tests/test_ffm.py @@ -1,6 +1,5 @@ # Author: Immanuel Bayer # License: BSD 3 clause -import json import numpy as np import scipy.sparse as sp @@ -56,13 +55,13 @@ def test_ffm2_fit(): y_pred = ffm2.ffm_predict(w0, w, V, X) msqr_before = mean_squared_error(y, y_pred) - jsn = json.dumps({'solver': 'cd', - 'loss': 'squared', - 'iter': 5, - 'l2_reg_w': 0.01, - 'l2_reg_V': 0.02}).encode() + settings = {'solver': 'cd', + 'loss': 'squared', + 'iter': 5, + 'l2_reg_w': 0.01, + 'l2_reg_V': 0.02} - w0, w, V = ffm2.ffm_als_fit(w0, w, V, X, y, rank, jsn) + w0, w, V = ffm2.ffm_fit(w0, w, V, X, y, rank, settings) y_pred = ffm2.ffm_predict(w0, w, V, X) msqr_after = mean_squared_error(y, y_pred) From 1f0d958069d3bf43797ce2df2e1d507fb080c6a2 Mon Sep 17 00:00:00 2001 From: Immanuel Bayer Date: Wed, 22 Aug 2018 22:49:08 +0200 Subject: [PATCH 23/34] add test for cpp sgd --- fastFM/ffm2.pyx | 9 ++------- fastFM/tests/test_ffm.py | 33 +++++++++++++++++++++++++++++---- 2 files changed, 31 insertions(+), 11 deletions(-) diff --git a/fastFM/ffm2.pyx b/fastFM/ffm2.pyx index ba04e27..85b1a6c 100644 --- a/fastFM/ffm2.pyx +++ b/fastFM/ffm2.pyx @@ -33,12 +33,7 @@ cdef Data* _data_factory(X, double[:] y_pred): n_samples = X.shape[0] nnz = X.count_nonzero() - is_col_major = None - if sp.isspmatrix_csc(X): - is_col_major = True - elif sp.isspmatrix_csr(X): - is_col_major = False - else: + if not (sp.isspmatrix_csc(X) or sp.isspmatrix_csr(X)): raise "matrix format is not supported" cdef np.ndarray[int, ndim=1, mode='c'] inner = X.indices @@ -47,7 +42,7 @@ cdef Data* _data_factory(X, double[:] y_pred): cdef Data *d = new Data() d.add_design_matrix(n_samples, n_features, nnz, &outer[0], &inner[0], - &data[0], is_col_major) + &data[0], sp.isspmatrix_csc(X)) d.add_prediction(n_samples, &y_pred[0]) return d diff --git a/fastFM/tests/test_ffm.py b/fastFM/tests/test_ffm.py index 73b714e..5f467b5 100644 --- a/fastFM/tests/test_ffm.py +++ b/fastFM/tests/test_ffm.py @@ -39,7 +39,7 @@ def test_ffm2_predict_w0(): y_pred = ffm2.ffm_predict(w0, w, V, X) assert_equal(y_pred, w0) -def test_ffm2_fit(): +def test_ffm2_fit_als(): w0, w, V, y, X = get_test_problem() w0 = 0 w[:] = 0 @@ -47,9 +47,6 @@ def test_ffm2_fit(): V = np.random.normal(loc=0.0, scale=1.0, size=(2, 2)) - w0_init = w0 - w_init = np.copy(w) - V_init = np.copy(V) rank = 2 y_pred = ffm2.ffm_predict(w0, w, V, X) @@ -68,3 +65,31 @@ def test_ffm2_fit(): assert w0 != 0 assert(msqr_before > msqr_after) + +def test_ffm2_fit_sgd(): + w0, w, V, y, X = get_test_problem() + w0 = 0 + w[:] = 0 + np.random.seed(123) + V = np.random.normal(loc=0.0, scale=1.0, + size=(2, 2)) + + rank = 2 + + y_pred = ffm2.ffm_predict(w0, w, V, X) + msqr_before = mean_squared_error(y, y_pred) + + settings = {'solver': 'sgd', + 'loss': 'squared', + 'step_size': 0.001, + 'n_epoch': 5, + 'l2_reg_w': 0.01, + 'l2_reg_V': 0.02} + + w0, w, V = ffm2.ffm_fit(w0, w, V, sp.csr_matrix(X), y, rank, settings) + + y_pred = ffm2.ffm_predict(w0, w, V, X) + msqr_after = mean_squared_error(y, y_pred) + + assert w0 != 0 + assert(msqr_before > msqr_after) From 204db6f53d17fca4ad17a6df742d250ead252591 Mon Sep 17 00:00:00 2001 From: Immanuel Bayer Date: Wed, 22 Aug 2018 23:12:45 +0200 Subject: [PATCH 24/34] fix train_test_split depreciation warining --- fastFM/tests/test_als.py | 4 +--- fastFM/tests/test_datasets.py | 3 +-- fastFM/tests/test_mcmc.py | 3 +-- 3 files changed, 3 insertions(+), 7 deletions(-) diff --git a/fastFM/tests/test_als.py b/fastFM/tests/test_als.py index 804812e..a6306e4 100644 --- a/fastFM/tests/test_als.py +++ b/fastFM/tests/test_als.py @@ -7,6 +7,7 @@ from fastFM import als from fastFM.datasets import make_user_item_regression from sklearn.metrics import mean_squared_error +from sklearn.model_selection import train_test_split from sklearn.utils.testing import assert_almost_equal @@ -95,7 +96,6 @@ def test_fm_classification(): def test_als_warm_start(): X, y, coef = make_user_item_regression(label_stdev=0) - from sklearn.cross_validation import train_test_split X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.33, random_state=42) X_train = sp.csc_matrix(X_train) @@ -123,9 +123,7 @@ def test_als_warm_start(): def test_warm_start_path(): - X, y, coef = make_user_item_regression(label_stdev=.4) - from sklearn.cross_validation import train_test_split X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.33, random_state=42) X_train = sp.csc_matrix(X_train) diff --git a/fastFM/tests/test_datasets.py b/fastFM/tests/test_datasets.py index 34b4dfb..e4e3a0a 100644 --- a/fastFM/tests/test_datasets.py +++ b/fastFM/tests/test_datasets.py @@ -3,13 +3,13 @@ from fastFM.datasets import make_user_item_regression from sklearn.metrics import mean_squared_error +from sklearn.model_selection import train_test_split import scipy.sparse as sp def test_make_user_item_regression(): from fastFM.mcmc import FMRegression X, y, coef = make_user_item_regression(label_stdev=0) - from sklearn.cross_validation import train_test_split X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.33, random_state=42) @@ -19,7 +19,6 @@ def test_make_user_item_regression(): # generate data with noisy lables X, y, coef = make_user_item_regression(label_stdev=2) - from sklearn.cross_validation import train_test_split X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.33, random_state=42) diff --git a/fastFM/tests/test_mcmc.py b/fastFM/tests/test_mcmc.py index be6e8ef..095db9d 100644 --- a/fastFM/tests/test_mcmc.py +++ b/fastFM/tests/test_mcmc.py @@ -7,6 +7,7 @@ from fastFM import mcmc from fastFM.datasets import make_user_item_regression from sklearn.metrics import mean_squared_error +from sklearn.cross_validation import train_test_split from sklearn.utils.testing import assert_almost_equal, assert_array_equal @@ -83,7 +84,6 @@ def test_fm_classification_proba(): def test_mcmc_warm_start(): X, y, coef = make_user_item_regression(label_stdev=0) - from sklearn.cross_validation import train_test_split X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.33, random_state=44) X_train = sp.csc_matrix(X_train) @@ -106,7 +106,6 @@ def test_mcmc_warm_start(): def test_find_init_stdev(): X, y, coef = make_user_item_regression(label_stdev=.5) - from sklearn.cross_validation import train_test_split X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.33, random_state=44) X_train = sp.csc_matrix(X_train) From b30f70ab6fa1a0db3da0ff144a0e22692d9e660e Mon Sep 17 00:00:00 2001 From: Immanuel Bayer Date: Thu, 23 Aug 2018 11:52:46 +0200 Subject: [PATCH 25/34] replace predict with cpp version --- fastFM/base.py | 4 ++-- fastFM/ffm2.pyx | 43 ++++++++++++++++++++++++++++++++++++++-- fastFM/tests/test_sgd.py | 15 ++++++++++++++ 3 files changed, 58 insertions(+), 4 deletions(-) diff --git a/fastFM/base.py b/fastFM/base.py index ae7604c..f776608 100644 --- a/fastFM/base.py +++ b/fastFM/base.py @@ -6,8 +6,8 @@ from scipy.stats import norm from sklearn.base import BaseEstimator, ClassifierMixin +import ffm2 from .validation import check_array -import ffm def _validate_class_labels(y): @@ -99,7 +99,7 @@ def predict(self, X_test): order="F") assert sp.isspmatrix_csc(X_test) assert X_test.shape[1] == len(self.w_) - return ffm.ffm_predict(self.w0_, self.w_, self.V_, X_test) + return ffm2.ffm_predict_self(self, X_test) class BaseFMClassifier(FactorizationMachine, ClassifierMixin): diff --git a/fastFM/ffm2.pyx b/fastFM/ffm2.pyx index 85b1a6c..a0e083d 100644 --- a/fastFM/ffm2.pyx +++ b/fastFM/ffm2.pyx @@ -27,6 +27,28 @@ cdef Model* _model_factory(double* w_0, double[:] w, return m +cdef Model* _model_factory_self(fm): + + n_features = fm.w_.shape[0] + cdef double w_0 + cdef np.ndarray[np.float64_t, ndim=1, mode='c'] w + cdef np.ndarray[np.float64_t, ndim=2, mode='c'] V + + w_0 = 0 if fm.ignore_w_0 else fm.w0_ + w = np.zeros(n_features, dtype=np.float64) if fm.ignore_w else fm.w_ + V = np.zeros((fm.rank, n_features), dtype=np.float64)\ + if fm.rank == 0 else fm.V_ + + cdef Model *m = new Model() + rank = V.shape[0] + n_features = V.shape[1] + m.add_parameter(&w_0) + m.add_parameter(&w[0], n_features) + m.add_parameter( V.data, rank, n_features, 2) + + return m + + cdef Data* _data_factory(X, double[:] y_pred): # get attributes from csc scipy n_features = X.shape[1] @@ -73,10 +95,26 @@ def ffm_predict(double w_0, double[:] w, return y + +def ffm_predict_self(fm, X): + # allocate memory for predictions + cdef np.ndarray[np.float64_t, ndim=1, mode='c'] y =\ + np.zeros(X.shape[0], dtype=np.float64) + + m = _model_factory_self(fm) + d = _data_factory(X, y) + + cpp_ffm.predict(m, d) + + del m + del d + + return y + + def ffm_fit(double w_0, double[:] w, np.ndarray[np.float64_t, ndim = 2] V, X, double[:] y, int rank, dict settings): - if not isinstance(settings, dict): - raise "settings must be of type dict" + assert isinstance(settings, dict) assert X.shape[0] == len(y) # test shapes cdef Settings* s = new Settings(json.dumps(settings).encode()) @@ -90,6 +128,7 @@ def ffm_fit(double w_0, double[:] w, np.ndarray[np.float64_t, ndim = 2] V, cpp_ffm.fit(s, m, d) + del d del m del s diff --git a/fastFM/tests/test_sgd.py b/fastFM/tests/test_sgd.py index 949414f..8448dcf 100644 --- a/fastFM/tests/test_sgd.py +++ b/fastFM/tests/test_sgd.py @@ -6,6 +6,7 @@ from sklearn import metrics from sklearn.datasets import make_regression from sklearn.utils.testing import assert_almost_equal +from numpy.testing import assert_equal from fastFM import sgd from fastFM import als @@ -28,6 +29,20 @@ def get_test_problem(task='regression'): return w0, w, V, y, X +def test_sgd_predict(): + w0, w, V, y, X = get_test_problem() + X_train = sp.csc_matrix(X) + + fm = sgd.FMRegression(rank=V.shape[0]) + + # set model parameter + fm.w0_ = w0 + fm.w_ = w + fm.V_ = V + y_pred = fm.predict(X_train) + assert_equal(y_pred, y) + + def test_sgd_regression_small_example(): w0, w, V, y, X = get_test_problem() X_test = X.copy() From c9daed86fc9b269989cd1fb151afc4a8b709292f Mon Sep 17 00:00:00 2001 From: Immanuel Bayer Date: Wed, 29 Aug 2018 13:07:49 +0200 Subject: [PATCH 26/34] put intercept in np array --- fastFM/als.py | 2 ++ fastFM/bpr.py | 1 + fastFM/ffm2.pyx | 72 +++++++++++++++++++++++++--------------- fastFM/mcmc.py | 2 ++ fastFM/sgd.py | 2 ++ fastFM/tests/test_als.py | 2 +- fastFM/tests/test_ffm.py | 8 ++--- fastFM/tests/test_sgd.py | 2 +- 8 files changed, 58 insertions(+), 33 deletions(-) diff --git a/fastFM/als.py b/fastFM/als.py index 659076d..84691a1 100644 --- a/fastFM/als.py +++ b/fastFM/als.py @@ -91,6 +91,7 @@ def fit(self, X_train, y_train, n_more_iter=0): self.warm_start = True self.w0_, self.w_, self.V_ = ffm.ffm_als_fit(self, X_train, y_train) + self.w0_ = np.array([self.w0_], dtype=np.float64) if self.iter_count != 0: self.iter_count = self.iter_count + n_more_iter @@ -188,4 +189,5 @@ def fit(self, X_train, y_train): y_train[~i_class1] = 1 self.w0_, self.w_, self.V_ = ffm.ffm_als_fit(self, X_train, y_train) + self.w0_ = np.array([self.w0_], dtype=np.float64) return self diff --git a/fastFM/bpr.py b/fastFM/bpr.py index 2805fdc..be092fc 100644 --- a/fastFM/bpr.py +++ b/fastFM/bpr.py @@ -92,4 +92,5 @@ def fit(self, X, pairs): assert pairs.max() <= X.shape[1] assert pairs.min() >= 0 self.w0_, self.w_, self.V_ = ffm.ffm_fit_sgd_bpr(self, X, pairs) + self.w0_ = np.array([self.w0_], dtype=np.float64) return self diff --git a/fastFM/ffm2.pyx b/fastFM/ffm2.pyx index a0e083d..a0ff4cd 100644 --- a/fastFM/ffm2.pyx +++ b/fastFM/ffm2.pyx @@ -14,13 +14,26 @@ cimport numpy as np import numpy as np -cdef Model* _model_factory(double* w_0, double[:] w, +cdef Settings* _settings_factory(fm): + settings_dict = fm.get_params() + cdef Settings* s = new Settings(json.dumps(settings_dict).encode()) + return s + + +def _init_parameter(fm, n_features): + fm.w0_ = np.zeros(1, dtype=np.float64) + fm.w_ = np.zeros(n_features, dtype=np.float64) + fm.V_ = np.zeros((fm.rank, n_features), dtype=np.float64) + return fm.w0_, fm.w_, fm.V_ + + +cdef Model* _model_factory(double[:] w_0, double[:] w, np.ndarray[np.float64_t, ndim = 2] V): cdef Model *m = new Model() rank = V.shape[0] n_features = V.shape[1] - m.add_parameter(w_0) + m.add_parameter(&w_0[0]) m.add_parameter(&w[0], n_features) m.add_parameter( V.data, rank, n_features, 2) @@ -30,19 +43,14 @@ cdef Model* _model_factory(double* w_0, double[:] w, cdef Model* _model_factory_self(fm): n_features = fm.w_.shape[0] - cdef double w_0 - cdef np.ndarray[np.float64_t, ndim=1, mode='c'] w - cdef np.ndarray[np.float64_t, ndim=2, mode='c'] V - - w_0 = 0 if fm.ignore_w_0 else fm.w0_ - w = np.zeros(n_features, dtype=np.float64) if fm.ignore_w else fm.w_ - V = np.zeros((fm.rank, n_features), dtype=np.float64)\ - if fm.rank == 0 else fm.V_ + cdef np.ndarray[np.float64_t, ndim=1, mode='c'] w_0 = fm.w0_ + cdef np.ndarray[np.float64_t, ndim=1, mode='c'] w = fm.w_ + cdef np.ndarray[np.float64_t, ndim=2, mode='c'] V = fm.V_ cdef Model *m = new Model() rank = V.shape[0] n_features = V.shape[1] - m.add_parameter(&w_0) + m.add_parameter(&w_0[0]) m.add_parameter(&w[0], n_features) m.add_parameter( V.data, rank, n_features, 2) @@ -76,7 +84,7 @@ cdef Data* _data_factory_fit(X, double[:] y_pred, double[:] y_true): return d -def ffm_predict(double w_0, double[:] w, +def ffm_predict(double [:] w_0, double[:] w, np.ndarray[np.float64_t, ndim = 2] V, X): assert X.shape[1] == len(w) assert X.shape[1] == V.shape[1] @@ -85,7 +93,7 @@ def ffm_predict(double w_0, double[:] w, cdef np.ndarray[np.float64_t, ndim=1, mode='c'] y =\ np.zeros(X.shape[0], dtype=np.float64) - m = _model_factory(&w_0, w, V) + m = _model_factory(w_0, w, V) d = _data_factory(X, y) cpp_ffm.predict(m, d) @@ -97,34 +105,44 @@ def ffm_predict(double w_0, double[:] w, def ffm_predict_self(fm, X): - # allocate memory for predictions - cdef np.ndarray[np.float64_t, ndim=1, mode='c'] y =\ - np.zeros(X.shape[0], dtype=np.float64) + return ffm_predict(fm.w0_, fm.w_, fm.V_, X) - m = _model_factory_self(fm) - d = _data_factory(X, y) - cpp_ffm.predict(m, d) +def ffm_fit(double [:] w_0, double[:] w, np.ndarray[np.float64_t, ndim = 2] V, + X, double[:] y, int rank, dict settings): + assert isinstance(settings, dict) + assert X.shape[0] == len(y) # test shapes + + cdef Settings* s = new Settings(json.dumps(settings).encode()) + m = _model_factory(w_0, w, V) + + # allocate memory for prediction + cdef np.ndarray[np.float64_t, ndim=1, mode='c'] y_pred = np.zeros( + X.shape[0], dtype=np.float64) + + d = _data_factory_fit(X, y, y_pred) + + cpp_ffm.fit(s, m, d) - del m del d + del m + del s - return y + return w_0, w, V -def ffm_fit(double w_0, double[:] w, np.ndarray[np.float64_t, ndim = 2] V, - X, double[:] y, int rank, dict settings): - assert isinstance(settings, dict) +def ffm_fit_self(fm, X, double[:] y): assert X.shape[0] == len(y) # test shapes - cdef Settings* s = new Settings(json.dumps(settings).encode()) - m = _model_factory(&w_0, w, V) + _init_parameter(fm, X.shape[1]) + m = _model_factory(fm.w0_, fm.w_, fm.V_) # allocate memory for prediction cdef np.ndarray[np.float64_t, ndim=1, mode='c'] y_pred = np.zeros( X.shape[0], dtype=np.float64) d = _data_factory_fit(X, y, y_pred) + s = _settings_factory(fm) cpp_ffm.fit(s, m, d) @@ -133,4 +151,4 @@ def ffm_fit(double w_0, double[:] w, np.ndarray[np.float64_t, ndim = 2] V, del m del s - return w_0, w, V + return fm.w0_, fm.w_, fm.V_ diff --git a/fastFM/mcmc.py b/fastFM/mcmc.py index 5880864..dcf2129 100644 --- a/fastFM/mcmc.py +++ b/fastFM/mcmc.py @@ -122,6 +122,7 @@ def fit_predict(self, X_train, y_train, X_test, n_more_iter=0): coef, y_pred = ffm.ffm_mcmc_fit_predict(self, X_train, X_test, y_train) self.w0_, self.w_, self.V_ = coef + self.w0_ = np.array([self.w0_], dtype=np.float64) self.prediction_ = y_pred self.warm_start = False @@ -231,4 +232,5 @@ def fit_predict_proba(self, X_train, y_train, X_test): coef, y_pred = ffm.ffm_mcmc_fit_predict(self, X_train, X_test, y_train) self.w0_, self.w_, self.V_ = coef + self.w0_ = np.array([self.w0_], dtype=np.float64) return y_pred diff --git a/fastFM/sgd.py b/fastFM/sgd.py index 27f9810..4cd37b8 100644 --- a/fastFM/sgd.py +++ b/fastFM/sgd.py @@ -91,6 +91,7 @@ def fit(self, X, y): X = check_array(X, accept_sparse="csc", dtype=np.float64) self.w0_, self.w_, self.V_ = ffm.ffm_sgd_fit(self, X, y) + self.w0_ = np.array([self.w0_], dtype=np.float64) return self @@ -188,4 +189,5 @@ def fit(self, X, y): X = check_array(X, accept_sparse="csc", dtype=np.float64) self.w0_, self.w_, self.V_ = ffm.ffm_sgd_fit(self, X, y) + self.w0_ = np.array([self.w0_], dtype=np.float64) return self diff --git a/fastFM/tests/test_als.py b/fastFM/tests/test_als.py index a6306e4..28366fe 100644 --- a/fastFM/tests/test_als.py +++ b/fastFM/tests/test_als.py @@ -21,7 +21,7 @@ def get_test_problem(task='regression'): V = np.array([[6, 0], [5, 8]], dtype=np.float64) w = np.array([9, 2], dtype=np.float64) - w0 = 2 + w0 = np.array([2], dtype=np.float64) if task == 'classification': y_labels = np.ones_like(y) y_labels[y < np.median(y)] = -1 diff --git a/fastFM/tests/test_ffm.py b/fastFM/tests/test_ffm.py index 5f467b5..c836540 100644 --- a/fastFM/tests/test_ffm.py +++ b/fastFM/tests/test_ffm.py @@ -19,7 +19,7 @@ def get_test_problem(): V = np.array([[6, 0], [5, 8]], dtype=np.float64) w = np.array([9, 2], dtype=np.float64) - w0 = 2 + w0 = np.array([2], dtype=np.float64) return w0, w, V, y, X def test_ffm_predict(): @@ -37,11 +37,11 @@ def test_ffm2_predict_w0(): w[:] = 0 V[:, :] = 0 y_pred = ffm2.ffm_predict(w0, w, V, X) - assert_equal(y_pred, w0) + assert_equal(y_pred[0], w0) def test_ffm2_fit_als(): w0, w, V, y, X = get_test_problem() - w0 = 0 + w0[:] = 0 w[:] = 0 np.random.seed(123) V = np.random.normal(loc=0.0, scale=1.0, @@ -68,7 +68,7 @@ def test_ffm2_fit_als(): def test_ffm2_fit_sgd(): w0, w, V, y, X = get_test_problem() - w0 = 0 + w0[:] = 0 w[:] = 0 np.random.seed(123) V = np.random.normal(loc=0.0, scale=1.0, diff --git a/fastFM/tests/test_sgd.py b/fastFM/tests/test_sgd.py index 8448dcf..08fc0c4 100644 --- a/fastFM/tests/test_sgd.py +++ b/fastFM/tests/test_sgd.py @@ -21,7 +21,7 @@ def get_test_problem(task='regression'): V = np.array([[6, 0], [5, 8]], dtype=np.float64) w = np.array([9, 2], dtype=np.float64) - w0 = 2 + w0 = np.array([2], dtype=np.float64) if task == 'classification': y_labels = np.ones_like(y) y_labels[y < np.median(y)] = -1 From f8d4eec07ed26fecbc459d65e76d565f183bf01a Mon Sep 17 00:00:00 2001 From: Immanuel Bayer Date: Wed, 29 Aug 2018 21:56:00 +0200 Subject: [PATCH 27/34] replace sgd solver (use core2 now) --- fastFM/base.py | 2 +- fastFM/ffm2.pyx | 79 ++++++----------------------- fastFM/sgd.py | 104 +++++++++++++++++++++++++++++---------- fastFM/tests/test_ffm.py | 14 ++++-- fastFM/tests/test_sgd.py | 51 +++++++++++-------- 5 files changed, 133 insertions(+), 117 deletions(-) diff --git a/fastFM/base.py b/fastFM/base.py index f776608..07713bf 100644 --- a/fastFM/base.py +++ b/fastFM/base.py @@ -99,7 +99,7 @@ def predict(self, X_test): order="F") assert sp.isspmatrix_csc(X_test) assert X_test.shape[1] == len(self.w_) - return ffm2.ffm_predict_self(self, X_test) + return ffm2.ffm_predict(self.w0_, self.w_, self.V_, X_test) class BaseFMClassifier(FactorizationMachine, ClassifierMixin): diff --git a/fastFM/ffm2.pyx b/fastFM/ffm2.pyx index a0ff4cd..4d2cc0d 100644 --- a/fastFM/ffm2.pyx +++ b/fastFM/ffm2.pyx @@ -14,50 +14,22 @@ cimport numpy as np import numpy as np -cdef Settings* _settings_factory(fm): - settings_dict = fm.get_params() - cdef Settings* s = new Settings(json.dumps(settings_dict).encode()) - return s - - -def _init_parameter(fm, n_features): - fm.w0_ = np.zeros(1, dtype=np.float64) - fm.w_ = np.zeros(n_features, dtype=np.float64) - fm.V_ = np.zeros((fm.rank, n_features), dtype=np.float64) - return fm.w0_, fm.w_, fm.V_ - - -cdef Model* _model_factory(double[:] w_0, double[:] w, +cdef Model* _model_factory(np.ndarray[np.float64_t, ndim = 1] w_0, + np.ndarray[np.float64_t, ndim = 1] w, np.ndarray[np.float64_t, ndim = 2] V): cdef Model *m = new Model() rank = V.shape[0] n_features = V.shape[1] - m.add_parameter(&w_0[0]) - m.add_parameter(&w[0], n_features) - m.add_parameter( V.data, rank, n_features, 2) - - return m - - -cdef Model* _model_factory_self(fm): - n_features = fm.w_.shape[0] - cdef np.ndarray[np.float64_t, ndim=1, mode='c'] w_0 = fm.w0_ - cdef np.ndarray[np.float64_t, ndim=1, mode='c'] w = fm.w_ - cdef np.ndarray[np.float64_t, ndim=2, mode='c'] V = fm.V_ - - cdef Model *m = new Model() - rank = V.shape[0] - n_features = V.shape[1] - m.add_parameter(&w_0[0]) - m.add_parameter(&w[0], n_features) + m.add_parameter( w_0.data) + m.add_parameter( w.data, n_features) m.add_parameter( V.data, rank, n_features, 2) return m -cdef Data* _data_factory(X, double[:] y_pred): +cdef Data* _data_factory(X, np.ndarray[np.float64_t, ndim = 1] y_pred): # get attributes from csc scipy n_features = X.shape[1] n_samples = X.shape[0] @@ -78,14 +50,16 @@ cdef Data* _data_factory(X, double[:] y_pred): # cython doesn't support function overloading -cdef Data* _data_factory_fit(X, double[:] y_pred, double[:] y_true): +cdef Data* _data_factory_fit(X, np.ndarray[np.float64_t, ndim = 1] y_true, + np.ndarray[np.float64_t, ndim = 1] y_pred): d = _data_factory(X, y_pred) d.add_target(X.shape[0], &y_true[0]) return d -def ffm_predict(double [:] w_0, double[:] w, - np.ndarray[np.float64_t, ndim = 2] V, X): +def ffm_predict(np.ndarray[np.float64_t, ndim = 1] w_0, + np.ndarray[np.float64_t, ndim = 1] w, + np.ndarray[np.float64_t, ndim = 2] V, X): assert X.shape[1] == len(w) assert X.shape[1] == V.shape[1] @@ -104,12 +78,10 @@ def ffm_predict(double [:] w_0, double[:] w, return y -def ffm_predict_self(fm, X): - return ffm_predict(fm.w0_, fm.w_, fm.V_, X) - - -def ffm_fit(double [:] w_0, double[:] w, np.ndarray[np.float64_t, ndim = 2] V, - X, double[:] y, int rank, dict settings): +def ffm_fit(np.ndarray[np.float64_t, ndim = 1] w_0, + np.ndarray[np.float64_t, ndim = 1] w, + np.ndarray[np.float64_t, ndim = 2] V, + X, np.ndarray[np.float64_t, ndim = 1] y, int rank, dict settings): assert isinstance(settings, dict) assert X.shape[0] == len(y) # test shapes @@ -129,26 +101,3 @@ def ffm_fit(double [:] w_0, double[:] w, np.ndarray[np.float64_t, ndim = 2] V, del s return w_0, w, V - - -def ffm_fit_self(fm, X, double[:] y): - assert X.shape[0] == len(y) # test shapes - - _init_parameter(fm, X.shape[1]) - m = _model_factory(fm.w0_, fm.w_, fm.V_) - - # allocate memory for prediction - cdef np.ndarray[np.float64_t, ndim=1, mode='c'] y_pred = np.zeros( - X.shape[0], dtype=np.float64) - - d = _data_factory_fit(X, y, y_pred) - s = _settings_factory(fm) - - cpp_ffm.fit(s, m, d) - - - del d - del m - del s - - return fm.w0_, fm.w_, fm.V_ diff --git a/fastFM/sgd.py b/fastFM/sgd.py index 4cd37b8..25f15f1 100644 --- a/fastFM/sgd.py +++ b/fastFM/sgd.py @@ -2,13 +2,35 @@ # License: BSD 3 clause -import ffm +import ffm2 import numpy as np +from scipy.special import expit as sigmoid from sklearn.base import RegressorMixin +from sklearn.utils import check_random_state from .validation import check_array, check_consistent_length from .base import (FactorizationMachine, BaseFMClassifier, _validate_class_labels) +def _init_parameter(fm, n_features): + generator = check_random_state(fm.random_state) + w0 = np.zeros(1, dtype=np.float64) + w = np.zeros(n_features, dtype=np.float64) + V = generator.normal(loc=0.0, scale=fm.init_stdev, + size=(fm.rank, n_features)) + return w0, w, V + + +def _settings_factory(fm): + settings_dict = fm.get_params() + settings_dict['loss'] = fm.loss + settings_dict['solver'] = fm.solver + + # TODO align naming + settings_dict['iter'] = settings_dict['n_iter'] + del settings_dict['n_iter'] + + return settings_dict + class FMRegression(FactorizationMachine, RegressorMixin): @@ -56,7 +78,7 @@ class FMRegression(FactorizationMachine, RegressorMixin): Coefficients of second order factor matrix. """ - def __init__(self, n_iter=100, init_stdev=0.1, rank=8, random_state=123, + def __init__(self, n_iter=0, n_epoch=10, init_stdev=0.1, rank=8, random_state=123, l2_reg_w=0.1, l2_reg_V=0.1, l2_reg=0, step_size=0.1): super(FMRegression, self).\ __init__(n_iter=n_iter, init_stdev=init_stdev, rank=rank, @@ -69,7 +91,9 @@ def __init__(self, n_iter=100, init_stdev=0.1, rank=8, random_state=123, self.l2_reg_V = l2_reg_V self.l2_reg = l2_reg self.step_size = step_size - self.task = "regression" + self.loss = 'squared' + self.solver = 'sgd' + self.n_epoch = n_epoch def fit(self, X, y): """ Fit model with specified loss. @@ -85,13 +109,12 @@ def fit(self, X, y): check_consistent_length(X, y) y = check_array(y, ensure_2d=False, dtype=np.float64) - # The sgd solver expects a transposed design matrix in column major - # order (csc_matrix). - X = X.T # creates a copy - X = check_array(X, accept_sparse="csc", dtype=np.float64) - - self.w0_, self.w_, self.V_ = ffm.ffm_sgd_fit(self, X, y) - self.w0_ = np.array([self.w0_], dtype=np.float64) + X = check_array(X, accept_sparse="csr", dtype=np.float64) + n_features = X.shape[1] + settings_dict = _settings_factory(self) + self.w0_, self.w_, self.V_ = _init_parameter(self, n_features) + ffm2.ffm_fit(self.w0_, self.w_, self.V_, X, y, self.rank, + settings_dict) return self @@ -141,7 +164,8 @@ class FMClassification(BaseFMClassifier): Coefficients of second order factor matrix. """ - def __init__(self, n_iter=100, init_stdev=0.1, rank=8, random_state=123, + def __init__(self, n_iter=100, n_epoch=10, init_stdev=0.1, rank=8, + random_state=123, l2_reg_w=0, l2_reg_V=0, l2_reg=None, step_size=0.1): super(FMClassification, self).\ __init__(n_iter=n_iter, init_stdev=init_stdev, rank=rank, @@ -154,7 +178,9 @@ def __init__(self, n_iter=100, init_stdev=0.1, rank=8, random_state=123, self.l2_reg_V = l2_reg_V self.l2_reg = l2_reg self.step_size = step_size - self.task = "classification" + self.loss = 'logistic' + self.solver = 'sgd' + self.n_epoch = n_epoch def fit(self, X, y): """ Fit model with specified loss. @@ -174,20 +200,48 @@ def fit(self, X, y): " but the data contains" " class: %r" % self.classes_) - # fastFM-core expects labels to be in {-1,1} - y_train = y.copy() - i_class1 = (y_train == self.classes_[0]) - y_train[i_class1] = -1 - y_train[~i_class1] = 1 - check_consistent_length(X, y) y = y.astype(np.float64) - # The sgd solver expects a transposed design matrix in column major - # order (csc_matrix). - X = X.T # creates a copy - X = check_array(X, accept_sparse="csc", dtype=np.float64) - - self.w0_, self.w_, self.V_ = ffm.ffm_sgd_fit(self, X, y) - self.w0_ = np.array([self.w0_], dtype=np.float64) + X = check_array(X, accept_sparse="csr", dtype=np.float64) + n_features = X.shape[1] + settings_dict = _settings_factory(self) + self.w0_, self.w_, self.V_ = _init_parameter(self, n_features) + ffm2.ffm_fit(self.w0_, self.w_, self.V_, X, y, self.rank, + settings_dict) return self + + def predict_proba(self, X_test): + """ Return probabilities + + Parameters + ---------- + X : scipy.sparse.csr_matrix, (n_samples, n_features) + + Returns + ------ + + y : array, shape (n_samples) + Class Probability for the class with smaller label. + """ + + pred = ffm2.ffm_predict(self.w0_, self.w_, self.V_, X_test) + return sigmoid(pred) + + def predict(self, X_test, threshold=0.5): + """ Return predictions + + Parameters + ---------- + X : scipy.sparse.csr_matrix, (n_samples, n_features) + + Returns + ------ + + y : array, shape (n_samples) + Class labels + """ + y_proba = self.predict_proba(X_test) + y_binary = np.ones_like(y_proba, dtype=np.float64) + y_binary[y_proba < threshold] = -1 + return y_binary diff --git a/fastFM/tests/test_ffm.py b/fastFM/tests/test_ffm.py index c836540..7ebcb5d 100644 --- a/fastFM/tests/test_ffm.py +++ b/fastFM/tests/test_ffm.py @@ -46,7 +46,6 @@ def test_ffm2_fit_als(): np.random.seed(123) V = np.random.normal(loc=0.0, scale=1.0, size=(2, 2)) - rank = 2 y_pred = ffm2.ffm_predict(w0, w, V, X) @@ -54,11 +53,11 @@ def test_ffm2_fit_als(): settings = {'solver': 'cd', 'loss': 'squared', - 'iter': 5, + 'iter': 500, 'l2_reg_w': 0.01, 'l2_reg_V': 0.02} - w0, w, V = ffm2.ffm_fit(w0, w, V, X, y, rank, settings) + ffm2.ffm_fit(w0, w, V, X, y, rank, settings) y_pred = ffm2.ffm_predict(w0, w, V, X) msqr_after = mean_squared_error(y, y_pred) @@ -71,7 +70,7 @@ def test_ffm2_fit_sgd(): w0[:] = 0 w[:] = 0 np.random.seed(123) - V = np.random.normal(loc=0.0, scale=1.0, + V = np.random.normal(loc=0.0, scale=0.01, size=(2, 2)) rank = 2 @@ -81,7 +80,7 @@ def test_ffm2_fit_sgd(): settings = {'solver': 'sgd', 'loss': 'squared', - 'step_size': 0.001, + 'step_size': 0.0001, 'n_epoch': 5, 'l2_reg_w': 0.01, 'l2_reg_V': 0.02} @@ -93,3 +92,8 @@ def test_ffm2_fit_sgd(): assert w0 != 0 assert(msqr_before > msqr_after) + + +if __name__ == "__main__": + # test_ffm2_fit_sgd() + test_ffm2_fit_als() diff --git a/fastFM/tests/test_sgd.py b/fastFM/tests/test_sgd.py index 08fc0c4..a7ab349 100644 --- a/fastFM/tests/test_sgd.py +++ b/fastFM/tests/test_sgd.py @@ -45,25 +45,28 @@ def test_sgd_predict(): def test_sgd_regression_small_example(): w0, w, V, y, X = get_test_problem() - X_test = X.copy() - X_train = sp.csc_matrix(X) + X_test = sp.csr_matrix(X) + X_train = sp.csr_matrix(X) + + fm = sgd.FMRegression(n_epoch=31, + init_stdev=0.01, l2_reg_w=0.01, + l2_reg_V=0.002, rank=2, + step_size=0.0001, random_state=123) - fm = sgd.FMRegression(n_iter=10000, - init_stdev=0.01, l2_reg_w=0.5, l2_reg_V=50.5, rank=2, - step_size=0.0001) + X_train.shape[0] == y.shape[0] fm.fit(X_train, y) y_pred = fm.predict(X_test) - assert metrics.r2_score(y_pred, y) > 0.99 + assert metrics.r2_score(y_pred, y) > 0.95 def test_first_order_sgd_vs_als_regression(): X, y = make_regression(n_samples=100, n_features=50, random_state=123) X = sp.csc_matrix(X) - fm_sgd = sgd.FMRegression(n_iter=900, init_stdev=0.01, l2_reg_w=0.0, - l2_reg_V=50.5, rank=0, step_size=0.01) - fm_als = als.FMRegression(n_iter=10, l2_reg_w=0, l2_reg_V=0, rank=0) + fm_sgd = sgd.FMRegression(n_epoch=20, init_stdev=0.01, l2_reg_w=0.01, + l2_reg_V=0.02, rank=0, step_size=0.01) + fm_als = als.FMRegression(n_iter=10, l2_reg_w=0.01, l2_reg_V=0.02, rank=0) y_pred_sgd = fm_sgd.fit(X, y).predict(X) y_pred_als = fm_als.fit(X, y).predict(X) @@ -78,11 +81,12 @@ def test_second_order_sgd_vs_als_regression(): X, y = make_regression(n_samples=100, n_features=50, random_state=123) X = sp.csc_matrix(X) - fm_sgd = sgd.FMRegression(n_iter=50000, init_stdev=0.00, l2_reg_w=0.0, - l2_reg_V=50.5, rank=2, step_size=0.0002) - fm_als = als.FMRegression(n_iter=10, l2_reg_w=0, l2_reg_V=0, rank=2) + fm_sgd = sgd.FMRegression(n_epoch=300, init_stdev=0.1, l2_reg_w=0.01, + l2_reg_V=0.05, rank=2, step_size=0.00001) + fm_als = als.FMRegression(n_iter=10, l2_reg_w=0.01, l2_reg_V=0.05, + rank=2) - y_pred_als = fm_als.fit(X, y).predict(X) + y_pred_als = fm_als.fit(sp.csr_matrix(X), y).predict(X) y_pred_sgd = fm_sgd.fit(X, y).predict(X) score_als = metrics.r2_score(y_pred_als, y) @@ -93,15 +97,17 @@ def test_second_order_sgd_vs_als_regression(): def test_sgd_classification_small_example(): w0, w, V, y, X = get_test_problem(task='classification') - X_test = X.copy() - X_train = sp.csc_matrix(X) + X_test = sp.csr_matrix(X) + X_train = sp.csr_matrix(X) - fm = sgd.FMClassification(n_iter=1000, - init_stdev=0.1, l2_reg_w=0, l2_reg_V=0, rank=2, - step_size=0.1) + fm = sgd.FMClassification(n_epoch=100, + init_stdev=0.01, l2_reg_w=0.02, l2_reg_V=0.02, + rank=2, step_size=0.01) fm.fit(X_train, y) + y_pred = fm.predict_proba(X_test) + assert metrics.roc_auc_score(y, y_pred) > 0.95 + y_pred = fm.predict(X_test) - print(y_pred) assert metrics.accuracy_score(y, y_pred) > 0.95 @@ -118,6 +124,9 @@ def test_clone(): if __name__ == '__main__': + # test_sgd_fit_small_example() + # test_sgd_fit_small_example() test_sgd_regression_small_example() - test_first_order_sgd_vs_als_regression() - test_second_order_sgd_vs_als_regression() + + # test_first_order_sgd_vs_als_regression() + # test_second_order_sgd_vs_als_regression() From 230e6881fd6681cbc69560a59b1ca4839cabb667 Mon Sep 17 00:00:00 2001 From: Immanuel Bayer Date: Wed, 29 Aug 2018 21:57:30 +0200 Subject: [PATCH 28/34] remove sgd wrapper for C solver --- fastFM/cffm.pxd | 3 --- fastFM/ffm.pyx | 27 +-------------------------- 2 files changed, 1 insertion(+), 29 deletions(-) diff --git a/fastFM/cffm.pxd b/fastFM/cffm.pxd index 9e600ea..19c0f55 100644 --- a/fastFM/cffm.pxd +++ b/fastFM/cffm.pxd @@ -40,8 +40,5 @@ cdef extern from "../fastFM-core/include/ffm.h": cs_di *X_train, cs_di *X_test, double *y_train, double *y_pred, ffm_param *param) - void ffm_sgd_fit(double *w_0, double *w, double *V, - cs_di *X, double *y, ffm_param *param) - void ffm_sgd_bpr_fit(double *w_0, double *w, double *V, cs_di *X, double *pairs, int n_pairs, ffm_param *param) diff --git a/fastFM/ffm.pyx b/fastFM/ffm.pyx index 344ceb3..9a43c3d 100644 --- a/fastFM/ffm.pyx +++ b/fastFM/ffm.pyx @@ -114,31 +114,6 @@ def ffm_als_fit(fm, X, double[:] y): return w_0, w, V -def ffm_sgd_fit(fm, X, double[:] y): - """ - The sgd solver expects a transposed design matrix in column major order - (csc_matrix) Samples are stored in columns, this allows fast sample by - sample access. - """ - assert X.shape[1] == len(y) # test shapes - n_features = X.shape[0] - X_ = CsMatrix(X) - pt_X = PyCapsule_GetPointer(X_, "CsMatrix") - param = FFMParam(fm) - pt_param = PyCapsule_GetPointer(param, "FFMParam") - - # allocate the coefs - cdef double w_0 = 0 - cdef np.ndarray[np.float64_t, ndim=1, mode='c'] w =\ - np.zeros(n_features, dtype=np.float64) - cdef np.ndarray[np.float64_t, ndim=2, mode='c'] V =\ - np.zeros((fm.rank, n_features), dtype=np.float64) - - cffm.ffm_sgd_fit(&w_0, w.data, V.data, - pt_X, &y[0], pt_param) - return w_0, w, V - - def ffm_fit_sgd_bpr(fm, X, np.ndarray[np.float64_t, ndim=2, mode='c'] pairs): n_features = X.shape[0] X_ = CsMatrix(X) @@ -208,4 +183,4 @@ def ffm_mcmc_fit_predict(fm, X_train, X_test, double[:] y): &y[0], y_pred.data, pt_param) fm.hyper_param_ = hyper_param - return (w_0, w, V), y_pred \ No newline at end of file + return (w_0, w, V), y_pred From 916ad29bba6bcb664befb040ea5e757e5b1956e4 Mon Sep 17 00:00:00 2001 From: Immanuel Bayer Date: Fri, 21 Sep 2018 11:51:56 +0200 Subject: [PATCH 29/34] update core2 --- Makefile | 2 +- fastFM-core2 | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index a853a2e..40d5f8a 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ PYTHON ?= python all: ( cd fastFM-core2 ; \ - cmake -H. -B_lib -DCMAKE_BUILD_TYPE=Debug -DFASTFM_MINIMAL=ON -DCMAKE_DEBUG_POSTFIX=d; \ + cmake -H. -B_lib -DCMAKE_BUILD_TYPE=Debug -DCMAKE_DEBUG_POSTFIX=d; \ cmake --build _lib; ) ( cd fastFM-core ; $(MAKE) lib ) $(PYTHON) setup.py build_ext --inplace diff --git a/fastFM-core2 b/fastFM-core2 index 2bfcbcb..dec8227 160000 --- a/fastFM-core2 +++ b/fastFM-core2 @@ -1 +1 @@ -Subproject commit 2bfcbcba70393a8ebfe413dd719658b69acc2f59 +Subproject commit dec822708d015e3d16ee14aafcc3cc7d58306a3b From fae6d7af206857f4089773b54f2a8805ddc48dce Mon Sep 17 00:00:00 2001 From: Immanuel Bayer Date: Fri, 21 Sep 2018 20:44:30 +0200 Subject: [PATCH 30/34] upgrade cd regression to core2 --- fastFM/als.py | 40 ++++++++++++++++++------------------- fastFM/base.py | 30 ++++++++++++++++++++++++---- fastFM/sgd.py | 23 ++------------------- fastFM/tests/test_als.py | 43 +++++++++++++++------------------------- 4 files changed, 64 insertions(+), 72 deletions(-) diff --git a/fastFM/als.py b/fastFM/als.py index 84691a1..7aae854 100644 --- a/fastFM/als.py +++ b/fastFM/als.py @@ -2,11 +2,13 @@ # License: BSD 3 clause import ffm +import ffm2 import numpy as np from sklearn.base import RegressorMixin from .validation import check_consistent_length, check_array from .base import (FactorizationMachine, BaseFMClassifier, - _validate_class_labels, _check_warm_start) + _validate_class_labels, _check_warm_start, + _init_parameter, _settings_factory) class FMRegression(FactorizationMachine, RegressorMixin): @@ -63,9 +65,11 @@ def __init__(self, n_iter=100, init_stdev=0.1, rank=8, random_state=123, self.l2_reg_w = l2_reg_w self.l2_reg_V = l2_reg_V self.l2_reg = l2_reg - self.task = "regression" + self.loss = "squared" + self.solver = "cd" + self.iter_count = 0 - def fit(self, X_train, y_train, n_more_iter=0): + def fit(self, X, y, n_more_iter=0): """ Fit model with specified loss. Parameters @@ -78,28 +82,24 @@ def fit(self, X_train, y_train, n_more_iter=0): Number of iterations to continue from the current Coefficients. """ + check_consistent_length(X, y) + y = check_array(y, ensure_2d=False, dtype=np.float64) - check_consistent_length(X_train, y_train) - y_train = check_array(y_train, ensure_2d=False, dtype=np.float64) + X = check_array(X, accept_sparse="csc", dtype=np.float64) + n_features = X.shape[1] - X_train = check_array(X_train, accept_sparse="csc", dtype=np.float64, - order="F") - self.n_iter = self.n_iter + n_more_iter + if self.iter_count == 0: + self.w0_, self.w_, self.V_ = _init_parameter(self, n_features) - if n_more_iter > 0: - _check_warm_start(self, X_train) - self.warm_start = True + if n_more_iter != 0: + _check_warm_start(self, X) + self.n_iter = n_more_iter - self.w0_, self.w_, self.V_ = ffm.ffm_als_fit(self, X_train, y_train) - self.w0_ = np.array([self.w0_], dtype=np.float64) - - if self.iter_count != 0: - self.iter_count = self.iter_count + n_more_iter - else: - self.iter_count = self.n_iter + settings_dict = _settings_factory(self) + ffm2.ffm_fit(self.w0_, self.w_, self.V_, X, y, self.rank, + settings_dict) - # reset to default setting - self.warm_start = False + self.iter_count += self.n_iter return self diff --git a/fastFM/base.py b/fastFM/base.py index 07713bf..dc4526e 100644 --- a/fastFM/base.py +++ b/fastFM/base.py @@ -5,16 +5,38 @@ import scipy.sparse as sp from scipy.stats import norm from sklearn.base import BaseEstimator, ClassifierMixin +from sklearn.utils import check_random_state import ffm2 from .validation import check_array +def _init_parameter(fm, n_features): + generator = check_random_state(fm.random_state) + w0 = np.zeros(1, dtype=np.float64) + w = np.zeros(n_features, dtype=np.float64) + V = generator.normal(loc=0.0, scale=fm.init_stdev, + size=(fm.rank, n_features)) + return w0, w, V + + +def _settings_factory(fm): + settings_dict = fm.get_params() + settings_dict['loss'] = fm.loss + settings_dict['solver'] = fm.solver + + # TODO align naming + settings_dict['iter'] = int(settings_dict['n_iter']) + del settings_dict['n_iter'] + + return settings_dict + + def _validate_class_labels(y): - assert len(set(y)) == 2 - assert y.min() == -1 - assert y.max() == 1 - return check_array(y, ensure_2d=False, dtype=np.float64) + assert len(set(y)) == 2 + assert y.min() == -1 + assert y.max() == 1 + return check_array(y, ensure_2d=False, dtype=np.float64) def _check_warm_start(fm, X): diff --git a/fastFM/sgd.py b/fastFM/sgd.py index 25f15f1..2ae8f87 100644 --- a/fastFM/sgd.py +++ b/fastFM/sgd.py @@ -9,27 +9,8 @@ from sklearn.utils import check_random_state from .validation import check_array, check_consistent_length from .base import (FactorizationMachine, BaseFMClassifier, - _validate_class_labels) - -def _init_parameter(fm, n_features): - generator = check_random_state(fm.random_state) - w0 = np.zeros(1, dtype=np.float64) - w = np.zeros(n_features, dtype=np.float64) - V = generator.normal(loc=0.0, scale=fm.init_stdev, - size=(fm.rank, n_features)) - return w0, w, V - - -def _settings_factory(fm): - settings_dict = fm.get_params() - settings_dict['loss'] = fm.loss - settings_dict['solver'] = fm.solver - - # TODO align naming - settings_dict['iter'] = settings_dict['n_iter'] - del settings_dict['n_iter'] - - return settings_dict + _validate_class_labels, + _init_parameter, _settings_factory) class FMRegression(FactorizationMachine, RegressorMixin): diff --git a/fastFM/tests/test_als.py b/fastFM/tests/test_als.py index 28366fe..2313ce3 100644 --- a/fastFM/tests/test_als.py +++ b/fastFM/tests/test_als.py @@ -124,47 +124,35 @@ def test_als_warm_start(): def test_warm_start_path(): X, y, coef = make_user_item_regression(label_stdev=.4) - X_train, X_test, y_train, y_test = train_test_split( - X, y, test_size=0.33, random_state=42) - X_train = sp.csc_matrix(X_train) - X_test = sp.csc_matrix(X_test) - n_iter = 10 + X = sp.csc_matrix(X) rank = 4 seed = 333 step_size = 1 + n_iter = 10 l2_reg_w = 0 l2_reg_V = 0 fm = als.FMRegression(n_iter=0, l2_reg_w=l2_reg_w, l2_reg_V=l2_reg_V, rank=rank, random_state=seed) - # initalize coefs - fm.fit(X_train, y_train) - rmse_train = [] - rmse_test = [] - for i in range(1, n_iter): - fm.fit(X_train, y_train, n_more_iter=step_size) - rmse_train.append(np.sqrt(mean_squared_error( - fm.predict(X_train), y_train))) - rmse_test.append(np.sqrt(mean_squared_error( - fm.predict(X_test), y_test))) + rmse = [] + for _ in range(1, n_iter): + fm.fit(X, y, n_more_iter=step_size) + rmse.append(np.sqrt(mean_squared_error( + fm.predict(X), y))) print('------- restart ----------') - values = np.arange(1, n_iter) - rmse_test_re = [] - rmse_train_re = [] - for i in values: + rmse_re = [] + for i in range(1, n_iter): fm = als.FMRegression(n_iter=i, l2_reg_w=l2_reg_w, l2_reg_V=l2_reg_V, rank=rank, random_state=seed) - fm.fit(X_train, y_train) - rmse_test_re.append(np.sqrt(mean_squared_error( - fm.predict(X_test), y_test))) - rmse_train_re.append(np.sqrt(mean_squared_error( - fm.predict(X_train), y_train))) + fm.fit(X, y) + rmse_re.append(np.sqrt(mean_squared_error( + fm.predict(X), y))) - assert_almost_equal(rmse_train, rmse_train_re) - assert_almost_equal(rmse_test, rmse_test_re) + assert len(rmse) == len(rmse_re) + assert_almost_equal(rmse, rmse_re) def test_clone(): @@ -181,4 +169,5 @@ def test_clone(): if __name__ == '__main__': # test_fm_regression_only_w0() - test_fm_linear_regression() + # test_fm_linear_regression() + test_warm_start_path() From 6d928e0432e94518157ae0b0d13758dd7496edbf Mon Sep 17 00:00:00 2001 From: Immanuel Bayer Date: Fri, 21 Sep 2018 21:32:00 +0200 Subject: [PATCH 31/34] refactor class predict --- fastFM/als.py | 32 +++++++++++++++++--------------- fastFM/base.py | 16 +++++++++++++++- fastFM/sgd.py | 35 ----------------------------------- 3 files changed, 32 insertions(+), 51 deletions(-) diff --git a/fastFM/als.py b/fastFM/als.py index 7aae854..f24744c 100644 --- a/fastFM/als.py +++ b/fastFM/als.py @@ -159,9 +159,11 @@ def __init__(self, n_iter=100, init_stdev=0.1, rank=8, random_state=123, self.l2_reg_w = l2_reg_w self.l2_reg_V = l2_reg_V self.l2_reg = l2_reg - self.task = "classification" + self.loss = "squared" + self.solver = "cd" + self.iter_count = 0 - def fit(self, X_train, y_train): + def fit(self, X, y): """ Fit model with specified loss. Parameters @@ -169,25 +171,25 @@ def fit(self, X_train, y_train): X : scipy.sparse.csc_matrix, (n_samples, n_features) y : float | ndarray, shape = (n_samples, ) - the targets have to be encodes as {-1, 1}. + the targets have to be encodes as {0, 1}. """ - check_consistent_length(X_train, y_train) - X_train = check_array(X_train, accept_sparse="csc", dtype=np.float64, - order="F") - y_train = _validate_class_labels(y_train) + check_consistent_length(X, y) + + X = check_array(X, accept_sparse="csc", dtype=np.float64, + order="F") + y = _validate_class_labels(y) + + self.classes_ = np.unique(y) - self.classes_ = np.unique(y_train) if len(self.classes_) != 2: raise ValueError("This solver only supports binary classification" " but the data contains" " class: %r" % self.classes_) - # fastFM-core expects labels to be in {-1,1} - y_train = y_train.copy() - i_class1 = (y_train == self.classes_[0]) - y_train[i_class1] = -1 - y_train[~i_class1] = 1 + self.w0_, self.w_, self.V_ = _init_parameter(self, X.shape[1]) + + settings_dict = _settings_factory(self) + ffm2.ffm_fit(self.w0_, self.w_, self.V_, X, y, self.rank, + settings_dict) - self.w0_, self.w_, self.V_ = ffm.ffm_als_fit(self, X_train, y_train) - self.w0_ = np.array([self.w0_], dtype=np.float64) return self diff --git a/fastFM/base.py b/fastFM/base.py index dc4526e..19aaef2 100644 --- a/fastFM/base.py +++ b/fastFM/base.py @@ -4,6 +4,7 @@ import numpy as np import scipy.sparse as sp from scipy.stats import norm +from scipy.special import expit as sigmoid from sklearn.base import BaseEstimator, ClassifierMixin from sklearn.utils import check_random_state @@ -104,6 +105,7 @@ def __init__(self, n_iter=100, init_stdev=0.1, rank=8, random_state=123, self.step_size = 0 self.copy_X = copy_X + def predict(self, X_test): """ Return predictions @@ -126,7 +128,7 @@ def predict(self, X_test): class BaseFMClassifier(FactorizationMachine, ClassifierMixin): - def predict(self, X_test): + def predict(self, X_test, threshold=0.5): """ Return predictions Parameters @@ -139,6 +141,13 @@ def predict(self, X_test): y : array, shape (n_samples) Class labels """ + + if self.loss == "logistic": + y_proba = self.predict_proba(X_test) + y_binary = np.ones_like(y_proba, dtype=np.float64) + y_binary[y_proba < threshold] = -1 + return y_binary + y_proba = norm.cdf(super(BaseFMClassifier, self).predict(X_test)) # convert probs to labels y_pred = np.zeros_like(y_proba, dtype=np.float64) + self.classes_[0] @@ -158,5 +167,10 @@ def predict_proba(self, X_test): y : array, shape (n_samples) Class Probability for the class with smaller label. """ + + if self.loss == "logistic": + pred = ffm2.ffm_predict(self.w0_, self.w_, self.V_, X_test) + return sigmoid(pred) + pred = super(BaseFMClassifier, self).predict(X_test) return norm.cdf(pred) diff --git a/fastFM/sgd.py b/fastFM/sgd.py index 2ae8f87..74d179e 100644 --- a/fastFM/sgd.py +++ b/fastFM/sgd.py @@ -191,38 +191,3 @@ def fit(self, X, y): ffm2.ffm_fit(self.w0_, self.w_, self.V_, X, y, self.rank, settings_dict) return self - - def predict_proba(self, X_test): - """ Return probabilities - - Parameters - ---------- - X : scipy.sparse.csr_matrix, (n_samples, n_features) - - Returns - ------ - - y : array, shape (n_samples) - Class Probability for the class with smaller label. - """ - - pred = ffm2.ffm_predict(self.w0_, self.w_, self.V_, X_test) - return sigmoid(pred) - - def predict(self, X_test, threshold=0.5): - """ Return predictions - - Parameters - ---------- - X : scipy.sparse.csr_matrix, (n_samples, n_features) - - Returns - ------ - - y : array, shape (n_samples) - Class labels - """ - y_proba = self.predict_proba(X_test) - y_binary = np.ones_like(y_proba, dtype=np.float64) - y_binary[y_proba < threshold] = -1 - return y_binary From bda4cad62e0d54a6b884cc6fc487b1bf37289657 Mon Sep 17 00:00:00 2001 From: Immanuel Bayer Date: Fri, 21 Sep 2018 21:45:43 +0200 Subject: [PATCH 32/34] rm als core (C) code --- fastFM/cffm.pxd | 3 --- fastFM/ffm.pyx | 26 -------------------------- 2 files changed, 29 deletions(-) diff --git a/fastFM/cffm.pxd b/fastFM/cffm.pxd index 19c0f55..655d100 100644 --- a/fastFM/cffm.pxd +++ b/fastFM/cffm.pxd @@ -33,9 +33,6 @@ cdef extern from "../fastFM-core/include/ffm.h": void ffm_predict(double *w_0, double * w, double * V, cs_di *X, double *y_pred, int k) - void ffm_als_fit(double *w_0, double *w, double *V, - cs_di *X, double *y, ffm_param *param) - void ffm_mcmc_fit_predict(double *w_0, double *w, double *V, cs_di *X_train, cs_di *X_test, double *y_train, double *y_pred, ffm_param *param) diff --git a/fastFM/ffm.pyx b/fastFM/ffm.pyx index 9a43c3d..26ef1d2 100644 --- a/fastFM/ffm.pyx +++ b/fastFM/ffm.pyx @@ -88,32 +88,6 @@ def ffm_predict(double w_0, double[:] w, return y -def ffm_als_fit(fm, X, double[:] y): - assert X.shape[0] == len(y) # test shapes - n_features = X.shape[1] - X_ = CsMatrix(X) - pt_X = PyCapsule_GetPointer(X_, "CsMatrix") - param = FFMParam(fm) - pt_param = PyCapsule_GetPointer(param, "FFMParam") - cdef double w_0 - cdef np.ndarray[np.float64_t, ndim=1, mode='c'] w - cdef np.ndarray[np.float64_t, ndim=2, mode='c'] V - - if fm.warm_start: - w_0 = 0 if fm.ignore_w_0 else fm.w0_ - w = np.zeros(n_features, dtype=np.float64) if fm.ignore_w else fm.w_ - V = np.zeros((fm.rank, n_features), dtype=np.float64)\ - if fm.rank == 0 else fm.V_ - else: - w_0 = 0 - w = np.zeros(n_features, dtype=np.float64) - V = np.zeros((fm.rank, n_features), dtype=np.float64) - - cffm.ffm_als_fit(&w_0, w.data, V.data, - pt_X, &y[0], pt_param) - return w_0, w, V - - def ffm_fit_sgd_bpr(fm, X, np.ndarray[np.float64_t, ndim=2, mode='c'] pairs): n_features = X.shape[0] X_ = CsMatrix(X) From 8074170292fe563058b6d0df102ca5603de0784e Mon Sep 17 00:00:00 2001 From: Immanuel Bayer Date: Sun, 14 Oct 2018 22:55:32 +0200 Subject: [PATCH 33/34] update fastfm-core2 --- fastFM-core2 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fastFM-core2 b/fastFM-core2 index dec8227..d28a678 160000 --- a/fastFM-core2 +++ b/fastFM-core2 @@ -1 +1 @@ -Subproject commit dec822708d015e3d16ee14aafcc3cc7d58306a3b +Subproject commit d28a678de422d3fb5593df114072764a53607b41 From 7b145d5799dcfd362670a1e5728097fb2304adc7 Mon Sep 17 00:00:00 2001 From: Immanuel Bayer Date: Sun, 14 Oct 2018 23:39:01 +0200 Subject: [PATCH 34/34] fix import --- fastFM/datasets.py | 2 +- fastFM/tests/test_mcmc.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/fastFM/datasets.py b/fastFM/datasets.py index 5832155..2402069 100644 --- a/fastFM/datasets.py +++ b/fastFM/datasets.py @@ -43,7 +43,7 @@ def make_user_item_regression(random_state=123, n_user=20, n_item=20, if __name__ == '__main__': X, y, coef = make_user_item_regression(n_user=5, n_item=5, rank=2, label_stdev=2) - from sklearn.cross_validation import train_test_split + from sklearn.model_selection import train_test_split X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.33, random_state=42) diff --git a/fastFM/tests/test_mcmc.py b/fastFM/tests/test_mcmc.py index 095db9d..503455f 100644 --- a/fastFM/tests/test_mcmc.py +++ b/fastFM/tests/test_mcmc.py @@ -7,7 +7,7 @@ from fastFM import mcmc from fastFM.datasets import make_user_item_regression from sklearn.metrics import mean_squared_error -from sklearn.cross_validation import train_test_split +from sklearn.model_selection import train_test_split from sklearn.utils.testing import assert_almost_equal, assert_array_equal