Skip to content

Commit 723db6e

Browse files
author
Immanuel Bayer
committed
init fastFM python wrapper
0 parents  commit 723db6e

15 files changed

+962
-0
lines changed

.gitignore

+17
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
# C
2+
*.swp
3+
*.o
4+
*.a
5+
*.so
6+
*.zip
7+
# latex
8+
*.aux
9+
*.bbl
10+
*.blg
11+
*.dvi
12+
*.log
13+
*.toc
14+
# python
15+
*.pyc
16+
fastFM/ffm.c
17+
fastFM.egg-info/

.gitmodules

+3
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
[submodule "fastFM-core"]
2+
path = fastFM-core
3+
url = [email protected]:ibayer/fastFM-core.git

Makefile

+10
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
all:
2+
$(MAKE) -C fastFM-core/src/C lib
3+
python setup.py build_ext --inplace
4+
5+
.PHONY : clean
6+
clean:
7+
cd fastFM/
8+
rm -f *.so
9+
rm -rf build/
10+
rm -f fastFM/ffm.c

fastFM-core

Submodule fastFM-core added at c57dffc

fastFM/__init__.py

+3
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
from .fastfm import FactorizationMachine
2+
from . import utils
3+
from . import transform

fastFM/cffm.pxd

+51
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
#cfastfm.pxd
2+
#
3+
# Declarations of "external" C functions and structures
4+
# distutils: include_dirs = /usr/include/suitesparse
5+
6+
cdef extern from "suitesparse/cs.h":
7+
ctypedef struct cs_di: # matrix in compressed-column or triplet form */
8+
int nzmax # maximum number of entries */
9+
int m # number of rows */
10+
int n # number of columns */
11+
int *p # column pointers (size n+1) or col indices (size nzmax) */
12+
int *i # row indices, size nzmax */
13+
double *x # numerical values, size nzmax */
14+
int nz # # of entries in triplet matrix, -1 for compressed-col */
15+
16+
double cs_di_norm(const cs_di *X) # max colsum
17+
18+
cdef extern from "./../fastFM-core/src/C/fast_fm.h":
19+
ctypedef struct ffm_vector:
20+
int size
21+
double *data
22+
int owner
23+
double ffm_mean_squared_error(ffm_vector *a, ffm_vector *b)
24+
25+
ctypedef struct ffm_matrix: # row order array
26+
int size0 # number of rows
27+
int size1 # number of cols
28+
double *data # pointer to data array
29+
int owner
30+
double ffm_matrix_get(ffm_matrix * X, int i, int j)
31+
32+
ctypedef struct ffm_param:
33+
int n_iter
34+
int k
35+
double init_sigma
36+
int TASK
37+
int SOLVER
38+
double stepsize
39+
int rng_seed
40+
int n_comparison
41+
42+
void ffm_predict(double *w_0, double * w, double * V, cs_di *X, double *y_pred, int k)
43+
44+
void ffm_fit(double *w_0, double *w, double *V,
45+
cs_di *X, double *y,
46+
double lambda_w, double lambda_V, ffm_param *param)
47+
48+
void ffm_mcmc_fit_predict(double *w_0, double *w, double *V,
49+
cs_di *X_train, cs_di *X_test, double *y_train, double *y_pred,
50+
double *lambda_w, double *lambda_V,
51+
double *alpha, double *mu_w, double *mu_V, ffm_param *param)

fastFM/fastfm.py

+194
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,194 @@
1+
import numpy as np
2+
import ffm
3+
import scipy.sparse as sp
4+
from scipy.stats import norm
5+
from sklearn.utils import assert_all_finite
6+
7+
8+
class FactorizationMachine:
9+
"""Linear model combined with factorized coefficients for second order
10+
interactions between features.
11+
12+
Parameters
13+
----------
14+
max_iter : int, optional
15+
The number of samples for the MCMC sampler, number or iterations over the
16+
training set for ALS and number of steps for SGD.
17+
18+
random_state: int, optional
19+
The seed of the pseudo random number generator that
20+
initializes the parameters and mcmc chain.
21+
22+
init_stdev : float, optional
23+
Sets the variance for the initialization of the parameter
24+
factorization
25+
26+
solver : 'mcmc' | 'sgd'
27+
Selects the solver, note that for ranking (BPR) only `sgd` is
28+
implemented.
29+
30+
task : 'regression' | 'classification' | 'ranking'
31+
Specifies the loss function, l2 loss for `regression`, sigmoid for
32+
`classification' and BPR for `ranking`.
33+
34+
step_size : float
35+
Stepsize for the SGD solver, the solver uses a fixed step size and
36+
might require a tunning of the number of iterations `max_iter`.
37+
38+
lambda_V : float
39+
L2 penalty weight for pairwise coefficients.
40+
41+
lambda_w : float
42+
L2 penalty weight for linear coefficients.
43+
44+
rank_pair: int
45+
The rank of the factorization used for the second order interactions.
46+
47+
Attributes
48+
---------
49+
50+
w0_ : float
51+
bias term
52+
53+
w_ : float | array, shape = (n_features)
54+
Coefficients for linear combination.
55+
56+
V_ : float | array, shape = (rank_pair, n_features)
57+
Coefficients of second order factor matrix.
58+
"""
59+
60+
def __init__(self, max_iter=100, init_stdev=0.1, solver='mcmc',
61+
task='regression', rank_pair=0, lambda_V=1, lambda_w=1,
62+
step_size=0.1, random_state=123):
63+
self.max_iter = max_iter
64+
self.random_state = random_state
65+
self.init_stdev = init_stdev
66+
self.solver = solver
67+
self.task = task
68+
self.step_size = step_size
69+
self.lambda_V = lambda_V
70+
self.lambda_w = lambda_w
71+
self.rank_pair = rank_pair
72+
self.w0_ = None
73+
self.w_ = None
74+
self.V_ = None
75+
76+
def fit(self, X_train, y_train):
77+
""" Fit model with specified loss.
78+
79+
Parameters
80+
----------
81+
X : scipy.sparse.csc_matrix, (n_samples, n_features)
82+
83+
y : float | ndarray, shape = (n_samples, )
84+
85+
"""
86+
assert_all_finite(X_train)
87+
assert_all_finite(y_train)
88+
if (self.task in ['classification', 'regression']):
89+
self._fit(X_train, y_train)
90+
elif (self.task=='ranking'):
91+
assert y_train.max() <= X_train.shape[1]
92+
self.w0_, self.w_, self.V_ = ffm.ffm_fit_ranking(self,
93+
X_train, y_train)
94+
else:
95+
raise Exception("task unknown")
96+
97+
def _fit(self, X_train, y_train):
98+
if self.task == 'classification':
99+
assert len(set(y_train)) == 2
100+
assert y_train.min() == -1
101+
assert y_train.max() == 1
102+
if (self.solver in ['als', 'sgd']):
103+
assert sp.isspmatrix_csc(X_train)
104+
self.w0_, self.w_, self.V_ = ffm.ffm_fit(self, X_train, y_train)
105+
elif (self.solver=='mcmc'):
106+
raise Exception("mcmc can only be used with fit_predict")
107+
else:
108+
raise Exception("solver not implemented")
109+
110+
111+
def predict(self, X_test):
112+
""" Return predictions
113+
114+
Parameters
115+
----------
116+
X : scipy.sparse.csc_matrix, (n_samples, n_features)
117+
118+
Returns
119+
------
120+
121+
T : array, shape (n_samples)
122+
The labels are returned for classification.
123+
124+
"""
125+
assert_all_finite(X_test)
126+
assert sp.isspmatrix_csc(X_test)
127+
assert X_test.shape[1] == len(self.w_)
128+
pred = ffm.ffm_predict(self.w0_, self.w_, self.V_, X_test)
129+
if self.task == 'regression':
130+
return pred
131+
if self.task == 'ranking':
132+
print pred
133+
return np.argsort(pred)
134+
y_pred = norm.cdf(pred)
135+
# convert probs to labels
136+
y_pred[y_pred < 0.5] = -1
137+
y_pred[y_pred >= 0.5] = 1
138+
return y_pred
139+
140+
141+
def predict_proba(self, X_test):
142+
""" Return probabilities
143+
144+
Parameters
145+
----------
146+
X : scipy.sparse.csc_matrix, (n_samples, n_features)
147+
148+
Returns
149+
------
150+
151+
T : array, shape (n_samples)
152+
Class Probabilities
153+
154+
"""
155+
assert_all_finite(X_test)
156+
assert sp.isspmatrix_csc(X_test)
157+
if self.task == 'regression':
158+
raise Exception("Regression model can't return probabilities")
159+
return norm.cdf(ffm.ffm_predict(self.w0_, self.w_, self.V_, X_test))
160+
161+
def fit_predict(self, X_train, y_train, X_test):
162+
"""Return average of posterior estimates of the test samples.
163+
Use only with MCMC!
164+
165+
Parameters
166+
----------
167+
X_train : scipy.sparse.csc_matrix, (n_samples, n_features)
168+
169+
y_train : array, shape (n_samples)
170+
171+
X_test : scipy.sparse.csc_matrix, (n_test_samples, n_features)
172+
173+
Returns
174+
------
175+
176+
T : array, shape (n_test_samples)
177+
"""
178+
if self.task == 'classification':
179+
assert len(set(y_train)) == 2
180+
assert y_train.min() == -1
181+
assert y_train.max() == 1
182+
assert_all_finite(X_train)
183+
assert_all_finite(X_test)
184+
assert_all_finite(y_train)
185+
assert sp.isspmatrix_csc(X_test)
186+
assert X_train.shape[1] == X_test.shape[1]
187+
assert X_train.shape[0] == len(y_train)
188+
if (self.solver=='mcmc'):
189+
coef, y_pred = ffm.ffm_mcmc_fit_predict(self, X_train,
190+
X_test, y_train)
191+
self.w0_, self.w_, self.V_ = coef
192+
return y_pred
193+
else:
194+
raise Exception("use only with mcmc")

0 commit comments

Comments
 (0)