diff --git a/glove/glove.py b/glove/glove.py index d2340b4..3c298bf 100644 --- a/glove/glove.py +++ b/glove/glove.py @@ -43,6 +43,8 @@ def __init__(self, no_components=30, learning_rate=0.05, self.dictionary = None self.inverse_dictionary = None + + self.global_loss = None def fit(self, matrix, epochs=5, no_threads=2, verbose=False): """ @@ -86,19 +88,22 @@ def fit(self, matrix, epochs=5, no_threads=2, verbose=False): # Shuffle the coocurrence matrix np.random.shuffle(shuffle_indices) - - fit_vectors(self.word_vectors, - self.vectors_sum_gradients, - self.word_biases, - self.biases_sum_gradients, - matrix.row, - matrix.col, - matrix.data, - shuffle_indices, - self.learning_rate, - self.max_count, - self.alpha, - int(no_threads)) + + self.global_loss = fit_vectors(self.word_vectors, + self.vectors_sum_gradients, + self.word_biases, + self.biases_sum_gradients, + matrix.row, + matrix.col, + matrix.data, + shuffle_indices, + self.learning_rate, + self.max_count, + self.alpha, + int(no_threads)) + + if verbose: + print('Global loss: %d' % self.global_loss) if not np.isfinite(self.word_vectors).all(): raise Exception('Non-finite values in word vectors. ' diff --git a/glove/glove_cython.pyx b/glove/glove_cython.pyx index d67f036..a5ddc27 100644 --- a/glove/glove_cython.pyx +++ b/glove/glove_cython.pyx @@ -50,6 +50,9 @@ def fit_vectors(double[:, ::1] wordvec, # Loss and gradient variables. cdef double prediction, entry_weight, loss + + # Define global loss + cdef double global_loss # Iteration variables cdef int i, j, shuffle_index @@ -74,7 +77,12 @@ def fit_vectors(double[:, ::1] wordvec, # Compute loss and the example weight. entry_weight = double_min(1.0, (count / max_count)) ** alpha - loss = entry_weight * (prediction - c_log(count)) + + loss_unweighted = prediction - c_log(count) + loss = entry_weight * loss_unweighted + + # Update the weighted global loss + global_loss += 0.5 * loss * loss_unweighted # Update step: apply gradients and reproject # onto the unit sphere. @@ -100,7 +108,8 @@ def fit_vectors(double[:, ::1] wordvec, learning_rate = initial_learning_rate / sqrt(wordbias_sum_gradients[word_b]) wordbias[word_b] -= learning_rate * loss wordbias_sum_gradients[word_b] += loss ** 2 - + + return global_loss def transform_paragraph(double[:, ::1] wordvec, double[::1] wordbias,