Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[ENH] Unified clustering API #3814

Merged
merged 5 commits into from
Jun 21, 2019
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Clustering: Fixed tests
PrimozGodec committed Jun 21, 2019
commit 857a29ab27e545873fe6bc94c8306df6948a6a24
41 changes: 11 additions & 30 deletions Orange/tests/test_clustering_dbscan.py
Original file line number Diff line number Diff line change
@@ -3,45 +3,26 @@

import unittest

import Orange
import numpy as np

from Orange.data import Table
from Orange.clustering.dbscan import DBSCAN


class TestDBSCAN(unittest.TestCase):

@classmethod
def setUpClass(cls):
cls.iris = Orange.data.Table('iris')
def setUp(self):
self.iris = Table('iris')
self.dbscan = DBSCAN()

def test_dbscan_parameters(self):
dbscan = DBSCAN(eps=0.1, min_samples=7, metric='euclidean',
algorithm='auto', leaf_size=12, p=None)
c = dbscan(self.iris)
dbscan(self.iris)

def test_predict_table(self):
dbscan = DBSCAN()
c = dbscan(self.iris)
table = self.iris[:20]
p = c(table)
pred = self.dbscan(self.iris)
self.assertEqual(np.ndarray, type(pred))

def test_predict_numpy(self):
dbscan = DBSCAN()
c = dbscan(self.iris)
X = self.iris.X[::20]
p = c(X)

def test_values(self):
dbscan = DBSCAN(eps=1) # it clusters data in two classes
c = dbscan(self.iris)
table = self.iris
p = c(table)

self.assertEqual(2, len(p.domain[0].values))
self.assertSetEqual({"0", "1"}, set(p.domain[0].values))

table.X[0] = [100, 100, 100, 100] # we add a big outlier

p = c(table)

self.assertEqual(3, len(p.domain[0].values))
self.assertSetEqual({"-1", "0", "1"}, set(p.domain[0].values))
model = self.dbscan.fit(self.iris.X)
self.assertEqual(np.ndarray, type(model.labels))
49 changes: 13 additions & 36 deletions Orange/tests/test_clustering_kmeans.py
Original file line number Diff line number Diff line change
@@ -11,55 +11,32 @@


class TestKMeans(unittest.TestCase):
@classmethod
def setUpClass(cls):
cls.iris = Orange.data.Table('iris')
def setUp(self):
self.kmeans = KMeans(n_clusters=2)
self.iris = Orange.data.Table('iris')

def test_kmeans(self):
kmeans = KMeans(n_clusters=2)
c = kmeans(self.iris)
X = self.iris.X[:20]
p = c(X)
c = self.kmeans(self.iris)
# First 20 iris belong to one cluster
assert len(set(p.ravel())) == 1
self.assertEqual(1, len(set(c[:20].ravel())))

def test_kmeans_parameters(self):
kmeans = KMeans(n_clusters=10,
max_iter=10,
random_state=42,
tol=0.001,
init='random',
compute_silhouette_score=True)
c = kmeans(self.iris)

def test_predict_single_instance(self):
kmeans = KMeans()
c = kmeans(self.iris)
inst = self.iris[0]
p = c(inst)
kmeans = KMeans(n_clusters=10, max_iter=10, random_state=42, tol=0.001,
init='random')
kmeans(self.iris)

def test_predict_table(self):
kmeans = KMeans()
c = kmeans(self.iris)
table = self.iris[:20]
p = c(table)
self.assertEqual(np.ndarray, type(c))

def test_predict_numpy(self):
kmeans = KMeans()
c = kmeans(self.iris)
X = self.iris.X[::20]
p = c(X)
c = kmeans.fit(self.iris.X)
self.assertEqual(np.ndarray, type(c.labels))

def test_predict_sparse(self):
kmeans = KMeans()
self.iris.X = csc_matrix(self.iris.X[::20])
c = kmeans(self.iris)
X = csc_matrix(self.iris.X[::20])
p = c(X)

def test_silhouette_sparse(self):
"""Test if silhouette gets calculated for sparse data"""
kmeans = KMeans(compute_silhouette_score=True)
sparse_iris = self.iris.copy()
sparse_iris.X = csc_matrix(sparse_iris.X)
c = kmeans(sparse_iris)
self.assertFalse(np.isnan(c.silhouette))
self.assertEqual(np.ndarray, type(c))
20 changes: 6 additions & 14 deletions Orange/tests/test_louvain.py
Original file line number Diff line number Diff line change
@@ -8,19 +8,11 @@
from Orange.clustering.louvain import Louvain


class TestSVMLearner(unittest.TestCase):
@classmethod
def setUpClass(cls):
cls.data = Table('iris')
cls.louvain = Louvain()
class TestLouvain(unittest.TestCase):
def setUp(self):
self.data = Table('iris')
self.louvain = Louvain()

def test_orange_table(self):
self.assertIsNone(self.louvain.fit(self.data))
clusters = self.louvain.fit_predict(self.data)
self.assertIn(type(clusters), [list, np.ndarray])

def test_np_array(self):
data_np = self.data.X
self.assertIsNone(self.louvain.fit(data_np))
clusters = self.louvain.fit_predict(data_np)
self.assertIn(type(clusters), [list, np.ndarray])
labels = self.louvain(self.data)
self.assertEqual(np.ndarray, type(labels))