Skip to content

Commit 4b0ecfe

Browse files
committed
Merge pull request #1 from jdagilliland/master
Repo cleanup
2 parents f41d89d + 7c45690 commit 4b0ecfe

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

51 files changed

+3487
-3458
lines changed

.gitignore

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
*.pyc

PCV/__init__.pyc

-337 Bytes
Binary file not shown.

PCV/classifiers/__init__.pyc

-148 Bytes
Binary file not shown.

PCV/classifiers/bayes.py

+51-51
Original file line numberDiff line numberDiff line change
@@ -3,61 +3,61 @@
33

44
class BayesClassifier(object):
55

6-
def __init__(self):
7-
""" Initialize classifier with training data. """
8-
9-
self.labels = [] # class labels
10-
self.mean = [] # class mean
11-
self.var = [] # class variances
12-
self.n = 0 # nbr of classes
13-
14-
def train(self,data,labels=None):
15-
""" Train on data (list of arrays n*dim).
16-
Labels are optional, default is 0...n-1. """
17-
18-
if labels==None:
19-
labels = range(len(data))
20-
self.labels = labels
21-
self.n = len(labels)
22-
23-
for c in data:
24-
self.mean.append(mean(c,axis=0))
25-
self.var.append(var(c,axis=0))
26-
27-
def classify(self,points):
28-
""" Classify the points by computing probabilities
29-
for each class and return most probable label. """
6+
def __init__(self):
7+
""" Initialize classifier with training data. """
308

31-
# compute probabilities for each class
32-
est_prob = array([gauss(m,v,points) for m,v in zip(self.mean,self.var)])
33-
34-
print 'est prob',est_prob.shape,self.labels
35-
# get index of highest probability, this gives class label
36-
ndx = est_prob.argmax(axis=0)
37-
38-
est_labels = array([self.labels[n] for n in ndx])
39-
40-
return est_labels, est_prob
9+
self.labels = [] # class labels
10+
self.mean = [] # class mean
11+
self.var = [] # class variances
12+
self.n = 0 # nbr of classes
13+
14+
def train(self,data,labels=None):
15+
""" Train on data (list of arrays n*dim).
16+
Labels are optional, default is 0...n-1. """
17+
18+
if labels==None:
19+
labels = range(len(data))
20+
self.labels = labels
21+
self.n = len(labels)
22+
23+
for c in data:
24+
self.mean.append(mean(c,axis=0))
25+
self.var.append(var(c,axis=0))
26+
27+
def classify(self,points):
28+
""" Classify the points by computing probabilities
29+
for each class and return most probable label. """
30+
31+
# compute probabilities for each class
32+
est_prob = array([gauss(m,v,points) for m,v in zip(self.mean,self.var)])
33+
34+
print 'est prob',est_prob.shape,self.labels
35+
# get index of highest probability, this gives class label
36+
ndx = est_prob.argmax(axis=0)
37+
38+
est_labels = array([self.labels[n] for n in ndx])
39+
40+
return est_labels, est_prob
4141

4242

4343
def gauss(m,v,x):
44-
""" Evaluate Gaussian in d-dimensions with independent
45-
mean m and variance v at the points in (the rows of) x.
46-
http://en.wikipedia.org/wiki/Multivariate_normal_distribution """
47-
48-
if len(x.shape)==1:
49-
n,d = 1,x.shape[0]
50-
else:
51-
n,d = x.shape
52-
53-
# covariance matrix, subtract mean
54-
S = diag(1/v)
55-
x = x-m
56-
# product of probabilities
57-
y = exp(-0.5*diag(dot(x,dot(S,x.T))))
58-
59-
# normalize and return
60-
return y * (2*pi)**(-d/2.0) / ( sqrt(prod(v)) + 1e-6)
44+
""" Evaluate Gaussian in d-dimensions with independent
45+
mean m and variance v at the points in (the rows of) x.
46+
http://en.wikipedia.org/wiki/Multivariate_normal_distribution """
47+
48+
if len(x.shape)==1:
49+
n,d = 1,x.shape[0]
50+
else:
51+
n,d = x.shape
52+
53+
# covariance matrix, subtract mean
54+
S = diag(1/v)
55+
x = x-m
56+
# product of probabilities
57+
y = exp(-0.5*diag(dot(x,dot(S,x.T))))
58+
59+
# normalize and return
60+
return y * (2*pi)**(-d/2.0) / ( sqrt(prod(v)) + 1e-6)
6161

6262

6363

PCV/classifiers/knn.py

+27-27
Original file line numberDiff line numberDiff line change
@@ -1,35 +1,35 @@
11
from numpy import *
22

33
class KnnClassifier(object):
4-
5-
def __init__(self,labels,samples):
6-
""" Initialize classifier with training data. """
7-
8-
self.labels = labels
9-
self.samples = samples
10-
11-
def classify(self,point,k=3):
12-
""" Classify a point against k nearest
13-
in the training data, return label. """
14-
15-
# compute distance to all training points
16-
dist = array([L2dist(point,s) for s in self.samples])
17-
18-
# sort them
19-
ndx = dist.argsort()
20-
21-
# use dictionary to store the k nearest
22-
votes = {}
23-
for i in range(k):
24-
label = self.labels[ndx[i]]
25-
votes.setdefault(label,0)
26-
votes[label] += 1
27-
28-
return max(votes)
4+
5+
def __init__(self,labels,samples):
6+
""" Initialize classifier with training data. """
7+
8+
self.labels = labels
9+
self.samples = samples
10+
11+
def classify(self,point,k=3):
12+
""" Classify a point against k nearest
13+
in the training data, return label. """
14+
15+
# compute distance to all training points
16+
dist = array([L2dist(point,s) for s in self.samples])
17+
18+
# sort them
19+
ndx = dist.argsort()
20+
21+
# use dictionary to store the k nearest
22+
votes = {}
23+
for i in range(k):
24+
label = self.labels[ndx[i]]
25+
votes.setdefault(label,0)
26+
votes[label] += 1
27+
28+
return max(votes)
2929

3030

3131
def L2dist(p1,p2):
32-
return sqrt( sum( (p1-p2)**2) )
32+
return sqrt( sum( (p1-p2)**2) )
3333

3434
def L1dist(v1,v2):
35-
return sum(abs(v1-v2))
35+
return sum(abs(v1-v2))

PCV/clustering/__init__.pyc

-147 Bytes
Binary file not shown.

0 commit comments

Comments
 (0)