-
Notifications
You must be signed in to change notification settings - Fork 0
/
distances.py
78 lines (57 loc) · 1.45 KB
/
distances.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
import math
class SenLen(object):
def __init__(self, a):
self.val = len(a)
def __ne__(a, b):
return a.val != b.val
def __sub__(a, b):
return a.val - b.val
def __gr__(a, b):
return a.val > b.val
def maxi(self, a):
if self.val > a.val:
return self.val
return a.val
def __str__(self):
return '%d' % self.val
def hamming(string1, string2):
diffs = 0
s1 = SenLen(string1)
s2 = SenLen(string2)
if s1 != s2:
diffs += abs(s1 - s2)
for c1, c2 in zip(string1, string2):
if c1 != c2:
diffs += 1
return 1 - (diffs / s1.maxi(s2))
def euclidean(string1, string2):
dist = 0
s1 = SenLen(string1)
s2 = SenLen(string2)
temp1 = [ord(char) for char in string1]
temp2 = [ord(char) for char in string2]
i = abs(len(temp1) - len(temp2))
if len(temp1) > len(temp2):
for x in range(i):
temp2.append(ord('a'))
else:
for x in range(i):
temp1.append(ord('a'))
res = math.sqrt(sum((t1 - t2) ** 2 for t1, t2 in zip(temp1, temp2)))
max_distance = 26 * s1.maxi(s2)
return 1 - (res / max_distance)
def manhattan(string1, string2):
s1 = SenLen(string1)
s2 = SenLen(string2)
temp1 = [ord(char) for char in string1]
temp2 = [ord(char) for char in string2]
i = abs(len(temp1) - len(temp2))
if len(temp1) > len(temp2):
for x in range(i):
temp2.append(ord('a'))
else:
for x in range(i):
temp1.append(ord('a'))
res = sum(abs(t1 - t2) for t1, t2 in zip(temp1, temp2))
max_distance = 26 * s1.maxi(s2)
return 1 - (res / max_distance)