-
Notifications
You must be signed in to change notification settings - Fork 12
/
Copy pathApriori.py
126 lines (93 loc) · 3.72 KB
/
Apriori.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
"""
Copyright (c) 2016 Ariel Barmat
Edited by Reynaldo John Tristan Mahinay Jr.
"""
from itertools import chain, combinations
# Open the data in .csv file format
def open_data(filename):
f = open(filename, 'rU')
for l in f:
l = l.strip().rstrip(',')
row = frozenset(l.split(','))
yield row
def itemset_from_data(data):
itemset = set()
transaction_list = list()
for row in data:
transaction_list.append(frozenset(row))
for item in row:
if item:
itemset.add(frozenset([item]))
return itemset, transaction_list
def itemset_support(transaction_list, itemset, min_support=0):
len_transaction_list = len(transaction_list)
l = [
(item, float(sum(1 for row in transaction_list if item.issubset(row))) / len_transaction_list)
for item in itemset
]
return dict([(item, support) for item, support in l if support >= min_support])
def frequent_itemset(transaction_list, c_itemset, min_support):
f_itemset = dict()
k = 1
while True:
if k > 1:
c_itemset = joinset(l_itemset, k)
l_itemset = itemset_support(transaction_list, c_itemset, min_support)
if not l_itemset:
break
f_itemset.update(l_itemset)
k += 1
return f_itemset
def joinset(itemset, k):
return set([i.union(j) for i in itemset for j in itemset if len(i.union(j)) == k])
def subsets(itemset):
return chain(*[combinations(itemset, i + 1) for i, a in enumerate(itemset)])
# Generate Association Rules
def get_rules(f_itemset, min_confidence, min_lift):
rules = list()
for item, support in f_itemset.items():
if len(item) > 1:
for antecedent in subsets(item):
consequent = item.difference(antecedent)
if consequent:
antecedent = frozenset(antecedent)
XY = antecedent.union(consequent)
confidence = float(f_itemset[XY]) / f_itemset[antecedent]
lift = confidence / (f_itemset[antecedent] * f_itemset[consequent])
if confidence >= min_confidence:
if lift >= min_lift:
rules.append((antecedent, consequent, confidence, lift))
return rules
# APRIORI ALGORITHM
def generate_itemsets_rules(data, min_support, min_confidence, min_lift):
csv = open_data(data)
# Get first itemset and transactions
itemset, transaction_list = itemset_from_data(csv)
# Get the frequent itemset
f_itemset = frequent_itemset(transaction_list, itemset, min_support)
# Association rules
rules = get_rules(f_itemset, min_confidence, min_lift)
return rules
# Print the frequent itemset and association rules
def print_result(rules):
print('--Rules--')
for antecedent, consequent, confidence, lift in sorted(rules, key=lambda iterator: iterator[0]):
print('RULES: {} => {} : {} : {}'.format(tuple(antecedent), tuple(consequent), round(confidence, 5),
round(lift, 3)))
# Store result to database
def store_result(rules, frequent_itemset):
rulesnew = []
ant = []
cons = []
conf = []
lift = []
for antecedent, consequent, confidence, lift in sorted(rules, key=lambda iterator: iterator[0]):
ant.append(tuple(antecedent))
cons.append(tuple(consequent))
conf.append(round(confidence, 4))
lift.append(round(lift, 3))
return ant, cons, conf, lift
def mine(csv, default_support=0.014, default_confidence=0.9, default_lift=1):
data = open_data(csv)
rules, itemset = generate_itemsets_rules(data, default_support, default_confidence, default_lift)
return store_result(rules, itemset)