-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathinsert_mysql_flat.py
64 lines (52 loc) · 1.92 KB
/
insert_mysql_flat.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
#!/usr/bin/python
import sys
import csv
import json
import MySQLdb
import operator
subreddit_hash = json.load(open('data/subreddit_hash.json'))
subreddit_invhash = {v:k for k,v in subreddit_hash.iteritems() }
size_hash = json.load(open('data/size_hash.json'))
db = MySQLdb.connect(host="localhost", # your host, usually localhost
user="web", # your username
passwd="", # your password
db="db1") # name of the data base
cur = db.cursor()
with open('data/similarity_matrix.csv','r') as f:
csv_reader = csv.reader(f)
for linenum,line in enumerate(csv_reader):
main_subreddit = subreddit_invhash[int(line[0])]
sim_subreddits = line[1:]
if sim_subreddits[0]=='':
continue
records = []
for subreddits in sim_subreddits:
s = subreddits.split(':')
subreddit = subreddit_invhash[int(s[0])]
similarity = float(s[1])
records.append( (subreddit,similarity,size_hash[subreddit]) )
records = sorted(records,key=operator.itemgetter(1),reverse=True)[0:50]
n_to_add = 50-len(records)
for i in range(n_to_add):
records.append( (None, None, None) )
records = zip(*records)
record = (main_subreddit,) + records[0] + records[1] + records[2]
cur.execute(
"""INSERT INTO db1.similar_subreddits VALUES (%s,
%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,
%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,
%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,
%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,
%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,
%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,
%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,
%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,
%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,
%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,
%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,
%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,
%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,
%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,
%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)""",
record)
db.commit()