-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathapp_tracker_counter.py
113 lines (92 loc) · 2.66 KB
/
app_tracker_counter.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
import json
import re
import csv
import os
from collections import defaultdict
outfile_app_children = open('app_children_naive.csv', 'a')
outfile_app_parents = open('app_parents_naive.csv', 'a')
outfile_app_children_prom = open('app_children_prom.csv', 'a')
outfile_app_parents_prom = open('app_parents_prom.csv', 'a')
with open('5k_static_2.0.json') as data_file:
data = json.load(data_file)
companyinfo = open('../curated/company_details.json', 'rb')
companyinfo = json.load(companyinfo)
# get the app downloads data to estimate rank
with open('top5k-apps-estimated-ranks.csv') as f:
reader = csv.reader(f)
rows = list(reader)
app_ranks = {}
for row in rows:
app_ranks[row[0]] = row[3]
child_counts = defaultdict(int)
parent_counts = defaultdict(int)
child_counts_prom = defaultdict(int)
parent_counts_prom = defaultdict(int)
def getParent(co):
if co in companyinfo:
co_inf = companyinfo[co]['parent']
if not co_inf:
return co
else:
return co_inf
# children, unweighted
for apk in data:
trackers = data[apk]
print trackers
for tracker in trackers:
child_counts[tracker] += 1
print trackers
print child_counts
# children, weighted
for apk in data:
if apk in app_ranks:
rank = float(app_ranks[apk])
else:
# if rank is unknown, assign mid-point rank
rank = 2500
prominence_weight = 1 * 1/rank
trackers = data[apk]
for tracker in trackers:
child_counts_prom[tracker] += prominence_weight
print child_counts_prom
# parents, unweighted
for apk in data:
trackers = data[apk]
tracker_parents = []
for tracker in trackers:
parent = getParent(tracker)
tracker_parents.append(parent)
tracker_parents = set(tracker_parents)
for tracker_parent in tracker_parents:
parent_counts[tracker_parent] += 1
print parent_counts
# parents, weighted
for apk in data:
if apk in app_ranks:
rank = float(app_ranks[apk])
else:
# if rank is unknown, assign mid-point rank
rank = 2500
prominence_weight = 1 * 1/rank
trackers = data[apk]
tracker_parents = []
for tracker in trackers:
parent = getParent(tracker)
tracker_parents.append(parent)
tracker_parents = set(tracker_parents)
for tracker_parent in tracker_parents:
parent_counts_prom[tracker_parent] += prominence_weight
print parent_counts_prom
for key, value in child_counts.items():
row = '%s,%s\n' % (key, value)
outfile_app_children.write(row)
for key, value in parent_counts.items():
row = '%s,%s\n' % (key, value)
outfile_app_parents.write(row)
for key, value in child_counts_prom.items():
row = '%s,%s\n' % (key, value)
outfile_app_children_prom.write(row)
for key, value in parent_counts_prom.items():
row = '%s,%s\n' % (key, value)
outfile_app_parents_prom.write(row)
print app_ranks