-
Notifications
You must be signed in to change notification settings - Fork 0
/
connected.py
executable file
·40 lines (31 loc) · 1.32 KB
/
connected.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
import numpy as np
import set_compiler
set_compiler.install()
import pyximport
pyximport.install()
import CC
import timer
# links.npz can be downloaded from https://s3.amazonaws.com/Harvard-CS205/wikipedia/links.npz
# It is an (130148107, 2) array of np.uint32_t, generated by preprocess_to_npz.py
# Self-links have been removed.
# link names can be downloaded from https://s3.amazonaws.com/Harvard-CS205/wikipedia/titles-sorted.txt
# Note that the code below converts link indices from 1-indexed to 0-indexed.
if __name__ == '__main__':
with timer.Timer() as t:
print("Reading links")
links = np.load('links.npz')['links'] - 1
print(" {} links in graph".format(links.shape[0]))
print("Reading names")
names = [l.strip() for l in open('titles-sorted.txt')]
print(" {} nodes in graph".format(len(names)))
print("Loading: {} seconds\n".format(t.interval))
num_nodes = len(names)
total_time = 0
for i in range(10):
links_copy = links.copy() # just in case
with timer.Timer() as t:
labels = CC.connected_components(num_nodes, links_copy)
unique_labels = np.unique(labels)
print("iter: {}: {} components".format(i + 1, unique_labels.size))
total_time += t.interval
print("Total time: {} seconds".format(total_time))