forked from Alagaesia93/patent_co_citation_network
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathplot.py
103 lines (94 loc) · 3.61 KB
/
plot.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
import igraph
import utils
import leidenalg
from sklearn.model_selection import train_test_split
# patents = utils.read_patents()
# patent_classification = utils.read_patent_classification()
# uspatentcitations = utils.read_uspatentcitation()
# patents = utils.merge_patents_and_classification(patents, patent_classification)
# patents = patents.set_index("id")
# range_patents = patents
# range_uspatentcitations = uspatentcitations
# range_train_patents, range_test_patents = train_test_split(patents)
print("read graph")
g = igraph.Graph().Read_GraphMLz('../Data/graph_with_attributes.xml')
print("read components")
connected_components = leidenalg.find_partition(g, leidenalg.ModularityVertexPartition)
#
print("find subgraphs")
subgraphs = connected_components.subgraphs()
# sbg = subgraphs[0]
# sbg.write_graphmlz('data/subgraph.xml')
num_subgraphs = len(subgraphs)
index = 0
for sbg in subgraphs[0:10]:
print(index)
# cmp_patents = patents[patents.number.isin(sbg.vs["name"])]
# utils.plot_section_distribution(cmp_patents, name="train_test_"+str(index))
utils.plot_subgraph(sbg, name="train_test_"+str(index))
index += 1
# -------------- train test split
# delta = 10
# train_percentage = 0.8
# index = 0
#
# my_range = utils.Range(delta, train_percentage, patents['date'].min(), patents['date'].max())
# my_range.print()
# g = igraph.Graph(directed=True)
# global_assigned_patents = dict()
#
# print("start of while")
# while my_range.range_end <= my_range.max_date:
# range_patents, range_train_patents, range_test_patents, range_uspatentcitations = utils.find_range_dataframes_from_beginning(
# my_range, patents, uspatentcitations
# )
# g = utils.add_edges(g, range_uspatentcitations)
# print("finding components")
# connected_components = leidenalg.find_partition(g, leidenalg.ModularityVertexPartition)
# subgraphs = connected_components.subgraphs()
# num_subgraphs = len(subgraphs)
#
# for sbg in subgraphs[0:10]:
# print(index)
# # sbg.write_graphmlz('data/graph_with_attributes_'+str(index)+'.xml')
# index += 1
# cmp_patents = range_patents[range_patents.number.isin(sbg.vs["name"])]
# utils.plot_section_distribution(cmp_patents, name="modeling_whole_time_based_" + str(index))
#
# my_range.increase(delta, train_percentage)
# my_range.print()
# G = igraph.Graph(directed=True)
# --------------- time proportionally
# delta = 10
# train_percentage = 0.8
# index = 0
#
#
# my_range = utils.Range(delta, train_percentage, patents['date'].min(), patents['date'].max())
# my_range.print()
# G = igraph.Graph(directed=True)
#
# global_assigned_patents = dict()
#
#
# while my_range.range_end <= my_range.max_date:
# range_patents, range_train_patents, range_test_patents, range_uspatentcitations = utils.find_range_dataframes(
# my_range, patents, uspatentcitations
# )
# G = utils.add_edges(G, range_uspatentcitations)
# print("finding components")
# connected_components = leidenalg.find_partition(G, leidenalg.ModularityVertexPartition)
# subgraphs = connected_components.subgraphs()
# num_subgraphs = len(subgraphs)
#
# for sbg in subgraphs[0:10]:
# print(index)
# # sbg.write_graphmlz('data/graph_with_attributes_'+str(index)+'.xml')
# index += 1
# cmp_patents = range_patents[range_patents.number.isin(sbg.vs["name"])]
# utils.plot_section_distribution(cmp_patents, name="modeling_proportional_time_based_" + str(index))
#
# my_range.increase_proportionally(delta, train_percentage)
# my_range.print()
# G = igraph.Graph(directed=True) # restore the initial graph
#