forked from xmweijh/CommunityDetection
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathLouvain.py
285 lines (255 loc) · 10.7 KB
/
Louvain.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
import collections
import random
import time
import networkx as nx
import matplotlib.pyplot as plt
def load_graph(path):
G = collections.defaultdict(dict)
with open(path) as text:
for line in text:
vertices = line.strip().split()
v_i = int(vertices[0])
v_j = int(vertices[1])
w = 1.0 # 数据集有权重的话则读取数据集中的权重
G[v_i][v_j] = w
G[v_j][v_i] = w
return G
# 节点类 存储社区与节点编号信息
class Vertex:
def __init__(self, vid, cid, nodes, k_in=0):
# 节点编号
self._vid = vid
# 社区编号
self._cid = cid
self._nodes = nodes
self._kin = k_in # 结点内部的边的权重
class Louvain:
def __init__(self, G):
self._G = G
self._m = 0 # 边数量 图会凝聚动态变化
self._cid_vertices = {} # 需维护的关于社区的信息(社区编号,其中包含的结点编号的集合)
self._vid_vertex = {} # 需维护的关于结点的信息(结点编号,相应的Vertex实例)
for vid in self._G.keys():
# 刚开始每个点作为一个社区
self._cid_vertices[vid] = {vid}
# 刚开始社区编号就是节点编号
self._vid_vertex[vid] = Vertex(vid, vid, {vid})
# 计算边数 每两个点维护一条边
self._m += sum([1 for neighbor in self._G[vid].keys()
if neighbor > vid])
# 模块度优化阶段
def first_stage(self):
mod_inc = False # 用于判断算法是否可终止
visit_sequence = self._G.keys()
# 随机访问
random.shuffle(list(visit_sequence))
while True:
can_stop = True # 第一阶段是否可终止
# 遍历所有节点
for v_vid in visit_sequence:
# 获得节点的社区编号
v_cid = self._vid_vertex[v_vid]._cid
# k_v节点的权重(度数) 内部与外部边权重之和
k_v = sum(self._G[v_vid].values()) + \
self._vid_vertex[v_vid]._kin
# 存储模块度增益大于0的社区编号
cid_Q = {}
# 遍历节点的邻居
for w_vid in self._G[v_vid].keys():
# 获得该邻居的社区编号
w_cid = self._vid_vertex[w_vid]._cid
if w_cid in cid_Q:
continue
else:
# tot是关联到社区C中的节点的链路上的权重的总和
tot = sum(
[sum(self._G[k].values()) + self._vid_vertex[k]._kin for k in self._cid_vertices[w_cid]])
if w_cid == v_cid:
tot -= k_v
# k_v_in是从节点i连接到C中的节点的链路的总和
k_v_in = sum(
[v for k, v in self._G[v_vid].items() if k in self._cid_vertices[w_cid]])
# 由于只需要知道delta_Q的正负,所以少乘了1/(2*self._m)
delta_Q = k_v_in - k_v * tot / self._m
cid_Q[w_cid] = delta_Q
# 取得最大增益的编号
cid, max_delta_Q = sorted(
cid_Q.items(), key=lambda item: item[1], reverse=True)[0]
if max_delta_Q > 0.0 and cid != v_cid:
# 让该节点的社区编号变为取得最大增益邻居节点的编号
self._vid_vertex[v_vid]._cid = cid
# 在该社区编号下添加该节点
self._cid_vertices[cid].add(v_vid)
# 以前的社区中去除该节点
self._cid_vertices[v_cid].remove(v_vid)
# 模块度还能增加 继续迭代
can_stop = False
mod_inc = True
if can_stop:
break
return mod_inc
# 网络凝聚阶段
def second_stage(self):
cid_vertices = {}
vid_vertex = {}
# 遍历社区和社区内的节点
for cid, vertices in self._cid_vertices.items():
if len(vertices) == 0:
continue
new_vertex = Vertex(cid, cid, set())
# 将该社区内的所有点看做一个点
for vid in vertices:
new_vertex._nodes.update(self._vid_vertex[vid]._nodes)
new_vertex._kin += self._vid_vertex[vid]._kin
# k,v为邻居和它们之间边的权重 计算kin社区内部总权重 这里遍历vid的每一个在社区内的邻居 因为边被两点共享后面还会计算 所以权重/2
for k, v in self._G[vid].items():
if k in vertices:
new_vertex._kin += v / 2.0
# 新的社区与节点编号
cid_vertices[cid] = {cid}
vid_vertex[cid] = new_vertex
G = collections.defaultdict(dict)
# 遍历现在不为空的社区编号 求社区之间边的权重
for cid1, vertices1 in self._cid_vertices.items():
if len(vertices1) == 0:
continue
for cid2, vertices2 in self._cid_vertices.items():
# 找到cid后另一个不为空的社区
if cid2 <= cid1 or len(vertices2) == 0:
continue
edge_weight = 0.0
# 遍历 cid1社区中的点
for vid in vertices1:
# 遍历该点在社区2的邻居已经之间边的权重(即两个社区之间边的总权重 将多条边看做一条边)
for k, v in self._G[vid].items():
if k in vertices2:
edge_weight += v
if edge_weight != 0:
G[cid1][cid2] = edge_weight
G[cid2][cid1] = edge_weight
# 更新社区和点 每个社区看做一个点
self._cid_vertices = cid_vertices
self._vid_vertex = vid_vertex
self._G = G
def get_communities(self):
communities = []
for vertices in self._cid_vertices.values():
if len(vertices) != 0:
c = set()
for vid in vertices:
c.update(self._vid_vertex[vid]._nodes)
communities.append(list(c))
return communities
def execute(self):
iter_time = 1
while True:
iter_time += 1
# 反复迭代,直到网络中任何节点的移动都不能再改善总的 modularity 值为止
mod_inc = self.first_stage()
if mod_inc:
self.second_stage()
else:
break
return self.get_communities()
# 可视化划分结果
def showCommunity(G, partition, pos):
# 划分在同一个社区的用一个符号表示,不同社区之间的边用黑色粗体
cluster = {}
labels = {}
for index, item in enumerate(partition):
for nodeID in item:
labels[nodeID] = r'$' + str(nodeID) + '$' # 设置可视化label
cluster[nodeID] = index # 节点分区号
# 可视化节点
colors = ['r', 'g', 'b', 'y', 'm']
shapes = ['v', 'D', 'o', '^', '<']
for index, item in enumerate(partition):
nx.draw_networkx_nodes(G, pos, nodelist=item,
node_color=colors[index],
node_shape=shapes[index],
node_size=350,
alpha=1)
# 可视化边
edges = {len(partition): []}
for link in G.edges():
# cluster间的link
if cluster[link[0]] != cluster[link[1]]:
edges[len(partition)].append(link)
else:
# cluster内的link
if cluster[link[0]] not in edges:
edges[cluster[link[0]]] = [link]
else:
edges[cluster[link[0]]].append(link)
for index, edgelist in enumerate(edges.values()):
# cluster内
if index < len(partition):
nx.draw_networkx_edges(G, pos,
edgelist=edgelist,
width=1, alpha=0.8, edge_color=colors[index])
else:
# cluster间
nx.draw_networkx_edges(G, pos,
edgelist=edgelist,
width=3, alpha=0.8, edge_color=colors[index])
# 可视化label
nx.draw_networkx_labels(G, pos, labels, font_size=12)
plt.axis('off')
plt.show()
def cal_Q(partition, G): # 计算Q
# 如果为真,则返回3元组(u、v、ddict)中的边缘属性dict。如果为false,则返回2元组(u,v)
m = len(G.edges(None, False))
# print(G.edges(None,False))
# print("=======6666666")
a = []
e = []
for community in partition: # 把每一个联通子图拿出来
t = 0.0
for node in community: # 找出联通子图的每一个顶点
# G.neighbors(node)找node节点的邻接节点
t += len([x for x in G.neighbors(node)])
a.append(t / (2 * m))
# self.zidian[t/(2*m)]=community
for community in partition:
t = 0.0
for i in range(len(community)):
for j in range(len(community)):
if (G.has_edge(community[i], community[j])):
t += 1.0
e.append(t / (2 * m))
q = 0.0
for ei, ai in zip(e, a):
q += (ei - ai ** 2)
return q
class Graph:
graph = nx.DiGraph()
def __init__(self):
self.graph = nx.DiGraph()
def createGraph(self, filename):
file = open(filename, 'r')
for line in file.readlines():
nodes = line.split()
edge = (int(nodes[0]), int(nodes[1]))
self.graph.add_edge(*edge)
return self.graph
if __name__ == '__main__':
# G = load_graph('data/club.txt')
G = load_graph('data/OpenFlights.txt')
obj = Graph()
G1 = obj.createGraph("Data//OpenFlights.txt")
# G1 = nx.karate_club_graph()
# pos = nx.spring_layout(G1)
start_time = time.time()
algorithm = Louvain(G)
communities = algorithm.execute()
end_time = time.time()
# 按照社区大小从大到小排序输出
communities = sorted(communities, key=lambda b: -len(b)) # 按社区大小排序
count = 0
for communitie in communities:
count += 1
print("社区", count, " ", communitie)
print(cal_Q(communities, G1))
print(f'算法执行时间{end_time - start_time}')
# 可视化结果
# showCommunity(G1, communities, pos)