-
Notifications
You must be signed in to change notification settings - Fork 44
/
Copy pathtransfer.py
142 lines (124 loc) · 4.73 KB
/
transfer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
from tqdm import tqdm
from dataset import get_transfer_data
from model import BotGAT
import json
import torch
import torch.nn as nn
from torch_geometric.loader import NeighborLoader
from sklearn.metrics import accuracy_score, f1_score
import random
dataset = 'Twibot-22'
idx = json.load(open('idx.json'))
user_idx = []
for index in range(10):
data = json.load(open('../../../datasets/{}/domain/user{}.json'.format(dataset, index)))
user_id = [idx[item] for item in data]
random.shuffle(user_id)
user_idx.append(user_id)
data = get_transfer_data()
print('load done.')
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
hidden_dim = 128
dropout = 0.3
lr = 1e-3
weight_decay = 1e-5
max_epoch = 1000
no_up = 50
batch_size = 1024
def forward_one_epoch(model, optimizer, loss_fn, train_loader, test_loader):
model.train()
labels = []
preds = []
ave_loss = 0.0
cnt = 0.0
for batch in train_loader:
optimizer.zero_grad()
batch = batch.to(device)
n_batch = batch.batch_size
out = model(batch.des_embedding,
batch.tweet_embedding,
batch.num_property_embedding,
batch.cat_property_embedding,
batch.edge_index)
label = batch.y[:n_batch]
out = out[:n_batch]
labels += label.to('cpu').data
preds += out.argmax(-1).to('cpu').data
loss = loss_fn(out, label)
ave_loss += loss.item() * n_batch
cnt += n_batch
loss.backward()
optimizer.step()
ave_loss /= cnt
test_loss, test_acc, test_f1 = validation(model, loss_fn, test_loader)
return ave_loss, test_loss, accuracy_score(labels, preds), test_acc, test_f1
@torch.no_grad()
def validation(model, loss_fn, loader):
model.eval()
labels = []
preds = []
ave_loss = 0.0
cnt = 0.0
for batch in loader:
batch = batch.to(device)
n_batch = batch.batch_size
out = model(batch.des_embedding,
batch.tweet_embedding,
batch.num_property_embedding,
batch.cat_property_embedding,
batch.edge_index)
label = batch.y[:n_batch]
out = out[:n_batch]
labels += label.to('cpu').data
preds += out.argmax(-1).to('cpu').data
loss = loss_fn(out, label)
ave_loss += loss.item() * n_batch
cnt += n_batch
ave_loss /= cnt
return ave_loss, accuracy_score(labels, preds), f1_score(labels, preds)
def train(train_id, test_id):
mx = 0
mx_f1 = 0
train_idx = torch.tensor(user_idx[train_id], dtype=torch.long)
test_idx = torch.tensor(user_idx[test_id], dtype=torch.long)
train_loader = NeighborLoader(data,
num_neighbors=[256] * 4,
batch_size=batch_size,
input_nodes=train_idx)
test_loader = NeighborLoader(data,
num_neighbors=[256] * 4,
batch_size=batch_size,
input_nodes=test_idx)
model = BotGAT(hidden_dim=hidden_dim, dropout=dropout).to(device)
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)
pbar = tqdm(range(max_epoch), ncols=0)
pbar.set_description('{} {}'.format(train_id, test_id))
cnt = 0
for _ in pbar:
train_loss, test_loss, train_acc, test_acc, test_f1 = forward_one_epoch(model,
optimizer,
loss_fn,
train_loader,
test_loader)
pbar.set_postfix_str('test acc {:4f} '
'train acc {:4f} '
'test loss {:4f} '
'train loss {:4f} '
'no up cnt {}'.format(test_acc, train_acc, test_loss, train_loss, cnt))
if test_acc >= mx:
mx = test_acc
mx_f1 = test_f1
cnt = 0
else:
cnt += 1
if cnt == no_up:
return mx, mx_f1
return mx, mx_f1
if __name__ == '__main__':
fb = open('transfer_results.txt', 'w')
for i in range(10):
for j in range(10):
acc, f1 = train(i, j)
fb.write('{} train, {} test, acc: {}, f1: {}\n'.format(i, j, acc, f1))
fb.close()