-
Notifications
You must be signed in to change notification settings - Fork 16
/
Copy pathone_gpu.py
65 lines (50 loc) · 1.86 KB
/
one_gpu.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from argparse import ArgumentParser
import torch
from torch.utils.data import DataLoader, TensorDataset, SequentialSampler
from transformers import AutoModelForCausalLM
from torch.optim import Adam
from tokenize_data import tokenize_and_load, add_tokenize_data_args
NUM_EPOCHS = 3
def main():
parser = ArgumentParser('DDP usage example')
parser.add_argument('--seed', type=int, default=1337)
parser.add_argument('--batch_size', type=int, default=16)
parser.add_argument('--local_rank', type=int, default=-1, metavar='N', help='Local process rank.') # you need this argument in your scripts for DDP to work
parser = add_tokenize_data_args(parser)
args = parser.parse_args()
# initialize your model
model = AutoModelForCausalLM.from_pretrained("sberbank-ai/rugpt3small_based_on_gpt2")
# send your model to GPU
model.cuda()
optimizer = Adam(model.parameters(), lr=1e-5)
# initialize your dataset
texts, _ = tokenize_and_load(**vars(args))
dataset = TensorDataset(torch.LongTensor(texts))
# initialize Sampler
sampler = SequentialSampler(dataset)
# initialize the dataloader
dataloader = DataLoader(
dataset=dataset,
sampler=sampler,
batch_size=args.batch_size
)
# start your training!
for epoch in range(NUM_EPOCHS):
# put model in train mode
model.train()
for step, batch in enumerate(dataloader):
# send batch to device
batch = batch[0]
batch = batch.cuda()
# forward pass
out = model(batch, labels=batch)
loss = out["loss"]
# backward pass
loss.backward()
optimizer.step()
if step % 40 == 0:
print('Loss %.3f' % loss)
if __name__ == '__main__':
main()