Skip to content

Commit 696da9f

Browse files
del comments
1 parent bcef226 commit 696da9f

File tree

2 files changed

+3
-22
lines changed

2 files changed

+3
-22
lines changed

app/train_roberta.py

-22
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,13 @@
11
import os
22
import sys
3-
import ipdb
4-
import time
53
import random
64
import shutil
7-
import ntpath
85
import numpy as np
96
import argparse
107

118
sys.path.append('..')
129

1310
from core.task import StoryTuringTest
14-
from core.utils import load_save_json
1511
from core.exp_record import ExpRecorder
1612
from core.semantic_modifier import SemanticModifier
1713
from exp_config import TRAIN_CONFIG, TRAIN_DEBUG_CONFIG, SYSTEM_CONFIG, SEED_OFFSET, TEST_PERCENT, VAL_PERCENT, \
@@ -24,14 +20,6 @@
2420
from pytorch_lightning import seed_everything
2521

2622

27-
# python3.6 train_roberta.py --epoch 1 --debug_N 100
28-
# python3.6 train_roberta.py --epoch 20 --per_device_train_batch_size 32 --gradient_accumulation_steps 4
29-
30-
# python3.6 train_roberta.py --epoch 10 --per_device_train_batch_size 32 --gradient_accumulation_steps 4 --data_dir ../data/5billion_sort
31-
# python3.6 train_roberta.py --epoch 10 --per_device_train_batch_size 32 --gradient_accumulation_steps 4 --data_dir ../data/5billion_sort_no_reverse
32-
# python3.6 train_roberta.py --epoch 10 --per_device_train_batch_size 32 --gradient_accumulation_steps 4 --data_dir ../data/5billion_sort_unique_no_reverse
33-
# python3.6 train_roberta.py --epoch 10 --per_device_train_batch_size 32 --gradient_accumulation_steps 4 --data_dir ../data/5billion_shuffle_unique_no_reverse
34-
3523
def compute_metrics(eval_predict):
3624
predict_prob, labels = eval_predict
3725
predict_label = np.argmax(predict_prob, axis=1)
@@ -166,11 +154,6 @@ def main():
166154
else:
167155
load_complete = True
168156

169-
# 直接用tokenizer的vocab看来也不行,因为前面都是ascii码,后面也不是完全按照词频排序的样子
170-
# tokenizer_keys = list(tokenizer.vocab.keys())
171-
# ipdb.set_trace()
172-
# read whole dataset into memory
173-
174157
# (0.) read dataset
175158
story_turing_test = StoryTuringTest(tokenizer, dataset_name=dataset_name)
176159
whole_texts, whole_labels = story_turing_test.read_cn_novel_whole_data(data_dir, semantic_change)
@@ -315,11 +298,6 @@ def main():
315298
model_save_dir = os.path.abspath(model_save_dir)
316299
model.save_pretrained(model_save_dir)
317300
print(f"Save best model ckpt to {model_save_dir}")
318-
#
319-
# train_result_save_path = os.path.join(model_save_dir, 'train_result.json')
320-
# test_result_save_path = os.path.join(model_save_dir, 'test_result.json')
321-
# load_save_json(train_result_save_path, 'save', data=train_result)
322-
# load_save_json(test_result_save_path, 'save', data=test_result)
323301

324302

325303
if __name__ == '__main__':

readme.md

+3
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,9 @@ Arguments for ```run_story_interpret.sh```:
8282
- ig_n_steps: The number of steps used by the approximation method for Integrated Gradients.
8383
- use_pad_baseline: Bool value. 1: use all [PAD] baseline for Integrated Gradients. 0: Use all zero baseline.
8484

85+
#### Other
86+
87+
- To create result figures, you may refer to notebooks in result/vis
8588
---
8689
### License:
8790
Our code is under Apache License 2.0.

0 commit comments

Comments
 (0)