-
Notifications
You must be signed in to change notification settings - Fork 3
/
wikitext_bpe_overfit.sh
156 lines (136 loc) · 7.02 KB
/
wikitext_bpe_overfit.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
# continue training till overfit, turns off dropout
python train.py --task language_modeling \
data-bin/wikitext103-bpe \
--save-dir checkpoints/wikitext103-bpe-overfit \
--arch transformer_lm_wikibpe \
--dropout 0 --attention-dropout 0 --activation-dropout 0 \
--restore-file checkpoints/wikitext103-bpe/checkpoint_best.pt \
--reset-optimizer --reset-dataloader --reset-meters \
--max-update 286000 --optimizer nag --lr 1e-2 --clip-norm 100 \
--max-tokens 3072 --update-freq 3 --tokens-per-sample 3072 --seed 1 \
--sample-break-mode none --skip-invalid-size-inputs-valid-test --ddp-backend=no_c10d --fp16 | tee overfit.log
# eval overfit train scores
python eval_lm.py data-bin/wikitext103-bpe \
--path checkpoints/wikitext103-bpe-overfit-new/checkpoint95.pt \
--sample-break-mode none --max-tokens 3072 \
--softmax-batch 1024 --gen-subset train \
--context-window 1536 --tokens-per-sample 1536 \
--fp16 --save-scores overfit_train_scores.npy
# eval overfit on valid
python eval_lm.py data-bin/wikitext103-bpe \
--path checkpoints/wikitext103-bpe-overfit-new/checkpoint95.pt \
--sample-break-mode complete --max-tokens 3072 \
--context-window 2560 --softmax-batch 1024 \
--gen-subset valid --bpe subword_nmt --remove-bpe --save-scores overfit_valid_scores.npy
python eval_lm.py data-bin/wikitext103-bpe \
--path checkpoints/wikitext103-bpe-overfit/checkpoint2.pt \
--sample-break-mode complete --max-tokens 3072 \
--context-window 2560 --softmax-batch 1024 \
--gen-subset valid --bpe subword_nmt --remove-bpe
# store continue training overfit
python eval_lm.py data-bin/wikitext103-bpe \
--path checkpoints/wikitext103-bpe-overfit/checkpoint242.pt \
--sample-break-mode none --max-tokens 3072 \
--softmax-batch 1024 --gen-subset train \
--context-window 1536 --tokens-per-sample 1536 \
--dstore-mmap checkpoints/wikitext103-bpe/dstore_242 --knn-keytype 'last_ffn_input' \
--dstore-size 153225485 --model-overrides "{'knn_keytype': 'last_ffn_input'}" \
--save-knnlm-dstore --fp16 --dstore-fp16
# build index
python build_dstore.py \
--dstore_mmap checkpoints/wikitext103-bpe/dstore_242 \
--dstore_size 153225485 \
--faiss_index checkpoints/wikitext103-bpe/knn_242.index \
--num_keys_to_add_at_a_time 500000 \
--starting_point 0 --dstore-fp16 --dimension 1024
# eval with index
# no recompute
python eval_lm.py data-bin/wikitext103-bpe \
--path checkpoints/wikitext103-bpe-overfit/checkpoint242.pt \
--sample-break-mode complete --max-tokens 3072 \
--context-window 2560 --softmax-batch 1024 \
--gen-subset valid --dstore-filename checkpoints/wikitext103-bpe/dstore_242 \
--indexfile checkpoints/wikitext103-bpe/knn_242.index \
--model-overrides "{'knn_keytype': 'last_ffn_input'}" \
--k 1024 --lmbda 0.25 --dstore-size 153225485 --knn-keytype last_ffn_input \
--knn-sim-func "do_not_recomp_l2" --no-load-keys \
--probe 32 --knnlm --fp16 --dstore-fp16 --bpe subword_nmt --remove-bpe
# recompute
python eval_lm.py data-bin/wikitext103-bpe \
--path checkpoints/wikitext103-bpe-overfit/checkpoint242.pt \
--sample-break-mode complete --max-tokens 3072 \
--context-window 2560 --softmax-batch 1024 \
--gen-subset valid --dstore-filename checkpoints/wikitext103-bpe/dstore_242 \
--indexfile checkpoints/wikitext103-bpe/knn_242.index \
--model-overrides "{'knn_keytype': 'last_ffn_input'}" \
--k 1024 --lmbda 0.25 --dstore-size 153225485 --knn-keytype last_ffn_input \
--probe 32 --knnlm --fp16 --dstore-fp16 --bpe subword_nmt --remove-bpe
### smaller training data, 1%
TEXT=examples/language_model/wikitext-103
python preprocess.py \
--only-source \
--srcdict data-bin/wikitext103-bpe/dict.txt \
--trainpref $TEXT/wiki.train.small.tokens.bpe \
--validpref $TEXT/wiki.valid.tokens.bpe \
--testpref $TEXT/wiki.test.tokens.bpe \
--destdir data-bin/wikitext103-bpe-small \
--workers 20
python eval_lm.py data-bin/wikitext103-bpe-small \
--path checkpoints/wikitext103-bpe/checkpoint_best.pt \
--sample-break-mode complete --max-tokens 3072 \
--context-window 2560 --softmax-batch 1024 \
--gen-subset valid --bpe subword_nmt --remove-bpe
# create datastore with best
python eval_lm.py data-bin/wikitext103-bpe-small \
--path checkpoints/wikitext103-bpe/checkpoint_best.pt \
--sample-break-mode none --max-tokens 3072 \
--softmax-batch 1024 --gen-subset train \
--context-window 1536 --tokens-per-sample 1536 \
--dstore-mmap checkpoints/wikitext103-bpe/dstore_small --knn-keytype 'last_ffn_input' \
--dstore-size 1113601 --model-overrides "{'knn_keytype': 'last_ffn_input'}" \
--save-knnlm-dstore --fp16 --dstore-fp16
# build index
python build_dstore.py \
--dstore_mmap checkpoints/wikitext103-bpe/dstore_small \
--dstore_size 1113601 \
--faiss_index checkpoints/wikitext103-bpe/knn_small.index \
--num_keys_to_add_at_a_time 500000 \
--starting_point 0 --dstore-fp16 --dimension 1024
# eval with index
# no recompute
python eval_lm.py data-bin/wikitext103-bpe-small \
--path checkpoints/wikitext103-bpe/checkpoint_best.pt \
--sample-break-mode complete --max-tokens 3072 \
--context-window 2560 --softmax-batch 1024 \
--gen-subset valid --dstore-filename checkpoints/wikitext103-bpe/dstore_small \
--indexfile checkpoints/wikitext103-bpe/knn_small.index \
--model-overrides "{'knn_keytype': 'last_ffn_input'}" \
--k 1024 --lmbda 0.25 --dstore-size 1113601 --knn-keytype last_ffn_input \
--knn-sim-func "do_not_recomp_l2" --no-load-keys \
--probe 32 --knnlm --fp16 --dstore-fp16 --bpe subword_nmt --remove-bpe
# recompute
python eval_lm.py data-bin/wikitext103-bpe-small \
--path checkpoints/wikitext103-bpe/checkpoint_best.pt \
--sample-break-mode complete --max-tokens 3072 \
--context-window 2560 --softmax-batch 1024 \
--gen-subset valid --dstore-filename checkpoints/wikitext103-bpe/dstore_small \
--indexfile checkpoints/wikitext103-bpe/knn_small.index \
--model-overrides "{'knn_keytype': 'last_ffn_input'}" \
--k 1024 --lmbda 0.25 --dstore-size 1113601 --knn-keytype last_ffn_input \
--probe 32 --knnlm --fp16 --dstore-fp16 --bpe subword_nmt --remove-bpe
# continue training till overfit
CUDA_VISIBLE_DEVICES=2,3 python train.py --task language_modeling \
data-bin/wikitext103-bpe-small \
--save-dir checkpoints/wikitext103-bpe-overfit \
--arch transformer_lm_wikibpe \
--restore-file checkpoints/wikitext103-bpe/checkpoint_best.pt \
--reset-optimizer --reset-dataloader --reset-meters \
--max-update 28600 --optimizer nag --lr 1e-4 --clip-norm 0.1 \
--max-tokens 3072 --update-freq 1 --tokens-per-sample 3072 --seed 1 \
--sample-break-mode none --skip-invalid-size-inputs-valid-test --ddp-backend=no_c10d --fp16
# eval overfit model
python eval_lm.py data-bin/wikitext103-bpe-small \
--path checkpoints/wikitext103-bpe-overfit/checkpoint10.pt \
--sample-break-mode complete --max-tokens 3072 \
--context-window 2560 --softmax-batch 1024 \
--gen-subset valid --bpe subword_nmt --remove-bpe