forked from PaddlePaddle/Research
-
Notifications
You must be signed in to change notification settings - Fork 0
/
data_preprocess.sh
executable file
·19 lines (15 loc) · 1.09 KB
/
data_preprocess.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
set -eu
set -o pipefail
# Attention! Python 2.7.14 and python3 gives different vocabulary order. We use Python 2.7.14 to preprocess files.
# input files: train.txt valid.txt test.txt
# (these are default filenames, change files name with the following arguments: --train $trainname --valid $validname --test $testname)
# output files: vocab.txt train.coke.txt valid.coke.txt test.coke.txt
python ./bin/kbc_data_preprocess.py --task fb15k --dir ./data/fb15k
python ./bin/kbc_data_preprocess.py --task wn18 --dir ./data/wn18
python ./bin/kbc_data_preprocess.py --task fb15k237 --dir ./data/fb15k237
python ./bin/kbc_data_preprocess.py --task wn18rr --dir ./data/wn18rr
# input files: train dev test
# (these are default filenames, change files name with the following arguments: --train $trainname --valid $validname --test $testname)
# output files: vocab.txt train.coke.txt valid.coke.txt test.coke.txt sen_candli.txt trivial_sen.txt
python ./bin/pathquery_data_preprocess.py --task pathqueryFB --dir ./data/pathqueryFB
python ./bin/pathquery_data_preprocess.py --task pathqueryWN --dir ./data/pathqueryWN