diff --git a/README.md b/README.md index ff80036..658bc87 100644 --- a/README.md +++ b/README.md @@ -116,6 +116,8 @@ python train_wc.py --train_file ./data/np/train.txt.iobes --dev_file ./data/np/t For other datasets or tasks, you may wanna try different stopping parameters, especially, for smaller dataset, you may want to set ```least_iters``` to a larger value; and for some tasks, if the speed of loss decreasing is too slow, you may want to increase ```lr```. + + ## Benchmarks Here we compare LM-LSTM-CRF with recent state-of-the-art models on the CoNLL 2000 Chunking dataset, the CoNLL 2003 NER dataset, and the WSJ portion of the PTB POS Tagging dataset. All experiments are conducted on a GTX 1080 GPU. @@ -149,10 +151,12 @@ When models are only trained on the WSJ portion of the PTB POS Tagging dataset, We released pre-trained models on these three tasks. The checkpoint file can be downloaded at the following links. Notice that the NER model and Chunking model (coming soon) are trained on both the training set and the development set: | WSJ-PTB POS Tagging | CoNLL03 NER | + | ------------------- | ------------------- | | [Args](https://drive.google.com/file/d/0B587SdKqutQmYmpiNFp6b1hKWEE/view?usp=sharing) | [Args](https://drive.google.com/file/d/1tGAQ0hu9AsIBdrqFn5fmDQ72Pk1I-o74/view?usp=sharing) | | [Model](https://drive.google.com/file/d/0B587SdKqutQmNnR3Nnk1WHdIMG8/view?usp=sharing) | [Model](https://drive.google.com/file/d/1o9kjZV5EcHAhys3GPgl7EPGE5fuXyYjr/view?usp=sharing) | + Also, ```eval_wc.py``` is provided to load and run these checkpoints. Its usage can be accessed by command ````python eval_wc.py -h````, and a running command example is provided below: ``` python eval_wc.py --load_arg checkpoint/ner/ner_4_cwlm_lstm_crf.json --load_check_point checkpoint/ner_ner_4_cwlm_lstm_crf.model --gpu 0 --dev_file ./data/ner/testa.txt --test_file ./data/ner/testb.txt diff --git a/model/utils.py b/model/utils.py index 05018e9..8599ad6 100644 --- a/model/utils.py +++ b/model/utils.py @@ -5,6 +5,8 @@ .. moduleauthor:: Liyuan Liu, Frank Xu """ + + import codecs import csv import itertools @@ -419,7 +421,7 @@ def load_embedding_wlm(emb_file, delimiter, feature_map, full_feature_set, casel outdoc_embedding_array = list() outdoc_word_array = list() - for line in open(emb_file, 'r'): + for line in codecs.open(emb_file, 'r','utf-8'): line = line.split(delimiter) vector = list(map(lambda t: float(t), filter(lambda n: n and not n.isspace(), line[1:]))) diff --git a/train_w.py b/train_w.py index 4779abf..146ffc6 100644 --- a/train_w.py +++ b/train_w.py @@ -22,10 +22,10 @@ if __name__ == "__main__": parser = argparse.ArgumentParser(description='Learning with BLSTM-CRF') parser.add_argument('--rand_embedding', action='store_true', help='random initialize word embedding') - parser.add_argument('--emb_file', default='./embedding/glove.6B.100d.txt', help='path to pre-trained embedding') - parser.add_argument('--train_file', default='./data/ner2003/eng.train.iobes', help='path to training file') - parser.add_argument('--dev_file', default='./data/ner2003/eng.testa.iobes', help='path to development file') - parser.add_argument('--test_file', default='./data/ner2003/eng.testb.iobes', help='path to test file') + parser.add_argument('--emb_file', default='./data/glove.6B.100d.txt', help='path to pre-trained embedding') + parser.add_argument('--train_file', default='./data/ner2003/eng.train', help='path to training file') + parser.add_argument('--dev_file', default='./data/ner2003/eng.testa', help='path to development file') + parser.add_argument('--test_file', default='./data/ner2003/eng.testb', help='path to test file') parser.add_argument('--gpu', type=int, default=0, help='gpu id, set to -1 if use cpu mode') parser.add_argument('--batch_size', type=int, default=10, help='batch size (10)') parser.add_argument('--unk', default='unk', help='unknow-token in pre-trained embedding') @@ -56,8 +56,8 @@ if args.gpu >= 0: torch.cuda.set_device(args.gpu) - print('setting:') - print(args) + # print('setting:') + # print(args) # load corpus print('loading corpus') @@ -180,7 +180,10 @@ itertools.chain.from_iterable(dataset_loader), mininterval=2, desc=' - Tot it %d (epoch %d)' % (tot_length, args.start_epoch), leave=False, file=sys.stdout): - fea_v, tg_v, mask_v = packer.repack_vb(feature, tg, mask) + #fea_v, tg_v, mask_v = packer.repack_vb(feature, tg, mask) + fea_v, tg_v, mask_v = packer.repack_vb(feature.type(torch.FloatTensor), tg.type(torch.FloatTensor), + mask.type(torch.FloatTensor)) + ner_model.zero_grad() scores, hidden = ner_model.forward(fea_v) loss = crit.forward(scores, tg_v, mask_v)