diff --git a/preprocess-shards.py b/preprocess-shards.py index 4859799..c9602a5 100644 --- a/preprocess-shards.py +++ b/preprocess-shards.py @@ -198,7 +198,7 @@ def convert(srcfile, targetfile, batchsize, seqlength, outfile, total_num_sents, if source_lengths[i] > curr_l: curr_l = source_lengths[i] l_location.append(j+1) - l_location.append(len(sources)) + l_location.append(len(sources)+1) #get batch sizes curr_idx = 1