diff --git a/configs/xlmr_stratify_0.1_3layers_1e-5.json b/configs/xlmr_stratify_0.1_3layers_1e-5.json new file mode 100644 index 00000000..90360747 --- /dev/null +++ b/configs/xlmr_stratify_0.1_3layers_1e-5.json @@ -0,0 +1,43 @@ +{ + "model_name_or_path": "xlm-roberta-base", + "output_dir": "xlmr-normal", + "train_text_path": "data/sentence/train.parquet", + "valid_text_path": "data/sentence/valid.parquet", + "block_size": 512, + "use_bert": true, + "do_train": true, + "do_eval": true, + "evaluation_strategy": "steps", + "per_device_train_batch_size": 64, + "per_device_eval_batch_size": 1, + "gradient_accumulation_steps": 1, + "eval_accumulation_steps": 256, + "dataloader_num_workers": 1, + "preprocessing_num_workers": 32, + "learning_rate": 1e-5, + "save_strategy": "steps", + "fp16": false, + "max_steps": 2000000, + "save_steps": 100000, + "eval_steps": 5000, + "logging_steps": 50, + "report_to": "wandb", + "is_decoder": false, + "remove_unused_columns": false, + "lookahead": null, + "one_sample_per_line": false, + "do_sentence_training": true, + "do_auxiliary_training": true, + "warmup_steps": 5000, + "adapter_warmup_steps": 0, + "adapter_lr_multiplier": 1, + "ngram_order": 1, + "non_punctuation_sample_ratio": 0.1, + "prediction_loss_only": true, + "use_auxiliary": true, + "ddp_timeout": 3600, + "use_subwords": true, + "num_hidden_layers": 3, + "custom_punctuation_file": "punctuation_xlmr_unk.txt", + "log_level": "info" +} \ No newline at end of file diff --git a/configs/xlmr_stratify_0.1_3layers_bs128_no_aux.json b/configs/xlmr_stratify_0.1_3layers_bs128_no_aux.json new file mode 100644 index 00000000..df5154c7 --- /dev/null +++ b/configs/xlmr_stratify_0.1_3layers_bs128_no_aux.json @@ -0,0 +1,44 @@ +{ + "model_name_or_path": "xlm-roberta-base", + "output_dir": "xlmr-normal-bs256-noaux", + "train_text_path": "data/sentence/train.parquet", + "valid_text_path": "data/sentence/valid.parquet", + "block_size": 128, + "eval_stride" : 64, + "use_bert": true, + "do_train": true, + "do_eval": true, + "evaluation_strategy": "steps", + "per_device_train_batch_size": 32, + "per_device_eval_batch_size": 32, + "gradient_accumulation_steps": 2, + "eval_accumulation_steps": 8, + "dataloader_num_workers": 4, + "preprocessing_num_workers": 32, + "learning_rate": 1e-4, + "save_strategy": "steps", + "fp16": false, + "max_steps": 2000000, + "save_steps": 100000, + "eval_steps": 5000, + "logging_steps": 50, + "report_to": "wandb", + "is_decoder": false, + "remove_unused_columns": false, + "lookahead": null, + "one_sample_per_line": false, + "do_sentence_training": true, + "do_auxiliary_training": false, + "warmup_steps": 5000, + "adapter_warmup_steps": 0, + "adapter_lr_multiplier": 1, + "ngram_order": 1, + "non_punctuation_sample_ratio": 0.1, + "prediction_loss_only": true, + "use_auxiliary": false, + "ddp_timeout": 3600, + "use_subwords": true, + "num_hidden_layers": 3, + "custom_punctuation_file": "punctuation_xlmr_unk.txt", + "log_level": "info" +} \ No newline at end of file diff --git a/configs/xlmr_stratify_0.1_3layers_bs256_no_aux.json b/configs/xlmr_stratify_0.1_3layers_bs256_no_aux.json new file mode 100644 index 00000000..c8e4b5a9 --- /dev/null +++ b/configs/xlmr_stratify_0.1_3layers_bs256_no_aux.json @@ -0,0 +1,44 @@ +{ + "model_name_or_path": "xlm-roberta-base", + "output_dir": "xlmr-normal-bs256-noaux", + "train_text_path": "data/sentence/train.parquet", + "valid_text_path": "data/sentence/valid.parquet", + "block_size": 256, + "eval_stride" : 128, + "use_bert": true, + "do_train": true, + "do_eval": true, + "evaluation_strategy": "steps", + "per_device_train_batch_size": 32, + "per_device_eval_batch_size": 32, + "gradient_accumulation_steps": 2, + "eval_accumulation_steps": 8, + "dataloader_num_workers": 4, + "preprocessing_num_workers": 32, + "learning_rate": 1e-4, + "save_strategy": "steps", + "fp16": false, + "max_steps": 2000000, + "save_steps": 100000, + "eval_steps": 5000, + "logging_steps": 50, + "report_to": "wandb", + "is_decoder": false, + "remove_unused_columns": false, + "lookahead": null, + "one_sample_per_line": false, + "do_sentence_training": true, + "do_auxiliary_training": false, + "warmup_steps": 5000, + "adapter_warmup_steps": 0, + "adapter_lr_multiplier": 1, + "ngram_order": 1, + "non_punctuation_sample_ratio": 0.1, + "prediction_loss_only": true, + "use_auxiliary": false, + "ddp_timeout": 3600, + "use_subwords": true, + "num_hidden_layers": 3, + "custom_punctuation_file": "punctuation_xlmr_unk.txt", + "log_level": "info" +} \ No newline at end of file diff --git a/configs/xlmr_stratify_0.1_3layers_bs64_no_aux_400k.json b/configs/xlmr_stratify_0.1_3layers_bs64_no_aux_400k.json new file mode 100644 index 00000000..399ba139 --- /dev/null +++ b/configs/xlmr_stratify_0.1_3layers_bs64_no_aux_400k.json @@ -0,0 +1,44 @@ +{ + "model_name_or_path": "xlm-roberta-base", + "output_dir": "xlmr-normal-bs64-noaux", + "train_text_path": "data/sentence/train.parquet", + "valid_text_path": "data/sentence/valid.parquet", + "block_size": 64, + "eval_stride" : 32, + "use_bert": true, + "do_train": true, + "do_eval": true, + "evaluation_strategy": "steps", + "per_device_train_batch_size": 32, + "per_device_eval_batch_size": 32, + "gradient_accumulation_steps": 2, + "eval_accumulation_steps": 8, + "dataloader_num_workers": 4, + "preprocessing_num_workers": 32, + "learning_rate": 1e-4, + "save_strategy": "steps", + "fp16": false, + "max_steps": 400000, + "save_steps": 100000, + "eval_steps": 5000, + "logging_steps": 50, + "report_to": "wandb", + "is_decoder": false, + "remove_unused_columns": false, + "lookahead": null, + "one_sample_per_line": false, + "do_sentence_training": true, + "do_auxiliary_training": true, + "warmup_steps": 5000, + "adapter_warmup_steps": 0, + "adapter_lr_multiplier": 1, + "ngram_order": 1, + "non_punctuation_sample_ratio": 0.1, + "prediction_loss_only": true, + "use_auxiliary": true, + "ddp_timeout": 3600, + "use_subwords": true, + "num_hidden_layers": 3, + "custom_punctuation_file": "punctuation_xlmr_unk.txt", + "log_level": "info" +} \ No newline at end of file diff --git a/configs/xlmr_stratify_0.1_3layers_stride128_400k.json b/configs/xlmr_stratify_0.1_3layers_stride128_400k.json new file mode 100644 index 00000000..3b151152 --- /dev/null +++ b/configs/xlmr_stratify_0.1_3layers_stride128_400k.json @@ -0,0 +1,44 @@ +{ + "model_name_or_path": "xlm-roberta-base", + "output_dir": "xlmr-normal", + "train_text_path": "data/sentence/train.parquet", + "valid_text_path": "data/sentence/valid.parquet", + "block_size": 512, + "eval_stride" : 128, + "use_bert": true, + "do_train": true, + "do_eval": true, + "evaluation_strategy": "steps", + "per_device_train_batch_size": 32, + "per_device_eval_batch_size": 32, + "gradient_accumulation_steps": 2, + "eval_accumulation_steps": 8, + "dataloader_num_workers": 4, + "preprocessing_num_workers": 32, + "learning_rate": 1e-4, + "save_strategy": "steps", + "fp16": false, + "max_steps": 400000, + "save_steps": 100000, + "eval_steps": 5000, + "logging_steps": 50, + "report_to": "wandb", + "is_decoder": false, + "remove_unused_columns": false, + "lookahead": null, + "one_sample_per_line": false, + "do_sentence_training": true, + "do_auxiliary_training": true, + "warmup_steps": 5000, + "adapter_warmup_steps": 0, + "adapter_lr_multiplier": 1, + "ngram_order": 1, + "non_punctuation_sample_ratio": 0.1, + "prediction_loss_only": true, + "use_auxiliary": true, + "ddp_timeout": 3600, + "use_subwords": true, + "num_hidden_layers": 3, + "custom_punctuation_file": "punctuation_xlmr_unk.txt", + "log_level": "info" +} \ No newline at end of file diff --git a/configs/xlmr_stratify_0.1_3layers_stride32_400k.json b/configs/xlmr_stratify_0.1_3layers_stride32_400k.json new file mode 100644 index 00000000..43859182 --- /dev/null +++ b/configs/xlmr_stratify_0.1_3layers_stride32_400k.json @@ -0,0 +1,44 @@ +{ + "model_name_or_path": "xlm-roberta-base", + "output_dir": "xlmr-normal", + "train_text_path": "data/sentence/train.parquet", + "valid_text_path": "data/sentence/valid.parquet", + "block_size": 512, + "eval_stride" : 128, + "use_bert": true, + "do_train": true, + "do_eval": true, + "evaluation_strategy": "steps", + "per_device_train_batch_size": 64, + "per_device_eval_batch_size": 1, + "gradient_accumulation_steps": 1, + "eval_accumulation_steps": 256, + "dataloader_num_workers": 1, + "preprocessing_num_workers": 32, + "learning_rate": 1e-4, + "save_strategy": "steps", + "fp16": false, + "max_steps": 400000, + "save_steps": 100000, + "eval_steps": 5000, + "logging_steps": 50, + "report_to": "wandb", + "is_decoder": false, + "remove_unused_columns": false, + "lookahead": null, + "one_sample_per_line": false, + "do_sentence_training": true, + "do_auxiliary_training": true, + "warmup_steps": 5000, + "adapter_warmup_steps": 0, + "adapter_lr_multiplier": 1, + "ngram_order": 1, + "non_punctuation_sample_ratio": 0.1, + "prediction_loss_only": true, + "use_auxiliary": true, + "ddp_timeout": 3600, + "use_subwords": true, + "num_hidden_layers": 3, + "custom_punctuation_file": "punctuation_xlmr_unk.txt", + "log_level": "info" +} \ No newline at end of file