diff --git a/configs/peft/lora_xlmr.json b/configs/peft/lora_xlmr.json new file mode 100644 index 00000000..8b9a3ab3 --- /dev/null +++ b/configs/peft/lora_xlmr.json @@ -0,0 +1,38 @@ +{ + "model_name_or_path": "xlm-roberta-base", + "output_dir": "xlmr-base-3_lora-v2_ep30_s10k", + "block_size": 256, + "eval_stride": 128, + "do_train": true, + "do_eval": true, + "per_device_train_batch_size": 64, + "per_device_eval_batch_size": 32, + "gradient_accumulation_steps": 1, + "eval_accumulation_steps": 8, + "dataloader_num_workers": 1, + "preprocessing_num_workers": 1, + "learning_rate": 3e-4, + "fp16": false, + "num_train_epochs": 30, + "logging_steps": 50, + "report_to": "wandb", + "wandb_project": "sentence-peft-v2", + "save_steps": 100000000, + "remove_unused_columns": false, + "one_sample_per_line": false, + "do_sentence_training": true, + "do_auxiliary_training": false, + "warmup_ratio": 0.1, + "non_punctuation_sample_ratio": null, + "prediction_loss_only": true, + "use_auxiliary": false, + "ddp_timeout": 3600, + "use_subwords": true, + "custom_punctuation_file": "punctuation_xlmr_unk.txt", + "log_level": "warning", + "adapter_config": "lora[r=16,alpha=32,intermediate_lora=True]", + "weight_decay": 0.0, + "auxiliary_remove_prob": 0.0, + "train_adapter": true, + "subsample": 10000 +} \ No newline at end of file