We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
The number of data labels is 4. the eval loss increase and the train loss decrease. I think it is overfit. Code is as below:
from setfit import SetFitModel, Trainer, TrainingArguments, sample_dataset, SetFitModelCardData import ujson as json from datasets import load_dataset, DatasetDict, Dataset from transformers import EarlyStoppingCallback import datetime def convert_files_to_dataset(train_path, val_path): # 定义一个函数来读取单个文件 def read_file(file_path): with open(file_path, 'r', encoding='utf-8') as f: lines = f.readlines() data = [json.loads(line.strip()) for line in lines] return data # 读取训练集和验证集 train_data = read_file(train_path) val_data = read_file(val_path) # 将数据转换为Dataset train_dataset = Dataset.from_list(train_data) val_dataset = Dataset.from_list(val_data) # 创建DatasetDict dataset_dict = DatasetDict({'train': train_dataset, 'validation': val_dataset}) return dataset_dict # 使用函数 train_path = 'train_cn.txt' val_path = 'val_cn.txt' dataset = convert_files_to_dataset(train_path, val_path) from setfit import sample_dataset train_dataset = sample_dataset(dataset["train"], num_samples=50) print(train_dataset) eval_dataset = dataset["validation"] print(eval_dataset) from setfit import SetFitModel model = SetFitModel.from_pretrained('sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2', model_card_data=SetFitModelCardData( language=['en', 'de', 'nl'], )) from sentence_transformers.losses import CosineSimilarityLoss from setfit import SetFitTrainer trainer = SetFitTrainer( model=model, train_dataset=train_dataset, eval_dataset=eval_dataset, loss_class=CosineSimilarityLoss, num_iterations=20, num_epochs=5 ) trainer.train() metrics = trainer.evaluate() print(metrics)``` log is as below:
Dataset({ features: ['text', 'label'], num_rows: 200 }) Dataset({ features: ['text', 'label'], num_rows: 40 }) /usr/local/matrix/conda3/envs/peft/lib/python3.8/site-packages/huggingface_hub/file_download.py:1132: FutureWarning: resume_download is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use force_download=True. warnings.warn( model_head.pkl not found on HuggingFace Hub, initialising classification head with random weights. You should TRAIN this model on a downstream task to use it for predictions and inference. b.py:51: DeprecationWarning: SetFitTrainer has been deprecated and will be removed in v2.0.0 of SetFit. Please use Trainer instead. trainer = SetFitTrainer( Using evaluation_strategy="steps" as eval_steps is defined. Map: 100%|████████████████████████████████████████████████████████████████████████████| 200/200 [00:00<00:00, 12981.44 examples/s] ***** Running training ***** Num unique pairs = 8000 Batch size = 16 Num epochs = 5 Total optimization steps = 2500 0%| | 0/2500 [00:00<?, ?it/s] {'embedding_loss': 0.3173, 'learning_rate': 8e-08, 'epoch': 0.0} | 0/2500 [00:00<?, ?it/s] {'embedding_loss': 0.2875, 'learning_rate': 4.000000000000001e-06, 'epoch': 0.1} {'eval_embedding_loss': 0.236, 'learning_rate': 4.000000000000001e-06, 'epoch': 0.1} {'embedding_loss': 0.2662, 'learning_rate': 8.000000000000001e-06, 'epoch': 0.2} {'eval_embedding_loss': 0.2351, 'learning_rate': 8.000000000000001e-06, 'epoch': 0.2} {'embedding_loss': 0.2741, 'learning_rate': 1.2e-05, 'epoch': 0.3} {'eval_embedding_loss': 0.2319, 'learning_rate': 1.2e-05, 'epoch': 0.3} {'embedding_loss': 0.2747, 'learning_rate': 1.6000000000000003e-05, 'epoch': 0.4} {'eval_embedding_loss': 0.2331, 'learning_rate': 1.6000000000000003e-05, 'epoch': 0.4} {'embedding_loss': 0.196, 'learning_rate': 2e-05, 'epoch': 0.5} {'eval_embedding_loss': 0.2297, 'learning_rate': 2e-05, 'epoch': 0.5} {'embedding_loss': 0.1512, 'learning_rate': 1.9555555555555557e-05, 'epoch': 0.6} {'eval_embedding_loss': 0.2387, 'learning_rate': 1.9555555555555557e-05, 'epoch': 0.6} {'embedding_loss': 0.0866, 'learning_rate': 1.9111111111111113e-05, 'epoch': 0.7} {'eval_embedding_loss': 0.248, 'learning_rate': 1.9111111111111113e-05, 'epoch': 0.7} {'embedding_loss': 0.0437, 'learning_rate': 1.866666666666667e-05, 'epoch': 0.8} {'eval_embedding_loss': 0.2427, 'learning_rate': 1.866666666666667e-05, 'epoch': 0.8} {'embedding_loss': 0.07, 'learning_rate': 1.8222222222222224e-05, 'epoch': 0.9} {'eval_embedding_loss': 0.2474, 'learning_rate': 1.8222222222222224e-05, 'epoch': 0.9} {'embedding_loss': 0.0332, 'learning_rate': 1.7777777777777777e-05, 'epoch': 1.0} {'eval_embedding_loss': 0.2587, 'learning_rate': 1.7777777777777777e-05, 'epoch': 1.0} {'embedding_loss': 0.0125, 'learning_rate': 1.7333333333333336e-05, 'epoch': 1.1} {'eval_embedding_loss': 0.2573, 'learning_rate': 1.7333333333333336e-05, 'epoch': 1.1} {'embedding_loss': 0.0023, 'learning_rate': 1.688888888888889e-05, 'epoch': 1.2} {'eval_embedding_loss': 0.2648, 'learning_rate': 1.688888888888889e-05, 'epoch': 1.2} {'embedding_loss': 0.0033, 'learning_rate': 1.6444444444444444e-05, 'epoch': 1.3} {'eval_embedding_loss': 0.2659, 'learning_rate': 1.6444444444444444e-05, 'epoch': 1.3} {'embedding_loss': 0.0011, 'learning_rate': 1.6000000000000003e-05, 'epoch': 1.4} {'eval_embedding_loss': 0.2692, 'learning_rate': 1.6000000000000003e-05, 'epoch': 1.4} {'embedding_loss': 0.0007, 'learning_rate': 1.555555555555556e-05, 'epoch': 1.5} {'eval_embedding_loss': 0.2687, 'learning_rate': 1.555555555555556e-05, 'epoch': 1.5} {'embedding_loss': 0.001, 'learning_rate': 1.5111111111111112e-05, 'epoch': 1.6} {'eval_embedding_loss': 0.2739, 'learning_rate': 1.5111111111111112e-05, 'epoch': 1.6} {'embedding_loss': 0.0012, 'learning_rate': 1.4666666666666666e-05, 'epoch': 1.7} {'eval_embedding_loss': 0.2707, 'learning_rate': 1.4666666666666666e-05, 'epoch': 1.7} {'embedding_loss': 0.0005, 'learning_rate': 1.4222222222222224e-05, 'epoch': 1.8} {'eval_embedding_loss': 0.2684, 'learning_rate': 1.4222222222222224e-05, 'epoch': 1.8} {'embedding_loss': 0.0006, 'learning_rate': 1.377777777777778e-05, 'epoch': 1.9} {'eval_embedding_loss': 0.2756, 'learning_rate': 1.377777777777778e-05, 'epoch': 1.9} {'embedding_loss': 0.0003, 'learning_rate': 1.3333333333333333e-05, 'epoch': 2.0} {'eval_embedding_loss': 0.2698, 'learning_rate': 1.3333333333333333e-05, 'epoch': 2.0} {'embedding_loss': 0.0007, 'learning_rate': 1.288888888888889e-05, 'epoch': 2.1} {'eval_embedding_loss': 0.2745, 'learning_rate': 1.288888888888889e-05, 'epoch': 2.1} {'embedding_loss': 0.0004, 'learning_rate': 1.2444444444444446e-05, 'epoch': 2.2} {'eval_embedding_loss': 0.2771, 'learning_rate': 1.2444444444444446e-05, 'epoch': 2.2} {'embedding_loss': 0.0005, 'learning_rate': 1.2e-05, 'epoch': 2.3} {'eval_embedding_loss': 0.2742, 'learning_rate': 1.2e-05, 'epoch': 2.3} {'embedding_loss': 0.0007, 'learning_rate': 1.1555555555555556e-05, 'epoch': 2.4} {'eval_embedding_loss': 0.2719, 'learning_rate': 1.1555555555555556e-05, 'epoch': 2.4} {'embedding_loss': 0.0002, 'learning_rate': 1.1111111111111113e-05, 'epoch': 2.5} {'eval_embedding_loss': 0.2782, 'learning_rate': 1.1111111111111113e-05, 'epoch': 2.5} {'embedding_loss': 0.0002, 'learning_rate': 1.0666666666666667e-05, 'epoch': 2.6} {'eval_embedding_loss': 0.2721, 'learning_rate': 1.0666666666666667e-05, 'epoch': 2.6} {'embedding_loss': 0.0002, 'learning_rate': 1.0222222222222223e-05, 'epoch': 2.7} {'eval_embedding_loss': 0.2743, 'learning_rate': 1.0222222222222223e-05, 'epoch': 2.7} {'embedding_loss': 0.0003, 'learning_rate': 9.777777777777779e-06, 'epoch': 2.8} {'eval_embedding_loss': 0.2822, 'learning_rate': 9.777777777777779e-06, 'epoch': 2.8} {'embedding_loss': 0.0003, 'learning_rate': 9.333333333333334e-06, 'epoch': 2.9} {'eval_embedding_loss': 0.2758, 'learning_rate': 9.333333333333334e-06, 'epoch': 2.9} {'embedding_loss': 0.0004, 'learning_rate': 8.888888888888888e-06, 'epoch': 3.0} {'eval_embedding_loss': 0.2764, 'learning_rate': 8.888888888888888e-06, 'epoch': 3.0} {'embedding_loss': 0.0004, 'learning_rate': 8.444444444444446e-06, 'epoch': 3.1} {'eval_embedding_loss': 0.2798, 'learning_rate': 8.444444444444446e-06, 'epoch': 3.1} {'embedding_loss': 0.0002, 'learning_rate': 8.000000000000001e-06, 'epoch': 3.2} {'eval_embedding_loss': 0.2769, 'learning_rate': 8.000000000000001e-06, 'epoch': 3.2} {'embedding_loss': 0.0004, 'learning_rate': 7.555555555555556e-06, 'epoch': 3.3} {'eval_embedding_loss': 0.2766, 'learning_rate': 7.555555555555556e-06, 'epoch': 3.3} {'embedding_loss': 0.0002, 'learning_rate': 7.111111111111112e-06, 'epoch': 3.4} {'eval_embedding_loss': 0.2833, 'learning_rate': 7.111111111111112e-06, 'epoch': 3.4} {'embedding_loss': 0.0002, 'learning_rate': 6.666666666666667e-06, 'epoch': 3.5} {'eval_embedding_loss': 0.2755, 'learning_rate': 6.666666666666667e-06, 'epoch': 3.5}```
resume_download
force_download=True
SetFitTrainer
Trainer
evaluation_strategy="steps"
eval_steps
The text was updated successfully, but these errors were encountered:
I would decrease the num_iterations parameter to 5 and see what is the behaviour there.
num_iterations
5
Sorry, something went wrong.
No branches or pull requests
The number of data labels is 4. the eval loss increase and the train loss decrease. I think it is overfit. Code is as below:
code
Dataset({
features: ['text', 'label'],
num_rows: 200
})
Dataset({
features: ['text', 'label'],
num_rows: 40
})
/usr/local/matrix/conda3/envs/peft/lib/python3.8/site-packages/huggingface_hub/file_download.py:1132: FutureWarning:
resume_download
is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, useforce_download=True
.warnings.warn(
model_head.pkl not found on HuggingFace Hub, initialising classification head with random weights. You should TRAIN this model on a downstream task to use it for predictions and inference.
b.py:51: DeprecationWarning:
SetFitTrainer
has been deprecated and will be removed in v2.0.0 of SetFit. Please useTrainer
instead.trainer = SetFitTrainer(
Using
evaluation_strategy="steps"
aseval_steps
is defined.Map: 100%|████████████████████████████████████████████████████████████████████████████| 200/200 [00:00<00:00, 12981.44 examples/s]
***** Running training *****
Num unique pairs = 8000
Batch size = 16
Num epochs = 5
Total optimization steps = 2500
0%| | 0/2500 [00:00<?, ?it/s]
{'embedding_loss': 0.3173, 'learning_rate': 8e-08, 'epoch': 0.0} | 0/2500 [00:00<?, ?it/s]
{'embedding_loss': 0.2875, 'learning_rate': 4.000000000000001e-06, 'epoch': 0.1}
{'eval_embedding_loss': 0.236, 'learning_rate': 4.000000000000001e-06, 'epoch': 0.1}
{'embedding_loss': 0.2662, 'learning_rate': 8.000000000000001e-06, 'epoch': 0.2}
{'eval_embedding_loss': 0.2351, 'learning_rate': 8.000000000000001e-06, 'epoch': 0.2}
{'embedding_loss': 0.2741, 'learning_rate': 1.2e-05, 'epoch': 0.3}
{'eval_embedding_loss': 0.2319, 'learning_rate': 1.2e-05, 'epoch': 0.3}
{'embedding_loss': 0.2747, 'learning_rate': 1.6000000000000003e-05, 'epoch': 0.4}
{'eval_embedding_loss': 0.2331, 'learning_rate': 1.6000000000000003e-05, 'epoch': 0.4}
{'embedding_loss': 0.196, 'learning_rate': 2e-05, 'epoch': 0.5}
{'eval_embedding_loss': 0.2297, 'learning_rate': 2e-05, 'epoch': 0.5}
{'embedding_loss': 0.1512, 'learning_rate': 1.9555555555555557e-05, 'epoch': 0.6}
{'eval_embedding_loss': 0.2387, 'learning_rate': 1.9555555555555557e-05, 'epoch': 0.6}
{'embedding_loss': 0.0866, 'learning_rate': 1.9111111111111113e-05, 'epoch': 0.7}
{'eval_embedding_loss': 0.248, 'learning_rate': 1.9111111111111113e-05, 'epoch': 0.7}
{'embedding_loss': 0.0437, 'learning_rate': 1.866666666666667e-05, 'epoch': 0.8}
{'eval_embedding_loss': 0.2427, 'learning_rate': 1.866666666666667e-05, 'epoch': 0.8}
{'embedding_loss': 0.07, 'learning_rate': 1.8222222222222224e-05, 'epoch': 0.9}
{'eval_embedding_loss': 0.2474, 'learning_rate': 1.8222222222222224e-05, 'epoch': 0.9}
{'embedding_loss': 0.0332, 'learning_rate': 1.7777777777777777e-05, 'epoch': 1.0}
{'eval_embedding_loss': 0.2587, 'learning_rate': 1.7777777777777777e-05, 'epoch': 1.0}
{'embedding_loss': 0.0125, 'learning_rate': 1.7333333333333336e-05, 'epoch': 1.1}
{'eval_embedding_loss': 0.2573, 'learning_rate': 1.7333333333333336e-05, 'epoch': 1.1}
{'embedding_loss': 0.0023, 'learning_rate': 1.688888888888889e-05, 'epoch': 1.2}
{'eval_embedding_loss': 0.2648, 'learning_rate': 1.688888888888889e-05, 'epoch': 1.2}
{'embedding_loss': 0.0033, 'learning_rate': 1.6444444444444444e-05, 'epoch': 1.3}
{'eval_embedding_loss': 0.2659, 'learning_rate': 1.6444444444444444e-05, 'epoch': 1.3}
{'embedding_loss': 0.0011, 'learning_rate': 1.6000000000000003e-05, 'epoch': 1.4}
{'eval_embedding_loss': 0.2692, 'learning_rate': 1.6000000000000003e-05, 'epoch': 1.4}
{'embedding_loss': 0.0007, 'learning_rate': 1.555555555555556e-05, 'epoch': 1.5}
{'eval_embedding_loss': 0.2687, 'learning_rate': 1.555555555555556e-05, 'epoch': 1.5}
{'embedding_loss': 0.001, 'learning_rate': 1.5111111111111112e-05, 'epoch': 1.6}
{'eval_embedding_loss': 0.2739, 'learning_rate': 1.5111111111111112e-05, 'epoch': 1.6}
{'embedding_loss': 0.0012, 'learning_rate': 1.4666666666666666e-05, 'epoch': 1.7}
{'eval_embedding_loss': 0.2707, 'learning_rate': 1.4666666666666666e-05, 'epoch': 1.7}
{'embedding_loss': 0.0005, 'learning_rate': 1.4222222222222224e-05, 'epoch': 1.8}
{'eval_embedding_loss': 0.2684, 'learning_rate': 1.4222222222222224e-05, 'epoch': 1.8}
{'embedding_loss': 0.0006, 'learning_rate': 1.377777777777778e-05, 'epoch': 1.9}
{'eval_embedding_loss': 0.2756, 'learning_rate': 1.377777777777778e-05, 'epoch': 1.9}
{'embedding_loss': 0.0003, 'learning_rate': 1.3333333333333333e-05, 'epoch': 2.0}
{'eval_embedding_loss': 0.2698, 'learning_rate': 1.3333333333333333e-05, 'epoch': 2.0}
{'embedding_loss': 0.0007, 'learning_rate': 1.288888888888889e-05, 'epoch': 2.1}
{'eval_embedding_loss': 0.2745, 'learning_rate': 1.288888888888889e-05, 'epoch': 2.1}
{'embedding_loss': 0.0004, 'learning_rate': 1.2444444444444446e-05, 'epoch': 2.2}
{'eval_embedding_loss': 0.2771, 'learning_rate': 1.2444444444444446e-05, 'epoch': 2.2}
{'embedding_loss': 0.0005, 'learning_rate': 1.2e-05, 'epoch': 2.3}
{'eval_embedding_loss': 0.2742, 'learning_rate': 1.2e-05, 'epoch': 2.3}
{'embedding_loss': 0.0007, 'learning_rate': 1.1555555555555556e-05, 'epoch': 2.4}
{'eval_embedding_loss': 0.2719, 'learning_rate': 1.1555555555555556e-05, 'epoch': 2.4}
{'embedding_loss': 0.0002, 'learning_rate': 1.1111111111111113e-05, 'epoch': 2.5}
{'eval_embedding_loss': 0.2782, 'learning_rate': 1.1111111111111113e-05, 'epoch': 2.5}
{'embedding_loss': 0.0002, 'learning_rate': 1.0666666666666667e-05, 'epoch': 2.6}
{'eval_embedding_loss': 0.2721, 'learning_rate': 1.0666666666666667e-05, 'epoch': 2.6}
{'embedding_loss': 0.0002, 'learning_rate': 1.0222222222222223e-05, 'epoch': 2.7}
{'eval_embedding_loss': 0.2743, 'learning_rate': 1.0222222222222223e-05, 'epoch': 2.7}
{'embedding_loss': 0.0003, 'learning_rate': 9.777777777777779e-06, 'epoch': 2.8}
{'eval_embedding_loss': 0.2822, 'learning_rate': 9.777777777777779e-06, 'epoch': 2.8}
{'embedding_loss': 0.0003, 'learning_rate': 9.333333333333334e-06, 'epoch': 2.9}
{'eval_embedding_loss': 0.2758, 'learning_rate': 9.333333333333334e-06, 'epoch': 2.9}
{'embedding_loss': 0.0004, 'learning_rate': 8.888888888888888e-06, 'epoch': 3.0}
{'eval_embedding_loss': 0.2764, 'learning_rate': 8.888888888888888e-06, 'epoch': 3.0}
{'embedding_loss': 0.0004, 'learning_rate': 8.444444444444446e-06, 'epoch': 3.1}
{'eval_embedding_loss': 0.2798, 'learning_rate': 8.444444444444446e-06, 'epoch': 3.1}
{'embedding_loss': 0.0002, 'learning_rate': 8.000000000000001e-06, 'epoch': 3.2}
{'eval_embedding_loss': 0.2769, 'learning_rate': 8.000000000000001e-06, 'epoch': 3.2}
{'embedding_loss': 0.0004, 'learning_rate': 7.555555555555556e-06, 'epoch': 3.3}
{'eval_embedding_loss': 0.2766, 'learning_rate': 7.555555555555556e-06, 'epoch': 3.3}
{'embedding_loss': 0.0002, 'learning_rate': 7.111111111111112e-06, 'epoch': 3.4}
{'eval_embedding_loss': 0.2833, 'learning_rate': 7.111111111111112e-06, 'epoch': 3.4}
{'embedding_loss': 0.0002, 'learning_rate': 6.666666666666667e-06, 'epoch': 3.5}
{'eval_embedding_loss': 0.2755, 'learning_rate': 6.666666666666667e-06, 'epoch': 3.5}```
The text was updated successfully, but these errors were encountered: