You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
File "/workspace/global_venv/lib/python3.11/site-packages/autotrain/trainers/clm/train_clm_sft.py", line 16, in train
train_data, valid_data = utils.process_data_with_chat_template(config, tokenizer, train_data, valid_data)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/workspace/global_venv/lib/python3.11/site-packages/autotrain/trainers/clm/utils.py", line 639, in process_data_with_chat_template
train_data = train_data.map(
^^^^^^^^^^^^^^^
File "/workspace/global_venv/lib/python3.11/site-packages/datasets/arrow_dataset.py", line 560, in wrapper
out: Union["Dataset", "DatasetDict"] = func(self, *args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/workspace/global_venv/lib/python3.11/site-packages/datasets/arrow_dataset.py", line 3055, in map
for rank, done, content in Dataset._map_single(**dataset_kwargs):
File "/workspace/global_venv/lib/python3.11/site-packages/datasets/arrow_dataset.py", line 3428, in _map_single
example = apply_function_on_filtered_inputs(example, i, offset=offset)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/workspace/global_venv/lib/python3.11/site-packages/datasets/arrow_dataset.py", line 3320, in apply_function_on_filtered_inputs
processed_inputs = function(*fn_args, *additional_args, **fn_kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/workspace/global_venv/lib/python3.11/site-packages/autotrain/trainers/clm/utils.py", line 346, in apply_chat_template
example[config.text_column] = tokenizer.apply_chat_template(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/workspace/global_venv/lib/python3.11/site-packages/transformers/tokenization_utils_base.py", line 1871, in apply_chat_template
rendered_chat = compiled_template.render(
^^^^^^^^^^^^^^^^^^^^^^^^^
File "/workspace/global_venv/lib/python3.11/site-packages/jinja2/environment.py", line 1304, in render
self.environment.handle_exception()
File "/workspace/global_venv/lib/python3.11/site-packages/jinja2/environment.py", line 939, in handle_exception
raise rewrite_traceback_stack(source=source)
File "", line 2, in top-level template code
jinja2.exceptions.UndefinedError: 'str object' has no attribute 'role'
ERROR | 2024-12-13 15:54:50 | autotrain.trainers.common:wrapper:216 - 'str object' has no attribute 'role'
Additional Information
I tried to finetune model with fineweb-2 dataset. but got this error and not able to fix it with parameters.
Please help me
The text was updated successfully, but these errors were encountered:
Prerequisites
Backend
Local
Interface Used
CLI
CLI Command
params = LLMTrainingParams(
model=model,
data_path="HuggingFaceFW/fineweb-edu-score-2",
chat_template="tokenizer",
text_column="text",
train_split=f"{sub_dataset}:train",
trainer="sft",
epochs=3,
lr=1e-5,
peft=True,
quantization="int4",
target_modules=target_modules,
padding="right",
optimizer="adamw_torch",
scheduler="cosine",
gradient_accumulation=8,
mixed_precision="bf16",
merge_adapter=True,
project_name=model_name,
log="tensorboard",
push_to_hub=True,
username=hf_username,
token=hf_token,
model_max_length=4096,
auto_find_batch_size=True,
)
backend = "local"
project = AutoTrainProject(params=params, backend=backend, process=True)
project.create()
UI Screenshots & Parameters
No response
Error Logs
File "/workspace/global_venv/lib/python3.11/site-packages/autotrain/trainers/clm/train_clm_sft.py", line 16, in train
train_data, valid_data = utils.process_data_with_chat_template(config, tokenizer, train_data, valid_data)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/workspace/global_venv/lib/python3.11/site-packages/autotrain/trainers/clm/utils.py", line 639, in process_data_with_chat_template
train_data = train_data.map(
^^^^^^^^^^^^^^^
File "/workspace/global_venv/lib/python3.11/site-packages/datasets/arrow_dataset.py", line 560, in wrapper
out: Union["Dataset", "DatasetDict"] = func(self, *args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/workspace/global_venv/lib/python3.11/site-packages/datasets/arrow_dataset.py", line 3055, in map
for rank, done, content in Dataset._map_single(**dataset_kwargs):
File "/workspace/global_venv/lib/python3.11/site-packages/datasets/arrow_dataset.py", line 3428, in _map_single
example = apply_function_on_filtered_inputs(example, i, offset=offset)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/workspace/global_venv/lib/python3.11/site-packages/datasets/arrow_dataset.py", line 3320, in apply_function_on_filtered_inputs
processed_inputs = function(*fn_args, *additional_args, **fn_kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/workspace/global_venv/lib/python3.11/site-packages/autotrain/trainers/clm/utils.py", line 346, in apply_chat_template
example[config.text_column] = tokenizer.apply_chat_template(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/workspace/global_venv/lib/python3.11/site-packages/transformers/tokenization_utils_base.py", line 1871, in apply_chat_template
rendered_chat = compiled_template.render(
^^^^^^^^^^^^^^^^^^^^^^^^^
File "/workspace/global_venv/lib/python3.11/site-packages/jinja2/environment.py", line 1304, in render
self.environment.handle_exception()
File "/workspace/global_venv/lib/python3.11/site-packages/jinja2/environment.py", line 939, in handle_exception
raise rewrite_traceback_stack(source=source)
File "", line 2, in top-level template code
jinja2.exceptions.UndefinedError: 'str object' has no attribute 'role'
ERROR | 2024-12-13 15:54:50 | autotrain.trainers.common:wrapper:216 - 'str object' has no attribute 'role'
Additional Information
I tried to finetune model with fineweb-2 dataset. but got this error and not able to fix it with parameters.
Please help me
The text was updated successfully, but these errors were encountered: