We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
cuda =12.1 torch =2.3.0 transformers=4.40.0 ,显卡是Tesla V100-PCIE-32GB
@Btlmd
╭──────────────────────────────────────────────────────────────────────────────────────────────── Traceback (most recent call last) ─────────────────────────────────────────────────────────────────────────────────────────────────╮ │ /home/cug100/d2l-zh/pytorch/hgh/pycharmProject/base-model-test/LLM/ChatGLM3-main/finetune_demo/finetune_hf.py:540 in main │ │ │ │ 537 │ ) │ │ 538 │ │ │ 539 │ if auto_resume_from_checkpoint.upper() == "" or auto_resume_from_checkpoint is None: │ │ ❱ 540 │ │ trainer.train() │ │ 541 │ else: │ │ 542 │ │ def do_rf_checkpoint(sn): │ │ 543 │ │ │ model.gradient_checkpointing_enable() │ │ │ │ /home/cug100/anaconda3/envs/hgh-nlp-envs-llm/lib/python3.10/site-packages/transformers/trainer.py:1859 in train │ │ │ │ 1856 │ │ │ finally: │ │ 1857 │ │ │ │ hf_hub_utils.enable_progress_bars() │ │ 1858 │ │ else: │ │ ❱ 1859 │ │ │ return inner_training_loop( │ │ 1860 │ │ │ │ args=args, │ │ 1861 │ │ │ │ resume_from_checkpoint=resume_from_checkpoint, │ │ 1862 │ │ │ │ trial=trial, │ │ │ │ /home/cug100/anaconda3/envs/hgh-nlp-envs-llm/lib/python3.10/site-packages/transformers/trainer.py:1960 in _inner_training_loop │ │ │ │ 1957 │ │ │ self.optimizer, self.lr_scheduler = deepspeed_init(self, num_training_steps= │ │ 1958 │ │ │ │ 1959 │ │ if not delay_optimizer_creation: │ │ ❱ 1960 │ │ │ self.create_optimizer_and_scheduler(num_training_steps=max_steps) │ │ 1961 │ │ │ │ 1962 │ │ self.state = TrainerState() │ │ 1963 │ │ self.state.is_hyper_param_search = trial is not None │ │ │ │ /home/cug100/anaconda3/envs/hgh-nlp-envs-llm/lib/python3.10/site-packages/transformers/trainer.py:992 in create_optimizer_and_scheduler │ │ │ │ 989 │ │ Trainer's init through optimizers, or subclass and override this method (or c │ │ 990 │ │ create_scheduler`) in a subclass. │ │ 991 │ │ """ │ │ ❱ 992 │ │ self.create_optimizer() │ │ 993 │ │ if IS_SAGEMAKER_MP_POST_1_10 and smp.state.cfg.fp16: │ │ 994 │ │ │ # If smp >= 1.10 and fp16 is enabled, we unwrap the optimizer │ │ 995 │ │ │ optimizer = self.optimizer.optimizer │ │ │ │ /home/cug100/anaconda3/envs/hgh-nlp-envs-llm/lib/python3.10/site-packages/transformers/trainer.py:1049 in create_optimizer │ │ │ │ 1046 │ │ │ if "optimizer_dict" in optimizer_kwargs: │ │ 1047 │ │ │ │ optimizer_grouped_parameters = optimizer_kwargs.pop("optimizer_dict") │ │ 1048 │ │ │ │ │ ❱ 1049 │ │ │ self.optimizer = optimizer_cls(optimizer_grouped_parameters, **optimizer_kwa │ │ 1050 │ │ │ if optimizer_cls.name == "Adam8bit": │ │ 1051 │ │ │ │ import bitsandbytes │ │ 1052 │ │ │ │ /home/cug100/anaconda3/envs/hgh-nlp-envs-llm/lib/python3.10/site-packages/torch/optim/adamw.py:53 in init │ │ │ │ 50 │ │ │ differentiable=differentiable, │ │ 51 │ │ │ fused=fused, │ │ 52 │ │ ) │ │ ❱ 53 │ │ super().init(params, defaults) │ │ 54 │ │ │ │ 55 │ │ if fused: │ │ 56 │ │ │ if differentiable: │ │ │ │ /home/cug100/anaconda3/envs/hgh-nlp-envs-llm/lib/python3.10/site-packages/torch/optim/optimizer.py:284 in init │ │ │ │ 281 │ │ │ param_groups = [{'params': param_groups}] │ │ 282 │ │ │ │ 283 │ │ for param_group in param_groups: │ │ ❱ 284 │ │ │ self.add_param_group(cast(dict, param_group)) │ │ 285 │ │ │ │ 286 │ │ # Allows _cuda_graph_capture_health_check to rig a poor man's TORCH_WARN_ONCE in │ │ 287 │ │ # which I don't think exists │ │ │ │ /home/cug100/anaconda3/envs/hgh-nlp-envs-llm/lib/python3.10/site-packages/torch/_compile.py:22 in inner │ │ │ │ 19 │ │ │ │ 20 │ │ @functools.wraps(fn) │ │ 21 │ │ def inner(*args, **kwargs): │ │ ❱ 22 │ │ │ import torch._dynamo │ │ 23 │ │ │ │ │ 24 │ │ │ return torch._dynamo.disable(fn, recursive)(*args, **kwargs) │ │ 25 │ │ │ │ /home/cug100/anaconda3/envs/hgh-nlp-envs-llm/lib/python3.10/site-packages/torch/_dynamo/init.py:2 in │ │ │ │ 1 import torch │ │ ❱ 2 from . import convert_frame, eval_frame, resume_execution │ │ 3 from .backends.registry import list_backends, lookup_backend, register_backend │ │ 4 from .callback import callback_handler, on_compile_end, on_compile_start │ │ 5 from .code_context import code_context │ │ │ │ /home/cug100/anaconda3/envs/hgh-nlp-envs-llm/lib/python3.10/site-packages/torch/_dynamo/convert_frame.py:40 in │ │ │ │ 37 from torch.utils._python_dispatch import _disable_current_modes │ │ 38 from torch.utils._traceback import format_traceback_short │ │ 39 │ │ ❱ 40 from . import config, exc, trace_rules │ │ 41 from .backends.registry import CompilerFn │ │ 42 from .bytecode_analysis import remove_dead_code, remove_pointless_jumps │ │ 43 from .bytecode_transformation import ( │ │ │ │ /home/cug100/anaconda3/envs/hgh-nlp-envs-llm/lib/python3.10/site-packages/torch/_dynamo/trace_rules.py:50 in │ │ │ │ 47 from ..utils import _config_module │ │ 48 from .utils import getfile, hashable, NP_SUPPORTED_MODULES, unwrap_if_wrapper │ │ 49 │ │ ❱ 50 from .variables import ( │ │ 51 │ BuiltinVariable, │ │ 52 │ FunctorchHigherOrderVariable, │ │ 53 │ NestedUserFunctionVariable, │ │ │ │ /home/cug100/anaconda3/envs/hgh-nlp-envs-llm/lib/python3.10/site-packages/torch/_dynamo/variables/init.py:34 in │ │ │ │ 31 │ UserFunctionVariable, │ │ 32 │ UserMethodVariable, │ │ 33 ) │ │ ❱ 34 from .higher_order_ops import ( │ │ 35 │ FunctorchHigherOrderVariable, │ │ 36 │ TorchHigherOrderOperatorVariable, │ │ 37 ) │ │ │ │ /home/cug100/anaconda3/envs/hgh-nlp-envs-llm/lib/python3.10/site-packages/torch/_dynamo/variables/higher_order_ops.py:13 in │ │ │ │ 10 import torch._C │ │ 11 import torch.fx │ │ 12 import torch.nn │ │ ❱ 13 import torch.onnx.operators │ │ 14 from torch._dynamo.utils import deepcopy_to_fake_tensor, get_fake_value, get_real_value │ │ 15 from torch._dynamo.variables.base import VariableTracker │ │ 16 from torch._dynamo.variables.builtin import BuiltinVariable │ │ │ │ /home/cug100/anaconda3/envs/hgh-nlp-envs-llm/lib/python3.10/site-packages/torch/onnx/init.py:46 in │ │ │ │ 43 │ unregister_custom_op_symbolic, │ │ 44 ) │ │ 45 │ │ ❱ 46 from ._internal.exporter import ( # usort:skip. needs to be last to avoid circular impo │ │ 47 │ DiagnosticOptions, │ │ 48 │ ExportOptions, │ │ 49 │ ONNXProgram, │ │ │ │ /home/cug100/anaconda3/envs/hgh-nlp-envs-llm/lib/python3.10/site-packages/torch/onnx/_internal/exporter/init.py:13 in │ │ │ │ 10 ] │ │ 11 │ │ 12 from . import _testing as testing, _verification as verification │ │ ❱ 13 from ._analysis import analyze │ │ 14 from ._compat import export_compat │ │ 15 from ._core import export, exported_program_to_ir │ │ 16 from ._onnx_program import ONNXProgram │ │ │ │ /home/cug100/anaconda3/envs/hgh-nlp-envs-llm/lib/python3.10/site-packages/torch/onnx/_internal/exporter/_analysis.py:14 in │ │ │ │ 11 from typing import TYPE_CHECKING │ │ 12 │ │ 13 import torch │ │ ❱ 14 import torch._export.serde.schema │ │ 15 from torch.export import graph_signature │ │ 16 from torch.onnx._internal.exporter import _dispatching, _registration │ │ 17 │ │ │ │ /home/cug100/anaconda3/envs/hgh-nlp-envs-llm/lib/python3.10/site-packages/torch/_export/init.py:40 in │ │ │ │ 37 from torch._subclasses.functional_tensor import FunctionalTensor │ │ 38 from torch._utils_internal import log_export_usage │ │ 39 from torch.export._tree_utils import reorder_kwargs │ │ ❱ 40 from torch.export._unlift import _create_stateful_graph_module │ │ 41 from torch.export.dynamic_shapes import ( │ │ 42 │ _process_constraints, │ │ 43 │ _process_dynamic_shapes, │ │ │ │ /home/cug100/anaconda3/envs/hgh-nlp-envs-llm/lib/python3.10/site-packages/torch/export/_unlift.py:20 in │ │ │ │ 17 ) │ │ 18 │ │ 19 │ │ ❱ 20 @torch._dynamo.disable │ │ 21 def _check_input_constraints_pre_hook(self, *args, **kwargs): │ │ 22 │ flat_args_with_path, received_spec = pytree.tree_flatten_with_path(args) │ │ 23 │ ╰────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ AttributeError: partially initialized module 'torch._dynamo' has no attribute 'disable' (most likely due to a circular import)
optimizers
c │ │ 990 │ │
在运行示例代码时,前面的部分“'...反': 54955 -> 54955 '差': 55342 -> 55342 '萌': 56842 -> 56842 '。': 31155 -> 31155 '': 2 -> 2...”同示例输出相同,但是这个之后会出现上述报错代码,不太清楚是什么原因引起的,已经按照要求安装python和相关包,版本保持一致
The text was updated successfully, but these errors were encountered:
您好,可以关闭本条issue,经过排查可能是torch的缓存问题,将torch重新卸载并清除缓存后解决了问题!
Sorry, something went wrong.
No branches or pull requests
System Info / 系統信息
cuda =12.1 torch =2.3.0 transformers=4.40.0 ,显卡是Tesla V100-PCIE-32GB
Who can help? / 谁可以帮助到您?
@Btlmd
Information / 问题信息
Reproduction / 复现过程
╭──────────────────────────────────────────────────────────────────────────────────────────────── Traceback (most recent call last) ─────────────────────────────────────────────────────────────────────────────────────────────────╮
│ /home/cug100/d2l-zh/pytorch/hgh/pycharmProject/base-model-test/LLM/ChatGLM3-main/finetune_demo/finetune_hf.py:540 in main │
│ │
│ 537 │ ) │
│ 538 │ │
│ 539 │ if auto_resume_from_checkpoint.upper() == "" or auto_resume_from_checkpoint is None: │
│ ❱ 540 │ │ trainer.train() │
│ 541 │ else: │
│ 542 │ │ def do_rf_checkpoint(sn): │
│ 543 │ │ │ model.gradient_checkpointing_enable() │
│ │
│ /home/cug100/anaconda3/envs/hgh-nlp-envs-llm/lib/python3.10/site-packages/transformers/trainer.py:1859 in train │
│ │
│ 1856 │ │ │ finally: │
│ 1857 │ │ │ │ hf_hub_utils.enable_progress_bars() │
│ 1858 │ │ else: │
│ ❱ 1859 │ │ │ return inner_training_loop( │
│ 1860 │ │ │ │ args=args, │
│ 1861 │ │ │ │ resume_from_checkpoint=resume_from_checkpoint, │
│ 1862 │ │ │ │ trial=trial, │
│ │
│ /home/cug100/anaconda3/envs/hgh-nlp-envs-llm/lib/python3.10/site-packages/transformers/trainer.py:1960 in _inner_training_loop │
│ │
│ 1957 │ │ │ self.optimizer, self.lr_scheduler = deepspeed_init(self, num_training_steps= │
│ 1958 │ │ │
│ 1959 │ │ if not delay_optimizer_creation: │
│ ❱ 1960 │ │ │ self.create_optimizer_and_scheduler(num_training_steps=max_steps) │
│ 1961 │ │ │
│ 1962 │ │ self.state = TrainerState() │
│ 1963 │ │ self.state.is_hyper_param_search = trial is not None │
│ │
│ /home/cug100/anaconda3/envs/hgh-nlp-envs-llm/lib/python3.10/site-packages/transformers/trainer.py:992 in create_optimizer_and_scheduler │
│ │
│ 989 │ │ Trainer's init through
optimizers
, or subclass and override this method (orc │ │ 990 │ │
create_scheduler`) in a subclass. ││ 991 │ │ """ │
│ ❱ 992 │ │ self.create_optimizer() │
│ 993 │ │ if IS_SAGEMAKER_MP_POST_1_10 and smp.state.cfg.fp16: │
│ 994 │ │ │ # If smp >= 1.10 and fp16 is enabled, we unwrap the optimizer │
│ 995 │ │ │ optimizer = self.optimizer.optimizer │
│ │
│ /home/cug100/anaconda3/envs/hgh-nlp-envs-llm/lib/python3.10/site-packages/transformers/trainer.py:1049 in create_optimizer │
│ │
│ 1046 │ │ │ if "optimizer_dict" in optimizer_kwargs: │
│ 1047 │ │ │ │ optimizer_grouped_parameters = optimizer_kwargs.pop("optimizer_dict") │
│ 1048 │ │ │ │
│ ❱ 1049 │ │ │ self.optimizer = optimizer_cls(optimizer_grouped_parameters, **optimizer_kwa │
│ 1050 │ │ │ if optimizer_cls.name == "Adam8bit": │
│ 1051 │ │ │ │ import bitsandbytes │
│ 1052 │
│ │
│ /home/cug100/anaconda3/envs/hgh-nlp-envs-llm/lib/python3.10/site-packages/torch/optim/adamw.py:53 in init │
│ │
│ 50 │ │ │ differentiable=differentiable, │
│ 51 │ │ │ fused=fused, │
│ 52 │ │ ) │
│ ❱ 53 │ │ super().init(params, defaults) │
│ 54 │ │ │
│ 55 │ │ if fused: │
│ 56 │ │ │ if differentiable: │
│ │
│ /home/cug100/anaconda3/envs/hgh-nlp-envs-llm/lib/python3.10/site-packages/torch/optim/optimizer.py:284 in init │
│ │
│ 281 │ │ │ param_groups = [{'params': param_groups}] │
│ 282 │ │ │
│ 283 │ │ for param_group in param_groups: │
│ ❱ 284 │ │ │ self.add_param_group(cast(dict, param_group)) │
│ 285 │ │ │
│ 286 │ │ # Allows _cuda_graph_capture_health_check to rig a poor man's TORCH_WARN_ONCE in │
│ 287 │ │ # which I don't think exists │
│ │
│ /home/cug100/anaconda3/envs/hgh-nlp-envs-llm/lib/python3.10/site-packages/torch/_compile.py:22 in inner │
│ │
│ 19 │ │ │
│ 20 │ │ @functools.wraps(fn) │
│ 21 │ │ def inner(*args, **kwargs): │
│ ❱ 22 │ │ │ import torch._dynamo │
│ 23 │ │ │ │
│ 24 │ │ │ return torch._dynamo.disable(fn, recursive)(*args, **kwargs) │
│ 25 │
│ │
│ /home/cug100/anaconda3/envs/hgh-nlp-envs-llm/lib/python3.10/site-packages/torch/_dynamo/init.py:2 in │
│ │
│ 1 import torch │
│ ❱ 2 from . import convert_frame, eval_frame, resume_execution │
│ 3 from .backends.registry import list_backends, lookup_backend, register_backend │
│ 4 from .callback import callback_handler, on_compile_end, on_compile_start │
│ 5 from .code_context import code_context │
│ │
│ /home/cug100/anaconda3/envs/hgh-nlp-envs-llm/lib/python3.10/site-packages/torch/_dynamo/convert_frame.py:40 in │
│ │
│ 37 from torch.utils._python_dispatch import _disable_current_modes │
│ 38 from torch.utils._traceback import format_traceback_short │
│ 39 │
│ ❱ 40 from . import config, exc, trace_rules │
│ 41 from .backends.registry import CompilerFn │
│ 42 from .bytecode_analysis import remove_dead_code, remove_pointless_jumps │
│ 43 from .bytecode_transformation import ( │
│ │
│ /home/cug100/anaconda3/envs/hgh-nlp-envs-llm/lib/python3.10/site-packages/torch/_dynamo/trace_rules.py:50 in │
│ │
│ 47 from ..utils import _config_module │
│ 48 from .utils import getfile, hashable, NP_SUPPORTED_MODULES, unwrap_if_wrapper │
│ 49 │
│ ❱ 50 from .variables import ( │
│ 51 │ BuiltinVariable, │
│ 52 │ FunctorchHigherOrderVariable, │
│ 53 │ NestedUserFunctionVariable, │
│ │
│ /home/cug100/anaconda3/envs/hgh-nlp-envs-llm/lib/python3.10/site-packages/torch/_dynamo/variables/init.py:34 in │
│ │
│ 31 │ UserFunctionVariable, │
│ 32 │ UserMethodVariable, │
│ 33 ) │
│ ❱ 34 from .higher_order_ops import ( │
│ 35 │ FunctorchHigherOrderVariable, │
│ 36 │ TorchHigherOrderOperatorVariable, │
│ 37 ) │
│ │
│ /home/cug100/anaconda3/envs/hgh-nlp-envs-llm/lib/python3.10/site-packages/torch/_dynamo/variables/higher_order_ops.py:13 in │
│ │
│ 10 import torch._C │
│ 11 import torch.fx │
│ 12 import torch.nn │
│ ❱ 13 import torch.onnx.operators │
│ 14 from torch._dynamo.utils import deepcopy_to_fake_tensor, get_fake_value, get_real_value │
│ 15 from torch._dynamo.variables.base import VariableTracker │
│ 16 from torch._dynamo.variables.builtin import BuiltinVariable │
│ │
│ /home/cug100/anaconda3/envs/hgh-nlp-envs-llm/lib/python3.10/site-packages/torch/onnx/init.py:46 in │
│ │
│ 43 │ unregister_custom_op_symbolic, │
│ 44 ) │
│ 45 │
│ ❱ 46 from ._internal.exporter import ( # usort:skip. needs to be last to avoid circular impo │
│ 47 │ DiagnosticOptions, │
│ 48 │ ExportOptions, │
│ 49 │ ONNXProgram, │
│ │
│ /home/cug100/anaconda3/envs/hgh-nlp-envs-llm/lib/python3.10/site-packages/torch/onnx/_internal/exporter/init.py:13 in │
│ │
│ 10 ] │
│ 11 │
│ 12 from . import _testing as testing, _verification as verification │
│ ❱ 13 from ._analysis import analyze │
│ 14 from ._compat import export_compat │
│ 15 from ._core import export, exported_program_to_ir │
│ 16 from ._onnx_program import ONNXProgram │
│ │
│ /home/cug100/anaconda3/envs/hgh-nlp-envs-llm/lib/python3.10/site-packages/torch/onnx/_internal/exporter/_analysis.py:14 in │
│ │
│ 11 from typing import TYPE_CHECKING │
│ 12 │
│ 13 import torch │
│ ❱ 14 import torch._export.serde.schema │
│ 15 from torch.export import graph_signature │
│ 16 from torch.onnx._internal.exporter import _dispatching, _registration │
│ 17 │
│ │
│ /home/cug100/anaconda3/envs/hgh-nlp-envs-llm/lib/python3.10/site-packages/torch/_export/init.py:40 in │
│ │
│ 37 from torch._subclasses.functional_tensor import FunctionalTensor │
│ 38 from torch._utils_internal import log_export_usage │
│ 39 from torch.export._tree_utils import reorder_kwargs │
│ ❱ 40 from torch.export._unlift import _create_stateful_graph_module │
│ 41 from torch.export.dynamic_shapes import ( │
│ 42 │ _process_constraints, │
│ 43 │ _process_dynamic_shapes, │
│ │
│ /home/cug100/anaconda3/envs/hgh-nlp-envs-llm/lib/python3.10/site-packages/torch/export/_unlift.py:20 in │
│ │
│ 17 ) │
│ 18 │
│ 19 │
│ ❱ 20 @torch._dynamo.disable │
│ 21 def _check_input_constraints_pre_hook(self, *args, **kwargs): │
│ 22 │ flat_args_with_path, received_spec = pytree.tree_flatten_with_path(args) │
│ 23 │
╰────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯
AttributeError: partially initialized module 'torch._dynamo' has no attribute 'disable' (most likely due to a circular import)
Expected behavior / 期待表现
在运行示例代码时,前面的部分“'...反': 54955 -> 54955
'差': 55342 -> 55342
'萌': 56842 -> 56842
'。': 31155 -> 31155
'': 2 -> 2...”同示例输出相同,但是这个之后会出现上述报错代码,不太清楚是什么原因引起的,已经按照要求安装python和相关包,版本保持一致
The text was updated successfully, but these errors were encountered: