huggingface · muellerzr · Nov 22, 2024 · Nov 22, 2024
diff --git a/src/transformers/integrations/fsdp.py b/src/transformers/integrations/fsdp.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 from __future__ import annotations
 
+import os
 from typing import TYPE_CHECKING
 
 from ..utils import is_torch_available
@@ -31,3 +32,24 @@ def is_fsdp_managed_module(module: nn.Module) -> bool:
     return isinstance(module, torch.distributed.fsdp.FullyShardedDataParallel) or getattr(
         module, "_is_fsdp_managed_module", False
     )
+
+
+def enable_cpu_ram_efficient_loading():
+    """
+    Enable CPU RAM efficient loading of model weights by setting `FSDP_CPU_RAM_EFFICIENT_LOADING`.
+    """
+    os.environ["FSDP_CPU_RAM_EFFICIENT_LOADING"] = "true"
+
+
+def disable_cpu_ram_efficient_loading():
+    """
+    Disable CPU RAM efficient loading of model weights by unsetting `FSDP_CPU_RAM_EFFICIENT_LOADING`.
+    """
+    os.environ["FSDP_CPU_RAM_EFFICIENT_LOADING"] = "false"
+
+
+def set_cpu_ram_efficient_loading(value: bool):
+    """
+    Set CPU RAM efficient loading of model weights by setting `FSDP_CPU_RAM_EFFICIENT_LOADING`.
+    """
+    os.environ["FSDP_CPU_RAM_EFFICIENT_LOADING"] = str(bool(value)).lower()
diff --git a/src/transformers/trainer_pt_utils.py b/src/transformers/trainer_pt_utils.py
@@ -41,6 +41,7 @@
 from .integrations.deepspeed import is_deepspeed_zero3_enabled
 from .tokenization_utils_base import BatchEncoding
 from .utils import (
+    is_accelerate_available,
     is_sagemaker_mp_enabled,
     is_torch_available,
     is_torch_xla_available,
@@ -49,6 +50,10 @@
 )
 
 
+if is_accelerate_available():
+    from accelerate.utils import FullyShardedDataParallelPlugin, TorchDynamoPlugin
+
+
 if is_training_run_on_sagemaker():
     logging.add_handler(StreamHandler(sys.stdout))
 
@@ -1312,6 +1317,27 @@ class AcceleratorConfig:
             "    The [`accelerate.utils.GradientAccumulationPlugin`] default is `False`."
         },
     )
+    mixed_precision: str = field(
+        default=None,
+        metadata={
+            "help": "The mixed precision policy to use. If not set, the policy will be determined by the `ACCELERATE_MIXED_PRECISION` environment variable. "
+            "Should not be passed in through a config file."
+        },
+    )
+    dynamo_plugin: Optional["TorchDynamoPlugin"] = field(  # noqa: F821
+        default=None,
+        metadata={
+            "help": "The dynamo config to use. If not set, the config will be determined by the `ACCELERATE_DYNAMO_CONFIG` environment variable. "
+            "Should not be passed in through a config file."
+        },
+    )
+    fsdp_plugin: Optional["FullyShardedDataParallelPlugin"] = field(  # noqa: F821
+        default=None,
+        metadata={
+            "help": "The FSDP config to use. If not set, the config will be determined by environmental variables set during `accelerate launch`. "
+            "Should not be passed in through a config file."
+        },
+    )
     use_configured_state: bool = field(
         default=False,
         metadata={
@@ -1333,6 +1359,13 @@ def from_json_file(cls, json_file):
                 f"The config file at {json_file} had unknown keys ({extra_keys}), please try upgrading your `transformers`"
                 " version or fix (and potentially remove these keys) from your config file."
             )
+        # Check for fields that should not be set in config file
+        invalid_fields = ["mixed_precision", "fsdp_plugin", "dynamo_plugin"]
+        for field in invalid_fields:
+            if config_dict.get(field) is not None:
+                raise ValueError(
+                    f"The `{field}` field should not be set in a config file. It is determined by the TrainingArguments."
+                )
         return cls(**config_dict)
 
     def to_dict(self):