diff --git a/src/autotrain/backend.py b/src/autotrain/backend.py index c96654eebb..3828e984ba 100644 --- a/src/autotrain/backend.py +++ b/src/autotrain/backend.py @@ -404,7 +404,8 @@ def __post_init__(self): self.nvcf_token = os.environ.get("NVCF_API_TOKEN") self.instance_map = { - "nvcf-l40": {"backend": "GFN", "id": "67bb8939-c932-429a-a446-8ae898311856"}, + "nvcf-l40": {"id": "67bb8939-c932-429a-a446-8ae898311856"}, + "nvcf-h100x1": {"id": "848348f8-a4e2-4242-bce9-6baa1bd70a66"}, } logger.info("Starting NVCF training") @@ -422,7 +423,7 @@ def _convert_dict_to_object(self, dictionary): return dictionary def _conf_nvcf(self, token, nvcf_type, url, method="POST", payload=None): - logger.info(f"{self.job_name}: {method} - Configuring NVCF {nvcf_type}.") + logger.info(f"{self.job_name}: {method} - Configuring NVCF {nvcf_type}.") headers = {"Content-Type": "application/json", "Authorization": f"Bearer {token}"} try: @@ -472,7 +473,7 @@ def _poll_nvcf(self, url, token, method="get", timeout=86400, interval=30, op="p while time.time() - start_time < timeout: try: headers = {"Content-Type": "application/json", "Authorization": f"Bearer {token}"} - if method == "get": + if method.upper() == "GET": response = requests.get(url, headers=headers) else: raise ValueError(f"Unsupported HTTP method: {method}") @@ -527,4 +528,4 @@ def create(self): nvcf_url_reqpoll = f"{self.nvcf_api}/v2/nvcf/pexec/status/{nvcf_fn_req}" logger.info(f"{self.job_name}: Polling : {nvcf_url_reqpoll}") - self._poll_nvcf(url=nvcf_url_reqpoll, token=self.nvcf_token, method="GET", timeout=1200, interval=20) + self._poll_nvcf(url=nvcf_url_reqpoll, token=self.nvcf_token, method="GET", timeout=172800, interval=20) diff --git a/src/autotrain/project.py b/src/autotrain/project.py index c96b84c57d..e7aac964c3 100644 --- a/src/autotrain/project.py +++ b/src/autotrain/project.py @@ -49,12 +49,8 @@ def __post_init__(self): "DGX 2xA100": "dgx-2a100", "DGX 4xA100": "dgx-4a100", "DGX 8xA100": "dgx-8a100", - "NVCF 1xA100": "nvcf-a100", - "NVCF 8xA100": "nvcf-8a100", - "NVCF 1xA10G": "nvcf-a10g", + "NVCF 1xH100": "nvcf-h100x1", "NVCF 1xL40": "nvcf-l40", - "NVCF 1xL40G": "nvcf-l40g", - "NVCF 1xT10": "nvcf-t10", "Local": "local", "EP US-East-1 1xA10g": "ep-aws-useast1-m", "EP US-East-1 1xA100": "ep-aws-useast1-xl", diff --git a/src/autotrain/templates/index.html b/src/autotrain/templates/index.html index d699470f7b..56edae3751 100644 --- a/src/autotrain/templates/index.html +++ b/src/autotrain/templates/index.html @@ -189,10 +189,8 @@ {% endif %} {% if enable_nvcf == 1 %} - - - + {% endif %} {% endif %}