diff --git a/integrations/nvidia/pyproject.toml b/integrations/nvidia/pyproject.toml
index 504077b4e..f35485e9c 100644
--- a/integrations/nvidia/pyproject.toml
+++ b/integrations/nvidia/pyproject.toml
@@ -42,7 +42,7 @@ root = "../.."
 git_describe_command = 'git describe --tags --match="integrations/nvidia-v[0-9]*"'
 
 [tool.hatch.envs.default]
-dependencies = ["coverage[toml]>=6.5", "pytest", "pytest-rerunfailures", "haystack-pydoc-tools"]
+dependencies = ["coverage[toml]>=6.5", "pytest", "pytest-rerunfailures", "haystack-pydoc-tools", "requests_mock"]
 [tool.hatch.envs.default.scripts]
 test = "pytest --reruns 3 --reruns-delay 30 -x {args:tests}"
 test-cov = "coverage run -m pytest --reruns 3 --reruns-delay 30 -x {args:tests}"
@@ -147,6 +147,8 @@ module = [
   "haystack_integrations.*",
   "pytest.*",
   "numpy.*",
+  "requests_mock.*",
+  "pydantic.*"
 ]
 ignore_missing_imports = true
 
diff --git a/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/_nim_backend.py b/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/_nim_backend.py
deleted file mode 100644
index ee25df7fd..000000000
--- a/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/_nim_backend.py
+++ /dev/null
@@ -1,52 +0,0 @@
-from typing import Any, Dict, List, Optional, Tuple
-
-import requests
-from haystack.utils import Secret
-
-from .backend import EmbedderBackend
-
-REQUEST_TIMEOUT = 60
-
-
-class NimBackend(EmbedderBackend):
-    def __init__(
-        self,
-        model: str,
-        api_url: str,
-        api_key: Optional[Secret] = Secret.from_env_var("NVIDIA_API_KEY"),
-        model_kwargs: Optional[Dict[str, Any]] = None,
-    ):
-        headers = {
-            "Content-Type": "application/json",
-            "accept": "application/json",
-        }
-
-        if api_key:
-            headers["authorization"] = f"Bearer {api_key.resolve_value()}"
-
-        self.session = requests.Session()
-        self.session.headers.update(headers)
-
-        self.model = model
-        self.api_url = api_url
-        self.model_kwargs = model_kwargs or {}
-
-    def embed(self, texts: List[str]) -> Tuple[List[List[float]], Dict[str, Any]]:
-        url = f"{self.api_url}/embeddings"
-
-        res = self.session.post(
-            url,
-            json={
-                "model": self.model,
-                "input": texts,
-                **self.model_kwargs,
-            },
-            timeout=REQUEST_TIMEOUT,
-        )
-        res.raise_for_status()
-
-        data = res.json()
-        # Sort the embeddings by index, we don't know whether they're out of order or not
-        embeddings = [e["embedding"] for e in sorted(data["data"], key=lambda e: e["index"])]
-
-        return embeddings, {"usage": data["usage"]}
diff --git a/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/backend.py b/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/backend.py
deleted file mode 100644
index 09e9b7c80..000000000
--- a/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/backend.py
+++ /dev/null
@@ -1,29 +0,0 @@
-from abc import ABC, abstractmethod
-from typing import Any, Dict, List, Optional, Tuple
-
-
-class EmbedderBackend(ABC):
-    def __init__(self, model: str, model_kwargs: Optional[Dict[str, Any]] = None):
-        """
-        Initialize the backend.
-
-        :param model:
-            The name of the model to use.
-        :param model_kwargs:
-            Additional keyword arguments to pass to the model.
-        """
-        self.model_name = model
-        self.model_kwargs = model_kwargs or {}
-
-    @abstractmethod
-    def embed(self, texts: List[str]) -> Tuple[List[List[float]], Dict[str, Any]]:
-        """
-        Invoke the backend and embed the given texts.
-
-        :param texts:
-            Texts to embed.
-        :return:
-            Vector representation of the texts and
-            metadata returned by the service.
-        """
-        pass
diff --git a/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/document_embedder.py b/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/document_embedder.py
index 4cc805c01..f5d1747b8 100644
--- a/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/document_embedder.py
+++ b/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/document_embedder.py
@@ -1,12 +1,11 @@
+import warnings
 from typing import Any, Dict, List, Optional, Tuple, Union
 
 from haystack import Document, component, default_from_dict, default_to_dict
 from haystack.utils import Secret, deserialize_secrets_inplace
-from haystack_integrations.utils.nvidia import url_validation
+from haystack_integrations.utils.nvidia import NimBackend, is_hosted, url_validation
 from tqdm import tqdm
 
-from ._nim_backend import NimBackend
-from .backend import EmbedderBackend
 from .truncate import EmbeddingTruncateMode
 
 _DEFAULT_API_URL = "https://ai.api.nvidia.com/v1/retrieval/nvidia"
@@ -34,7 +33,7 @@ class NvidiaDocumentEmbedder:
 
     def __init__(
         self,
-        model: str = "NV-Embed-QA",
+        model: Optional[str] = None,
         api_key: Optional[Secret] = Secret.from_env_var("NVIDIA_API_KEY"),
         api_url: str = _DEFAULT_API_URL,
         prefix: str = "",
@@ -50,6 +49,8 @@ def __init__(
 
         :param model:
             Embedding model to use.
+            If no specific model along with locally hosted API URL is provided,
+            the system defaults to the available model found using /models API.
         :param api_key:
             API key for the NVIDIA NIM.
         :param api_url:
@@ -87,9 +88,31 @@ def __init__(
             truncate = EmbeddingTruncateMode.from_str(truncate)
         self.truncate = truncate
 
-        self.backend: Optional[EmbedderBackend] = None
+        self.backend: Optional[Any] = None
         self._initialized = False
 
+        if is_hosted(api_url) and not self.model:  # manually set default model
+            self.model = "NV-Embed-QA"
+
+    def default_model(self):
+        """Set default model in local NIM mode."""
+        valid_models = [
+            model.id for model in self.backend.models() if not model.base_model or model.base_model == model.id
+        ]
+        name = next(iter(valid_models), None)
+        if name:
+            warnings.warn(
+                f"Default model is set as: {name}. \n"
+                "Set model using model parameter. \n"
+                "To get available models use available_models property.",
+                UserWarning,
+                stacklevel=2,
+            )
+            self.model = self.backend.model = name
+        else:
+            error_message = "No locally hosted model was found."
+            raise ValueError(error_message)
+
     def warm_up(self):
         """
         Initializes the component.
@@ -109,6 +132,9 @@ def warm_up(self):
 
         self._initialized = True
 
+        if not self.model:
+            self.default_model()
+
     def to_dict(self) -> Dict[str, Any]:
         """
         Serializes the component to a dictionary.
diff --git a/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/text_embedder.py b/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/text_embedder.py
index e1a8c36dd..1c4a7c5c9 100644
--- a/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/text_embedder.py
+++ b/integrations/nvidia/src/haystack_integrations/components/embedders/nvidia/text_embedder.py
@@ -1,11 +1,10 @@
+import warnings
 from typing import Any, Dict, List, Optional, Union
 
 from haystack import component, default_from_dict, default_to_dict
 from haystack.utils import Secret, deserialize_secrets_inplace
-from haystack_integrations.utils.nvidia import url_validation
+from haystack_integrations.utils.nvidia import NimBackend, is_hosted, url_validation
 
-from ._nim_backend import NimBackend
-from .backend import EmbedderBackend
 from .truncate import EmbeddingTruncateMode
 
 _DEFAULT_API_URL = "https://ai.api.nvidia.com/v1/retrieval/nvidia"
@@ -35,7 +34,7 @@ class NvidiaTextEmbedder:
 
     def __init__(
         self,
-        model: str = "NV-Embed-QA",
+        model: Optional[str] = None,
         api_key: Optional[Secret] = Secret.from_env_var("NVIDIA_API_KEY"),
         api_url: str = _DEFAULT_API_URL,
         prefix: str = "",
@@ -47,6 +46,8 @@ def __init__(
 
         :param model:
             Embedding model to use.
+            If no specific model along with locally hosted API URL is provided,
+            the system defaults to the available model found using /models API.
         :param api_key:
             API key for the NVIDIA NIM.
         :param api_url:
@@ -71,9 +72,31 @@ def __init__(
             truncate = EmbeddingTruncateMode.from_str(truncate)
         self.truncate = truncate
 
-        self.backend: Optional[EmbedderBackend] = None
+        self.backend: Optional[Any] = None
         self._initialized = False
 
+        if is_hosted(api_url) and not self.model:  # manually set default model
+            self.model = "NV-Embed-QA"
+
+    def default_model(self):
+        """Set default model in local NIM mode."""
+        valid_models = [
+            model.id for model in self.backend.models() if not model.base_model or model.base_model == model.id
+        ]
+        name = next(iter(valid_models), None)
+        if name:
+            warnings.warn(
+                f"Default model is set as: {name}. \n"
+                "Set model using model parameter. \n"
+                "To get available models use available_models property.",
+                UserWarning,
+                stacklevel=2,
+            )
+            self.model = self.backend.model = name
+        else:
+            error_message = "No locally hosted model was found."
+            raise ValueError(error_message)
+
     def warm_up(self):
         """
         Initializes the component.
@@ -93,6 +116,9 @@ def warm_up(self):
 
         self._initialized = True
 
+        if not self.model:
+            self.default_model()
+
     def to_dict(self) -> Dict[str, Any]:
         """
         Serializes the component to a dictionary.
diff --git a/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/backend.py b/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/backend.py
deleted file mode 100644
index d14199daf..000000000
--- a/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/backend.py
+++ /dev/null
@@ -1,29 +0,0 @@
-from abc import ABC, abstractmethod
-from typing import Any, Dict, List, Optional, Tuple
-
-
-class GeneratorBackend(ABC):
-    def __init__(self, model: str, model_kwargs: Optional[Dict[str, Any]] = None):
-        """
-        Initialize the backend.
-
-        :param model:
-            The name of the model to use.
-        :param model_kwargs:
-            Additional keyword arguments to pass to the model.
-        """
-        self.model_name = model
-        self.model_kwargs = model_kwargs or {}
-
-    @abstractmethod
-    def generate(self, prompt: str) -> Tuple[List[str], List[Dict[str, Any]]]:
-        """
-        Invoke the backend and prompt the model.
-
-        :param prompt:
-            Prompt text.
-        :return:
-            Vector representation of the generated texts related
-            metadata returned by the service.
-        """
-        pass
diff --git a/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/generator.py b/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/generator.py
index f11ef8aaf..a286400ab 100644
--- a/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/generator.py
+++ b/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/generator.py
@@ -1,14 +1,12 @@
 # SPDX-FileCopyrightText: 2024-present deepset GmbH <info@deepset.ai>
 #
 # SPDX-License-Identifier: Apache-2.0
+import warnings
 from typing import Any, Dict, List, Optional
 
 from haystack import component, default_from_dict, default_to_dict
 from haystack.utils.auth import Secret, deserialize_secrets_inplace
-from haystack_integrations.utils.nvidia import url_validation
-
-from ._nim_backend import NimBackend
-from .backend import GeneratorBackend
+from haystack_integrations.utils.nvidia import NimBackend, is_hosted, url_validation
 
 _DEFAULT_API_URL = "https://integrate.api.nvidia.com/v1"
 
@@ -45,7 +43,7 @@ class NvidiaGenerator:
 
     def __init__(
         self,
-        model: str,
+        model: Optional[str] = None,
         api_url: str = _DEFAULT_API_URL,
         api_key: Optional[Secret] = Secret.from_env_var("NVIDIA_API_KEY"),
         model_arguments: Optional[Dict[str, Any]] = None,
@@ -55,6 +53,10 @@ def __init__(
 
         :param model:
             Name of the model to use for text generation.
+            See the [NVIDIA NIMs](https://ai.nvidia.com)
+            for more information on the supported models.
+            `Note`: If no specific model along with locally hosted API URL is provided,
+            the system defaults to the available model found using /models API.
             Check supported models at [NVIDIA NIM](https://ai.nvidia.com).
         :param api_key:
             API key for the NVIDIA NIM. Set it as the `NVIDIA_API_KEY` environment
@@ -72,7 +74,28 @@ def __init__(
         self._api_key = api_key
         self._model_arguments = model_arguments or {}
 
-        self._backend: Optional[GeneratorBackend] = None
+        self._backend: Optional[Any] = None
+
+        self.is_hosted = is_hosted(api_url)
+
+    def default_model(self):
+        """Set default model in local NIM mode."""
+        valid_models = [
+            model.id for model in self._backend.models() if not model.base_model or model.base_model == model.id
+        ]
+        name = next(iter(valid_models), None)
+        if name:
+            warnings.warn(
+                f"Default model is set as: {name}. \n"
+                "Set model using model parameter. \n"
+                "To get available models use available_models property.",
+                UserWarning,
+                stacklevel=2,
+            )
+            self._model = self._backend.model_name = name
+        else:
+            error_message = "No locally hosted model was found."
+            raise ValueError(error_message)
 
     def warm_up(self):
         """
@@ -91,6 +114,9 @@ def warm_up(self):
             model_kwargs=self._model_arguments,
         )
 
+        if not self.is_hosted and not self._model:
+            self.default_model()
+
     def to_dict(self) -> Dict[str, Any]:
         """
         Serializes the component to a dictionary.
diff --git a/integrations/nvidia/src/haystack_integrations/utils/nvidia/__init__.py b/integrations/nvidia/src/haystack_integrations/utils/nvidia/__init__.py
index 9863e4a38..da301d29d 100644
--- a/integrations/nvidia/src/haystack_integrations/utils/nvidia/__init__.py
+++ b/integrations/nvidia/src/haystack_integrations/utils/nvidia/__init__.py
@@ -1,3 +1,4 @@
-from .utils import url_validation
+from .nim_backend import Model, NimBackend
+from .utils import is_hosted, url_validation
 
-__all__ = ["url_validation"]
+__all__ = ["NimBackend", "Model", "is_hosted", "url_validation"]
diff --git a/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/_nim_backend.py b/integrations/nvidia/src/haystack_integrations/utils/nvidia/nim_backend.py
similarity index 61%
rename from integrations/nvidia/src/haystack_integrations/components/generators/nvidia/_nim_backend.py
rename to integrations/nvidia/src/haystack_integrations/utils/nvidia/nim_backend.py
index 5253b3254..f69862f0e 100644
--- a/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/_nim_backend.py
+++ b/integrations/nvidia/src/haystack_integrations/utils/nvidia/nim_backend.py
@@ -1,14 +1,29 @@
+from dataclasses import dataclass, field
 from typing import Any, Dict, List, Optional, Tuple
 
 import requests
 from haystack.utils import Secret
 
-from .backend import GeneratorBackend
-
 REQUEST_TIMEOUT = 60
 
 
-class NimBackend(GeneratorBackend):
+@dataclass
+class Model:
+    """
+    Model information.
+
+    id: unique identifier for the model, passed as model parameter for requests
+    aliases: list of aliases for the model
+    base_model: root model for the model
+    All aliases are deprecated and will trigger a warning when used.
+    """
+
+    id: str
+    aliases: Optional[List[str]] = field(default_factory=list)
+    base_model: Optional[str] = None
+
+
+class NimBackend:
     def __init__(
         self,
         model: str,
@@ -31,6 +46,26 @@ def __init__(
         self.api_url = api_url
         self.model_kwargs = model_kwargs or {}
 
+    def embed(self, texts: List[str]) -> Tuple[List[List[float]], Dict[str, Any]]:
+        url = f"{self.api_url}/embeddings"
+
+        res = self.session.post(
+            url,
+            json={
+                "model": self.model,
+                "input": texts,
+                **self.model_kwargs,
+            },
+            timeout=REQUEST_TIMEOUT,
+        )
+        res.raise_for_status()
+
+        data = res.json()
+        # Sort the embeddings by index, we don't know whether they're out of order or not
+        embeddings = [e["embedding"] for e in sorted(data["data"], key=lambda e: e["index"])]
+
+        return embeddings, {"usage": data["usage"]}
+
     def generate(self, prompt: str) -> Tuple[List[str], List[Dict[str, Any]]]:
         # We're using the chat completion endpoint as the NIM API doesn't support
         # the /completions endpoint. So both the non-chat and chat generator will use this.
@@ -78,3 +113,19 @@ def generate(self, prompt: str) -> Tuple[List[str], List[Dict[str, Any]]]:
             meta.append(choice_meta)
 
         return replies, meta
+
+    def models(self) -> List[Model]:
+        url = f"{self.api_url}/models"
+
+        res = self.session.get(
+            url,
+            timeout=REQUEST_TIMEOUT,
+        )
+        res.raise_for_status()
+
+        data = res.json()["data"]
+        models = [Model(element["id"]) for element in data if "id" in element]
+        if not models:
+            msg = f"No hosted model were found at URL '{url}'."
+            raise ValueError(msg)
+        return models
diff --git a/integrations/nvidia/src/haystack_integrations/utils/nvidia/utils.py b/integrations/nvidia/src/haystack_integrations/utils/nvidia/utils.py
index 4f8e14b09..7d4dfc3b4 100644
--- a/integrations/nvidia/src/haystack_integrations/utils/nvidia/utils.py
+++ b/integrations/nvidia/src/haystack_integrations/utils/nvidia/utils.py
@@ -37,3 +37,11 @@ def url_validation(api_url: str, default_api_url: str, allowed_paths: List[str])
 
     base_url = urlunparse((result.scheme, result.netloc, "v1", "", "", ""))
     return base_url
+
+
+def is_hosted(api_url: str):
+    """"""
+    return urlparse(api_url).netloc in [
+        "integrate.api.nvidia.com",
+        "ai.api.nvidia.com",
+    ]
diff --git a/integrations/nvidia/tests/__init__.py b/integrations/nvidia/tests/__init__.py
index e873bc332..47611e0b9 100644
--- a/integrations/nvidia/tests/__init__.py
+++ b/integrations/nvidia/tests/__init__.py
@@ -1,3 +1,6 @@
 # SPDX-FileCopyrightText: 2023-present deepset GmbH <info@deepset.ai>
 #
 # SPDX-License-Identifier: Apache-2.0
+from .conftest import MockBackend
+
+__all__ = ["MockBackend"]
diff --git a/integrations/nvidia/tests/conftest.py b/integrations/nvidia/tests/conftest.py
new file mode 100644
index 000000000..794c994ff
--- /dev/null
+++ b/integrations/nvidia/tests/conftest.py
@@ -0,0 +1,44 @@
+from typing import Any, Dict, List, Optional, Tuple
+
+import pytest
+from haystack.utils import Secret
+from haystack_integrations.utils.nvidia import Model, NimBackend
+from requests_mock import Mocker
+
+
+class MockBackend(NimBackend):
+    def __init__(self, model: str, api_key: Optional[Secret] = None, model_kwargs: Optional[Dict[str, Any]] = None):
+        api_key = api_key or Secret.from_env_var("NVIDIA_API_KEY")
+        super().__init__(model, api_url="", api_key=api_key, model_kwargs=model_kwargs or {})
+
+    def embed(self, texts):
+        inputs = texts
+        data = [[0.1, 0.2, 0.3] for i in range(len(inputs))]
+        return data, {"usage": {"total_tokens": 4, "prompt_tokens": 4}}
+
+    def models(self):
+        return [Model(id="aa")]
+
+    def generate(self) -> Tuple[List[str], List[Dict[str, Any]]]:
+        return (
+            ["This is a mocked response."],
+            [{"role": "assistant", "usage": {"prompt_tokens": 5, "total_tokens": 10, "completion_tokens": 5}}],
+        )
+
+
+@pytest.fixture
+def mock_local_models(requests_mock: Mocker) -> None:
+    requests_mock.get(
+        "http://localhost:8080/v1/models",
+        json={
+            "data": [
+                {
+                    "id": "model1",
+                    "object": "model",
+                    "created": 1234567890,
+                    "owned_by": "OWNER",
+                    "root": "model1",
+                },
+            ]
+        },
+    )
diff --git a/integrations/nvidia/tests/test_document_embedder.py b/integrations/nvidia/tests/test_document_embedder.py
index 856ae4652..6562a0ea9 100644
--- a/integrations/nvidia/tests/test_document_embedder.py
+++ b/integrations/nvidia/tests/test_document_embedder.py
@@ -4,17 +4,8 @@
 from haystack import Document
 from haystack.utils import Secret
 from haystack_integrations.components.embedders.nvidia import EmbeddingTruncateMode, NvidiaDocumentEmbedder
-from haystack_integrations.components.embedders.nvidia.backend import EmbedderBackend
 
-
-class MockBackend(EmbedderBackend):
-    def __init__(self, model, model_kwargs):
-        super().__init__(model, model_kwargs)
-
-    def embed(self, texts):
-        inputs = texts
-        data = [[0.1, 0.2, 0.3] for i in range(len(inputs))]
-        return data, {"usage": {"total_tokens": 4, "prompt_tokens": 4}}
+from . import MockBackend
 
 
 class TestNvidiaDocumentEmbedder:
@@ -185,14 +176,18 @@ def test_prepare_texts_to_embed_w_suffix(self):
 
     def test_embed_batch(self):
         texts = ["text 1", "text 2", "text 3", "text 4", "text 5"]
-
+        model = "playground_nvolveqa_40k"
+        api_key = Secret.from_token("fake-api-key")
         embedder = NvidiaDocumentEmbedder(
-            "playground_nvolveqa_40k",
-            api_key=Secret.from_token("fake-api-key"),
+            model,
+            api_key=api_key,
         )
 
         embedder.warm_up()
-        embedder.backend = MockBackend("aa", None)
+        embedder.backend = MockBackend(
+            model=model,
+            api_key=api_key,
+        )
 
         embeddings, metadata = embedder._embed_batch(texts_to_embed=texts, batch_size=2)
 
@@ -205,15 +200,55 @@ def test_embed_batch(self):
 
         assert metadata == {"usage": {"prompt_tokens": 3 * 4, "total_tokens": 3 * 4}}
 
-    def test_run(self):
+    @pytest.mark.usefixtures("mock_local_models")
+    def test_run_default_model(self):
         docs = [
             Document(content="I love cheese", meta={"topic": "Cuisine"}),
             Document(content="A transformer is a deep learning architecture", meta={"topic": "ML"}),
         ]
+        api_key = Secret.from_token("fake-api-key")
+
+        embedder = NvidiaDocumentEmbedder(
+            api_key=api_key,
+            model=None,
+            api_url="http://localhost:8080/v1",
+            prefix="prefix ",
+            suffix=" suffix",
+            meta_fields_to_embed=["topic"],
+            embedding_separator=" | ",
+        )
+
+        with pytest.warns(UserWarning) as record:
+            embedder.warm_up()
+        assert len(record) == 1
+        assert "Default model is set as:" in str(record[0].message)
+        assert embedder.model == "model1"
+
+        embedder.backend = MockBackend(model=embedder.model, api_key=api_key)
+
+        result = embedder.run(documents=docs)
 
+        documents_with_embeddings = result["documents"]
+        metadata = result["meta"]
+
+        assert isinstance(documents_with_embeddings, list)
+        assert len(documents_with_embeddings) == len(docs)
+        for doc in documents_with_embeddings:
+            assert isinstance(doc, Document)
+            assert isinstance(doc.embedding, list)
+            assert len(doc.embedding) == 3
+            assert all(isinstance(x, float) for x in doc.embedding)
+        assert metadata == {"usage": {"prompt_tokens": 4, "total_tokens": 4}}
+
+    def test_run(self):
+        docs = [
+            Document(content="I love cheese", meta={"topic": "Cuisine"}),
+            Document(content="A transformer is a deep learning architecture", meta={"topic": "ML"}),
+        ]
+        api_key = Secret.from_token("fake-api-key")
         model = "playground_nvolveqa_40k"
         embedder = NvidiaDocumentEmbedder(
-            api_key=Secret.from_token("fake-api-key"),
+            api_key=api_key,
             model=model,
             prefix="prefix ",
             suffix=" suffix",
@@ -222,7 +257,7 @@ def test_run(self):
         )
 
         embedder.warm_up()
-        embedder.backend = MockBackend("aa", None)
+        embedder.backend = MockBackend(model=model, api_key=api_key)
 
         result = embedder.run(documents=docs)
 
@@ -243,9 +278,10 @@ def test_run_custom_batch_size(self):
             Document(content="I love cheese", meta={"topic": "Cuisine"}),
             Document(content="A transformer is a deep learning architecture", meta={"topic": "ML"}),
         ]
+        api_key = Secret.from_token("fake-api-key")
         model = "playground_nvolveqa_40k"
         embedder = NvidiaDocumentEmbedder(
-            api_key=Secret.from_token("fake-api-key"),
+            api_key=api_key,
             model=model,
             prefix="prefix ",
             suffix=" suffix",
@@ -255,7 +291,7 @@ def test_run_custom_batch_size(self):
         )
 
         embedder.warm_up()
-        embedder.backend = MockBackend("aa", None)
+        embedder.backend = MockBackend(model=model, api_key=api_key)
 
         result = embedder.run(documents=docs)
 
@@ -273,10 +309,12 @@ def test_run_custom_batch_size(self):
         assert metadata == {"usage": {"prompt_tokens": 2 * 4, "total_tokens": 2 * 4}}
 
     def test_run_wrong_input_format(self):
-        embedder = NvidiaDocumentEmbedder("playground_nvolveqa_40k", api_key=Secret.from_token("fake-api-key"))
+        model = "playground_nvolveqa_40k"
+        api_key = Secret.from_token("fake-api-key")
+        embedder = NvidiaDocumentEmbedder(model, api_key=api_key)
 
         embedder.warm_up()
-        embedder.backend = MockBackend("aa", None)
+        embedder.backend = MockBackend(model=model, api_key=api_key)
 
         string_input = "text"
         list_integers_input = [1, 2, 3]
@@ -288,10 +326,12 @@ def test_run_wrong_input_format(self):
             embedder.run(documents=list_integers_input)
 
     def test_run_on_empty_list(self):
-        embedder = NvidiaDocumentEmbedder("playground_nvolveqa_40k", api_key=Secret.from_token("fake-api-key"))
+        model = "playground_nvolveqa_40k"
+        api_key = Secret.from_token("fake-api-key")
+        embedder = NvidiaDocumentEmbedder(model, api_key=api_key)
 
         embedder.warm_up()
-        embedder.backend = MockBackend("aa", None)
+        embedder.backend = MockBackend(model=model, api_key=api_key)
 
         empty_list_input = []
         result = embedder.run(documents=empty_list_input)
diff --git a/integrations/nvidia/tests/test_generator.py b/integrations/nvidia/tests/test_generator.py
index 3ddeebe88..9fff9c2e8 100644
--- a/integrations/nvidia/tests/test_generator.py
+++ b/integrations/nvidia/tests/test_generator.py
@@ -6,6 +6,35 @@
 import pytest
 from haystack.utils import Secret
 from haystack_integrations.components.generators.nvidia import NvidiaGenerator
+from requests_mock import Mocker
+
+
+@pytest.fixture
+def mock_local_chat_completion(requests_mock: Mocker) -> None:
+    requests_mock.post(
+        "http://localhost:8080/v1/chat/completions",
+        json={
+            "choices": [
+                {
+                    "message": {"content": "Hello!", "role": "system"},
+                    "usage": {"prompt_tokens": 3, "total_tokens": 5, "completion_tokens": 9},
+                    "finish_reason": "stop",
+                    "index": 0,
+                },
+                {
+                    "message": {"content": "How are you?", "role": "system"},
+                    "usage": {"prompt_tokens": 3, "total_tokens": 5, "completion_tokens": 9},
+                    "finish_reason": "stop",
+                    "index": 1,
+                },
+            ],
+            "usage": {
+                "prompt_tokens": 3,
+                "total_tokens": 5,
+                "completion_tokens": 9,
+            },
+        },
+    )
 
 
 class TestNvidiaGenerator:
@@ -116,6 +145,32 @@ def test_run_integration_with_nim_backend(self):
         assert result["replies"]
         assert result["meta"]
 
+    @pytest.mark.integration
+    @pytest.mark.usefixtures("mock_local_models")
+    @pytest.mark.usefixtures("mock_local_chat_completion")
+    def test_run_integration_with_default_model_nim_backend(self):
+        model = None
+        url = "http://localhost:8080/v1"
+        generator = NvidiaGenerator(
+            model=model,
+            api_url=url,
+            api_key=None,
+            model_arguments={
+                "temperature": 0.2,
+            },
+        )
+        with pytest.warns(UserWarning) as record:
+            generator.warm_up()
+        assert len(record) == 1
+        assert "Default model is set as:" in str(record[0].message)
+        assert generator._model == "model1"
+        assert not generator.is_hosted
+
+        result = generator.run(prompt="What is the answer?")
+
+        assert result["replies"]
+        assert result["meta"]
+
     @pytest.mark.skipif(
         not os.environ.get("NVIDIA_API_KEY", None),
         reason="Export an env var called NVIDIA_API_KEY containing the NVIDIA API key to run this test.",
diff --git a/integrations/nvidia/tests/test_text_embedder.py b/integrations/nvidia/tests/test_text_embedder.py
index 42d60dee2..7c0a7000d 100644
--- a/integrations/nvidia/tests/test_text_embedder.py
+++ b/integrations/nvidia/tests/test_text_embedder.py
@@ -3,17 +3,8 @@
 import pytest
 from haystack.utils import Secret
 from haystack_integrations.components.embedders.nvidia import EmbeddingTruncateMode, NvidiaTextEmbedder
-from haystack_integrations.components.embedders.nvidia.backend import EmbedderBackend
 
-
-class MockBackend(EmbedderBackend):
-    def __init__(self, model, model_kwargs):
-        super().__init__(model, model_kwargs)
-
-    def embed(self, texts):
-        inputs = texts
-        data = [[0.1, 0.2, 0.3] for i in range(len(inputs))]
-        return data, {"usage": {"total_tokens": 4, "prompt_tokens": 4}}
+from . import MockBackend
 
 
 class TestNvidiaTextEmbedder:
@@ -22,7 +13,6 @@ def test_init_default(self, monkeypatch):
         embedder = NvidiaTextEmbedder()
 
         assert embedder.api_key == Secret.from_env_var("NVIDIA_API_KEY")
-        assert embedder.model == "NV-Embed-QA"
         assert embedder.api_url == "https://ai.api.nvidia.com/v1/retrieval/nvidia"
         assert embedder.prefix == ""
         assert embedder.suffix == ""
@@ -106,13 +96,36 @@ def from_dict(self, monkeypatch):
         assert component.suffix == "suffix"
         assert component.truncate == "START"
 
+    @pytest.mark.usefixtures("mock_local_models")
+    def test_run_default_model(self):
+        api_key = Secret.from_token("fake-api-key")
+        embedder = NvidiaTextEmbedder(api_url="http://localhost:8080/v1", api_key=api_key)
+
+        assert embedder.model is None
+
+        with pytest.warns(UserWarning) as record:
+            embedder.warm_up()
+
+        assert len(record) == 1
+        assert "Default model is set as:" in str(record[0].message)
+        assert embedder.model == "model1"
+
+        embedder.backend = MockBackend(model=embedder.model, api_key=api_key)
+
+        result = embedder.run(text="The food was delicious")
+
+        assert len(result["embedding"]) == 3
+        assert all(isinstance(x, float) for x in result["embedding"])
+        assert result["meta"] == {
+            "usage": {"prompt_tokens": 4, "total_tokens": 4},
+        }
+
     def test_run(self):
-        embedder = NvidiaTextEmbedder(
-            "playground_nvolveqa_40k", api_key=Secret.from_token("fake-api-key"), prefix="prefix ", suffix=" suffix"
-        )
+        api_key = Secret.from_token("fake-api-key")
+        embedder = NvidiaTextEmbedder("playground_nvolveqa_40k", api_key=api_key, prefix="prefix ", suffix=" suffix")
 
         embedder.warm_up()
-        embedder.backend = MockBackend("aa", None)
+        embedder.backend = MockBackend(model="playground_nvolveqa_40k", api_key=api_key)
 
         result = embedder.run(text="The food was delicious")
 
@@ -123,9 +136,10 @@ def test_run(self):
         }
 
     def test_run_wrong_input_format(self):
-        embedder = NvidiaTextEmbedder("playground_nvolveqa_40k", api_key=Secret.from_token("fake-api-key"))
+        api_key = Secret.from_token("fake-api-key")
+        embedder = NvidiaTextEmbedder("playground_nvolveqa_40k", api_key=api_key)
         embedder.warm_up()
-        embedder.backend = MockBackend("aa", None)
+        embedder.backend = MockBackend(model="playground_nvolveqa_40k", api_key=api_key)
 
         list_integers_input = [1, 2, 3]