From 211eb14e0e6a375cd491a6667a175ed5ec8f07aa Mon Sep 17 00:00:00 2001
From: Leon Derczynski <leonderczynski@gmail.com>
Date: Wed, 18 Sep 2024 09:44:56 +0200
Subject: [PATCH 1/5] add generator.guardrails doc summary

---
 docs/source/garak.generators.guardrails.rst | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/docs/source/garak.generators.guardrails.rst b/docs/source/garak.generators.guardrails.rst
index 07b68f7ab..55a6d2f32 100644
--- a/docs/source/garak.generators.guardrails.rst
+++ b/docs/source/garak.generators.guardrails.rst
@@ -1,6 +1,24 @@
 garak.generators.guardrails
 ===========================
 
+This is a generator for warpping a NeMo Guardrails configuration. Using this
+garak generator enables security testing of a Guardrails config.
+
+The ``guardrails`` generator expects a path to a valid Guardrails configuration
+to be passed as its name. For example,
+
+.. code-block::
+
+   garak -m guardrails -n sample_abc/config
+
+This generator requires installation of the `guardrails <https://pypi.org/project/nemoguardrails/>`_
+Python package.
+
+When invoked, garak sends prompts in series to the Guardrails setup using 
+``rails.generate``, and waits for a response. The generator does not support
+parallisation, so it's recommended to run smaller probes, or set ``generations``
+to a low value, in order to reduce garak run time.
+
 .. automodule:: garak.generators.guardrails
    :members:
    :undoc-members:

From 3e43ad6a78eec98b2853827a0f051dfa1500c9c3 Mon Sep 17 00:00:00 2001
From: Leon Derczynski <leonderczynski@gmail.com>
Date: Wed, 18 Sep 2024 09:58:51 +0200
Subject: [PATCH 2/5] prune unused config value

---
 garak/generators/nvcf.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/garak/generators/nvcf.py b/garak/generators/nvcf.py
index 91355b0be..28bdc0d03 100644
--- a/garak/generators/nvcf.py
+++ b/garak/generators/nvcf.py
@@ -25,7 +25,6 @@ class NvcfChat(Generator):
         "top_p": 0.7,
         "fetch_url_format": "https://api.nvcf.nvidia.com/v2/nvcf/pexec/status/",
         "invoke_url_base": "https://api.nvcf.nvidia.com/v2/nvcf/pexec/functions/",
-        "extra_nvcf_logging": False,
         "timeout": 60,
         "version_id": None,  # string
         "stop_on_404": True,

From 30fb60d3f2642d3f654e162dc5fb93e981a3a957 Mon Sep 17 00:00:00 2001
From: Leon Derczynski <leonderczynski@gmail.com>
Date: Wed, 18 Sep 2024 10:11:36 +0200
Subject: [PATCH 3/5] add docs for nvcf generator

---
 docs/source/garak.generators.nvcf.rst | 92 +++++++++++++++++++++++++++
 1 file changed, 92 insertions(+)

diff --git a/docs/source/garak.generators.nvcf.rst b/docs/source/garak.generators.nvcf.rst
index 975264378..d06ce914f 100644
--- a/docs/source/garak.generators.nvcf.rst
+++ b/docs/source/garak.generators.nvcf.rst
@@ -1,6 +1,98 @@
 garak.generators.nvcf
 =====================
 
+This garak generator is a connector to NVIDIA Cloud Functions. It permits fast
+and flexible generation.
+
+NVCF functions work by sending a request to an invocation endpoint, and then polling
+a status endpoint until the response is received. The cloud function is described
+using a UUID, which is passed to garak as the model_name. API key should be placed in
+environment variable NVCF_API_KEY or set in a garak config. For example:
+
+.. code-block::
+
+   export NVCF_API_KEY="example-api-key-xyz"
+   garak -m nvcf -n 341da0d0-aa68-4c4f-89b5-fc39286de6a1
+
+
+Configuration
+-------------
+
+Configurable values:
+
+* temperature - Temperature for generation. Passed as a value to the endpoint.
+* top_p - Number of tokens to sample. Passed as a value to the endpoint.
+* invoke_url_base - Base URL for the NVCF endpoint (default is for NVIDIA-hosted functions).
+* fetch_url_format - URL to check for request status updates (default is for NVIDIA-hosted functions).
+* timeout - Read timeout for HTTP requests (note, this is network timeout, distinct from inference timeout)
+* version_id - API version id, postpended to endpoint URLs if supplied
+* stop_on_404 - Give up on endpoints returning 404 (i.e. nonexistent ones)
+* extra_params - Dictionary of optional extra values to pass to the endpoint. Default ``{"stream": False}``.
+
+Some NVCF instances require custom parameters, for example a "model" header. These
+can be asserted in the NVCF config. For example, this cURL maps to the following
+garak YAML:
+
+
+.. code-block::
+
+   curl -s -X POST 'https://api.nvcf.nvidia.com/v2/nvcf/pexec/functions/341da0d0-aa68-4c4f-89b5-fc39286de6a1' \
+   -H 'Content-Type: application/json' \
+   -H 'Authorization: Bearer example-api-key-xyz' \
+   -d '{
+         "messages": [{"role": "user", "content": "How many letters are in the word strawberry?"}],
+         "model": "prefix/obsidianorder/terer-nor",
+         "max_tokens": 1024,
+         "stream": false
+      }'
+
+.. code-block:: yaml
+
+   ---
+   plugins:
+      generators:
+         nvcf:
+            NvcfChat:
+               api_key: example-api-key-xyz
+               max_tokens: 1024
+               extra_params:
+                  stream: false
+                  model: prefix/obsidianorder/terer-nor
+      model_type: nvcf.NvcfChat
+      model_name: 341da0d0-aa68-4c4f-89b5-fc39286de6a1
+
+The ``nvcf`` generator uses the standard garak generator mechanism for 
+``max_tokens``, which is why this value is set at generator-level rather than 
+as a key-value pair in ``extra_params``.
+
+
+Scaling
+-------
+
+The NVCF generator supports parallelisation and it's recommended to use this,
+invoking garak with ``--parallel_attempts`` set to a value higher than one.
+IF the NVCF times out due to insufficient capacity, garak will note this, 
+backoff, and retry the request later.
+
+.. code-block::
+
+   garak -m nvcf -n 341da0d0-aa68-4c4f-89b5-fc39286de6a1 --parallel_attempts 32
+
+
+Or, as yaml config:
+
+.. code-block:: yaml
+
+   ---
+   system:
+      parallel_attempts: 32
+   plugins:
+      model_type: nvcf.NvcfChat
+      model_name: 341da0d0-aa68-4c4f-89b5-fc39286de6a1
+
+
+
+
 .. automodule:: garak.generators.nvcf
    :members:
    :undoc-members:

From e2fbd3195d782165a29fdd9470b1f23896763708 Mon Sep 17 00:00:00 2001
From: Leon Derczynski <leonderczynski@gmail.com>
Date: Wed, 18 Sep 2024 10:17:36 +0200
Subject: [PATCH 4/5] add nemo generator docs

---
 docs/source/garak.generators.nemo.rst | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/docs/source/garak.generators.nemo.rst b/docs/source/garak.generators.nemo.rst
index 1b9d20eef..0b0059333 100644
--- a/docs/source/garak.generators.nemo.rst
+++ b/docs/source/garak.generators.nemo.rst
@@ -1,6 +1,26 @@
 garak.generators.nemo
 =====================
 
+Wrapper for `nemollm <https://pypi.org/project/nemollm/>`_.
+
+Expects NGC API key in the environment variable ``NGC_API_KEY`` and the 
+organisation ID in environment variable ``ORG_ID``.
+
+Configurable values:
+
+* temperature: 0.9
+* top_p: 1.0
+* top_k: 2
+* repetition_penalty: 1.1 - between 1 and 2 incl., or none
+* beam_search_diversity_rate: 0.0
+* beam_width: 1
+* length_penalty: 1
+* guardrail: None -  (present in API but not implemented in library)
+* api_host: "https://api.llm.ngc.nvidia.com/v1" - endpoint URI
+
+
+
+
 .. automodule:: garak.generators.nemo
    :members:
    :undoc-members:

From 2b9f6826bee47792ed98c8d78cb7397817a0854d Mon Sep 17 00:00:00 2001
From: Leon Derczynski <leonderczynski@gmail.com>
Date: Mon, 23 Sep 2024 13:22:47 +0200
Subject: [PATCH 5/5] rename host/url to uri; fetch to status

---
 docs/source/garak.generators.nemo.rst |  2 +-
 docs/source/garak.generators.nvcf.rst |  8 ++++----
 garak/generators/nemo.py              |  4 ++--
 garak/generators/nvcf.py              | 14 +++++++-------
 tests/generators/test_nvcf.py         |  2 +-
 5 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/docs/source/garak.generators.nemo.rst b/docs/source/garak.generators.nemo.rst
index 0b0059333..d5042d4c3 100644
--- a/docs/source/garak.generators.nemo.rst
+++ b/docs/source/garak.generators.nemo.rst
@@ -16,7 +16,7 @@ Configurable values:
 * beam_width: 1
 * length_penalty: 1
 * guardrail: None -  (present in API but not implemented in library)
-* api_host: "https://api.llm.ngc.nvidia.com/v1" - endpoint URI
+* api_uri: "https://api.llm.ngc.nvidia.com/v1" - endpoint URI
 
 
 
diff --git a/docs/source/garak.generators.nvcf.rst b/docs/source/garak.generators.nvcf.rst
index d06ce914f..1150a65d5 100644
--- a/docs/source/garak.generators.nvcf.rst
+++ b/docs/source/garak.generators.nvcf.rst
@@ -6,8 +6,8 @@ and flexible generation.
 
 NVCF functions work by sending a request to an invocation endpoint, and then polling
 a status endpoint until the response is received. The cloud function is described
-using a UUID, which is passed to garak as the model_name. API key should be placed in
-environment variable NVCF_API_KEY or set in a garak config. For example:
+using a UUID, which is passed to garak as the ``model_name``. API key should be placed in
+environment variable ``NVCF_API_KEY`` or set in a garak config. For example:
 
 .. code-block::
 
@@ -22,8 +22,8 @@ Configurable values:
 
 * temperature - Temperature for generation. Passed as a value to the endpoint.
 * top_p - Number of tokens to sample. Passed as a value to the endpoint.
-* invoke_url_base - Base URL for the NVCF endpoint (default is for NVIDIA-hosted functions).
-* fetch_url_format - URL to check for request status updates (default is for NVIDIA-hosted functions).
+* invoke_uri_base - Base URL for the NVCF endpoint (default is for NVIDIA-hosted functions).
+* status_uri_base - URL to check for request status updates (default is for NVIDIA-hosted functions).
 * timeout - Read timeout for HTTP requests (note, this is network timeout, distinct from inference timeout)
 * version_id - API version id, postpended to endpoint URLs if supplied
 * stop_on_404 - Give up on endpoints returning 404 (i.e. nonexistent ones)
diff --git a/garak/generators/nemo.py b/garak/generators/nemo.py
index 1e57e4ed0..383166624 100644
--- a/garak/generators/nemo.py
+++ b/garak/generators/nemo.py
@@ -32,7 +32,7 @@ class NeMoGenerator(Generator):
         "beam_width": 1,
         "length_penalty": 1,
         "guardrail": None,  # NotImplemented in library
-        "api_host": "https://api.llm.ngc.nvidia.com/v1",
+        "api_uri": "https://api.llm.ngc.nvidia.com/v1",
     }
 
     supports_multiple_generations = False
@@ -48,7 +48,7 @@ def __init__(self, name=None, config_root=_config):
         super().__init__(self.name, config_root=config_root)
 
         self.nemo = nemollm.api.NemoLLM(
-            api_host=self.api_host, api_key=self.api_key, org_id=self.org_id
+            api_host=self.api_uri, api_key=self.api_key, org_id=self.org_id
         )
 
         if self.name is None:
diff --git a/garak/generators/nvcf.py b/garak/generators/nvcf.py
index 28bdc0d03..56ed667ad 100644
--- a/garak/generators/nvcf.py
+++ b/garak/generators/nvcf.py
@@ -23,8 +23,8 @@ class NvcfChat(Generator):
     DEFAULT_PARAMS = Generator.DEFAULT_PARAMS | {
         "temperature": 0.2,
         "top_p": 0.7,
-        "fetch_url_format": "https://api.nvcf.nvidia.com/v2/nvcf/pexec/status/",
-        "invoke_url_base": "https://api.nvcf.nvidia.com/v2/nvcf/pexec/functions/",
+        "status_uri_base": "https://api.nvcf.nvidia.com/v2/nvcf/pexec/status/",
+        "invoke_uri_base": "https://api.nvcf.nvidia.com/v2/nvcf/pexec/functions/",
         "timeout": 60,
         "version_id": None,  # string
         "stop_on_404": True,
@@ -49,10 +49,10 @@ def __init__(self, name=None, config_root=_config):
                 "Please specify a function identifier in model name (-n)"
             )
 
-        self.invoke_url = self.invoke_url_base + self.name
+        self.invoke_uri = self.invoke_uri_base + self.name
 
         if self.version_id is not None:
-            self.invoke_url += f"/versions/{self.version_id}"
+            self.invoke_uri += f"/versions/{self.version_id}"
 
         super().__init__(self.name, config_root=config_root)
 
@@ -109,7 +109,7 @@ def _call_model(
 
         request_time = time.time()
         logging.debug("nvcf : payload %s", repr(payload))
-        response = session.post(self.invoke_url, headers=self.headers, json=payload)
+        response = session.post(self.invoke_uri, headers=self.headers, json=payload)
 
         while response.status_code == 202:
             if time.time() > request_time + self.timeout:
@@ -119,8 +119,8 @@ def _call_model(
                 msg = "Got HTTP 202 but no NVCF-REQID was returned"
                 logging.info("nvcf : %s", msg)
                 raise AttributeError(msg)
-            fetch_url = self.fetch_url_format + request_id
-            response = session.get(fetch_url, headers=self.headers)
+            status_uri = self.status_uri_base + request_id
+            response = session.get(status_uri, headers=self.headers)
 
         if 400 <= response.status_code < 600:
             logging.warning("nvcf : returned error code %s", response.status_code)
diff --git a/tests/generators/test_nvcf.py b/tests/generators/test_nvcf.py
index 9232caf81..78c75cfa0 100644
--- a/tests/generators/test_nvcf.py
+++ b/tests/generators/test_nvcf.py
@@ -31,7 +31,7 @@ def test_version_endpoint(klassname):
     _config.plugins.generators["nvcf"][klassname]["api_key"] = "placeholder key"
     _config.plugins.generators["nvcf"][klassname]["version_id"] = version
     g = _plugins.load_plugin(f"generators.nvcf.{klassname}")
-    assert g.invoke_url == f"{g.invoke_url_base}{name}/versions/{version}"
+    assert g.invoke_uri == f"{g.invoke_uri_base}{name}/versions/{version}"
 
 
 @pytest.mark.parametrize("klassname", PLUGINS)