From 211eb14e0e6a375cd491a6667a175ed5ec8f07aa Mon Sep 17 00:00:00 2001 From: Leon Derczynski Date: Wed, 18 Sep 2024 09:44:56 +0200 Subject: [PATCH 1/5] add generator.guardrails doc summary --- docs/source/garak.generators.guardrails.rst | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/docs/source/garak.generators.guardrails.rst b/docs/source/garak.generators.guardrails.rst index 07b68f7ab..55a6d2f32 100644 --- a/docs/source/garak.generators.guardrails.rst +++ b/docs/source/garak.generators.guardrails.rst @@ -1,6 +1,24 @@ garak.generators.guardrails =========================== +This is a generator for warpping a NeMo Guardrails configuration. Using this +garak generator enables security testing of a Guardrails config. + +The ``guardrails`` generator expects a path to a valid Guardrails configuration +to be passed as its name. For example, + +.. code-block:: + + garak -m guardrails -n sample_abc/config + +This generator requires installation of the `guardrails `_ +Python package. + +When invoked, garak sends prompts in series to the Guardrails setup using +``rails.generate``, and waits for a response. The generator does not support +parallisation, so it's recommended to run smaller probes, or set ``generations`` +to a low value, in order to reduce garak run time. + .. automodule:: garak.generators.guardrails :members: :undoc-members: From 3e43ad6a78eec98b2853827a0f051dfa1500c9c3 Mon Sep 17 00:00:00 2001 From: Leon Derczynski Date: Wed, 18 Sep 2024 09:58:51 +0200 Subject: [PATCH 2/5] prune unused config value --- garak/generators/nvcf.py | 1 - 1 file changed, 1 deletion(-) diff --git a/garak/generators/nvcf.py b/garak/generators/nvcf.py index 91355b0be..28bdc0d03 100644 --- a/garak/generators/nvcf.py +++ b/garak/generators/nvcf.py @@ -25,7 +25,6 @@ class NvcfChat(Generator): "top_p": 0.7, "fetch_url_format": "https://api.nvcf.nvidia.com/v2/nvcf/pexec/status/", "invoke_url_base": "https://api.nvcf.nvidia.com/v2/nvcf/pexec/functions/", - "extra_nvcf_logging": False, "timeout": 60, "version_id": None, # string "stop_on_404": True, From 30fb60d3f2642d3f654e162dc5fb93e981a3a957 Mon Sep 17 00:00:00 2001 From: Leon Derczynski Date: Wed, 18 Sep 2024 10:11:36 +0200 Subject: [PATCH 3/5] add docs for nvcf generator --- docs/source/garak.generators.nvcf.rst | 92 +++++++++++++++++++++++++++ 1 file changed, 92 insertions(+) diff --git a/docs/source/garak.generators.nvcf.rst b/docs/source/garak.generators.nvcf.rst index 975264378..d06ce914f 100644 --- a/docs/source/garak.generators.nvcf.rst +++ b/docs/source/garak.generators.nvcf.rst @@ -1,6 +1,98 @@ garak.generators.nvcf ===================== +This garak generator is a connector to NVIDIA Cloud Functions. It permits fast +and flexible generation. + +NVCF functions work by sending a request to an invocation endpoint, and then polling +a status endpoint until the response is received. The cloud function is described +using a UUID, which is passed to garak as the model_name. API key should be placed in +environment variable NVCF_API_KEY or set in a garak config. For example: + +.. code-block:: + + export NVCF_API_KEY="example-api-key-xyz" + garak -m nvcf -n 341da0d0-aa68-4c4f-89b5-fc39286de6a1 + + +Configuration +------------- + +Configurable values: + +* temperature - Temperature for generation. Passed as a value to the endpoint. +* top_p - Number of tokens to sample. Passed as a value to the endpoint. +* invoke_url_base - Base URL for the NVCF endpoint (default is for NVIDIA-hosted functions). +* fetch_url_format - URL to check for request status updates (default is for NVIDIA-hosted functions). +* timeout - Read timeout for HTTP requests (note, this is network timeout, distinct from inference timeout) +* version_id - API version id, postpended to endpoint URLs if supplied +* stop_on_404 - Give up on endpoints returning 404 (i.e. nonexistent ones) +* extra_params - Dictionary of optional extra values to pass to the endpoint. Default ``{"stream": False}``. + +Some NVCF instances require custom parameters, for example a "model" header. These +can be asserted in the NVCF config. For example, this cURL maps to the following +garak YAML: + + +.. code-block:: + + curl -s -X POST 'https://api.nvcf.nvidia.com/v2/nvcf/pexec/functions/341da0d0-aa68-4c4f-89b5-fc39286de6a1' \ + -H 'Content-Type: application/json' \ + -H 'Authorization: Bearer example-api-key-xyz' \ + -d '{ + "messages": [{"role": "user", "content": "How many letters are in the word strawberry?"}], + "model": "prefix/obsidianorder/terer-nor", + "max_tokens": 1024, + "stream": false + }' + +.. code-block:: yaml + + --- + plugins: + generators: + nvcf: + NvcfChat: + api_key: example-api-key-xyz + max_tokens: 1024 + extra_params: + stream: false + model: prefix/obsidianorder/terer-nor + model_type: nvcf.NvcfChat + model_name: 341da0d0-aa68-4c4f-89b5-fc39286de6a1 + +The ``nvcf`` generator uses the standard garak generator mechanism for +``max_tokens``, which is why this value is set at generator-level rather than +as a key-value pair in ``extra_params``. + + +Scaling +------- + +The NVCF generator supports parallelisation and it's recommended to use this, +invoking garak with ``--parallel_attempts`` set to a value higher than one. +IF the NVCF times out due to insufficient capacity, garak will note this, +backoff, and retry the request later. + +.. code-block:: + + garak -m nvcf -n 341da0d0-aa68-4c4f-89b5-fc39286de6a1 --parallel_attempts 32 + + +Or, as yaml config: + +.. code-block:: yaml + + --- + system: + parallel_attempts: 32 + plugins: + model_type: nvcf.NvcfChat + model_name: 341da0d0-aa68-4c4f-89b5-fc39286de6a1 + + + + .. automodule:: garak.generators.nvcf :members: :undoc-members: From e2fbd3195d782165a29fdd9470b1f23896763708 Mon Sep 17 00:00:00 2001 From: Leon Derczynski Date: Wed, 18 Sep 2024 10:17:36 +0200 Subject: [PATCH 4/5] add nemo generator docs --- docs/source/garak.generators.nemo.rst | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/docs/source/garak.generators.nemo.rst b/docs/source/garak.generators.nemo.rst index 1b9d20eef..0b0059333 100644 --- a/docs/source/garak.generators.nemo.rst +++ b/docs/source/garak.generators.nemo.rst @@ -1,6 +1,26 @@ garak.generators.nemo ===================== +Wrapper for `nemollm `_. + +Expects NGC API key in the environment variable ``NGC_API_KEY`` and the +organisation ID in environment variable ``ORG_ID``. + +Configurable values: + +* temperature: 0.9 +* top_p: 1.0 +* top_k: 2 +* repetition_penalty: 1.1 - between 1 and 2 incl., or none +* beam_search_diversity_rate: 0.0 +* beam_width: 1 +* length_penalty: 1 +* guardrail: None - (present in API but not implemented in library) +* api_host: "https://api.llm.ngc.nvidia.com/v1" - endpoint URI + + + + .. automodule:: garak.generators.nemo :members: :undoc-members: From 2b9f6826bee47792ed98c8d78cb7397817a0854d Mon Sep 17 00:00:00 2001 From: Leon Derczynski Date: Mon, 23 Sep 2024 13:22:47 +0200 Subject: [PATCH 5/5] rename host/url to uri; fetch to status --- docs/source/garak.generators.nemo.rst | 2 +- docs/source/garak.generators.nvcf.rst | 8 ++++---- garak/generators/nemo.py | 4 ++-- garak/generators/nvcf.py | 14 +++++++------- tests/generators/test_nvcf.py | 2 +- 5 files changed, 15 insertions(+), 15 deletions(-) diff --git a/docs/source/garak.generators.nemo.rst b/docs/source/garak.generators.nemo.rst index 0b0059333..d5042d4c3 100644 --- a/docs/source/garak.generators.nemo.rst +++ b/docs/source/garak.generators.nemo.rst @@ -16,7 +16,7 @@ Configurable values: * beam_width: 1 * length_penalty: 1 * guardrail: None - (present in API but not implemented in library) -* api_host: "https://api.llm.ngc.nvidia.com/v1" - endpoint URI +* api_uri: "https://api.llm.ngc.nvidia.com/v1" - endpoint URI diff --git a/docs/source/garak.generators.nvcf.rst b/docs/source/garak.generators.nvcf.rst index d06ce914f..1150a65d5 100644 --- a/docs/source/garak.generators.nvcf.rst +++ b/docs/source/garak.generators.nvcf.rst @@ -6,8 +6,8 @@ and flexible generation. NVCF functions work by sending a request to an invocation endpoint, and then polling a status endpoint until the response is received. The cloud function is described -using a UUID, which is passed to garak as the model_name. API key should be placed in -environment variable NVCF_API_KEY or set in a garak config. For example: +using a UUID, which is passed to garak as the ``model_name``. API key should be placed in +environment variable ``NVCF_API_KEY`` or set in a garak config. For example: .. code-block:: @@ -22,8 +22,8 @@ Configurable values: * temperature - Temperature for generation. Passed as a value to the endpoint. * top_p - Number of tokens to sample. Passed as a value to the endpoint. -* invoke_url_base - Base URL for the NVCF endpoint (default is for NVIDIA-hosted functions). -* fetch_url_format - URL to check for request status updates (default is for NVIDIA-hosted functions). +* invoke_uri_base - Base URL for the NVCF endpoint (default is for NVIDIA-hosted functions). +* status_uri_base - URL to check for request status updates (default is for NVIDIA-hosted functions). * timeout - Read timeout for HTTP requests (note, this is network timeout, distinct from inference timeout) * version_id - API version id, postpended to endpoint URLs if supplied * stop_on_404 - Give up on endpoints returning 404 (i.e. nonexistent ones) diff --git a/garak/generators/nemo.py b/garak/generators/nemo.py index 1e57e4ed0..383166624 100644 --- a/garak/generators/nemo.py +++ b/garak/generators/nemo.py @@ -32,7 +32,7 @@ class NeMoGenerator(Generator): "beam_width": 1, "length_penalty": 1, "guardrail": None, # NotImplemented in library - "api_host": "https://api.llm.ngc.nvidia.com/v1", + "api_uri": "https://api.llm.ngc.nvidia.com/v1", } supports_multiple_generations = False @@ -48,7 +48,7 @@ def __init__(self, name=None, config_root=_config): super().__init__(self.name, config_root=config_root) self.nemo = nemollm.api.NemoLLM( - api_host=self.api_host, api_key=self.api_key, org_id=self.org_id + api_host=self.api_uri, api_key=self.api_key, org_id=self.org_id ) if self.name is None: diff --git a/garak/generators/nvcf.py b/garak/generators/nvcf.py index 28bdc0d03..56ed667ad 100644 --- a/garak/generators/nvcf.py +++ b/garak/generators/nvcf.py @@ -23,8 +23,8 @@ class NvcfChat(Generator): DEFAULT_PARAMS = Generator.DEFAULT_PARAMS | { "temperature": 0.2, "top_p": 0.7, - "fetch_url_format": "https://api.nvcf.nvidia.com/v2/nvcf/pexec/status/", - "invoke_url_base": "https://api.nvcf.nvidia.com/v2/nvcf/pexec/functions/", + "status_uri_base": "https://api.nvcf.nvidia.com/v2/nvcf/pexec/status/", + "invoke_uri_base": "https://api.nvcf.nvidia.com/v2/nvcf/pexec/functions/", "timeout": 60, "version_id": None, # string "stop_on_404": True, @@ -49,10 +49,10 @@ def __init__(self, name=None, config_root=_config): "Please specify a function identifier in model name (-n)" ) - self.invoke_url = self.invoke_url_base + self.name + self.invoke_uri = self.invoke_uri_base + self.name if self.version_id is not None: - self.invoke_url += f"/versions/{self.version_id}" + self.invoke_uri += f"/versions/{self.version_id}" super().__init__(self.name, config_root=config_root) @@ -109,7 +109,7 @@ def _call_model( request_time = time.time() logging.debug("nvcf : payload %s", repr(payload)) - response = session.post(self.invoke_url, headers=self.headers, json=payload) + response = session.post(self.invoke_uri, headers=self.headers, json=payload) while response.status_code == 202: if time.time() > request_time + self.timeout: @@ -119,8 +119,8 @@ def _call_model( msg = "Got HTTP 202 but no NVCF-REQID was returned" logging.info("nvcf : %s", msg) raise AttributeError(msg) - fetch_url = self.fetch_url_format + request_id - response = session.get(fetch_url, headers=self.headers) + status_uri = self.status_uri_base + request_id + response = session.get(status_uri, headers=self.headers) if 400 <= response.status_code < 600: logging.warning("nvcf : returned error code %s", response.status_code) diff --git a/tests/generators/test_nvcf.py b/tests/generators/test_nvcf.py index 9232caf81..78c75cfa0 100644 --- a/tests/generators/test_nvcf.py +++ b/tests/generators/test_nvcf.py @@ -31,7 +31,7 @@ def test_version_endpoint(klassname): _config.plugins.generators["nvcf"][klassname]["api_key"] = "placeholder key" _config.plugins.generators["nvcf"][klassname]["version_id"] = version g = _plugins.load_plugin(f"generators.nvcf.{klassname}") - assert g.invoke_url == f"{g.invoke_url_base}{name}/versions/{version}" + assert g.invoke_uri == f"{g.invoke_uri_base}{name}/versions/{version}" @pytest.mark.parametrize("klassname", PLUGINS)