From fd6e10e58abe9d8a609a8bf0e5ba0e4c0abe5d15 Mon Sep 17 00:00:00 2001 From: Leon Derczynski Date: Mon, 21 Oct 2024 17:50:27 +0200 Subject: [PATCH 01/10] factor HFCompatible out of generators into resources --- garak/buffs/paraphrase.py | 2 +- garak/detectors/base.py | 3 +- garak/generators/huggingface.py | 107 +------------------------------- 3 files changed, 5 insertions(+), 107 deletions(-) diff --git a/garak/buffs/paraphrase.py b/garak/buffs/paraphrase.py index 935782aac..fc3c3dec2 100644 --- a/garak/buffs/paraphrase.py +++ b/garak/buffs/paraphrase.py @@ -7,8 +7,8 @@ import garak.attempt from garak import _config -from garak.generators.huggingface import HFCompatible from garak.buffs.base import Buff +from garak.resources.api.huggingface import HFCompatible class PegasusT5(Buff): diff --git a/garak/detectors/base.py b/garak/detectors/base.py index 703db8264..82770ba35 100644 --- a/garak/detectors/base.py +++ b/garak/detectors/base.py @@ -12,9 +12,10 @@ from garak import _config from garak.configurable import Configurable -from garak.generators.huggingface import HFCompatible import garak.attempt +from garak.resources.api.huggingface import HFCompatible + class Detector(Configurable): """Base class for objects that define a way of detecting a probe hit / LLM failure""" diff --git a/garak/generators/huggingface.py b/garak/generators/huggingface.py index 702470a6f..81c7742b4 100644 --- a/garak/generators/huggingface.py +++ b/garak/generators/huggingface.py @@ -14,11 +14,9 @@ https://huggingface.co/docs/api-inference/quicktour """ -import inspect import logging -import os import re -from typing import Callable, List, Union +from typing import List, Union import warnings import backoff @@ -28,7 +26,7 @@ from garak import _config from garak.exception import ModelNameMissingError, GarakException from garak.generators.base import Generator - +from garak.resources.api.huggingface import HFCompatible models_to_deprefix = ["gpt2"] @@ -45,107 +43,6 @@ class HFInternalServerError(GarakException): pass -class HFCompatible: - def _set_hf_context_len(self, config): - if hasattr(config, "n_ctx"): - if isinstance(config.n_ctx, int): - self.context_len = config.n_ctx - - def _gather_hf_params(self, hf_constructor: Callable): - """ "Identify arguments that impact huggingface transformers resources and behavior""" - - # this may be a bit too naive as it will pass any parameter valid for the hf_constructor signature - # this falls over when passed some `from_pretrained` methods as the callable model params are not always explicit - params = ( - self.hf_args - if hasattr(self, "hf_args") and isinstance(self.hf_args, dict) - else {} - ) - if params is not None and not "device" in params and hasattr(self, "device"): - # consider setting self.device in all cases or if self.device is not found raise error `_select_hf_device` must be called - params["device"] = self.device - - args = {} - - params_to_process = inspect.signature(hf_constructor).parameters - - if "model" in params_to_process: - args["model"] = self.name - # expand for - params_to_process = {"do_sample": True} | params_to_process - else: - # callable is for a Pretrained class also map standard `pipeline` params - from transformers import pipeline - - params_to_process = ( - {"low_cpu_mem_usage": True} - | params_to_process - | inspect.signature(pipeline).parameters - ) - - for k in params_to_process: - if k == "model": - continue # special case `model` comes from `name` in the generator - if k in params: - val = params[k] - if k == "torch_dtype" and hasattr(torch, val): - args[k] = getattr( - torch, val - ) # some model type specific classes do not yet support direct string representation - continue - if ( - k == "device" - and "device_map" in params_to_process - and "device_map" in params - ): - # per transformers convention hold `device_map` before `device` - continue - args[k] = params[k] - - if ( - not "device_map" in args - and "device_map" in params_to_process - and "device" in params_to_process - and "device" in args - ): - del args["device"] - args["device_map"] = self.device - - return args - - def _select_hf_device(self): - """Determine the most efficient device for tensor load, hold any existing `device` already selected""" - import torch.cuda - - selected_device = None - if self.hf_args.get("device", None) is not None: - if isinstance(self.hf_args["device"], int): - # this assumes that indexed only devices selections means `cuda` - if self.hf_args["device"] < 0: - msg = f"device {self.hf_args['device']} requested but CUDA device numbering starts at zero. Use 'device: cpu' to request CPU." - logging.critical(msg) - raise ValueError(msg) - selected_device = torch.device("cuda:" + str(self.hf_args["device"])) - else: - selected_device = torch.device(self.hf_args["device"]) - - if selected_device is None: - selected_device = torch.device( - "cuda" - if torch.cuda.is_available() - else "mps" if torch.backends.mps.is_available() else "cpu" - ) - - if isinstance(selected_device, torch.device) and selected_device.type == "mps": - os.environ["PYTORCH_ENABLE_MPS_FALLBACK"] = "1" - logging.debug("Enabled MPS fallback environment variable") - - logging.debug( - "Using %s, based on torch environment evaluation", selected_device - ) - return selected_device - - class Pipeline(Generator, HFCompatible): """Get text generations from a locally-run Hugging Face pipeline""" From 29b50fe9057f7bfd569612646aa6755d89031543 Mon Sep 17 00:00:00 2001 From: Leon Derczynski Date: Mon, 21 Oct 2024 17:56:09 +0200 Subject: [PATCH 02/10] add hf api module --- garak/resources/api/huggingface.py | 111 +++++++++++++++++++++++++++++ 1 file changed, 111 insertions(+) create mode 100644 garak/resources/api/huggingface.py diff --git a/garak/resources/api/huggingface.py b/garak/resources/api/huggingface.py new file mode 100644 index 000000000..8f2d6f658 --- /dev/null +++ b/garak/resources/api/huggingface.py @@ -0,0 +1,111 @@ +# SPDX-FileCopyrightText: Portions Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +import inspect +import logging +import os + +from typing import Callable + + +class HFCompatible: + + def _set_hf_context_len(self, config): + if hasattr(config, "n_ctx"): + if isinstance(config.n_ctx, int): + self.context_len = config.n_ctx + + def _gather_hf_params(self, hf_constructor: Callable): + """ "Identify arguments that impact huggingface transformers resources and behavior""" + import torch + + # this may be a bit too naive as it will pass any parameter valid for the hf_constructor signature + # this falls over when passed some `from_pretrained` methods as the callable model params are not always explicit + params = ( + self.hf_args + if hasattr(self, "hf_args") and isinstance(self.hf_args, dict) + else {} + ) + if params is not None and not "device" in params and hasattr(self, "device"): + # consider setting self.device in all cases or if self.device is not found raise error `_select_hf_device` must be called + params["device"] = self.device + + args = {} + + params_to_process = inspect.signature(hf_constructor).parameters + + if "model" in params_to_process: + args["model"] = self.name + # expand for + params_to_process = {"do_sample": True} | params_to_process + else: + # callable is for a Pretrained class also map standard `pipeline` params + from transformers import pipeline + + params_to_process = ( + {"low_cpu_mem_usage": True} + | params_to_process + | inspect.signature(pipeline).parameters + ) + + for k in params_to_process: + if k == "model": + continue # special case `model` comes from `name` in the generator + if k in params: + val = params[k] + if k == "torch_dtype" and hasattr(torch, val): + args[k] = getattr( + torch, val + ) # some model type specific classes do not yet support direct string representation + continue + if ( + k == "device" + and "device_map" in params_to_process + and "device_map" in params + ): + # per transformers convention hold `device_map` before `device` + continue + args[k] = params[k] + + if ( + not "device_map" in args + and "device_map" in params_to_process + and "device" in params_to_process + and "device" in args + ): + del args["device"] + args["device_map"] = self.device + + return args + + def _select_hf_device(self): + """Determine the most efficient device for tensor load, hold any existing `device` already selected""" + import torch + + selected_device = None + if self.hf_args.get("device", None) is not None: + if isinstance(self.hf_args["device"], int): + # this assumes that indexed only devices selections means `cuda` + if self.hf_args["device"] < 0: + msg = f"device {self.hf_args['device']} requested but CUDA device numbering starts at zero. Use 'device: cpu' to request CPU." + logging.critical(msg) + raise ValueError(msg) + selected_device = torch.device("cuda:" + str(self.hf_args["device"])) + else: + selected_device = torch.device(self.hf_args["device"]) + + if selected_device is None: + selected_device = torch.device( + "cuda" + if torch.cuda.is_available() + else "mps" if torch.backends.mps.is_available() else "cpu" + ) + + if isinstance(selected_device, torch.device) and selected_device.type == "mps": + os.environ["PYTORCH_ENABLE_MPS_FALLBACK"] = "1" + logging.debug("Enabled MPS fallback environment variable") + + logging.debug( + "Using %s, based on torch environment evaluation", selected_device + ) + return selected_device From 117c2c865fd0f89a6a04022b2c3fedaba6baceb7 Mon Sep 17 00:00:00 2001 From: Leon Derczynski Date: Mon, 21 Oct 2024 19:59:32 +0200 Subject: [PATCH 03/10] pegasus should use HFCompat --- garak/buffs/paraphrase.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/garak/buffs/paraphrase.py b/garak/buffs/paraphrase.py index fc3c3dec2..fcbd6e296 100644 --- a/garak/buffs/paraphrase.py +++ b/garak/buffs/paraphrase.py @@ -14,11 +14,14 @@ class PegasusT5(Buff): """Paraphrasing buff using Pegasus model""" + DEFAULT_PARAMS = Buff.DEFAULT_PARAMS | { + "para_model_name": "garak-llm/pegasus_paraphrase", + "hf_args": {"device": "cpu", "torch_dtype": "float32"}, + } bcp47 = "en" doc_uri = "https://huggingface.co/tuner007/pegasus_paraphrase" def __init__(self, config_root=_config) -> None: - self.para_model_name = "garak-llm/pegasus_paraphrase" # https://huggingface.co/tuner007/pegasus_paraphrase self.max_length = 60 self.temperature = 1.5 self.num_return_sequences = 6 From 4861a5f8ef810a1bc79da14943bf8b971c3e6d22 Mon Sep 17 00:00:00 2001 From: Leon Derczynski Date: Mon, 21 Oct 2024 19:59:43 +0200 Subject: [PATCH 04/10] docstring for HFCompat --- garak/resources/api/huggingface.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/garak/resources/api/huggingface.py b/garak/resources/api/huggingface.py index 8f2d6f658..6af14a834 100644 --- a/garak/resources/api/huggingface.py +++ b/garak/resources/api/huggingface.py @@ -10,6 +10,9 @@ class HFCompatible: + """Mixin class providing private utility methods for using Huggingface + transformers within garak""" + def _set_hf_context_len(self, config): if hasattr(config, "n_ctx"): if isinstance(config.n_ctx, int): From 0409b64455a1aedc5d9961b6b640779c35d3b72a Mon Sep 17 00:00:00 2001 From: Leon Derczynski Date: Tue, 22 Oct 2024 11:19:11 +0200 Subject: [PATCH 05/10] buffs.paraphrase.PegasusT5 should consume HFCompat --- garak/buffs/paraphrase.py | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/garak/buffs/paraphrase.py b/garak/buffs/paraphrase.py index fcbd6e296..0aef61a57 100644 --- a/garak/buffs/paraphrase.py +++ b/garak/buffs/paraphrase.py @@ -11,35 +11,34 @@ from garak.resources.api.huggingface import HFCompatible -class PegasusT5(Buff): +class PegasusT5(Buff, HFCompatible): """Paraphrasing buff using Pegasus model""" DEFAULT_PARAMS = Buff.DEFAULT_PARAMS | { "para_model_name": "garak-llm/pegasus_paraphrase", "hf_args": {"device": "cpu", "torch_dtype": "float32"}, + "max_length": 60, + "temperature": 1.5, } bcp47 = "en" doc_uri = "https://huggingface.co/tuner007/pegasus_paraphrase" def __init__(self, config_root=_config) -> None: - self.max_length = 60 - self.temperature = 1.5 self.num_return_sequences = 6 self.num_beams = self.num_return_sequences - self.torch_device = None self.tokenizer = None self.para_model = None super().__init__(config_root=config_root) def _load_model(self): - import torch from transformers import PegasusForConditionalGeneration, PegasusTokenizer - self.torch_device = "cuda" if torch.cuda.is_available() else "cpu" + self.device = self._select_hf_device() + self.tokenizer = PegasusTokenizer.from_pretrained(self.para_model_name) self.para_model = PegasusForConditionalGeneration.from_pretrained( self.para_model_name - ).to(self.torch_device) + ).to(self.device) def _get_response(self, input_text): if self.para_model is None: @@ -51,7 +50,7 @@ def _get_response(self, input_text): padding="longest", max_length=self.max_length, return_tensors="pt", - ).to(self.torch_device) + ).to(self.device) translated = self.para_model.generate( **batch, max_length=self.max_length, From f605aeb44721ad0dcf515fcd3a261968a9b70ce3 Mon Sep 17 00:00:00 2001 From: Leon Derczynski Date: Tue, 22 Oct 2024 11:28:21 +0200 Subject: [PATCH 06/10] catch regression where paraphraser loads but returns empty --- garak/buffs/paraphrase.py | 1 - 1 file changed, 1 deletion(-) diff --git a/garak/buffs/paraphrase.py b/garak/buffs/paraphrase.py index 0aef61a57..6b6aaf4b1 100644 --- a/garak/buffs/paraphrase.py +++ b/garak/buffs/paraphrase.py @@ -91,7 +91,6 @@ def __init__(self, config_root=_config) -> None: self.no_repeat_ngram_size = 2 # self.temperature = 0.7 self.max_length = 128 - self.device = None self.tokenizer = None self.para_model = None super().__init__(config_root=config_root) From 66365bfa1e8d109f577dfb333150873f7be529f8 Mon Sep 17 00:00:00 2001 From: Leon Derczynski Date: Tue, 22 Oct 2024 14:13:19 +0200 Subject: [PATCH 07/10] add paraphrase buff tests --- tests/buffs/test_buffs_paraphrase.py | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) create mode 100644 tests/buffs/test_buffs_paraphrase.py diff --git a/tests/buffs/test_buffs_paraphrase.py b/tests/buffs/test_buffs_paraphrase.py new file mode 100644 index 000000000..8eb00e521 --- /dev/null +++ b/tests/buffs/test_buffs_paraphrase.py @@ -0,0 +1,24 @@ +# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +import pytest + +from garak import _plugins + +BUFFS = [ + classname + for (classname, active) in _plugins.enumerate_plugins("buffs") + if classname.startswith("buffs.paraphrase.") +] + + +@pytest.mark.parametrize("klassname", BUFFS) +def test_buff_results(klassname): + b = _plugins.load_plugin(klassname) + b._load_model() + paraphrases = b._get_response("The rain in Spain falls mainly in the plains.") + assert len(paraphrases) > 0, "paraphrase buffs must return paraphrases" + assert len(paraphrases) == len( + set(paraphrases) + ), "Paraphrases should not have dupes" + assert not any([i == "" for i in paraphrases]), "No paraphrase may be empty" From 21d15facd8cbe31c0607b6e1c1d7c4cd161d8024 Mon Sep 17 00:00:00 2001 From: Leon Derczynski Date: Tue, 22 Oct 2024 16:15:54 +0200 Subject: [PATCH 08/10] rm torch_dtype --- garak/buffs/paraphrase.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/garak/buffs/paraphrase.py b/garak/buffs/paraphrase.py index 6b6aaf4b1..78e00da64 100644 --- a/garak/buffs/paraphrase.py +++ b/garak/buffs/paraphrase.py @@ -16,7 +16,7 @@ class PegasusT5(Buff, HFCompatible): DEFAULT_PARAMS = Buff.DEFAULT_PARAMS | { "para_model_name": "garak-llm/pegasus_paraphrase", - "hf_args": {"device": "cpu", "torch_dtype": "float32"}, + "hf_args": {"device": "cpu"}, # torch_dtype doesn't have standard support in Pegasus "max_length": 60, "temperature": 1.5, } From caafa4f6a7d1a5fe9bab4e73c188e6e1a5e09da5 Mon Sep 17 00:00:00 2001 From: Leon Derczynski Date: Tue, 22 Oct 2024 17:21:30 +0200 Subject: [PATCH 09/10] move to using model_kwargs --- garak/buffs/paraphrase.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/garak/buffs/paraphrase.py b/garak/buffs/paraphrase.py index 78e00da64..6a67fc4f9 100644 --- a/garak/buffs/paraphrase.py +++ b/garak/buffs/paraphrase.py @@ -34,11 +34,13 @@ def _load_model(self): from transformers import PegasusForConditionalGeneration, PegasusTokenizer self.device = self._select_hf_device() - - self.tokenizer = PegasusTokenizer.from_pretrained(self.para_model_name) + model_kwargs = self._gather_hf_params( + hf_constructor=PegasusForConditionalGeneration.from_pretrained + ) # will defer to device_map if device map was `auto` may not match self.device self.para_model = PegasusForConditionalGeneration.from_pretrained( - self.para_model_name + self.para_model_name, **model_kwargs ).to(self.device) + self.tokenizer = PegasusTokenizer.from_pretrained(self.para_model_name, **model_kwargs) def _get_response(self, input_text): if self.para_model is None: From d0511dd1ca351fb931b9dfa31bdb8530885a08b9 Mon Sep 17 00:00:00 2001 From: Leon Derczynski Date: Thu, 24 Oct 2024 11:15:08 +0200 Subject: [PATCH 10/10] unplug model_kwargs from pegasus setup --- garak/buffs/paraphrase.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/garak/buffs/paraphrase.py b/garak/buffs/paraphrase.py index 6a67fc4f9..42d1a8a62 100644 --- a/garak/buffs/paraphrase.py +++ b/garak/buffs/paraphrase.py @@ -16,7 +16,9 @@ class PegasusT5(Buff, HFCompatible): DEFAULT_PARAMS = Buff.DEFAULT_PARAMS | { "para_model_name": "garak-llm/pegasus_paraphrase", - "hf_args": {"device": "cpu"}, # torch_dtype doesn't have standard support in Pegasus + "hf_args": { + "device": "cpu" + }, # torch_dtype doesn't have standard support in Pegasus "max_length": 60, "temperature": 1.5, } @@ -34,13 +36,10 @@ def _load_model(self): from transformers import PegasusForConditionalGeneration, PegasusTokenizer self.device = self._select_hf_device() - model_kwargs = self._gather_hf_params( - hf_constructor=PegasusForConditionalGeneration.from_pretrained - ) # will defer to device_map if device map was `auto` may not match self.device self.para_model = PegasusForConditionalGeneration.from_pretrained( - self.para_model_name, **model_kwargs + self.para_model_name ).to(self.device) - self.tokenizer = PegasusTokenizer.from_pretrained(self.para_model_name, **model_kwargs) + self.tokenizer = PegasusTokenizer.from_pretrained(self.para_model_name) def _get_response(self, input_text): if self.para_model is None: