Skip to content

Commit

Permalink
architecture: factor HFCompatible out (#954)
Browse files Browse the repository at this point in the history
`HFCompatible` was embedded in generators.base, tying slow-to-import
HF-specific stuff to base classes. This PR moves `HFCompatible` to a
separate module, with a candidate location in
`garak.resources.api.huggingface`, enabling fast base class loading.
  • Loading branch information
leondz authored Oct 24, 2024
2 parents 9aadcfb + d0511dd commit d3634f0
Show file tree
Hide file tree
Showing 5 changed files with 156 additions and 118 deletions.
26 changes: 14 additions & 12 deletions garak/buffs/paraphrase.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,36 +7,39 @@

import garak.attempt
from garak import _config
from garak.generators.huggingface import HFCompatible
from garak.buffs.base import Buff
from garak.resources.api.huggingface import HFCompatible


class PegasusT5(Buff):
class PegasusT5(Buff, HFCompatible):
"""Paraphrasing buff using Pegasus model"""

DEFAULT_PARAMS = Buff.DEFAULT_PARAMS | {
"para_model_name": "garak-llm/pegasus_paraphrase",
"hf_args": {
"device": "cpu"
}, # torch_dtype doesn't have standard support in Pegasus
"max_length": 60,
"temperature": 1.5,
}
bcp47 = "en"
doc_uri = "https://huggingface.co/tuner007/pegasus_paraphrase"

def __init__(self, config_root=_config) -> None:
self.para_model_name = "garak-llm/pegasus_paraphrase" # https://huggingface.co/tuner007/pegasus_paraphrase
self.max_length = 60
self.temperature = 1.5
self.num_return_sequences = 6
self.num_beams = self.num_return_sequences
self.torch_device = None
self.tokenizer = None
self.para_model = None
super().__init__(config_root=config_root)

def _load_model(self):
import torch
from transformers import PegasusForConditionalGeneration, PegasusTokenizer

self.torch_device = "cuda" if torch.cuda.is_available() else "cpu"
self.tokenizer = PegasusTokenizer.from_pretrained(self.para_model_name)
self.device = self._select_hf_device()
self.para_model = PegasusForConditionalGeneration.from_pretrained(
self.para_model_name
).to(self.torch_device)
).to(self.device)
self.tokenizer = PegasusTokenizer.from_pretrained(self.para_model_name)

def _get_response(self, input_text):
if self.para_model is None:
Expand All @@ -48,7 +51,7 @@ def _get_response(self, input_text):
padding="longest",
max_length=self.max_length,
return_tensors="pt",
).to(self.torch_device)
).to(self.device)
translated = self.para_model.generate(
**batch,
max_length=self.max_length,
Expand Down Expand Up @@ -89,7 +92,6 @@ def __init__(self, config_root=_config) -> None:
self.no_repeat_ngram_size = 2
# self.temperature = 0.7
self.max_length = 128
self.device = None
self.tokenizer = None
self.para_model = None
super().__init__(config_root=config_root)
Expand Down
3 changes: 2 additions & 1 deletion garak/detectors/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,10 @@

from garak import _config
from garak.configurable import Configurable
from garak.generators.huggingface import HFCompatible
import garak.attempt

from garak.resources.api.huggingface import HFCompatible


class Detector(Configurable):
"""Base class for objects that define a way of detecting a probe hit / LLM failure"""
Expand Down
107 changes: 2 additions & 105 deletions garak/generators/huggingface.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,9 @@
https://huggingface.co/docs/api-inference/quicktour
"""

import inspect
import logging
import os
import re
from typing import Callable, List, Union
from typing import List, Union
import warnings

import backoff
Expand All @@ -28,7 +26,7 @@
from garak import _config
from garak.exception import ModelNameMissingError, GarakException
from garak.generators.base import Generator

from garak.resources.api.huggingface import HFCompatible

models_to_deprefix = ["gpt2"]

Expand All @@ -45,107 +43,6 @@ class HFInternalServerError(GarakException):
pass


class HFCompatible:
def _set_hf_context_len(self, config):
if hasattr(config, "n_ctx"):
if isinstance(config.n_ctx, int):
self.context_len = config.n_ctx

def _gather_hf_params(self, hf_constructor: Callable):
""" "Identify arguments that impact huggingface transformers resources and behavior"""

# this may be a bit too naive as it will pass any parameter valid for the hf_constructor signature
# this falls over when passed some `from_pretrained` methods as the callable model params are not always explicit
params = (
self.hf_args
if hasattr(self, "hf_args") and isinstance(self.hf_args, dict)
else {}
)
if params is not None and not "device" in params and hasattr(self, "device"):
# consider setting self.device in all cases or if self.device is not found raise error `_select_hf_device` must be called
params["device"] = self.device

args = {}

params_to_process = inspect.signature(hf_constructor).parameters

if "model" in params_to_process:
args["model"] = self.name
# expand for
params_to_process = {"do_sample": True} | params_to_process
else:
# callable is for a Pretrained class also map standard `pipeline` params
from transformers import pipeline

params_to_process = (
{"low_cpu_mem_usage": True}
| params_to_process
| inspect.signature(pipeline).parameters
)

for k in params_to_process:
if k == "model":
continue # special case `model` comes from `name` in the generator
if k in params:
val = params[k]
if k == "torch_dtype" and hasattr(torch, val):
args[k] = getattr(
torch, val
) # some model type specific classes do not yet support direct string representation
continue
if (
k == "device"
and "device_map" in params_to_process
and "device_map" in params
):
# per transformers convention hold `device_map` before `device`
continue
args[k] = params[k]

if (
not "device_map" in args
and "device_map" in params_to_process
and "device" in params_to_process
and "device" in args
):
del args["device"]
args["device_map"] = self.device

return args

def _select_hf_device(self):
"""Determine the most efficient device for tensor load, hold any existing `device` already selected"""
import torch.cuda

selected_device = None
if self.hf_args.get("device", None) is not None:
if isinstance(self.hf_args["device"], int):
# this assumes that indexed only devices selections means `cuda`
if self.hf_args["device"] < 0:
msg = f"device {self.hf_args['device']} requested but CUDA device numbering starts at zero. Use 'device: cpu' to request CPU."
logging.critical(msg)
raise ValueError(msg)
selected_device = torch.device("cuda:" + str(self.hf_args["device"]))
else:
selected_device = torch.device(self.hf_args["device"])

if selected_device is None:
selected_device = torch.device(
"cuda"
if torch.cuda.is_available()
else "mps" if torch.backends.mps.is_available() else "cpu"
)

if isinstance(selected_device, torch.device) and selected_device.type == "mps":
os.environ["PYTORCH_ENABLE_MPS_FALLBACK"] = "1"
logging.debug("Enabled MPS fallback environment variable")

logging.debug(
"Using %s, based on torch environment evaluation", selected_device
)
return selected_device


class Pipeline(Generator, HFCompatible):
"""Get text generations from a locally-run Hugging Face pipeline"""

Expand Down
114 changes: 114 additions & 0 deletions garak/resources/api/huggingface.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
# SPDX-FileCopyrightText: Portions Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0

import inspect
import logging
import os

from typing import Callable


class HFCompatible:

"""Mixin class providing private utility methods for using Huggingface
transformers within garak"""

def _set_hf_context_len(self, config):
if hasattr(config, "n_ctx"):
if isinstance(config.n_ctx, int):
self.context_len = config.n_ctx

def _gather_hf_params(self, hf_constructor: Callable):
""" "Identify arguments that impact huggingface transformers resources and behavior"""
import torch

# this may be a bit too naive as it will pass any parameter valid for the hf_constructor signature
# this falls over when passed some `from_pretrained` methods as the callable model params are not always explicit
params = (
self.hf_args
if hasattr(self, "hf_args") and isinstance(self.hf_args, dict)
else {}
)
if params is not None and not "device" in params and hasattr(self, "device"):
# consider setting self.device in all cases or if self.device is not found raise error `_select_hf_device` must be called
params["device"] = self.device

args = {}

params_to_process = inspect.signature(hf_constructor).parameters

if "model" in params_to_process:
args["model"] = self.name
# expand for
params_to_process = {"do_sample": True} | params_to_process
else:
# callable is for a Pretrained class also map standard `pipeline` params
from transformers import pipeline

params_to_process = (
{"low_cpu_mem_usage": True}
| params_to_process
| inspect.signature(pipeline).parameters
)

for k in params_to_process:
if k == "model":
continue # special case `model` comes from `name` in the generator
if k in params:
val = params[k]
if k == "torch_dtype" and hasattr(torch, val):
args[k] = getattr(
torch, val
) # some model type specific classes do not yet support direct string representation
continue
if (
k == "device"
and "device_map" in params_to_process
and "device_map" in params
):
# per transformers convention hold `device_map` before `device`
continue
args[k] = params[k]

if (
not "device_map" in args
and "device_map" in params_to_process
and "device" in params_to_process
and "device" in args
):
del args["device"]
args["device_map"] = self.device

return args

def _select_hf_device(self):
"""Determine the most efficient device for tensor load, hold any existing `device` already selected"""
import torch

selected_device = None
if self.hf_args.get("device", None) is not None:
if isinstance(self.hf_args["device"], int):
# this assumes that indexed only devices selections means `cuda`
if self.hf_args["device"] < 0:
msg = f"device {self.hf_args['device']} requested but CUDA device numbering starts at zero. Use 'device: cpu' to request CPU."
logging.critical(msg)
raise ValueError(msg)
selected_device = torch.device("cuda:" + str(self.hf_args["device"]))
else:
selected_device = torch.device(self.hf_args["device"])

if selected_device is None:
selected_device = torch.device(
"cuda"
if torch.cuda.is_available()
else "mps" if torch.backends.mps.is_available() else "cpu"
)

if isinstance(selected_device, torch.device) and selected_device.type == "mps":
os.environ["PYTORCH_ENABLE_MPS_FALLBACK"] = "1"
logging.debug("Enabled MPS fallback environment variable")

logging.debug(
"Using %s, based on torch environment evaluation", selected_device
)
return selected_device
24 changes: 24 additions & 0 deletions tests/buffs/test_buffs_paraphrase.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0

import pytest

from garak import _plugins

BUFFS = [
classname
for (classname, active) in _plugins.enumerate_plugins("buffs")
if classname.startswith("buffs.paraphrase.")
]


@pytest.mark.parametrize("klassname", BUFFS)
def test_buff_results(klassname):
b = _plugins.load_plugin(klassname)
b._load_model()
paraphrases = b._get_response("The rain in Spain falls mainly in the plains.")
assert len(paraphrases) > 0, "paraphrase buffs must return paraphrases"
assert len(paraphrases) == len(
set(paraphrases)
), "Paraphrases should not have dupes"
assert not any([i == "" for i in paraphrases]), "No paraphrase may be empty"

0 comments on commit d3634f0

Please sign in to comment.