Skip to content

Commit

Permalink
Feature: enable configuration updates (#1026)
Browse files Browse the repository at this point in the history
  • Loading branch information
jmartin-tech committed Dec 3, 2024
2 parents add9e1d + 426f52e commit 9b33870
Show file tree
Hide file tree
Showing 13 changed files with 442 additions and 63 deletions.
178 changes: 117 additions & 61 deletions garak/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,45 @@

"""Flow for invoking garak from the command line"""

command_options = "list_detectors list_probes list_generators list_buffs list_config plugin_info interactive report version".split()
command_options = "list_detectors list_probes list_generators list_buffs list_config plugin_info interactive report version fix".split()


def parse_cli_plugin_config(plugin_type, args):
import os
import json
import logging

opts_arg = f"{plugin_type}_options"
opts_file = f"{plugin_type}_option_file"
opts_cli_config = None
if opts_arg in args or opts_file in args:
if opts_arg in args:
opts_argv = getattr(args, opts_arg)
try:
opts_cli_config = json.loads(opts_argv)
except json.JSONDecodeError as e:
logging.warning("Failed to parse JSON %s: %s", opts_arg, e.args[0])

elif opts_file in args:
file_arg = getattr(args, opts_file)
if not os.path.isfile(file_arg):
raise FileNotFoundError(f"Path provided is not a file: {opts_file}")
with open(file_arg, encoding="utf-8") as f:
options_json = f.read().strip()
try:
opts_cli_config = json.loads(options_json)
except json.decoder.JSONDecodeError as e:
logging.warning("Failed to parse JSON %s: %s", opts_file, {e.args[0]})
raise e
return opts_cli_config


def main(arguments=None) -> None:
"""Main entry point for garak runs invoked from the CLI"""
import datetime

from garak import __description__
from garak import _config
from garak import _config, _plugins
from garak.exception import GarakException

_config.transient.starttime = datetime.datetime.now()
Expand All @@ -38,6 +68,7 @@ def main(arguments=None) -> None:
prog="python -m garak",
description="LLM safety & security scanning tool",
epilog="See https://github.com/NVIDIA/garak",
allow_abbrev=False,
)

## SYSTEM
Expand Down Expand Up @@ -108,7 +139,7 @@ def main(arguments=None) -> None:
)

## PLUGINS
# generator
# generators
parser.add_argument(
"--model_type",
"-m",
Expand All @@ -122,18 +153,6 @@ def main(arguments=None) -> None:
default=None,
help="name of the model, e.g. 'timdettmers/guanaco-33b-merged'",
)
generator_args = parser.add_mutually_exclusive_group()
generator_args.add_argument(
"--generator_option_file",
"-G",
type=str,
help="path to JSON file containing options to pass to generator",
)
generator_args.add_argument(
"--generator_options",
type=str,
help="options to pass to the generator",
)
# probes
parser.add_argument(
"--probes",
Expand All @@ -148,18 +167,6 @@ def main(arguments=None) -> None:
type=str,
help="only include probes with a tag that starts with this value (e.g. owasp:llm01)",
)
probe_args = parser.add_mutually_exclusive_group()
probe_args.add_argument(
"--probe_option_file",
"-P",
type=str,
help="path to JSON file containing options to pass to probes",
)
probe_args.add_argument(
"--probe_options",
type=str,
help="options to pass to probes, formatted as a JSON dict",
)
# detectors
parser.add_argument(
"--detectors",
Expand All @@ -181,7 +188,23 @@ def main(arguments=None) -> None:
default=_config.plugins.buff_spec,
help="list of buffs to use. Default is none",
)

# file or json based config options
plugin_types = sorted(
zip([type.lower() for type in _plugins.PLUGIN_CLASSES], _plugins.PLUGIN_TYPES)
)
for plugin_type, _ in plugin_types:
probe_args = parser.add_mutually_exclusive_group()
probe_args.add_argument(
f"--{plugin_type}_option_file",
f"-{plugin_type[0].upper()}",
type=str,
help=f"path to JSON file containing options to pass to {plugin_type}",
)
probe_args.add_argument(
f"--{plugin_type}_options",
type=str,
help=f"options to pass to {plugin_type}, formatted as a JSON dict",
)
## REPORTING
parser.add_argument(
"--taxonomy",
Expand Down Expand Up @@ -247,6 +270,12 @@ def main(arguments=None) -> None:
help="Launch garak in interactive.py mode",
)

parser.add_argument(
"--fix",
action="store_true",
help="Update provided configuration with fixer migrations; requires one of --config / --*_option_file, / --*_options",
)

## EXPERIMENTAL FEATURES
if _config.system.enable_experimental:
# place parser argument defs for experimental features here
Expand Down Expand Up @@ -350,44 +379,17 @@ def main(arguments=None) -> None:
# startup
import sys
import json
import os

import garak.evaluators

try:
plugin_types = ["probe", "generator"]
has_config_file_or_json = False
# do a special thing for CLI probe options, generator options
for plugin_type in plugin_types:
opts_arg = f"{plugin_type}_options"
opts_file = f"{plugin_type}_option_file"
opts_cli_config = None
if opts_arg in args or opts_file in args:
if opts_arg in args:
opts_argv = getattr(args, opts_arg)
try:
opts_cli_config = json.loads(opts_argv)
except json.JSONDecodeError as e:
logging.warning(
"Failed to parse JSON %s: %s", opts_arg, e.args[0]
)

elif opts_file in args:
file_arg = getattr(args, opts_file)
if not os.path.isfile(file_arg):
raise FileNotFoundError(
f"Path provided is not a file: {opts_file}"
)
with open(file_arg, encoding="utf-8") as f:
options_json = f.read().strip()
try:
opts_cli_config = json.loads(options_json)
except json.decoder.JSONDecodeError as e:
logging.warning(
"Failed to parse JSON %s: %s", opts_file, {e.args[0]}
)
raise e

config_plugin_type = getattr(_config.plugins, f"{plugin_type}s")
for plugin_type, plugin_plural in plugin_types:
opts_cli_config = parse_cli_plugin_config(plugin_type, args)
if opts_cli_config is not None:
has_config_file_or_json = True
config_plugin_type = getattr(_config.plugins, plugin_plural)

config_plugin_type = _config._combine_into(
opts_cli_config, config_plugin_type
Expand Down Expand Up @@ -429,6 +431,60 @@ def main(arguments=None) -> None:
print("cli args:\n ", args)
command.list_config()

elif args.fix:
from garak.resources import fixer
import json
import yaml

# process all possible configuration entries
# should this restrict the config updates to a single fixable value?
# for example allowed commands:
# --fix --config filename.yaml
# --fix --generator_option_file filename.json
# --fix --generator_options json
#
# disallowed commands:
# --fix --config filename.yaml --generator_option_file filename.json
# --fix --generator_option_file filename.json --probe_option_file filename.json
#
# already unsupported as only one is held:
# --fix --generator_option_file filename.json --generator_options json_data
#
# How should this handle garak.site.yaml? Only if --fix was provided and no other options offered?
# For now process all files registered a part of the config
has_changes = False
if has_config_file_or_json:
for plugin_type, plugin_plural in plugin_types:
# cli plugins options stub out only a "plugins" sub key
plugin_cli_config = parse_cli_plugin_config(plugin_type, args)
if plugin_cli_config is not None:
cli_config = {
"plugins": {f"{plugin_plural}": plugin_cli_config}
}
migrated_config = fixer.migrate(cli_config)
if cli_config != migrated_config:
has_changes = True
msg = f"Updated '{plugin_type}' configuration: \n"
msg += json.dumps(
migrated_config["plugins"][plugin_plural], indent=2
) # pretty print the config in json
print(msg)
else:
# check if garak.site.yaml needs to be fixed up?
for filename in _config.config_files:
with open(filename, encoding="UTF-8") as file:
cli_config = yaml.safe_load(file)
migrated_config = fixer.migrate(cli_config)
if cli_config != migrated_config:
has_changes = True
msg = f"Updated {filename}: \n"
msg += yaml.dump(migrated_config)
print(msg)
# should this add support for --*_spec entries passed on cli?
if has_changes:
exit(1) # exit with error code to denote changes
else:
print("No revisions applied. Please verify options provided for `--fix`")
elif args.report:
from garak.report import Report

Expand Down
3 changes: 3 additions & 0 deletions garak/probes/av_spam_scanning.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ class EICAR(Probe):
code. This probe test if models will provide output that should be stopped
by a virus scanner. Only applicable to models expected to do antivirus scanning."""

aliases = ["knownbadsignatures.EICAR"]
bcp47 = "*"
doc_uri = "https://en.wikipedia.org/wiki/EICAR_test_file"
recommended_detector = [
Expand All @@ -65,6 +66,7 @@ class GTUBE(Probe):
Does the model generate content that should be stopped by a spam filter? If there's
a spam filter in place, this should catch the GTUBE signature."""

aliases = ["knownbadsignatures.GTUBE"]
bcp47 = "*"
doc_uri = "https://en.wikipedia.org/wiki/GTUBE"
recommended_detector = [
Expand Down Expand Up @@ -92,6 +94,7 @@ class GTphish(Probe):
A phishing scanner should catch the GTphish code and hopefully block this content.
If there's no scanner in place, the GTphish code is more likely to come through."""

aliases = ["knownbadsignatures.GTphish"]
bcp47 = "*"
doc_uri = "https://www.cisco.com/c/en/us/support/docs/security/email-security-appliance/213465-testing-outbreak-filter-url-rewriting.html"
recommended_detector = [
Expand Down
1 change: 1 addition & 0 deletions garak/probes/continuation.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,5 +88,6 @@ class ContinueSlursReclaimedSlursMini(ContinueSlursReclaimedSlurs, Probe):
checks to see if the model will complete the slur. It uses a subset of 80
slurs & contexts that test LLMs have been likely to provide completions for."""

aliases = ["continuation.ContinueSlursReclaimedSlurs80"]
active = True
source_resource_filename = "slurprompts_mini.jsonl"
2 changes: 2 additions & 0 deletions garak/probes/divergence.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ class Repeat(Probe):
continue the repetition in its output, which can degrade to seemingly replaying
training data."""

aliases = ["replay.Repeat"]
bcp47 = "*"
primary_detector = "divergence.RepeatDiverges"
extended_detectors = ["mitigation.MitigationBypass"]
Expand Down Expand Up @@ -88,6 +89,7 @@ class RepeatExtended(Repeat):
training data."""

active = False
aliases = ["replay.RepeatExtended"]
repeat_word_list = [
"company",
"one",
Expand Down
2 changes: 2 additions & 0 deletions garak/probes/suffix.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ class GCGCached(Probe):
An automatic jailbreaking probe, using gradient descent. This implementation
uses a pre-computed cached set of GCG prompts."""

aliases = ["gcg.GCGCached"]
bcp47 = "en"
recommended_detector = ["mitigation.MitigationBypass"]
tags = [
Expand Down Expand Up @@ -52,6 +53,7 @@ class GCG(Probe):
An automatic jailbreaking probe, using gradient descent."""

aliases = ["gcg.GCG"]
bcp47 = "en"
recommended_detector = ["mitigation.MitigationBypass"]
tags = [
Expand Down
15 changes: 15 additions & 0 deletions garak/resources/fixer/20240628_gcg_rename.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
# SPDX-FileCopyrightText: Portions Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0

from garak.resources.fixer import Migration
from garak.resources.fixer import _plugin


class RenameGCG(Migration):
def apply(config_dict: dict) -> dict:
"""Rename probe family gcg -> suffix"""

path = ["plugins", "probes"]
old = "gcg"
new = "suffix"
return _plugin.rename(config_dict, path, old, new)
15 changes: 15 additions & 0 deletions garak/resources/fixer/20240801_continuation_rename.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
# SPDX-FileCopyrightText: Portions Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0

from garak.resources.fixer import Migration
from garak.resources.fixer import _plugin


class RenameContinuation(Migration):
def apply(config_dict: dict) -> dict:
"""Rename continuation probe class 80 -> Mini"""

path = ["plugins", "probes", "continuation"]
old = "ContinueSlursReclaimedSlurs80"
new = "ContinueSlursReclaimedSlursMini"
return _plugin.rename(config_dict, path, old, new)
15 changes: 15 additions & 0 deletions garak/resources/fixer/20240822_knownbadsignatures_rename.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
# SPDX-FileCopyrightText: Portions Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0

from garak.resources.fixer import Migration
from garak.resources.fixer import _plugin


class RenameKnownbadsignatures(Migration):
def apply(config_dict: dict) -> dict:
"""Rename probe family knownbadsignatures -> av_spam_scanning"""

path = ["plugins", "probes"]
old = "knownbadsignatures"
new = "av_spam_scanning"
return _plugin.rename(config_dict, path, old, new)
15 changes: 15 additions & 0 deletions garak/resources/fixer/20241011_replay_rename.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
# SPDX-FileCopyrightText: Portions Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0

from garak.resources.fixer import Migration
from garak.resources.fixer import _plugin


class RenameReplay(Migration):
def apply(config_dict: dict) -> dict:
"""Rename probe family replay -> divergence"""

path = ["plugins", "probes"]
old = "replay"
new = "divergence"
return _plugin.rename(config_dict, path, old, new)
Loading

0 comments on commit 9b33870

Please sign in to comment.