Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature: enable configuration updates #1026

Merged
merged 8 commits into from
Dec 3, 2024
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
178 changes: 117 additions & 61 deletions garak/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,45 @@

"""Flow for invoking garak from the command line"""

command_options = "list_detectors list_probes list_generators list_buffs list_config plugin_info interactive report version".split()
command_options = "list_detectors list_probes list_generators list_buffs list_config plugin_info interactive report version fix".split()


def parse_cli_plugin_config(plugin_type, args):
import os
import json
import logging

opts_arg = f"{plugin_type}_options"
opts_file = f"{plugin_type}_option_file"
opts_cli_config = None
if opts_arg in args or opts_file in args:
if opts_arg in args:
opts_argv = getattr(args, opts_arg)
try:
opts_cli_config = json.loads(opts_argv)
except json.JSONDecodeError as e:
logging.warning("Failed to parse JSON %s: %s", opts_arg, e.args[0])

elif opts_file in args:
file_arg = getattr(args, opts_file)
if not os.path.isfile(file_arg):
raise FileNotFoundError(f"Path provided is not a file: {opts_file}")
with open(file_arg, encoding="utf-8") as f:
options_json = f.read().strip()
try:
opts_cli_config = json.loads(options_json)
except json.decoder.JSONDecodeError as e:
logging.warning("Failed to parse JSON %s: %s", opts_file, {e.args[0]})
raise e
return opts_cli_config
leondz marked this conversation as resolved.
Show resolved Hide resolved


def main(arguments=None) -> None:
"""Main entry point for garak runs invoked from the CLI"""
import datetime

from garak import __description__
from garak import _config
from garak import _config, _plugins
from garak.exception import GarakException

_config.transient.starttime = datetime.datetime.now()
Expand All @@ -38,6 +68,7 @@ def main(arguments=None) -> None:
prog="python -m garak",
description="LLM safety & security scanning tool",
epilog="See https://github.com/NVIDIA/garak",
allow_abbrev=False,
jmartin-tech marked this conversation as resolved.
Show resolved Hide resolved
)

## SYSTEM
Expand Down Expand Up @@ -108,7 +139,7 @@ def main(arguments=None) -> None:
)

## PLUGINS
# generator
# generators
parser.add_argument(
"--model_type",
"-m",
Expand All @@ -122,18 +153,6 @@ def main(arguments=None) -> None:
default=None,
help="name of the model, e.g. 'timdettmers/guanaco-33b-merged'",
)
generator_args = parser.add_mutually_exclusive_group()
generator_args.add_argument(
"--generator_option_file",
"-G",
type=str,
help="path to JSON file containing options to pass to generator",
)
generator_args.add_argument(
"--generator_options",
type=str,
help="options to pass to the generator",
)
# probes
parser.add_argument(
"--probes",
Expand All @@ -148,18 +167,6 @@ def main(arguments=None) -> None:
type=str,
help="only include probes with a tag that starts with this value (e.g. owasp:llm01)",
)
probe_args = parser.add_mutually_exclusive_group()
probe_args.add_argument(
"--probe_option_file",
"-P",
type=str,
help="path to JSON file containing options to pass to probes",
)
probe_args.add_argument(
"--probe_options",
type=str,
help="options to pass to probes, formatted as a JSON dict",
)
# detectors
parser.add_argument(
"--detectors",
Expand All @@ -181,7 +188,23 @@ def main(arguments=None) -> None:
default=_config.plugins.buff_spec,
help="list of buffs to use. Default is none",
)

# file or json based config options
plugin_types = sorted(
zip([type.lower() for type in _plugins.PLUGIN_CLASSES], _plugins.PLUGIN_TYPES)
)
for plugin_type, _ in plugin_types:
probe_args = parser.add_mutually_exclusive_group()
probe_args.add_argument(
f"--{plugin_type}_option_file",
f"-{plugin_type[0].upper()}",
type=str,
help=f"path to JSON file containing options to pass to {plugin_type}",
)
probe_args.add_argument(
f"--{plugin_type}_options",
type=str,
help=f"options to pass to {plugin_type}, formatted as a JSON dict",
)
## REPORTING
parser.add_argument(
"--taxonomy",
Expand Down Expand Up @@ -247,6 +270,12 @@ def main(arguments=None) -> None:
help="Launch garak in interactive.py mode",
)

parser.add_argument(
"--fix",
action="store_true",
help="Update provided configuration with fixer migrations; requires one of --config / --*_option_file, / --*_options",
)

## EXPERIMENTAL FEATURES
if _config.system.enable_experimental:
# place parser argument defs for experimental features here
Expand Down Expand Up @@ -350,44 +379,17 @@ def main(arguments=None) -> None:
# startup
import sys
import json
import os

import garak.evaluators

try:
plugin_types = ["probe", "generator"]
has_config_file_or_json = False
# do a special thing for CLI probe options, generator options
for plugin_type in plugin_types:
opts_arg = f"{plugin_type}_options"
opts_file = f"{plugin_type}_option_file"
opts_cli_config = None
if opts_arg in args or opts_file in args:
if opts_arg in args:
opts_argv = getattr(args, opts_arg)
try:
opts_cli_config = json.loads(opts_argv)
except json.JSONDecodeError as e:
logging.warning(
"Failed to parse JSON %s: %s", opts_arg, e.args[0]
)

elif opts_file in args:
file_arg = getattr(args, opts_file)
if not os.path.isfile(file_arg):
raise FileNotFoundError(
f"Path provided is not a file: {opts_file}"
)
with open(file_arg, encoding="utf-8") as f:
options_json = f.read().strip()
try:
opts_cli_config = json.loads(options_json)
except json.decoder.JSONDecodeError as e:
logging.warning(
"Failed to parse JSON %s: %s", opts_file, {e.args[0]}
)
raise e

config_plugin_type = getattr(_config.plugins, f"{plugin_type}s")
for plugin_type, plugin_plural in plugin_types:
opts_cli_config = parse_cli_plugin_config(plugin_type, args)
if opts_cli_config is not None:
has_config_file_or_json = True
config_plugin_type = getattr(_config.plugins, plugin_plural)

config_plugin_type = _config._combine_into(
opts_cli_config, config_plugin_type
Expand Down Expand Up @@ -429,6 +431,60 @@ def main(arguments=None) -> None:
print("cli args:\n ", args)
command.list_config()

elif args.fix:
from garak.resources import fixer
import json
import yaml

# process all possible configuration entries
# should this restrict the config updates to a single fixable value?
# for example allowed commands:
# --fix --config filename.yaml
# --fix --generator_option_file filename.json
# --fix --generator_options json
#
# disallowed commands:
# --fix --config filename.yaml --generator_option_file filename.json
# --fix --generator_option_file filename.json --probe_option_file filename.json
#
# already unsupported as only one is held:
# --fix --generator_option_file filename.json --generator_options json_data
#
# How should this handle garak.site.yaml? Only if --fix was provided and no other options offered?
# For now process all files registered a part of the config
has_changes = False
if has_config_file_or_json:
for plugin_type, plugin_plural in plugin_types:
# cli plugins options stub out only a "plugins" sub key
plugin_cli_config = parse_cli_plugin_config(plugin_type, args)
if plugin_cli_config is not None:
cli_config = {
"plugins": {f"{plugin_plural}": plugin_cli_config}
}
migrated_config = fixer.migrate(cli_config)
if cli_config != migrated_config:
has_changes = True
msg = f"Updated '{plugin_type}' configuration: \n"
msg += json.dumps(
migrated_config["plugins"][plugin_plural], indent=2
) # pretty print the config in json
print(msg)
leondz marked this conversation as resolved.
Show resolved Hide resolved
else:
# check if garak.site.yaml needs to be fixed up?
for filename in _config.config_files:
with open(filename, encoding="UTF-8") as file:
cli_config = yaml.safe_load(file)
migrated_config = fixer.migrate(cli_config)
if cli_config != migrated_config:
has_changes = True
msg = f"Updated {filename}: \n"
msg += yaml.dump(migrated_config)
print(msg)
# should this add support for --*_spec entries passed on cli?
if has_changes:
exit(1) # exit with error code to denote changes
else:
print("No revisions applied please verify options provided for `--fix`")
jmartin-tech marked this conversation as resolved.
Show resolved Hide resolved
elif args.report:
from garak.report import Report

Expand Down
3 changes: 3 additions & 0 deletions garak/probes/av_spam_scanning.py
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

what uses aliases? same comment for other probes

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Currently just for --plugin_info output, I could see skipping them or enabling interactive to use them later as hints for outdated command requests.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm into it -- it enables deprecation pretty easily too. Could warn on the use of aliases and fix it at some point, especially via interactive.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ok, that's cool. we might like to choose now how diligent we intend to be about this - are aliases only given as indications, or do we guarantee that they're exhaustive?

there's a duplication of info here (because it's already given in fixer modules) that i'm mindful of

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Agreed I tried to note in the description this becomes a required step when adding a migration that impacts a plugin name however there is no enforcement/early warning mechanism currently in place, we could add a lint like action that checks for plugin file or class renames as a warning/hint on PRs.

Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ class EICAR(Probe):
code. This probe test if models will provide output that should be stopped
by a virus scanner. Only applicable to models expected to do antivirus scanning."""

aliases = ["knownbadsignatures.EICAR"]
bcp47 = "*"
doc_uri = "https://en.wikipedia.org/wiki/EICAR_test_file"
recommended_detector = [
Expand All @@ -65,6 +66,7 @@ class GTUBE(Probe):
Does the model generate content that should be stopped by a spam filter? If there's
a spam filter in place, this should catch the GTUBE signature."""

aliases = ["knownbadsignatures.GTUBE"]
bcp47 = "*"
doc_uri = "https://en.wikipedia.org/wiki/GTUBE"
recommended_detector = [
Expand Down Expand Up @@ -92,6 +94,7 @@ class GTphish(Probe):
A phishing scanner should catch the GTphish code and hopefully block this content.
If there's no scanner in place, the GTphish code is more likely to come through."""

aliases = ["knownbadsignatures.GTphish"]
bcp47 = "*"
doc_uri = "https://www.cisco.com/c/en/us/support/docs/security/email-security-appliance/213465-testing-outbreak-filter-url-rewriting.html"
recommended_detector = [
Expand Down
1 change: 1 addition & 0 deletions garak/probes/continuation.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,5 +88,6 @@ class ContinueSlursReclaimedSlursMini(ContinueSlursReclaimedSlurs, Probe):
checks to see if the model will complete the slur. It uses a subset of 80
slurs & contexts that test LLMs have been likely to provide completions for."""

aliases = ["continuation.ContinueSlursReclaimedSlurs80"]
active = True
source_resource_filename = "slurprompts_mini.jsonl"
2 changes: 2 additions & 0 deletions garak/probes/divergence.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ class Repeat(Probe):
continue the repetition in its output, which can degrade to seemingly replaying
training data."""

aliases = ["replay.Repeat"]
bcp47 = "*"
primary_detector = "divergence.RepeatDiverges"
extended_detectors = ["mitigation.MitigationBypass"]
Expand Down Expand Up @@ -88,6 +89,7 @@ class RepeatExtended(Repeat):
training data."""

active = False
aliases = ["replay.RepeatExtended"]
repeat_word_list = [
"company",
"one",
Expand Down
2 changes: 2 additions & 0 deletions garak/probes/suffix.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ class GCGCached(Probe):
An automatic jailbreaking probe, using gradient descent. This implementation
uses a pre-computed cached set of GCG prompts."""

aliases = ["gcg.GCGCached"]
bcp47 = "en"
recommended_detector = ["mitigation.MitigationBypass"]
tags = [
Expand Down Expand Up @@ -52,6 +53,7 @@ class GCG(Probe):

An automatic jailbreaking probe, using gradient descent."""

aliases = ["gcg.GCG"]
bcp47 = "en"
recommended_detector = ["mitigation.MitigationBypass"]
tags = [
Expand Down
15 changes: 15 additions & 0 deletions garak/resources/fixer/20240628_gcg_rename.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
# SPDX-FileCopyrightText: Portions Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0

from garak.resources.fixer import Migration
from garak.resources.fixer import _plugin


class RenameGCG(Migration):
def apply(config_dict: dict) -> dict:
"""Rename probe gcg -> suffix"""
jmartin-tech marked this conversation as resolved.
Show resolved Hide resolved

path = ["plugins", "probes"]
old = "gcg"
new = "suffix"
return _plugin.rename(config_dict, path, old, new)
15 changes: 15 additions & 0 deletions garak/resources/fixer/20240801_continuation_rename.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
# SPDX-FileCopyrightText: Portions Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0

from garak.resources.fixer import Migration
from garak.resources.fixer import _plugin


class RenameContinuation(Migration):
def apply(config_dict: dict) -> dict:
"""Rename continuation probe class 80 -> Mini"""

path = ["plugins", "probes", "continuation"]
old = "ContinueSlursReclaimedSlurs80"
new = "ContinueSlursReclaimedSlursMini"
return _plugin.rename(config_dict, path, old, new)
15 changes: 15 additions & 0 deletions garak/resources/fixer/20240822_knownbadsignatures_rename.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
# SPDX-FileCopyrightText: Portions Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0

from garak.resources.fixer import Migration
from garak.resources.fixer import _plugin


class RenameKnownbadsignatures(Migration):
def apply(config_dict: dict) -> dict:
"""Rename probe knownbadsignatures -> av_spam_scanning"""
jmartin-tech marked this conversation as resolved.
Show resolved Hide resolved

path = ["plugins", "probes"]
old = "knownbadsignatures"
new = "av_spam_scanning"
return _plugin.rename(config_dict, path, old, new)
15 changes: 15 additions & 0 deletions garak/resources/fixer/20241011_replay_rename.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
# SPDX-FileCopyrightText: Portions Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0

from garak.resources.fixer import Migration
from garak.resources.fixer import _plugin


class RenameReplay(Migration):
def apply(config_dict: dict) -> dict:
"""Rename probe replay -> divergence"""
jmartin-tech marked this conversation as resolved.
Show resolved Hide resolved

path = ["plugins", "probes"]
old = "replay"
new = "divergence"
return _plugin.rename(config_dict, path, old, new)
Loading