diff --git a/README.md b/README.md index 3f99c9e..89da01d 100644 --- a/README.md +++ b/README.md @@ -69,12 +69,13 @@ Not required: - `BOT_DEV_IDS`: list of user ids of bot devs. You may want to include `BOT_VIP_IDS` here. - `BOT_CONTROL_CHANNEL_IDS`: list of channels where control commands are accepted. - `BOT_PRIVATE_CHANNEL_ID`: single channel where private Stampy status updates are sent +- `BOT_ERROR_CHANNEL_ID`: (defaults to private channel) low level error tracebacks from Python. with this variable they can be shunted to a seperate channel. - `CODA_API_TOKEN`: token to access Coda. Without it, modules `Questions` and `QuestionSetter` will not be available and `StampyControls` will have limited functionality. - `BOT_REBOOT`: how Stampy reboots himself. Unset, he only quits, expecting an external `while true` loop (like in `runstampy`/Dockerfile). Set to `exec` he will try to relaunch himself from his own CLI arguments. - `STOP_ON_ERROR`: Dockerfile/`runstampy` only, unset `BOT_REBOOT` only. If defined, will only restart Stampy when he gets told to reboot, returning exit code 42. Any other exit code will cause the script to just stop. -- `BE_SHY`: Stamp won't respond when the message isn't specifically to him. +- `BE_SHY`: Stamp never responds when the message isn't specifically to him. - `CHANNEL_WHITELIST`: channels Stampy is allowed to respond to messages in -- `IS_ROB_SERVER`: If defined, Rob Miles server-specific stuff is enabled. Servers other than Rob Miles Discord Server and Stampy Test Server should not enable it, Otherwise some errors are likely to occur. +- `IS_ROB_SERVER`: If defined, Rob Miles server-specific stuff is enabled. This is a convenience option for the Rob Miles sysadmins. Servers other than Rob Miles Discord Server and Stampy Test Server should not enable it, otherwise your custom config won't be read. Specific modules (excluding LLM stuff): @@ -93,6 +94,7 @@ LLM stuff: - `GPT4_WHITELIST_ROLE_IDS`: if the above is unset, Stampy responds with GPT4 only for users with these roles. - `USE_HELICONE`: if set, GPT prompts call the helicone API rather than OpenAI. - `LLM_PROMPT`: What prompt is the language model being fed? This describes the personality and behavior of the bot. +- `DISABLE_PROMPT_MODERATION`: don't check safety of prompts for LLM ## Docker diff --git a/api/openai.py b/api/openai.py index af3d57e..79db41e 100644 --- a/api/openai.py +++ b/api/openai.py @@ -8,7 +8,8 @@ gpt4_whitelist_role_ids, bot_vip_ids, paid_service_all_channels, - use_helicone + use_helicone, + disable_prompt_moderation ) from structlog import get_logger from servicemodules.serviceConstants import Services, openai_channel_ids @@ -22,7 +23,16 @@ from helicone import openai_proxy as openai else: import openai + from openai import Moderation import discord +import requests +import json # moderation response dump + +CURL_REQUEST: bool # helicone breaks some moderation attribute of openai module +if use_helicone: + CURL_REQUEST = True +else: + CURL_REQUEST = False openai.api_key = openai_api_key start_sequence = "\nA:" @@ -46,78 +56,85 @@ def is_channel_allowed(self, message: ServiceMessage) -> bool: return True else: return False - def cf_risk_level(self, prompt): - """Ask the openai content filter if the prompt is risky - Returns: - 0 - The text is safe. - 1 - This text is sensitive. - 2 - This text is unsafe. + def is_text_risky(self, text: str) -> bool: + """Ask the openai moderation endpoint if the text is risky - See https://beta.openai.com/docs/engines/content-filter for details""" + See https://platform.openai.com/docs/guides/moderation/quickstart for details""" - try: - response = openai.Completion.create( - engine="content-filter-alpha", - prompt="<|endoftext|>" + prompt + "\n--\nLabel:", - temperature=0, - max_tokens=1, - top_p=0, - logprobs=10, - ) - except openai.error.AuthenticationError as e: - self.log.error(self.class_name, error="OpenAI Authentication Failed") - loop = asyncio.get_running_loop() - loop.create_task(utils.log_error(f"OpenAI Authenication Failed")) - loop.create_task(utils.log_exception(e)) - return 2 - except openai.error.RateLimitError as e: - self.log.warning(self.class_name, error="OpenAI Rate Limit Exceeded") - loop = asyncio.get_running_loop() - loop.create_task(utils.log_error(f"OpenAI Rate Limit Exceeded")) - loop.create_task(utils.log_exception(e)) - return 2 - - output_label = response["choices"][0]["text"] - - # This is the probability at which we evaluate that a "2" is likely real - # vs. should be discarded as a false positive - toxic_threshold = -0.355 - - if output_label == "2": - # If the model returns "2", return its confidence in 2 or other output-labels - logprobs = response["choices"][0]["logprobs"]["top_logprobs"][0] - - # If the model is not sufficiently confident in "2", - # choose the most probable of "0" or "1" - # Guaranteed to have a confidence for 2 since this was the selected token. - if logprobs["2"] < toxic_threshold: - logprob_0 = logprobs.get("0", None) - logprob_1 = logprobs.get("1", None) - - # If both "0" and "1" have probabilities, set the output label - # to whichever is most probable - if logprob_0 is not None and logprob_1 is not None: - if logprob_0 >= logprob_1: - output_label = "0" - else: - output_label = "1" - # If only one of them is found, set output label to that one - elif logprob_0 is not None: - output_label = "0" - elif logprob_1 is not None: - output_label = "1" - - # If neither "0" or "1" are available, stick with "2" - # by leaving output_label unchanged. - - # if the most probable token is none of "0", "1", or "2" - # this should be set as unsafe - if output_label not in ["0", "1", "2"]: - output_label = "2" - - self.log.info(self.class_name, msg=f"Prompt is risk level {output_label}") - - return int(output_label) + allowed_categories = frozenset("violence") # Can be triggered by some AI safety terms + + if disable_prompt_moderation: + return False + + if CURL_REQUEST: + try: + http_response = requests.post( + 'https://api.openai.com/v1/moderations', + headers={ + "Content-Type": "application/json", + "Authorization": f"Bearer {openai_api_key}" + }, + json={ + "input": text + } + ) + except Exception as e: + self.log.error(self.class_name, error="Error in Requests module trying to moderate content") + loop = asyncio.get_running_loop() + loop.create_task(utils.log_error(f"Error in Requests module trying to moderate content")) + loop.create_task(utils.log_exception(e)) + return True + if http_response.status_code == 401: + self.log.error(self.class_name, error="OpenAI Authentication Failed") + loop = asyncio.get_running_loop() + loop.create_task(utils.log_error(f"OpenAI Authenication Failed")) + loop.create_task(utils.log_exception(e)) + return True + elif http_response.status_code == 429: + self.log.warning(self.class_name, error="OpenAI Rate Limit Exceeded") + loop = asyncio.get_running_loop() + loop.create_task(utils.log_error(f"OpenAI Rate Limit Exceeded")) + loop.create_task(utils.log_exception(e)) + return True + elif http_response.status_code != 200: + self.log.warning(self.class_name, error=f"Possible issue with the OpenAI API. Status: {http_response.status_code}, Content: {http_response.text}") + loop = asyncio.get_running_loop() + loop.create_task(utils.log_error(f"Possible issue with the OpenAI API. Status: {http_response.status_code}, Content: {http_response.text}")) + return True + response = http_response.json() + else: + try: + response = Moderation.create(input=text) + except openai.error.AuthenticationError as e: + self.log.error(self.class_name, error="OpenAI Authentication Failed") + loop = asyncio.get_running_loop() + loop.create_task(utils.log_error(f"OpenAI Authenication Failed")) + loop.create_task(utils.log_exception(e)) + return True + except openai.error.RateLimitError as e: + self.log.warning(self.class_name, error="OpenAI Rate Limit Exceeded") + loop = asyncio.get_running_loop() + loop.create_task(utils.log_error(f"OpenAI Rate Limit Exceeded")) + loop.create_task(utils.log_exception(e)) + return True + + flagged: bool = response["results"][0]["flagged"] + + all_morals: frozenset[str] = ["sexual", "hate", "harassment", "self-harm", "sexual/minors", "hate/threatening", "violence/graphic", "self-harm/intent", "self-harm/instructions", "harassment/threatening", "violence"] + violated_categories = set() + + if flagged: + for moral in all_morals - allowed_categories: + if response["results"][0][moral]: + violated_categories.add(moral) + + if len(violated_categories) > 0: + self.log.warning(self.class_name, msg=f"Text violated these unwanted categories: {violated_categories}") + self.log.debug(self.class_name, msg=f"OpenAI moderation response: {json.dumps(response)}") + return True + else: + self.log.info(self.class_name, msg=f"Checked with content filter, it says the text looks clean") + return False def get_engine(self, message: ServiceMessage) -> OpenAIEngines: """Pick the appropriate engine to respond to a message with""" @@ -131,8 +148,8 @@ def get_engine(self, message: ServiceMessage) -> OpenAIEngines: return OpenAIEngines.GPT_3_5_TURBO def get_response(self, engine: OpenAIEngines, prompt: str, logit_bias: dict[int, int]) -> str: - if self.cf_risk_level(prompt) > 1: - self.log.info(self.class_name, msg="OpenAI's GPT-3 content filter thought the prompt was risky") + if self.is_text_risky(prompt): + self.log.info(self.class_name, msg="The content filter thought the prompt was risky") return "" try: diff --git a/config.py b/config.py index 00bfad9..73b4fec 100644 --- a/config.py +++ b/config.py @@ -126,30 +126,21 @@ def getenv_unique_set(var_name: str, default: T = frozenset()) -> Union[frozense else: enabled_modules = enabled_modules_var -# user-configured from dotenv +# SEE README: ENVIRONMENT VARIABLES discord_guild: str -# Factoid.py factoid_database_path: str -# VIPs have full access + special permissions bot_vip_ids: frozenset -# devs have less but can do maintainence like reboot bot_dev_roles: frozenset bot_dev_ids: frozenset -# control channel is where maintainence commands are issued bot_control_channel_ids: frozenset -# private channel is where stampy logging gets printed -bot_private_channel_id: Optional[str] -# NOTE: Rob's invite/member management functions, not ported yet +bot_private_channel_id: str +bot_error_channel_id: str member_role_id: Optional[str] -# bot_reboot is how stampy reboots himself valid_bot_reboot_options = Literal["exec", False] bot_reboot: valid_bot_reboot_options -# GPT STUFF paid_service_all_channels: bool -# if above is false, where can paid services be used? paid_service_channel_ids: frozenset paid_service_for_all: bool -# if above is false, who gets to use paid services? paid_service_whitelist_role_ids: frozenset gpt4: bool gpt4_for_all: bool @@ -158,6 +149,7 @@ def getenv_unique_set(var_name: str, default: T = frozenset()) -> Union[frozense llm_prompt: str be_shy: bool channel_whitelist: Optional[frozenset[str]] +disable_prompt_moderation: bool is_rob_server = getenv_bool("IS_ROB_SERVER") if is_rob_server: @@ -215,34 +207,31 @@ def getenv_unique_set(var_name: str, default: T = frozenset()) -> Union[frozense llm_prompt = getenv("LLM_PROMPT", default=stampy_default_prompt) be_shy = getenv_bool("BE_SHY") channel_whitelist = None + bot_error_channel_id = { + "production": "1017527224540344380", + "development": "1017531179664150608" + }[ENVIRONMENT_TYPE] + disable_prompt_moderation = False else: - # user-configured from dotenv + # SEE README: ENVIRONMENT VARIABLES discord_guild = getenv("DISCORD_GUILD") - # Factoid.py factoid_database_path = getenv( "FACTOID_DATABASE_PATH", default="./database/Factoids.db" ) - # VIPs have full access + special permissions bot_vip_ids = getenv_unique_set("BOT_VIP_IDS", frozenset()) - # devs have less but can do maintainence like reboot bot_dev_roles = getenv_unique_set("BOT_DEV_ROLES", frozenset()) bot_dev_ids = getenv_unique_set("BOT_DEV_IDS", frozenset()) - # control channel is where maintainence commands are issued bot_control_channel_ids = getenv_unique_set("BOT_CONTROL_CHANNEL_IDS", frozenset()) - # private channel is where stampy logging gets printed - bot_private_channel_id = getenv("BOT_PRIVATE_CHANNEL_ID", default=None) + bot_private_channel_id = getenv("BOT_PRIVATE_CHANNEL_ID") + bot_error_channel_id = getenv("BOT_ERROR_CHANNEL_ID", bot_private_channel_id) # NOTE: Rob's invite/member management functions, not ported yet member_role_id = getenv("MEMBER_ROLE_ID", default=None) - # bot_reboot is how stampy reboots himself bot_reboot = cast(valid_bot_reboot_options, getenv("BOT_REBOOT", default=False)) - # GPT STUFF paid_service_all_channels = getenv_bool("PAID_SERVICE_ALL_CHANNELS") - # if above is false, where can paid services be used? paid_service_channel_ids = getenv_unique_set( "PAID_SERVICE_CHANNEL_IDS", frozenset() ) paid_service_for_all = getenv_bool("PAID_SERVICE_FOR_ALL") - # if above is false, who gets to use paid services? paid_service_whitelist_role_ids = getenv_unique_set( "PAID_SERVICE_ROLE_IDS", frozenset() ) @@ -253,6 +242,7 @@ def getenv_unique_set(var_name: str, default: T = frozenset()) -> Union[frozense llm_prompt = getenv("LLM_PROMPT", default=stampy_default_prompt) be_shy = getenv_bool("BE_SHY") channel_whitelist = getenv_unique_set("CHANNEL_WHITELIST", None) + disable_prompt_moderation = getenv_bool("DISABLE_PROMPT_MODERATION") discord_token: str = getenv("DISCORD_TOKEN") database_path: str = getenv("DATABASE_PATH") diff --git a/modules/chatgpt.py b/modules/chatgpt.py index cfd6ea2..73df6b4 100644 --- a/modules/chatgpt.py +++ b/modules/chatgpt.py @@ -141,6 +141,12 @@ async def chatgpt_chat(self, message: ServiceMessage) -> Response: im = default_italics_mark if self.openai.is_channel_allowed(message): + if self.openai.is_text_risky(message.clean_content): + return Response( + confidence=0, + text="", + why="GPT-3's content filter thought the prompt was risky", + ) self.log.info( self.class_name, msg=f"sending chat prompt to chatgpt, engine {engine} ({engine.description})", diff --git a/modules/gpt3module.py b/modules/gpt3module.py index a90e3c5..357009c 100644 --- a/modules/gpt3module.py +++ b/modules/gpt3module.py @@ -223,7 +223,7 @@ async def gpt3_question(self, message: ServiceMessage) -> Response: self.log.info(self.class_name, status="Asking GPT-3") prompt = self.start_prompt + text + start_sequence - if self.openai.cf_risk_level(prompt) > 1: + if self.openai.is_text_risky(text): return Response( confidence=0, text="", diff --git a/utilities/utilities.py b/utilities/utilities.py index 0c50509..86c1e69 100644 --- a/utilities/utilities.py +++ b/utilities/utilities.py @@ -39,12 +39,12 @@ bot_vip_ids, paid_service_for_all, paid_service_whitelist_role_ids, - be_shy + be_shy, + bot_error_channel_id ) from database.database import Database from servicemodules.discordConstants import ( wiki_feed_channel_id, - stampy_error_log_channel_id, ) from servicemodules.serviceConstants import Services from utilities.discordutils import DiscordUser, user_has_role @@ -291,7 +291,7 @@ async def log_exception( def error_channel(self) -> discord.channel.TextChannel: return cast( discord.channel.TextChannel, - self.client.get_channel(int(stampy_error_log_channel_id)), + self.client.get_channel(int(bot_error_channel_id)), ) async def log_error(self, error_message: str) -> None: