From b9d570d16ee26f397d25b836302cb4d81526e10b Mon Sep 17 00:00:00 2001 From: psouranis Date: Tue, 26 Sep 2023 10:24:48 +0300 Subject: [PATCH] Adding optimization by prompting --- tools/optimization_by_prompting.py | 384 +++++++++++++++++++++++++++++ 1 file changed, 384 insertions(+) create mode 100644 tools/optimization_by_prompting.py diff --git a/tools/optimization_by_prompting.py b/tools/optimization_by_prompting.py new file mode 100644 index 00000000..c3d37199 --- /dev/null +++ b/tools/optimization_by_prompting.py @@ -0,0 +1,384 @@ +"""A script that implements the optimization by prompting methodology.""" + +import os +from io import StringIO +from typing import Any, Dict, Optional, Tuple +import re +import json +from concurrent.futures import Future, ThreadPoolExecutor +from typing import Any, Dict, Generator, List, Optional, Tuple + +import requests +from bs4 import BeautifulSoup +from googleapiclient.discovery import build + +import openai +import pandas as pd +from langchain.chains import LLMChain +from langchain.llms import OpenAI +from langchain.prompts import PromptTemplate +from sklearn.metrics import roc_auc_score + +# Provide several examples in order to backtest the resulted prompt +EXAMPLES = """query;event +"Will Apple release iphone 15 by 1 October 2023?";1 +"Will the newly elected ceremonial president of Singapore face any political scandals by 13 September 2023?";0 +"Will Russia Invade Ukraine in 2022";1 +"Will Finland and Sweden apply to join NATO in 2023?";1 +"Will Charles become King in 2022?";1 +""" + +NUM_URLS_EXTRACT = 5 +DEFAULT_OPENAI_SETTINGS = { + "max_tokens": 500, + "temperature": 0.8, +} +ALLOWED_TOOLS = [ + "deepmind-optimization-strong", + "deepmind-optimization", +] +TOOL_TO_ENGINE = { + "deepmind-optimization-strong": "gpt-4", + "deepmind-optimization": "gpt-3.5-turbo", +} + +PREDICTION_PROMPT_INSTRUCTIONS = """ +You are an LLM inside a multi-agent system that takes in a prompt of a user requesting a probability estimation +for a given event. You are provided with an input under the label "USER_PROMPT". You must follow the instructions +under the label "INSTRUCTIONS". You must provide your response in the format specified under "OUTPUT_FORMAT". + +INSTRUCTIONS +* Read the input under the label "USER_PROMPT" delimited by three backticks. +* The "USER_PROMPT" specifies an event. +* The event will only have two possible outcomes: either the event will happen or the event will not happen. +* If the event has more than two possible outcomes, you must ignore the rest of the instructions and output the response "Error". +* You must provide a probability estimation of the event happening, based on your training data. +* You are provided an itemized list of information under the label "ADDITIONAL_INFORMATION" delimited by three backticks. +* You can use any item in "ADDITIONAL_INFORMATION" in addition to your training data. +* If an item in "ADDITIONAL_INFORMATION" is not relevant, you must ignore that item for the estimation. +* You must provide your response in the format specified under "OUTPUT_FORMAT". +* Do not include any other contents in your response. +""" + + +PREDICTION_PROMPT_FORMAT = """ +USER_PROMPT: +``` +{user_prompt} +``` + +ADDITIONAL_INFORMATION: +``` +{additional_information} +``` + +OUTPUT_FORMAT +* Your output response must be only a single JSON object to be parsed by Python's "json.loads()". +* The JSON must contain four fields: "p_yes", "p_no", "confidence", and "info_utility". +* Each item in the JSON must have a value between 0 and 1. + - "p_yes": Estimated probability that the event in the "USER_PROMPT" occurs. + - "p_no": Estimated probability that the event in the "USER_PROMPT" does not occur. + - "confidence": A value between 0 and 1 indicating the confidence in the prediction. 0 indicates lowest + confidence value; 1 maximum confidence value. + - "info_utility": Utility of the information provided in "ADDITIONAL_INFORMATION" to help you make the prediction. + 0 indicates lowest utility; 1 maximum utility. +* The sum of "p_yes" and "p_no" must equal 1. +* Output only the JSON object. Do not include any other contents in your response.""" + +URL_QUERY_PROMPT = """ +You are an LLM inside a multi-agent system that takes in a prompt of a user requesting a probability estimation +for a given event. You are provided with an input under the label "USER_PROMPT". You must follow the instructions +under the label "INSTRUCTIONS". You must provide your response in the format specified under "OUTPUT_FORMAT". + +INSTRUCTIONS +* Read the input under the label "USER_PROMPT" delimited by three backticks. +* The "USER_PROMPT" specifies an event. +* The event will only have two possible outcomes: either the event will happen or the event will not happen. +* If the event has more than two possible outcomes, you must ignore the rest of the instructions and output the response "Error". +* You must provide your response in the format specified under "OUTPUT_FORMAT". +* Do not include any other contents in your response. + +USER_PROMPT: +``` +{user_prompt} +``` + +OUTPUT_FORMAT +* Your output response must be only a single JSON object to be parsed by Python's "json.loads()". +* The JSON must contain two fields: "queries", and "urls". + - "queries": An array of strings of size between 1 and 5. Each string must be a search engine query that can help obtain relevant information to estimate + the probability that the event in "USER_PROMPT" occurs. You must provide original information in each query, and they should not overlap + or lead to obtain the same set of results. +* Output only the JSON object. Do not include any other contents in your response. +""" + +TEMPLATE_INSTRUCTOR = """You are an advanced reasoning agent that suggest to a bot ways to predict world events very accurately. +You are given the following: +(1) The previous instructions. +(2) A metric score that evaluates the previous instructions given to the bot. Best metric score is 1. + +You are asked to refine the instructions in order to reach the best score. +Try to think the steps one by one. + +Example format: +INSTRUCTIONS: previous instructions here +METRIC SCORE: score between 0 and 1 here + +INSTRUCTIONS: {instructions} +METRIC SCORE: {score} +NEW INSTRUCTIONS:""" + +PROMPT_INSTRUCTOR = PromptTemplate( + input_variables=["instructions", "score"], template=TEMPLATE_INSTRUCTOR +) + + +def evaluate_prompt(prompt, df, llm): + chain = LLMChain(llm=llm, prompt=prompt) + probas = [] + + for row in df.itertuples(): + pred_chain = chain.run({"user_prompt": row.query, "additional_information": ""}) + try: + dictionary_match = float(eval(pred_chain)["p_yes"]) + except: + dictionary_match = float(eval(re.search(r'\{.*\}', pred_chain).group(0))["p_yes"]) + probas.append(dictionary_match) + + return probas + + +def calculate_score(df, answer_key="event", prob_key="probability"): + return roc_auc_score(df[answer_key], df[prob_key]) + + +def create_new_instructions(llm, instructions, score): + + chain = LLMChain(llm=llm, prompt=PROMPT_INSTRUCTOR) + evaluations = chain.run({"instructions": instructions, "score": score}) + return evaluations + + +def prompt_engineer(init_instructions, instructions_format, iterations=3, model_name="gpt-3.5-turbo"): + + llm = OpenAI(model_name=model_name) + score_template = {"template": init_instructions, "score": 0.0} + + df = pd.read_csv(StringIO(EXAMPLES), sep=";") + template = init_instructions + + for _ in range(iterations): + prompt = PromptTemplate( + input_variables=["user_prompt", "additional_information"], + template=template + instructions_format, + ) + + df["probability"] = evaluate_prompt(prompt=prompt, llm=llm, df=df) + + score = calculate_score(df) + print(f"Score: {score}\n") + if score > score_template["score"]: + print( + f"Best template score: {score} \nTemplate: {template}\n" + ) + score_template["template"] = template + score_template["score"] = score + template = create_new_instructions( + llm=llm, + instructions=score_template["template"], + score=score_template["score"], + ) + + return score_template["template"] + + +def search_google(query: str, api_key: str, engine: str, num: int = 3) -> List[str]: + service = build("customsearch", "v1", developerKey=api_key) + search = ( + service.cse() + .list( + q=query, + cx=engine, + num=num, + ) + .execute() + ) + return [result["link"] for result in search["items"]] + + +def get_urls_from_queries(queries: List[str], api_key: str, engine: str) -> List[str]: + """Get URLs from search engine queries""" + results = [] + for query in queries: + for url in search_google( + query=query, + api_key=api_key, + engine=engine, + num=3, # Number of returned results + ): + results.append(url) + unique_results = list(set(results)) + return unique_results + + +def extract_text( + html: str, + num_words: int = 300, # TODO: summerise using GPT instead of limit +) -> str: + """Extract text from a single HTML document""" + soup = BeautifulSoup(html, "html.parser") + for script in soup(["script", "style"]): + script.extract() + text = soup.get_text() + lines = (line.strip() for line in text.splitlines()) + chunks = (phrase.strip() for line in lines for phrase in line.split(" ")) + text = "\n".join(chunk for chunk in chunks if chunk) + return text[:num_words] + + +def process_in_batches( + urls: List[str], window: int = 5, timeout: int = 10 +) -> Generator[None, None, List[Tuple[Future, str]]]: + """Iter URLs in batches.""" + with ThreadPoolExecutor() as executor: + for i in range(0, len(urls), window): + batch = urls[i : i + window] + futures = [(executor.submit(requests.get, url, timeout=timeout), url) for url in batch] + yield futures + + +def extract_texts(urls: List[str], num_words: int = 300) -> List[str]: + """Extract texts from URLs""" + max_allowed = 5 + extracted_texts = [] + count = 0 + stop = False + for batch in process_in_batches(urls=urls): + for future, url in batch: + try: + result = future.result() + if result.status_code != 200: + continue + extracted_texts.append(extract_text(html=result.text, num_words=num_words)) + count += 1 + if count >= max_allowed: + stop = True + break + except requests.exceptions.ReadTimeout: + print(f"Request timed out: {url}.") + except Exception as e: + print(f"An error occurred: {e}") + if stop: + break + return extracted_texts + + +def fetch_additional_information( + prompt: str, + engine: str, + temperature: float, + max_tokens: int, + google_api_key: str, + google_engine: str, +) -> str: + """Fetch additional information.""" + url_query_prompt = URL_QUERY_PROMPT.format(user_prompt=prompt) + moderation_result = openai.Moderation.create(url_query_prompt) + if moderation_result["results"][0]["flagged"]: + return "" + messages = [ + {"role": "system", "content": "You are a helpful assistant."}, + {"role": "user", "content": url_query_prompt}, + ] + + response = openai.ChatCompletion.create( + model=engine, + messages=messages, + temperature=temperature, + max_tokens=max_tokens, + n=1, + timeout=90, + request_timeout=90, + stop=None, + ) + json_data = json.loads(response.choices[0].message.content) + urls = get_urls_from_queries( + json_data["queries"], + api_key=google_api_key, + engine=google_engine, + ) + texts = extract_texts(urls) + return "\n".join(["- " + text for text in texts]) + + +def run(**kwargs) -> Tuple[str, Optional[Dict[str, Any]]]: + """Run the task""" + tool = kwargs["tool"] + prompt = kwargs["prompt"] + improve_instructions = kwargs["improve_instructions"] + max_tokens = kwargs.get("max_tokens", DEFAULT_OPENAI_SETTINGS["max_tokens"]) + temperature = kwargs.get("temperature", DEFAULT_OPENAI_SETTINGS["temperature"]) + + openai.api_key = kwargs["api_keys"]["openai"] + if tool not in ALLOWED_TOOLS: + raise ValueError(f"Tool {tool} is not supported.") + + engine = TOOL_TO_ENGINE[tool] + additional_information = ( + fetch_additional_information( + prompt=prompt, + engine=engine, + temperature=temperature, + max_tokens=max_tokens, + google_api_key=kwargs["api_keys"]["google_api_key"], + google_engine=kwargs["api_keys"]["google_engine_id"], + ) + if tool == "prediction-online-sme" + else "" + ) + + instructions = ( + prompt_engineer(PREDICTION_PROMPT_INSTRUCTIONS, PREDICTION_PROMPT_FORMAT) + if improve_instructions + else PREDICTION_PROMPT_INSTRUCTIONS + ) + instructions += PREDICTION_PROMPT_FORMAT + + prediction_prompt = instructions.format( + user_prompt=prompt, additional_information=additional_information + ) + + moderation_result = openai.Moderation.create(prediction_prompt) + if moderation_result["results"][0]["flagged"]: + return "Moderation flagged the prompt as in violation of terms.", None + messages = [ + {"role": "system", "content": "You are a helpful assistant."}, + {"role": "user", "content": prediction_prompt}, + ] + + response = openai.ChatCompletion.create( + model=engine, + messages=messages, + temperature=temperature, + max_tokens=max_tokens, + n=1, + timeout=150, + request_timeout=150, + stop=None, + ) + return response.choices[0].message.content, None + + +if __name__ == "__main__": + os.environ['OPENAI_API_KEY'] = "your_openai_api_key" + api_keys = {"openai": "your_openai_api_key"} + + func_args = { + "api_keys": api_keys, + "tool": "deepmind-optimization", + "prompt": "Will AI take over the world in the next year?", + "improve_instructions": True, + } + + response = run(**func_args) + print(response)