diff --git a/packages/packages.json b/packages/packages.json index 80a28942..b1bd751c 100644 --- a/packages/packages.json +++ b/packages/packages.json @@ -31,6 +31,7 @@ "custom/victorpolisetty/dalle_request/0.1.0": "bafybeieqqtd6gtlry7vheix54nj3ok4cag3uy47yoxlufhi6y3u5i6doti", "custom/jhehemann/prediction_sentence_embeddings/0.1.0": "bafybeifyyb2wpa77tl7a7fs3fabns45llivhgccbnrpupubojmq2fwe4si", "custom/gnosis/ofv_market_resolver/0.1.0": "bafybeigapoti2ysukapphspjawktkb4qkeltlollt4d2z4u7mrddk3u3rq", + "custom/valory/superforcaster/0.1.0": "bafybeibub6ulgkyyefw5zaab2tawyvjxbhcdddx7emfqrhlzkifb4dxox4", "custom/valory/tee_openai_request/0.1.0": "bafybeictmezaorzxelsy4dztbxh5n2343zio3rk6vo7wc5lptxlobhdnku", "custom/dvilela/corcel_request/0.1.0": "bafybeicjrix2wg23cm2j437isokkzd26uloqezlx3cg2pu7rf7mfwg3p6e", "custom/dvilela/gemini_prediction/0.1.0": "bafybeigvwflupxzbjgmaxcxml5vkez3obl4fjo6bxzhquq56urnviq32u4", diff --git a/packages/valory/customs/superforcaster/__init__.py b/packages/valory/customs/superforcaster/__init__.py new file mode 100644 index 00000000..03175811 --- /dev/null +++ b/packages/valory/customs/superforcaster/__init__.py @@ -0,0 +1,20 @@ +# -*- coding: utf-8 -*- +# ------------------------------------------------------------------------------ +# +# Copyright 2023-2024 Valory AG +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# ------------------------------------------------------------------------------ + +"""This module contains the Superforcaster tool.""" diff --git a/packages/valory/customs/superforcaster/component.yaml b/packages/valory/customs/superforcaster/component.yaml new file mode 100644 index 00000000..3044f002 --- /dev/null +++ b/packages/valory/customs/superforcaster/component.yaml @@ -0,0 +1,22 @@ +name: superforcaster +author: valory +version: 0.1.0 +type: custom +description: A tool that runs superforcaster prompt. +license: Apache-2.0 +aea_version: '>=1.0.0, <2.0.0' +fingerprint: + __init__.py: bafybeifvbuxt54l5jsxextf6ru5yvimkfdg4gfkcsnmyvsyqcsgclg7vey + superforcaster.py: bafybeify3b3s5mimghq3d3pr4yoldbfo2giliebhfj7eh6w6sucfkk6iku +fingerprint_ignore_patterns: [] +entry_point: superforcaster.py +callable: run +params: + default_model: gpt-4o-2024-08-06 +dependencies: + openai: + version: ==1.30.2 + tiktoken: + version: ==0.7.0 + requests: + version: ==2.28.1 diff --git a/packages/valory/customs/superforcaster/superforcaster.py b/packages/valory/customs/superforcaster/superforcaster.py new file mode 100644 index 00000000..90db409f --- /dev/null +++ b/packages/valory/customs/superforcaster/superforcaster.py @@ -0,0 +1,363 @@ +# -*- coding: utf-8 -*- +# ------------------------------------------------------------------------------ +# +# Copyright 2023-2024 Valory AG +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# ------------------------------------------------------------------------------ +"""Contains the job definitions""" +import functools +import time +from typing import Any, Dict, List, Union, Optional, Tuple, Callable +from datetime import date + +import openai +from openai import OpenAI +from tiktoken import encoding_for_model + +import requests +import json + + +client: Optional[OpenAI] = None +MechResponse = Tuple[str, Optional[str], Optional[Dict[str, Any]], Any, Any] + + +def with_key_rotation(func: Callable): + @functools.wraps(func) + def wrapper(*args, **kwargs) -> MechResponse: + # this is expected to be a KeyChain object, + # although it is not explicitly typed as such + api_keys = kwargs["api_keys"] + retries_left: Dict[str, int] = api_keys.max_retries() + + def execute() -> MechResponse: + """Retry the function with a new key.""" + try: + result = func(*args, **kwargs) + return result + (api_keys,) + except openai.RateLimitError as e: + # try with a new key again + if retries_left["openai"] <= 0 and retries_left["openrouter"] <= 0: + raise e + retries_left["openai"] -= 1 + retries_left["openrouter"] -= 1 + api_keys.rotate("openai") + api_keys.rotate("openrouter") + return execute() + except Exception as e: + return str(e), "", None, None, api_keys + + mech_response = execute() + return mech_response + + return wrapper + + +class OpenAIClientManager: + """Client context manager for OpenAI.""" + + def __init__(self, api_key: str): + self.api_key = api_key + + def __enter__(self) -> OpenAI: + global client + if client is None: + client = OpenAIClient(api_key=self.api_key) + return client + + def __exit__(self, exc_type, exc_value, traceback) -> None: + global client + if client is not None: + client.client.close() + client = None + + +class Usage: + """Usage class.""" + + def __init__(self, prompt_tokens=None, completion_tokens=None): + self.prompt_tokens = prompt_tokens + self.completion_tokens = completion_tokens + + +class OpenAIResponse: + """Response class.""" + + def __init__(self, content: Optional[str] = None, usage: Optional[Usage] = None): + self.content = content + self.usage = Usage() + + +class OpenAIClient: + def __init__(self, api_key: str): + self.api_key = api_key + self.client = openai.OpenAI(api_key=self.api_key) + + def completions( + self, + model: str, + messages: List = [], + timeout: Optional[Union[float, int]] = None, + temperature: Optional[float] = None, + top_p: Optional[float] = None, + n: Optional[int] = None, + stop=None, + max_tokens: Optional[float] = None, + ): + + response_provider = self.client.chat.completions.create( + model=model, + messages=messages, + temperature=temperature, + max_tokens=max_tokens, + n=1, + timeout=150, + stop=None, + ) + response = OpenAIResponse() + response.content = response_provider.choices[0].message.content + response.usage.prompt_tokens = response_provider.usage.prompt_tokens + response.usage.completion_tokens = response_provider.usage.completion_tokens + return response + + +def count_tokens(text: str, model: str) -> int: + """Count the number of tokens in a text.""" + enc = encoding_for_model(model) + return len(enc.encode(text)) + + +DEFAULT_OPENAI_SETTINGS = { + "max_tokens": 500, + "limit_max_tokens": 4096, + "temperature": 0, +} +DEFAULT_OPENAI_MODEL = "gpt-4o-2024-08-06" +ALLOWED_TOOLS = ["superforcaster"] +ALLOWED_MODELS = [DEFAULT_OPENAI_MODEL] +COMPLETION_RETRIES = 3 +COMPLETION_DELAY = 2 + + +PREDICTION_PROMPT = """ +You are an advanced AI system which has been finetuned to provide calibrated probabilistic +forecasts under uncertainty, with your performance evaluated according to the Brier score. When +forecasting, do not treat 0.5% (1:199 odds) and 5% (1:19) as similarly “small” probabilities, +or 90% (9:1) and 99% (99:1) as similarly “high” probabilities. As the odds show, they are +markedly different, so output your probabilities accordingly. + +Question: +{question} + +Today's date: {today} +Your pretraining knowledge cutoff: October 2023 + +We have retrieved the following information for this question: +{sources} + +Recall the question you are forecasting: +{question} + +Instructions: +1. Compress key factual information from the sources, as well as useful background information +which may not be in the sources, into a list of core factual points to reference. Aim for +information which is specific, relevant, and covers the core considerations you'll use to make +your forecast. For this step, do not draw any conclusions about how a fact will influence your +answer or forecast. Place this section of your response in tags. + +2. Provide a few reasons why the answer might be no. Rate the strength of each reason on a +scale of 1-10. Use tags. + +3. Provide a few reasons why the answer might be yes. Rate the strength of each reason on a +scale of 1-10. Use tags. + +4. Aggregate your considerations. Do not summarize or repeat previous points; instead, +investigate how the competing factors and mechanisms interact and weigh against each other. +Factorize your thinking across (exhaustive, mutually exclusive) cases if and only if it would be +beneficial to your reasoning. We have detected that you overestimate world conflict, drama, +violence, and crises due to news' negativity bias, which doesn't necessarily represent overall +trends or base rates. Similarly, we also have detected you overestimate dramatic, shocking, +or emotionally charged news due to news' sensationalism bias. Therefore adjust for news' +negativity bias and sensationalism bias by considering reasons to why your provided sources +might be biased or exaggerated. Think like a superforecaster. Use tags +for this section of your response. + +5. Output an initial probability (prediction) as a single number between 0 and 1 given steps 1-4. +Use tags. + +6. Reflect on your answer, performing sanity checks and mentioning any additional knowledge +or background information which may be relevant. Check for over/underconfidence, improper +treatment of conjunctive or disjunctive conditions (only if applicable), and other forecasting +biases when reviewing your reasoning. Consider priors/base rates, and the extent to which +case-specific information justifies the deviation between your tentative forecast and the prior. +Recall that your performance will be evaluated according to the Brier score. Be precise with tail +probabilities. Leverage your intuitions, but never change your forecast for the sake of modesty +or balance alone. Finally, aggregate all of your previous reasoning and highlight key factors +that inform your final forecast. Use tags for this portion of your response. + +7. Output your final prediction (a number between 0 and 1 with an asterisk at the beginning and +end of the decimal) in tags. + + +OUTPUT_FORMAT +* Your output response must be only a single JSON object to be parsed by Python's "json.loads()". +* The JSON must contain four fields: "p_yes", "p_no", "confidence", and "info_utility". +* Each item in the JSON must have a value between 0 and 1. + - "p_yes": Estimated probability that the event in the "Question" occurs. + - "p_no": Estimated probability that the event in the "Question" does not occur. + - "confidence": A value between 0 and 1 indicating the confidence in the prediction. 0 indicates lowest + confidence value; 1 maximum confidence value. + - "info_utility": Utility of the information provided in "sources" to help you make the prediction. + 0 indicates lowest utility; 1 maximum utility. +* The sum of "p_yes" and "p_no" must equal 1. +* Output only the JSON object. Do not include any other contents in your response. +* This is incorrect:"```json{{\n \"p_yes\": 0.2,\n \"p_no\": 0.8,\n \"confidence\": 0.7,\n \"info_utility\": 0.5\n}}```" +* This is incorrect:```json"{{\n \"p_yes\": 0.2,\n \"p_no\": 0.8,\n \"confidence\": 0.7,\n \"info_utility\": 0.5\n}}"``` +* This is correct:"{{\n \"p_yes\": 0.2,\n \"p_no\": 0.8,\n \"confidence\": 0.7,\n \"info_utility\": 0.5\n}}" +""" + + +def generate_prediction_with_retry( + model: str, + messages: List[Dict[str, str]], + temperature: float, + max_tokens: int, + retries: int = COMPLETION_RETRIES, + delay: int = COMPLETION_DELAY, + counter_callback: Optional[Callable] = None, +): + """Attempt to generate a prediction with retries on failure.""" + attempt = 0 + while attempt < retries: + try: + response = client.completions( + model=model, + messages=messages, + temperature=temperature, + max_tokens=max_tokens, + n=1, + timeout=90, + stop=None, + ) + + if counter_callback is not None: + counter_callback( + input_tokens=response.usage.prompt_tokens, + output_tokens=response.usage.completion_tokens, + model=model, + token_counter=count_tokens, + ) + + return response.content, counter_callback + except Exception as e: + print(f"Attempt {attempt + 1} failed with error: {e}") + time.sleep(delay) + attempt += 1 + raise Exception("Failed to generate prediction after retries") + + +def fetch_additional_sources(question, serper_api_key): + url = "https://google.serper.dev/search" + payload = json.dumps({"q": question}) + headers = { + "X-API-KEY": serper_api_key, + "Content-Type": "application/json", + } + + response = requests.request("POST", url, headers=headers, data=payload) + + return response + + +def format_sources_data(organic_data, misc_data): + sources = "" + + if len(organic_data) > 0: + print("Adding organic data...") + + sources = f""" + Organic Results: + """ + + for item in organic_data: + sources += f"""{item['position']}. **Title:** {item["title"]} + - **Link:** [{item["link"]}]({item["link"]}) + - **Snippet:** {item["snippet"]} + """ + + if len(misc_data) > 0: + print("Adding misc data...") + + sources += "People Also Ask:\n" + + counter = 1 + for item in misc_data: + sources += f"""{counter}. **Question:** {item["question"]} + - **Link:** [{item["link"]}]({item["link"]}) + - **Snippet:** {item["snippet"]} + """ + counter += 1 + + return sources + + +@with_key_rotation +def run(**kwargs) -> Tuple[Optional[str], Optional[Dict[str, Any]], Any, Any]: + """Run the task""" + openai_api_key = kwargs["api_keys"]["openai"] + serper_api_key = kwargs["api_keys"]["serperapi"] + with OpenAIClientManager(openai_api_key): + max_tokens = kwargs.get("max_tokens", DEFAULT_OPENAI_SETTINGS["max_tokens"]) + temperature = kwargs.get("temperature", DEFAULT_OPENAI_SETTINGS["temperature"]) + prompt = kwargs["prompt"] + tool = kwargs["tool"] + engine = kwargs.get("model") + counter_callback = kwargs.get("counter_callback", None) + if tool not in ALLOWED_TOOLS: + raise ValueError(f"Tool {tool} is not supported.") + + today = date.today() + d = today.strftime("%d/%m/%Y") + + print("Fetching additional sources...") + serper_response = fetch_additional_sources(prompt, serper_api_key) + sources_data = serper_response.json() + # choose top 5 results + organic_data = sources_data.get("organic", [])[:5] + misc_data = sources_data.get("peopleAlsoAsk", []) + print("Formating sources...") + sources = format_sources_data(organic_data, misc_data) + + print("Updating prompt...") + prediction_prompt = PREDICTION_PROMPT.format( + question=prompt, today=d, sources=sources + ) + + messages = [ + {"role": "system", "content": "You are a helpful assistant."}, + {"role": "user", "content": prediction_prompt}, + ] + print("Getting prompt response...") + extracted_block, counter_callback = generate_prediction_with_retry( + model=engine, + messages=messages, + temperature=temperature, + max_tokens=max_tokens, + retries=COMPLETION_RETRIES, + delay=COMPLETION_DELAY, + counter_callback=counter_callback, + ) + + return extracted_block, prediction_prompt, None, counter_callback