From f2f195bc1563f21b320ab35c5f10a29315f2ee1f Mon Sep 17 00:00:00 2001 From: melih-unsal Date: Wed, 27 Sep 2023 20:26:56 +0300 Subject: [PATCH] finalizer module refined, doc_load refined --- demogpt/chains/prompts/combine_v2.py | 37 ++++++++-- demogpt/chains/prompts/task_list/doc_load.py | 67 +++++++------------ .../chains/prompts/task_list/ui_input_file.py | 1 + demogpt/chains/task_chains.py | 33 ++++++--- demogpt/controllers.py | 12 ++-- demogpt/utils.py | 56 +++++++++------- pyproject.toml | 2 +- 7 files changed, 119 insertions(+), 89 deletions(-) diff --git a/demogpt/chains/prompts/combine_v2.py b/demogpt/chains/prompts/combine_v2.py index 25d3007..8d9ebe7 100644 --- a/demogpt/chains/prompts/combine_v2.py +++ b/demogpt/chains/prompts/combine_v2.py @@ -1,15 +1,17 @@ system_template = """ -Regenerate the code by only combining the input parts into st.form. +Regenerate the code by combining all the user input parts into st.form. It is really important not to change other parts. Copy all the function definitions and library imports as is and don't modify or replace them. Combine input-related parts under the st.form. If a function needs an input from user via st.text_input, put it between st.form and st.form_submit_button so that the state is preserved. -Show the result when the form is submitted. +Show the result when the form is submitted under the if submit_button: statement. Keep in mind that don't miss any function definition. Don't forget to add those functions with their original definitions as is {function_names} + +Always put "if submit_button:" inside of st.form block """ human_template = """ @@ -17,6 +19,13 @@ DRAFT CODE 1: # all imports +openai_api_key = st.sidebar.text_input( + "OpenAI API Key", + placeholder="sk-...", + value=os.getenv("OPENAI_API_KEY", ""), + type="password", +) + def foo1(): result = "res" return result @@ -43,27 +52,41 @@ def foo2(half_story,user_choice): FINAL CODE 1: # all imports +# all functions + +openai_api_key = st.sidebar.text_input( + "OpenAI API Key", + placeholder="sk-...", + value=os.getenv("OPENAI_API_KEY", ""), + type="password", +) + def foo1(): result = "res" return result -half_story = foo1() - -if half_story: - st.write(half_story) - def foo2(half_story,user_choice): result = half_story + user_choice return result with st.form(key='story_game'): + # take all user inputs text_input = st.text_input(label='Enter some text') user_choice = st.selectbox("What would you like to do next?", ["Choice1", "Choice2"]) submit_button = st.form_submit_button(label='Submit Story') + # run functions if submit button is pressed if submit_button: + half_story = foo1() + if half_story: + st.write(half_story) if text_input and user_choice : continued_story = foo2(text_input,user_choice) + else: + continued_story = "" + if continued_story: st.markdown(continued_story) + else: # if not submitted yet, we need to initizalize continued_story to get rid of name error + continued_story = "" ############################################################# diff --git a/demogpt/chains/prompts/task_list/doc_load.py b/demogpt/chains/prompts/task_list/doc_load.py index d317f5d..688109f 100644 --- a/demogpt/chains/prompts/task_list/doc_load.py +++ b/demogpt/chains/prompts/task_list/doc_load.py @@ -1,64 +1,49 @@ loaders = """ For Local TXT file: -from langchain.document_loaders import TextLoader -loader = TextLoader() +TextLoader ################################ For Web Page: -from langchain.document_loaders import WebBaseLoader -loader = WebBaseLoader("") +WebBaseLoader ################################ For Online PDF: -from langchain.document_loaders import OnlinePDFLoader -loader = OnlinePDFLoader("") +OnlinePDFLoader ################################ For Local PDF: -from langchain.document_loaders import UnstructuredPDFLoader -loader = UnstructuredPDFLoader( - , mode="elements", strategy="fast" - ) +UnstructuredPDFLoader ################################ For Power Point: -from langchain.document_loaders import UnstructuredPowerPointLoader -loader = UnstructuredPowerPointLoader( - , mode="elements", strategy="fast" - ) +UnstructuredPowerPointLoader ################################ For CSV: -from langchain.document_loaders.csv_loader import UnstructuredCSVLoader -loader = UnstructuredCSVLoader(, mode="elements") +UnstructuredCSVLoader ################################ For Excel: -from langchain.document_loaders.excel import UnstructuredExcelLoader -loader = UnstructuredExcelLoader(, mode="elements") +UnstructuredExcelLoader """ +loader_dict = { + "txt" : "TextLoader", + "web_page" : "WebBaseLoader", + "online_pdf" : "OnlinePDFLoader", + "pdf" :"UnstructuredPDFLoader", + "powerpoint" : "UnstructuredPowerPointLoader", + "csv" : "UnstructuredCSVLoader", + "excel" :"UnstructuredExcelLoader" + } + system_template = f""" -These are the Loader classes that you should select. -Select the loader according to the input type unless the input type is ambiguous. +Based on the provided context in 'Previous Code', choose the most appropriate loader. + +These are your loader options: + {loaders} """ human_template = """ -Write a loader function using langchain.document_loaders -to load the document for the argument name, variable and instruction -below like in the below format: - -### -def {function_name}({argument}): - loader = Loader(path) # Select the appropriate Loader - docs = loader.load() - return docs - -if {argument}: - {variable} = {function_name}({argument}) -else: - {variable} = '' -### - -While using the loader, don't change "mode" and "strategy" arguments, they need to be constant as stated. -If there are no such arguments, ignore it. - -Instruction:{instruction} +Use the information from 'Previous Code' to determine the loader from one of the 7 loader options. +Don't write any explanation but directly say the loader option -Document Loader Code: +Instruction: {instruction} +Previous Code: {code_snippets} +Loader Option: """ diff --git a/demogpt/chains/prompts/task_list/ui_input_file.py b/demogpt/chains/prompts/task_list/ui_input_file.py index f538b44..536efa1 100644 --- a/demogpt/chains/prompts/task_list/ui_input_file.py +++ b/demogpt/chains/prompts/task_list/ui_input_file.py @@ -16,6 +16,7 @@ You will basically use file_uploader and get file path from it but nothing else. Do not loose the file path and check if the file is uploaded. Otherwise, assign empty string to "{variable}" Don't read the file, only get the file path +In the st.file_uploader, change type parameter compatible with the type of the expected file such as pdf, csv, ... """ human_template = """ diff --git a/demogpt/chains/task_chains.py b/demogpt/chains/task_chains.py index b9037d1..72a73c4 100644 --- a/demogpt/chains/task_chains.py +++ b/demogpt/chains/task_chains.py @@ -93,7 +93,7 @@ def pathToContent(cls, task, code_snippets): return utils.refine(code) @classmethod - def promptTemplate(cls, task, code_snippets): + def promptTemplate(cls, task): inputs = task["input_key"] instruction = task["description"] @@ -101,8 +101,7 @@ def promptTemplate(cls, task, code_snippets): system_template=prompts.prompt_template.system_template, human_template=prompts.prompt_template.human_template, instruction=instruction, - inputs=inputs, - code_snippets=code_snippets, + inputs=inputs ) res = res[res.find("{") : res.rfind("}") + 1] return json.loads(res) @@ -175,16 +174,34 @@ def docLoad(cls, task, code_snippets): variable = task["output_key"] function_name = task["task_name"] - code = cls.getChain( + loader = cls.getChain( system_template=prompts.doc_load.system_template, human_template=prompts.doc_load.human_template, instruction=instruction, - argument=argument, - variable=variable, - function_name=function_name, code_snippets=code_snippets, ) - return utils.refine(code) + + + if loader in ["TextLoader", "WebBaseLoader", "OnlinePDFLoader"]: + loader_line = f'loader = {loader}({argument})' + elif loader in ["UnstructuredPDFLoader", "UnstructuredPowerPointLoader"]: + loader_line = f'loader = {loader}({argument}, mode="elements", strategy="fast")' + elif loader in ["UnstructuredCSVLoader", "UnstructuredExcelLoader"]: + loader_line = f'loader = {loader}({argument}, mode="elements")' + else: + loader_line = f'loader = TextLoader({argument})' + + code = f""" +def {function_name}({argument}): + {loader_line} + docs = loader.load() + return docs +if {argument}: + {variable} = {function_name}({argument}) +else: + {variable} = '' + """ + return code @classmethod def stringToDoc(cls, task, code_snippets): diff --git a/demogpt/controllers.py b/demogpt/controllers.py index 57a4dca..eed9df3 100644 --- a/demogpt/controllers.py +++ b/demogpt/controllers.py @@ -49,7 +49,6 @@ def checkDTypes(tasks): feedback += f""" {name} expects all inputs as {reference_input} or none but the data type of {input_key} is {input_data_type} not {reference_input}. Please find another way.\n """ - print("1:",) else: for res, data_type in zip(input_key, input_data_type): if data_type != reference_input: @@ -75,10 +74,8 @@ def checkDTypes(tasks): return {"feedback": feedback, "valid": valid} -def checkPromptTemplates(templates, task): - human_template = templates["template"] - system_template = templates["system_template"] - templates = human_template + system_template +def checkPromptTemplates(templates, task, additional_inputs=[]): + templates = " ".join(list(templates.values())) inputs = task["input_key"] if inputs == "none": inputs = [] @@ -87,8 +84,9 @@ def checkPromptTemplates(templates, task): if inputs.startswith("["): inputs = inputs[1:-1] inputs = [var.strip() for var in inputs.split(",")] + template_inputs = inputs + additional_inputs feedback = "" - for input_key in inputs: + for input_key in template_inputs: if f"{{{input_key}}}" not in templates: feedback += f"'{{{input_key}}}' is not included in any of the templates. You must add '{{{input_key}}}' inside of at least one of the templates.\n" @@ -97,7 +95,7 @@ def checkPromptTemplates(templates, task): matches = set(re.findall(r"\{([^}]+)\}", templates)) for match in matches: - if match not in inputs: + if match not in template_inputs: feedback += f"'{{{match}}}' cannot be included nowhere in the templates. You must remove '{{{match}}}'.\n" valid = len(feedback) == 0 diff --git a/demogpt/utils.py b/demogpt/utils.py index 1721f60..99a241a 100644 --- a/demogpt/utils.py +++ b/demogpt/utils.py @@ -21,7 +21,35 @@ def getFunctionNames(code): pattern = r"def (\w+)\(.*\):" return re.findall(pattern, code) - +def getGenericPromptTemplateCode(task, iters): + res = "" + is_valid = False + task_type = task["task_type"] + prompt_func = TaskChains.promptTemplate if task_type == "prompt_template" else TaskChains.chat + finalizer_func = getPromptChatTemplateCode if task_type == "prompt_template" else getChatCode + additional_inputs = [] + if task_type == "chat": + additional_inputs.append("chat_history") + res = prompt_func(task=task) + templates = {key:res.get(key) for key in res if "template" in key} + function_name = res.get("function_name") + variety = res.get("variety") + index = 0 + while not is_valid: + check = checkPromptTemplates(templates, task, additional_inputs) + is_valid = check["valid"] + feedback = check["feedback"] + if not is_valid: + res = TaskChains.promptTemplateRefiner(res, feedback) + else: + break + index += 1 + if index == iters: + break + res["function_name"] = function_name + res["variety"] = variety + return finalizer_func(res, task) + def getCodeSnippet(task, code_snippets, iters=10): task = refineKeyTypeCompatiblity(task) task_type = task["task_type"] @@ -30,27 +58,8 @@ def getCodeSnippet(task, code_snippets, iters=10): code = TaskChains.uiInputText(task=task, code_snippets=code_snippets) elif task_type == "ui_output_text": code = TaskChains.uiOutputText(task=task, code_snippets=code_snippets) - elif task_type == "prompt_template": - res = "" - is_valid = False - res = TaskChains.promptTemplate(task=task, code_snippets=code_snippets) - function_name = res.get("function_name") - variety = res.get("variety") - index = 0 - while not is_valid: - check = checkPromptTemplates(res, task) - is_valid = check["valid"] - feedback = check["feedback"] - if not is_valid: - res = TaskChains.promptTemplateRefiner(res, feedback) - else: - break - index += 1 - if index == iters: - break - res["function_name"] = function_name - res["variety"] = variety - code = getPromptChatTemplateCode(res, task) + elif task_type in ["prompt_template", "chat"]: + code = getGenericPromptTemplateCode(task, iters=iters) elif task_type == "path_to_content": code = TaskChains.pathToContent(task=task, code_snippets=code_snippets) elif task_type == "doc_to_string": @@ -63,9 +72,6 @@ def getCodeSnippet(task, code_snippets, iters=10): code = TaskChains.docLoad(task=task, code_snippets=code_snippets) elif task_type == "doc_summarizer": code = TaskChains.summarize(task=task, code_snippets=code_snippets) - elif task_type == "chat": - template = TaskChains.chat(task=task) - code = getChatCode(template=template, task=task) elif task_type == "ui_input_chat": code = getChatInputCode(TaskChains.uiInputChat(task=task)) elif task_type == "ui_output_chat": diff --git a/pyproject.toml b/pyproject.toml index 71bccd1..6ab2f4b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api" [tool.poetry] name = "demogpt" -version = "1.2.6.3" +version = "1.2.6.4" description = "Auto Gen-AI App Generator with the Power of Llama 2" authors = ["Melih Unsal "] license = "MIT"