From 9865be00c861c4be1bbc5a206ab7af1b25456245 Mon Sep 17 00:00:00 2001 From: BaimoQilin Date: Sun, 5 May 2024 20:56:37 +0800 Subject: [PATCH 1/4] feat: Ignore _config.yaml _config.yaml is for personal use. --- .gitignore | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index ade67b3..3116f2f 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,6 @@ +__pycache__ generated/* logs/* test.py -__pycache__ \ No newline at end of file +_config.yaml \ No newline at end of file From df200dd8f765c8d219d520198bafac57621df6f9 Mon Sep 17 00:00:00 2001 From: BaimoQilin Date: Sun, 5 May 2024 20:56:55 +0800 Subject: [PATCH 2/4] feat: Add configs for advanced mode --- config.yaml | 87 +++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 84 insertions(+), 3 deletions(-) diff --git a/config.yaml b/config.yaml index 0b60e19..86755d0 100644 --- a/config.yaml +++ b/config.yaml @@ -1,12 +1,34 @@ +########## EDIT REQUIRED ########## + # GPT SETTINGS # -# EDIT REQUIRED # Get your api key from openai. Remember google/bing is always your best friend. # Model names: gpt-4-turbo-preview, gpt-3.5-turbo, etc. # Recommend -> gpt-4-turbo-preview, which codes more accurately and is less likely to write bugs, but is more expensive. -API_KEY: "" +API_KEY: "" # Free API Key with GPT-4 access: https://github.com/CubeGPT/.github/discussions/1 BASE_URL: "https://api.openai.com/v1/chat/completions" -GENERATE_MODEL: "gpt-4-turbo-2024-04-09" # Don't use gpt-4, because this model is longer supports json modes. + +GENERATE_MODEL: "gpt-4-turbo-preview" # Don't use gpt-4, because this model is longer supports json modes. + + +# ADVANCED MODE # +# This mode is experimental. But we highly recommend you to enable this mode for better performance. +ADVANCED_MODE: True +IMAGE_GENERATION_MODEL: "dall-e-3" +IMAGE_SIZE: "1024x1024" +VISION_MODEL: "gpt-4-vision-preview" + +# Note: If you are using the free API key above, you can't use the advanced mode since it doesn't support dall-e-3 and gpt-4-vision-preview models. +USE_DIFFERENT_APIKEY_FOR_DALLE_MODEL: False +DALLE_API_KEY: "" +DALLE_BASE_URL: "https://api.openai.com/v1/chat/completions" + +USE_DIFFERENT_APIKEY_FOR_VISION_MODEL: False +VISION_API_KEY: "" +VISION_BASE_URL: "https://api.openai.com/v1/chat/completions" + + +########## EDIT OPTIONAL ########## # PROMPT SETTINGS # # If you don't know what it is, please don't touch it. Be sure to backup before editing. @@ -48,6 +70,65 @@ SYS_GEN: | USR_GEN: | %DESCRIPTION% +## Advanced Mode ## + +### Programme ### +BTR_DESC_SYS_GEN: | + You are an minecraft schematic designer. Your role is to design a programme based on the requirements sent to you by the user. + For exmaple, + User input: "A cafe." + Response: "A small cafe with a modern design, red roof and brown door, big windows. Inside, there's two tables and a bar." + +BTR_DESC_USR_GEN: | + %DESCRIPTION% + +### Image Tag Generation ### +IMG_TAG_SYS_GEN: | + You work for a minecraft schematic company and you need to use AI to generate the design image based on designer's architectural programme. Please response the tags you'd like to use for the image generation. + Never response anything else. + Example resposne: "A minecraft building with a modern design, red roof and brown door, big windows." + +IMG_TAG_USR_GEN: | + Designer's programme: %PROGRAMME% + +### Stucture Generation (Advanced with gpt-4-vision) ### +SYS_GEN_ADV: | + You are a minecraft structure builder bot. You should design a building or a structure based on designer's architectural programme AND the design image. + Response in json like this: + { + "materials": [ + "A: \"minecraft:air\"", + "S: \"minecraft:stone\"" + ], + "structures": [ + { + "floor": 0, + "structure": "SSSSSSSS\nSAAAAAAS\nSAAAAAAS\nSAAAAAAS\nSAAAAAAS\nSAAAAAAS\nSAAAAAAS\nSAAAAAAS\nSAAAAAAS\nSSSSSSSS" + }, + { + "floor": 1, + "structure": "SSGGGGSS\nSAAAAAAS\nSAAAAAAS\nSAAAAAAS\nSAAAAAAS\nSAAAAAAS\nSAAAAAAS\nSAAAAAAS\nSAAAAAAS\nSSSSSSSS" + }, + { + "floor": 2, + "structure": "SSGGGGSS\nSAAAAAAS\nSAAAAAAS\nSAAAAAAS\nSAAAAAAS\nSAAAAAAS\nSAAAAAAS\nSAAAAAAS\nSAAAAAAS\nSSSSSSSS" + }, + { + "floor": 3, + "structure": "SSSSSSSS\nSAAAAAAS\nSAAAAAAS\nSAAAAAAS\nSAAAAAAS\nSAAAAAAS\nSAAAAAAS\nSAAAAAAS\nSAAAAAAS\nSSSSSSSS" + }, + { + "floor": 4, + "structure": "SSSSSSSS\nSSSSSSSS\nSSSSSSSS\nSSSSSSSS\nSSSSSSSS\nSSSSSSSS\nSSSSSSSS\nSSSSSSSS\nSSSSSSSS\nSSSSSSSS\n" + } + ] + } + Never response anything else. Do not design a building which is too large (more than 10 floors). Never use markdown format. Use \n for line feed. + +USR_GEN_ADV: | + %DESCRIPTION% + The image is attached below. + # Developer Settings # DEBUG_MODE: True VERSION_NUMBER: "Alpha-1.0" #NEVER EDIT THIS IF YOU DON'T KNOW WHAT ARE YOU DOING \ No newline at end of file From 8de8816ead388371f672cf8136e4f1a320d57878 Mon Sep 17 00:00:00 2001 From: BaimoQilin Date: Sun, 5 May 2024 20:58:19 +0800 Subject: [PATCH 3/4] feat!: Add advanced mode support --- console.py | 24 ++++++++++++++++- core.py | 76 +++++++++++++++++++++++++++++++++++++++++++++++------- ui.py | 28 +++++++++++++++++++- 3 files changed, 116 insertions(+), 12 deletions(-) diff --git a/console.py b/console.py index 4f7baa2..781d167 100644 --- a/console.py +++ b/console.py @@ -37,6 +37,24 @@ def generate_plugin(description): return schem +def get_schematic_advanced(description): + print("(Advanced Mode) Generating programme...") + programme = core.askgpt(config.BTR_DESC_SYS_GEN, config.BTR_DESC_USR_GEN.replace("%DESCRIPTION%", description), config.GENERATE_MODEL, disable_json_mode=True) + + print("(Advanced Mode) Generating image tag...") + image_tag = core.askgpt(config.IMG_TAG_SYS_GEN, config.IMG_TAG_USR_GEN.replace("%PROGRAMME%", programme), config.GENERATE_MODEL, disable_json_mode=True) + + print("(Advanced Mode) Generating image...") + tag = image_tag + ", minecraft)" + image_url = core.ask_dall_e(tag) + + print("(Advanced Mode) Generating schematic...") + response = core.askgpt(config.SYS_GEN_ADV, config.USR_GEN_ADV.replace("%DESCRIPTION%", description), config.VISION_MODEL, image_url=image_url) + + schem = core.text_to_schem(response) + + return schem + if __name__ == "__main__": core.initialize() @@ -54,7 +72,11 @@ def generate_plugin(description): print("Generating...") - schem = generate_plugin(description) + if config.ADVANCED_MODE: + print("Advanced mode is enabled. Generating a schematic with advanced features.") + schem = get_schematic_advanced(description) + else: + schem = generate_plugin(description) logger(f"console: Saving {name}.schem to generated/ folder.") version_tag = core.input_version_to_mcs_tag(version) diff --git a/core.py b/core.py index 4ce1c10..45c2910 100644 --- a/core.py +++ b/core.py @@ -2,6 +2,9 @@ import mcschematic import sys import json +import requests +import base64 +import uuid from log_writer import logger import config @@ -31,24 +34,46 @@ def askgpt(system_prompt: str, user_prompt: str, model_name: str): Returns: str: The response from ChatGPT. """ - client = OpenAI(api_key=config.API_KEY, base_url=config.BASE_URL) + if image_url is not None and config.USE_DIFFERENT_APIKEY_FOR_VISION_MODEL: + logger("Using different API key for vision model.") + client = OpenAI(api_key=config.VISION_API_KEY, base_url=config.VISION_BASE_URL) + else: + client = OpenAI(api_key=config.API_KEY, base_url=config.BASE_URL) + logger("Initialized the OpenAI client.") # Define the messages for the conversation - messages = [ - {"role": "system", "content": system_prompt}, - {"role": "user", "content": user_prompt} - ] + if image_url is not None: + messages = [ + {"role": "system", "content": system_prompt}, + {"role": "user", "content": [ + {"type": "text", "text": user_prompt}, + {"type": "image_url", "image_url": {"url": image_url}} + ] + } + ] + else: + messages = [ + {"role": "system", "content": system_prompt}, + {"role": "user", "content": user_prompt} + ] + logger(f"askgpt: system {system_prompt}") logger(f"askgpt: user {user_prompt}") # Create a chat completion - response = client.chat.completions.create( - model=model_name, - response_format={"type": "json_object"}, - messages=messages - ) + if disable_json_mode: + response = client.chat.completions.create( + model=model_name, + messages=messages + ) + else: + response = client.chat.completions.create( + model=model_name, + response_format={"type": "json_object"}, + messages=messages + ) logger(f"askgpt: response {response}") @@ -57,6 +82,37 @@ def askgpt(system_prompt: str, user_prompt: str, model_name: str): logger(f"askgpt: extracted reply {assistant_reply}") return assistant_reply +def ask_dall_e(description: str): + """ + Generates a design image using the DALL-E API. + + Args: + description (str): The prompt or description for generating the image. + + Returns: + str: The URL of the generated image. + """ + if config.USE_DIFFERENT_APIKEY_FOR_DALLE_MODEL: + client = OpenAI(api_key=config.DALLE_API_KEY, base_url=config.DALLE_BASE_URL) + else: + client = OpenAI(api_key=config.API_KEY, base_url=config.BASE_URL) + + logger("ask_dall_e: Generating design image using DALL-E API.") + + response = client.images.generate( + model=config.IMAGE_GENERATION_MODEL, + prompt=description, + size=config.IMAGE_SIZE, + quality="standard", + n=1, + ) + + image_url = response.data[0].url + + logger(f"ask_dall_e: Generated image URL {image_url}") + + return image_url + def text_to_schem(text: str): """ Converts a JSON string to a Minecraft schematic. diff --git a/ui.py b/ui.py index 239e4ef..84c67f1 100644 --- a/ui.py +++ b/ui.py @@ -29,6 +29,24 @@ def get_schematic(description): return schem +def get_schematic_advanced(description): + print("(Advanced Mode) Generating programme...") + programme = core.askgpt(config.BTR_DESC_SYS_GEN, config.BTR_DESC_USR_GEN.replace("%DESCRIPTION%", description), config.GENERATE_MODEL, disable_json_mode=True) + + print("(Advanced Mode) Generating image tag...") + image_tag = core.askgpt(config.IMG_TAG_SYS_GEN, config.IMG_TAG_USR_GEN.replace("%PROGRAMME%", programme), config.GENERATE_MODEL, disable_json_mode=True) + + print("(Advanced Mode) Generating image...") + tag = image_tag + ", minecraft)" + image_url = core.ask_dall_e(tag) + + print("(Advanced Mode) Generating schematic...") + response = core.askgpt(config.SYS_GEN_ADV, config.USR_GEN_ADV.replace("%DESCRIPTION%", description), config.VISION_MODEL, image_url=image_url) + + schem = core.text_to_schem(response) + + return schem + def generate_schematic(): """ Generates a schematic file based on user input. @@ -42,6 +60,11 @@ def generate_schematic(): """ generate_button.config(state=tk.DISABLED, text="Generating...") + if config.ADVANCED_MODE: + msgbox.showwarning("Warning", "You are using advanced mode. This mode will generate schematic with higher quality, but it may take longer to generate.") + + msgbox.showinfo("Info", "It is expected to take 30 seconds to 5 minutes. The programme may \"not responding\", this is normal, just be patient. DO NOT CLOSE THE PROGRAM. Click the button below to start generating.") + version = version_entry.get() name = name_entry.get() description = description_entry.get() @@ -50,7 +73,10 @@ def generate_schematic(): logger(f"console: input name {name}") logger(f"console: input description {description}") - schem = get_schematic(description) + if config.ADVANCED_MODE: + schem = get_schematic_advanced(description) + else: + schem = get_schematic(description) logger(f"console: Saving {name}.schem to generated/ folder.") version_tag = core.input_version_to_mcs_tag(version) From 21f9c136a2a8855ebe879457808aceaa3297c5d3 Mon Sep 17 00:00:00 2001 From: BaimoQilin Date: Sun, 5 May 2024 20:58:53 +0800 Subject: [PATCH 4/4] Fix: no-json-in-text error while using advanced mode --- core.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/core.py b/core.py index 45c2910..84f2bb9 100644 --- a/core.py +++ b/core.py @@ -23,13 +23,15 @@ def initialize(): """ logger(f"Launch. Software version {config.VERSION_NUMBER}, platform {sys.platform}") -def askgpt(system_prompt: str, user_prompt: str, model_name: str): +def askgpt(system_prompt: str, user_prompt: str, model_name: str, disable_json_mode: bool = False, image_url: str = None): """ Interacts with ChatGPT using the specified prompts. Args: system_prompt (str): The system prompt. user_prompt (str): The user prompt. + model_name (str): The model name to use. + disable_json_mode (bool): Whether to disable JSON mode. Returns: str: The response from ChatGPT.