Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat!: Add advanced mode support #5

Merged
merged 4 commits into from
May 5, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
__pycache__
generated/*

logs/*
test.py
__pycache__
_config.yaml
87 changes: 84 additions & 3 deletions config.yaml
Original file line number Diff line number Diff line change
@@ -1,12 +1,34 @@
########## EDIT REQUIRED ##########

# GPT SETTINGS #
# EDIT REQUIRED
# Get your api key from openai. Remember google/bing is always your best friend.
# Model names: gpt-4-turbo-preview, gpt-3.5-turbo, etc.
# Recommend -> gpt-4-turbo-preview, which codes more accurately and is less likely to write bugs, but is more expensive.

API_KEY: ""
API_KEY: "" # Free API Key with GPT-4 access: https://github.com/CubeGPT/.github/discussions/1
BASE_URL: "https://api.openai.com/v1/chat/completions"
GENERATE_MODEL: "gpt-4-turbo-2024-04-09" # Don't use gpt-4, because this model is longer supports json modes.

GENERATE_MODEL: "gpt-4-turbo-preview" # Don't use gpt-4, because this model is longer supports json modes.


# ADVANCED MODE #
# This mode is experimental. But we highly recommend you to enable this mode for better performance.
ADVANCED_MODE: True
IMAGE_GENERATION_MODEL: "dall-e-3"
IMAGE_SIZE: "1024x1024"
VISION_MODEL: "gpt-4-vision-preview"

# Note: If you are using the free API key above, you can't use the advanced mode since it doesn't support dall-e-3 and gpt-4-vision-preview models.
USE_DIFFERENT_APIKEY_FOR_DALLE_MODEL: False
DALLE_API_KEY: ""
DALLE_BASE_URL: "https://api.openai.com/v1/chat/completions"

USE_DIFFERENT_APIKEY_FOR_VISION_MODEL: False
VISION_API_KEY: ""
VISION_BASE_URL: "https://api.openai.com/v1/chat/completions"


########## EDIT OPTIONAL ##########

# PROMPT SETTINGS #
# If you don't know what it is, please don't touch it. Be sure to backup before editing.
Expand Down Expand Up @@ -48,6 +70,65 @@ SYS_GEN: |
USR_GEN: |
%DESCRIPTION%

## Advanced Mode ##

### Programme ###
BTR_DESC_SYS_GEN: |
You are an minecraft schematic designer. Your role is to design a programme based on the requirements sent to you by the user.
For exmaple,
User input: "A cafe."
Response: "A small cafe with a modern design, red roof and brown door, big windows. Inside, there's two tables and a bar."

BTR_DESC_USR_GEN: |
%DESCRIPTION%

### Image Tag Generation ###
IMG_TAG_SYS_GEN: |
You work for a minecraft schematic company and you need to use AI to generate the design image based on designer's architectural programme. Please response the tags you'd like to use for the image generation.
Never response anything else.
Example resposne: "A minecraft building with a modern design, red roof and brown door, big windows."

IMG_TAG_USR_GEN: |
Designer's programme: %PROGRAMME%

### Stucture Generation (Advanced with gpt-4-vision) ###
SYS_GEN_ADV: |
You are a minecraft structure builder bot. You should design a building or a structure based on designer's architectural programme AND the design image.
Response in json like this:
{
"materials": [
"A: \"minecraft:air\"",
"S: \"minecraft:stone\""
],
"structures": [
{
"floor": 0,
"structure": "SSSSSSSS\nSAAAAAAS\nSAAAAAAS\nSAAAAAAS\nSAAAAAAS\nSAAAAAAS\nSAAAAAAS\nSAAAAAAS\nSAAAAAAS\nSSSSSSSS"
},
{
"floor": 1,
"structure": "SSGGGGSS\nSAAAAAAS\nSAAAAAAS\nSAAAAAAS\nSAAAAAAS\nSAAAAAAS\nSAAAAAAS\nSAAAAAAS\nSAAAAAAS\nSSSSSSSS"
},
{
"floor": 2,
"structure": "SSGGGGSS\nSAAAAAAS\nSAAAAAAS\nSAAAAAAS\nSAAAAAAS\nSAAAAAAS\nSAAAAAAS\nSAAAAAAS\nSAAAAAAS\nSSSSSSSS"
},
{
"floor": 3,
"structure": "SSSSSSSS\nSAAAAAAS\nSAAAAAAS\nSAAAAAAS\nSAAAAAAS\nSAAAAAAS\nSAAAAAAS\nSAAAAAAS\nSAAAAAAS\nSSSSSSSS"
},
{
"floor": 4,
"structure": "SSSSSSSS\nSSSSSSSS\nSSSSSSSS\nSSSSSSSS\nSSSSSSSS\nSSSSSSSS\nSSSSSSSS\nSSSSSSSS\nSSSSSSSS\nSSSSSSSS\n"
}
]
}
Never response anything else. Do not design a building which is too large (more than 10 floors). Never use markdown format. Use \n for line feed.

USR_GEN_ADV: |
%DESCRIPTION%
The image is attached below.

# Developer Settings #
DEBUG_MODE: True
VERSION_NUMBER: "Alpha-1.0" #NEVER EDIT THIS IF YOU DON'T KNOW WHAT ARE YOU DOING
24 changes: 23 additions & 1 deletion console.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,24 @@ def generate_plugin(description):

return schem

def get_schematic_advanced(description):
print("(Advanced Mode) Generating programme...")
programme = core.askgpt(config.BTR_DESC_SYS_GEN, config.BTR_DESC_USR_GEN.replace("%DESCRIPTION%", description), config.GENERATE_MODEL, disable_json_mode=True)

print("(Advanced Mode) Generating image tag...")
image_tag = core.askgpt(config.IMG_TAG_SYS_GEN, config.IMG_TAG_USR_GEN.replace("%PROGRAMME%", programme), config.GENERATE_MODEL, disable_json_mode=True)

print("(Advanced Mode) Generating image...")
tag = image_tag + ", minecraft)"
image_url = core.ask_dall_e(tag)

print("(Advanced Mode) Generating schematic...")
response = core.askgpt(config.SYS_GEN_ADV, config.USR_GEN_ADV.replace("%DESCRIPTION%", description), config.VISION_MODEL, image_url=image_url)

schem = core.text_to_schem(response)

return schem

if __name__ == "__main__":
core.initialize()

Expand All @@ -54,7 +72,11 @@ def generate_plugin(description):

print("Generating...")

schem = generate_plugin(description)
if config.ADVANCED_MODE:
print("Advanced mode is enabled. Generating a schematic with advanced features.")
schem = get_schematic_advanced(description)
else:
schem = generate_plugin(description)

logger(f"console: Saving {name}.schem to generated/ folder.")
version_tag = core.input_version_to_mcs_tag(version)
Expand Down
80 changes: 69 additions & 11 deletions core.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@
import mcschematic
import sys
import json
import requests
import base64
import uuid

from log_writer import logger
import config
Expand All @@ -20,35 +23,59 @@ def initialize():
"""
logger(f"Launch. Software version {config.VERSION_NUMBER}, platform {sys.platform}")

def askgpt(system_prompt: str, user_prompt: str, model_name: str):
def askgpt(system_prompt: str, user_prompt: str, model_name: str, disable_json_mode: bool = False, image_url: str = None):
"""
Interacts with ChatGPT using the specified prompts.

Args:
system_prompt (str): The system prompt.
user_prompt (str): The user prompt.
model_name (str): The model name to use.
disable_json_mode (bool): Whether to disable JSON mode.

Returns:
str: The response from ChatGPT.
"""
client = OpenAI(api_key=config.API_KEY, base_url=config.BASE_URL)
if image_url is not None and config.USE_DIFFERENT_APIKEY_FOR_VISION_MODEL:
logger("Using different API key for vision model.")
client = OpenAI(api_key=config.VISION_API_KEY, base_url=config.VISION_BASE_URL)
else:
client = OpenAI(api_key=config.API_KEY, base_url=config.BASE_URL)

logger("Initialized the OpenAI client.")

# Define the messages for the conversation
messages = [
{"role": "system", "content": system_prompt},
{"role": "user", "content": user_prompt}
]
if image_url is not None:
messages = [
{"role": "system", "content": system_prompt},
{"role": "user", "content": [
{"type": "text", "text": user_prompt},
{"type": "image_url", "image_url": {"url": image_url}}
]
}
]
else:
messages = [
{"role": "system", "content": system_prompt},
{"role": "user", "content": user_prompt}
]


logger(f"askgpt: system {system_prompt}")
logger(f"askgpt: user {user_prompt}")

# Create a chat completion
response = client.chat.completions.create(
model=model_name,
response_format={"type": "json_object"},
messages=messages
)
if disable_json_mode:
response = client.chat.completions.create(
model=model_name,
messages=messages
)
else:
response = client.chat.completions.create(
model=model_name,
response_format={"type": "json_object"},
messages=messages
)

logger(f"askgpt: response {response}")

Expand All @@ -57,6 +84,37 @@ def askgpt(system_prompt: str, user_prompt: str, model_name: str):
logger(f"askgpt: extracted reply {assistant_reply}")
return assistant_reply

def ask_dall_e(description: str):
"""
Generates a design image using the DALL-E API.

Args:
description (str): The prompt or description for generating the image.

Returns:
str: The URL of the generated image.
"""
if config.USE_DIFFERENT_APIKEY_FOR_DALLE_MODEL:
client = OpenAI(api_key=config.DALLE_API_KEY, base_url=config.DALLE_BASE_URL)
else:
client = OpenAI(api_key=config.API_KEY, base_url=config.BASE_URL)

logger("ask_dall_e: Generating design image using DALL-E API.")

response = client.images.generate(
model=config.IMAGE_GENERATION_MODEL,
prompt=description,
size=config.IMAGE_SIZE,
quality="standard",
n=1,
)

image_url = response.data[0].url

logger(f"ask_dall_e: Generated image URL {image_url}")

return image_url

def text_to_schem(text: str):
"""
Converts a JSON string to a Minecraft schematic.
Expand Down
28 changes: 27 additions & 1 deletion ui.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,24 @@ def get_schematic(description):

return schem

def get_schematic_advanced(description):
print("(Advanced Mode) Generating programme...")
programme = core.askgpt(config.BTR_DESC_SYS_GEN, config.BTR_DESC_USR_GEN.replace("%DESCRIPTION%", description), config.GENERATE_MODEL, disable_json_mode=True)

print("(Advanced Mode) Generating image tag...")
image_tag = core.askgpt(config.IMG_TAG_SYS_GEN, config.IMG_TAG_USR_GEN.replace("%PROGRAMME%", programme), config.GENERATE_MODEL, disable_json_mode=True)

print("(Advanced Mode) Generating image...")
tag = image_tag + ", minecraft)"
image_url = core.ask_dall_e(tag)

print("(Advanced Mode) Generating schematic...")
response = core.askgpt(config.SYS_GEN_ADV, config.USR_GEN_ADV.replace("%DESCRIPTION%", description), config.VISION_MODEL, image_url=image_url)

schem = core.text_to_schem(response)

return schem

def generate_schematic():
"""
Generates a schematic file based on user input.
Expand All @@ -42,6 +60,11 @@ def generate_schematic():
"""
generate_button.config(state=tk.DISABLED, text="Generating...")

if config.ADVANCED_MODE:
msgbox.showwarning("Warning", "You are using advanced mode. This mode will generate schematic with higher quality, but it may take longer to generate.")

msgbox.showinfo("Info", "It is expected to take 30 seconds to 5 minutes. The programme may \"not responding\", this is normal, just be patient. DO NOT CLOSE THE PROGRAM. Click the button below to start generating.")

version = version_entry.get()
name = name_entry.get()
description = description_entry.get()
Expand All @@ -50,7 +73,10 @@ def generate_schematic():
logger(f"console: input name {name}")
logger(f"console: input description {description}")

schem = get_schematic(description)
if config.ADVANCED_MODE:
schem = get_schematic_advanced(description)
else:
schem = get_schematic(description)

logger(f"console: Saving {name}.schem to generated/ folder.")
version_tag = core.input_version_to_mcs_tag(version)
Expand Down