Skip to content

Commit

Permalink
Merge pull request #5 from Zhou-Shilin/main
Browse files Browse the repository at this point in the history
feat!: Add advanced mode support
  • Loading branch information
Zhou-Shilin authored May 5, 2024
2 parents 9b1973c + 21f9c13 commit ae9d7bd
Show file tree
Hide file tree
Showing 5 changed files with 205 additions and 17 deletions.
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
__pycache__
generated/*

logs/*
test.py
__pycache__
_config.yaml
87 changes: 84 additions & 3 deletions config.yaml
Original file line number Diff line number Diff line change
@@ -1,12 +1,34 @@
########## EDIT REQUIRED ##########

# GPT SETTINGS #
# EDIT REQUIRED
# Get your api key from openai. Remember google/bing is always your best friend.
# Model names: gpt-4-turbo-preview, gpt-3.5-turbo, etc.
# Recommend -> gpt-4-turbo-preview, which codes more accurately and is less likely to write bugs, but is more expensive.

API_KEY: ""
API_KEY: "" # Free API Key with GPT-4 access: https://github.com/CubeGPT/.github/discussions/1
BASE_URL: "https://api.openai.com/v1/chat/completions"
GENERATE_MODEL: "gpt-4-turbo-2024-04-09" # Don't use gpt-4, because this model is longer supports json modes.

GENERATE_MODEL: "gpt-4-turbo-preview" # Don't use gpt-4, because this model is longer supports json modes.


# ADVANCED MODE #
# This mode is experimental. But we highly recommend you to enable this mode for better performance.
ADVANCED_MODE: True
IMAGE_GENERATION_MODEL: "dall-e-3"
IMAGE_SIZE: "1024x1024"
VISION_MODEL: "gpt-4-vision-preview"

# Note: If you are using the free API key above, you can't use the advanced mode since it doesn't support dall-e-3 and gpt-4-vision-preview models.
USE_DIFFERENT_APIKEY_FOR_DALLE_MODEL: False
DALLE_API_KEY: ""
DALLE_BASE_URL: "https://api.openai.com/v1/chat/completions"

USE_DIFFERENT_APIKEY_FOR_VISION_MODEL: False
VISION_API_KEY: ""
VISION_BASE_URL: "https://api.openai.com/v1/chat/completions"


########## EDIT OPTIONAL ##########

# PROMPT SETTINGS #
# If you don't know what it is, please don't touch it. Be sure to backup before editing.
Expand Down Expand Up @@ -48,6 +70,65 @@ SYS_GEN: |
USR_GEN: |
%DESCRIPTION%
## Advanced Mode ##

### Programme ###
BTR_DESC_SYS_GEN: |
You are an minecraft schematic designer. Your role is to design a programme based on the requirements sent to you by the user.
For exmaple,
User input: "A cafe."
Response: "A small cafe with a modern design, red roof and brown door, big windows. Inside, there's two tables and a bar."
BTR_DESC_USR_GEN: |
%DESCRIPTION%
### Image Tag Generation ###
IMG_TAG_SYS_GEN: |
You work for a minecraft schematic company and you need to use AI to generate the design image based on designer's architectural programme. Please response the tags you'd like to use for the image generation.
Never response anything else.
Example resposne: "A minecraft building with a modern design, red roof and brown door, big windows."
IMG_TAG_USR_GEN: |
Designer's programme: %PROGRAMME%
### Stucture Generation (Advanced with gpt-4-vision) ###
SYS_GEN_ADV: |
You are a minecraft structure builder bot. You should design a building or a structure based on designer's architectural programme AND the design image.
Response in json like this:
{
"materials": [
"A: \"minecraft:air\"",
"S: \"minecraft:stone\""
],
"structures": [
{
"floor": 0,
"structure": "SSSSSSSS\nSAAAAAAS\nSAAAAAAS\nSAAAAAAS\nSAAAAAAS\nSAAAAAAS\nSAAAAAAS\nSAAAAAAS\nSAAAAAAS\nSSSSSSSS"
},
{
"floor": 1,
"structure": "SSGGGGSS\nSAAAAAAS\nSAAAAAAS\nSAAAAAAS\nSAAAAAAS\nSAAAAAAS\nSAAAAAAS\nSAAAAAAS\nSAAAAAAS\nSSSSSSSS"
},
{
"floor": 2,
"structure": "SSGGGGSS\nSAAAAAAS\nSAAAAAAS\nSAAAAAAS\nSAAAAAAS\nSAAAAAAS\nSAAAAAAS\nSAAAAAAS\nSAAAAAAS\nSSSSSSSS"
},
{
"floor": 3,
"structure": "SSSSSSSS\nSAAAAAAS\nSAAAAAAS\nSAAAAAAS\nSAAAAAAS\nSAAAAAAS\nSAAAAAAS\nSAAAAAAS\nSAAAAAAS\nSSSSSSSS"
},
{
"floor": 4,
"structure": "SSSSSSSS\nSSSSSSSS\nSSSSSSSS\nSSSSSSSS\nSSSSSSSS\nSSSSSSSS\nSSSSSSSS\nSSSSSSSS\nSSSSSSSS\nSSSSSSSS\n"
}
]
}
Never response anything else. Do not design a building which is too large (more than 10 floors). Never use markdown format. Use \n for line feed.
USR_GEN_ADV: |
%DESCRIPTION%
The image is attached below.
# Developer Settings #
DEBUG_MODE: True
VERSION_NUMBER: "Alpha-1.0" #NEVER EDIT THIS IF YOU DON'T KNOW WHAT ARE YOU DOING
24 changes: 23 additions & 1 deletion console.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,24 @@ def generate_plugin(description):

return schem

def get_schematic_advanced(description):
print("(Advanced Mode) Generating programme...")
programme = core.askgpt(config.BTR_DESC_SYS_GEN, config.BTR_DESC_USR_GEN.replace("%DESCRIPTION%", description), config.GENERATE_MODEL, disable_json_mode=True)

print("(Advanced Mode) Generating image tag...")
image_tag = core.askgpt(config.IMG_TAG_SYS_GEN, config.IMG_TAG_USR_GEN.replace("%PROGRAMME%", programme), config.GENERATE_MODEL, disable_json_mode=True)

print("(Advanced Mode) Generating image...")
tag = image_tag + ", minecraft)"
image_url = core.ask_dall_e(tag)

print("(Advanced Mode) Generating schematic...")
response = core.askgpt(config.SYS_GEN_ADV, config.USR_GEN_ADV.replace("%DESCRIPTION%", description), config.VISION_MODEL, image_url=image_url)

schem = core.text_to_schem(response)

return schem

if __name__ == "__main__":
core.initialize()

Expand All @@ -54,7 +72,11 @@ def generate_plugin(description):

print("Generating...")

schem = generate_plugin(description)
if config.ADVANCED_MODE:
print("Advanced mode is enabled. Generating a schematic with advanced features.")
schem = get_schematic_advanced(description)
else:
schem = generate_plugin(description)

logger(f"console: Saving {name}.schem to generated/ folder.")
version_tag = core.input_version_to_mcs_tag(version)
Expand Down
80 changes: 69 additions & 11 deletions core.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@
import mcschematic
import sys
import json
import requests
import base64
import uuid

from log_writer import logger
import config
Expand All @@ -20,35 +23,59 @@ def initialize():
"""
logger(f"Launch. Software version {config.VERSION_NUMBER}, platform {sys.platform}")

def askgpt(system_prompt: str, user_prompt: str, model_name: str):
def askgpt(system_prompt: str, user_prompt: str, model_name: str, disable_json_mode: bool = False, image_url: str = None):
"""
Interacts with ChatGPT using the specified prompts.
Args:
system_prompt (str): The system prompt.
user_prompt (str): The user prompt.
model_name (str): The model name to use.
disable_json_mode (bool): Whether to disable JSON mode.
Returns:
str: The response from ChatGPT.
"""
client = OpenAI(api_key=config.API_KEY, base_url=config.BASE_URL)
if image_url is not None and config.USE_DIFFERENT_APIKEY_FOR_VISION_MODEL:
logger("Using different API key for vision model.")
client = OpenAI(api_key=config.VISION_API_KEY, base_url=config.VISION_BASE_URL)
else:
client = OpenAI(api_key=config.API_KEY, base_url=config.BASE_URL)

logger("Initialized the OpenAI client.")

# Define the messages for the conversation
messages = [
{"role": "system", "content": system_prompt},
{"role": "user", "content": user_prompt}
]
if image_url is not None:
messages = [
{"role": "system", "content": system_prompt},
{"role": "user", "content": [
{"type": "text", "text": user_prompt},
{"type": "image_url", "image_url": {"url": image_url}}
]
}
]
else:
messages = [
{"role": "system", "content": system_prompt},
{"role": "user", "content": user_prompt}
]


logger(f"askgpt: system {system_prompt}")
logger(f"askgpt: user {user_prompt}")

# Create a chat completion
response = client.chat.completions.create(
model=model_name,
response_format={"type": "json_object"},
messages=messages
)
if disable_json_mode:
response = client.chat.completions.create(
model=model_name,
messages=messages
)
else:
response = client.chat.completions.create(
model=model_name,
response_format={"type": "json_object"},
messages=messages
)

logger(f"askgpt: response {response}")

Expand All @@ -57,6 +84,37 @@ def askgpt(system_prompt: str, user_prompt: str, model_name: str):
logger(f"askgpt: extracted reply {assistant_reply}")
return assistant_reply

def ask_dall_e(description: str):
"""
Generates a design image using the DALL-E API.
Args:
description (str): The prompt or description for generating the image.
Returns:
str: The URL of the generated image.
"""
if config.USE_DIFFERENT_APIKEY_FOR_DALLE_MODEL:
client = OpenAI(api_key=config.DALLE_API_KEY, base_url=config.DALLE_BASE_URL)
else:
client = OpenAI(api_key=config.API_KEY, base_url=config.BASE_URL)

logger("ask_dall_e: Generating design image using DALL-E API.")

response = client.images.generate(
model=config.IMAGE_GENERATION_MODEL,
prompt=description,
size=config.IMAGE_SIZE,
quality="standard",
n=1,
)

image_url = response.data[0].url

logger(f"ask_dall_e: Generated image URL {image_url}")

return image_url

def text_to_schem(text: str):
"""
Converts a JSON string to a Minecraft schematic.
Expand Down
28 changes: 27 additions & 1 deletion ui.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,24 @@ def get_schematic(description):

return schem

def get_schematic_advanced(description):
print("(Advanced Mode) Generating programme...")
programme = core.askgpt(config.BTR_DESC_SYS_GEN, config.BTR_DESC_USR_GEN.replace("%DESCRIPTION%", description), config.GENERATE_MODEL, disable_json_mode=True)

print("(Advanced Mode) Generating image tag...")
image_tag = core.askgpt(config.IMG_TAG_SYS_GEN, config.IMG_TAG_USR_GEN.replace("%PROGRAMME%", programme), config.GENERATE_MODEL, disable_json_mode=True)

print("(Advanced Mode) Generating image...")
tag = image_tag + ", minecraft)"
image_url = core.ask_dall_e(tag)

print("(Advanced Mode) Generating schematic...")
response = core.askgpt(config.SYS_GEN_ADV, config.USR_GEN_ADV.replace("%DESCRIPTION%", description), config.VISION_MODEL, image_url=image_url)

schem = core.text_to_schem(response)

return schem

def generate_schematic():
"""
Generates a schematic file based on user input.
Expand All @@ -42,6 +60,11 @@ def generate_schematic():
"""
generate_button.config(state=tk.DISABLED, text="Generating...")

if config.ADVANCED_MODE:
msgbox.showwarning("Warning", "You are using advanced mode. This mode will generate schematic with higher quality, but it may take longer to generate.")

msgbox.showinfo("Info", "It is expected to take 30 seconds to 5 minutes. The programme may \"not responding\", this is normal, just be patient. DO NOT CLOSE THE PROGRAM. Click the button below to start generating.")

version = version_entry.get()
name = name_entry.get()
description = description_entry.get()
Expand All @@ -50,7 +73,10 @@ def generate_schematic():
logger(f"console: input name {name}")
logger(f"console: input description {description}")

schem = get_schematic(description)
if config.ADVANCED_MODE:
schem = get_schematic_advanced(description)
else:
schem = get_schematic(description)

logger(f"console: Saving {name}.schem to generated/ folder.")
version_tag = core.input_version_to_mcs_tag(version)
Expand Down

0 comments on commit ae9d7bd

Please sign in to comment.