From 291fa8396e66ee76aed3c9f666d80afd07743330 Mon Sep 17 00:00:00 2001 From: Eric Hennenfent Date: Fri, 3 May 2024 02:04:25 -0700 Subject: [PATCH 1/9] Add prompt manager (untested) --- live_illustrate/__main__.py | 36 +++++-------------------- live_illustrate/prompts.py | 20 ++++++++++++++ live_illustrate/prompts/image_extra.txt | 1 + live_illustrate/prompts/summary.txt | 5 ++++ live_illustrate/render.py | 7 +++-- live_illustrate/summarize.py | 10 +++---- 6 files changed, 38 insertions(+), 41 deletions(-) create mode 100644 live_illustrate/prompts.py create mode 100644 live_illustrate/prompts/image_extra.txt create mode 100644 live_illustrate/prompts/summary.txt diff --git a/live_illustrate/__main__.py b/live_illustrate/__main__.py index 6bab016..11f1f88 100644 --- a/live_illustrate/__main__.py +++ b/live_illustrate/__main__.py @@ -66,45 +66,21 @@ def get_args() -> argparse.Namespace: choices=["1792x1024", "1024x1792", "1024x1024", "512x512", "256x256"], ) parser.add_argument( - "--image_quality", - default="standard", - help="How fancy of an image to render", - choices=["standard", "hd"], + "--image_quality", default="standard", help="How fancy of an image to render", choices=["standard", "hd"] ) parser.add_argument( - "--image_style", - default="vivid", - help="How stylized of an image to render", - choices=["vivid", "natural"], - ) - parser.add_argument( - "--server_host", - default="0.0.0.0", - help="Address to bind web server", - ) - parser.add_argument( - "--server_port", - default=8080, - type=int, - help="Port to serve HTML viewer on", - ) - parser.add_argument( - "--open", - action="store_true", - help="Automatically open a browser tab for the rendered images", + "--image_style", default="vivid", help="How stylized of an image to render", choices=["vivid", "natural"] ) + parser.add_argument("--server_host", default="0.0.0.0", help="Address to bind web server") + parser.add_argument("--server_port", default=8080, type=int, help="Port to serve HTML viewer on") + parser.add_argument("--open", action="store_true", help="Automatically open a browser tab for the rendered images") parser.add_argument( "--persistence_of_memory", default=0.2, # Expressed as a fraction of the total buffered transcription type=float, help="How much of the previous transcription to retain after generating each summary. 0 - 1.0", ) - parser.add_argument( - "-v", - "--verbose", - action="count", - default=0, - ) + parser.add_argument("-v", "--verbose", action="count", default=0) return parser.parse_args() diff --git a/live_illustrate/prompts.py b/live_illustrate/prompts.py new file mode 100644 index 0000000..1c36875 --- /dev/null +++ b/live_illustrate/prompts.py @@ -0,0 +1,20 @@ +from pathlib import Path +import typing as t + +PROMPTS_FOLDER = Path(__file__).parent.joinpath("prompts") +IMAGE_EXTENSION = PROMPTS_FOLDER.joinpath("image_extra.txt") +SUMMARY = PROMPTS_FOLDER.joinpath("summary.txt") + + +class PromptManager: + def __init__(self): + self.cached: t.Dict[Path, str] = {} + self.last_modified = {} + + def get_prompt(self, path: Path) -> str: + last_modified = path.stat().st_mtime + if self.last_modified.get(path) != last_modified: + with open(path, "r") as f: + self.cached[path] = f.read() + self.last_modified[path] = last_modified + return self.cached[path] diff --git a/live_illustrate/prompts/image_extra.txt b/live_illustrate/prompts/image_extra.txt new file mode 100644 index 0000000..2a4801d --- /dev/null +++ b/live_illustrate/prompts/image_extra.txt @@ -0,0 +1 @@ +digital painting, fantasy art diff --git a/live_illustrate/prompts/summary.txt b/live_illustrate/prompts/summary.txt new file mode 100644 index 0000000..68de012 --- /dev/null +++ b/live_illustrate/prompts/summary.txt @@ -0,0 +1,5 @@ +You are a helpful assistant that describes scenes to an artist who wants to draw them. +You will be given several lines of dialogue that contain details about the physical surroundings of the characters. +Your job is to summarize the details of the scene in a bulleted list containing 4-7 bullet points. +If there is more than one scene described by the dialog, summarize only the most recent one. +Remember to be concise and not include details that cannot be seen. diff --git a/live_illustrate/render.py b/live_illustrate/render.py index 536419b..5fe5079 100644 --- a/live_illustrate/render.py +++ b/live_illustrate/render.py @@ -3,11 +3,9 @@ from openai import OpenAI +from .prompts import PromptManager, IMAGE_EXTENSION from .util import AsyncThread, Image, Summary -# Prompt engineering level 1,000,000 -EXTRA: t.List[str] = ["digital painting, fantasy art"] - class ImageRenderer(AsyncThread): def __init__(self, model: str, image_size: str, image_quality: str, image_style: str) -> None: @@ -17,13 +15,14 @@ def __init__(self, model: str, image_size: str, image_quality: str, image_style: self.size: str = image_size self.image_quality: str = image_quality self.image_style: str = image_style + self.prompt_manager = PromptManager() def work(self, summary: Summary) -> Image | None: """Sends the text to Dall-e, spits out an image URL""" start = datetime.now() rendered = self.openai_client.images.generate( model=self.model, - prompt="\n".join((summary.summary, *EXTRA)), + prompt=summary.summary + "\n" + self.prompt_manager.get_prompt(IMAGE_EXTENSION), size=self.size, # type: ignore[arg-type] quality=self.image_quality, # type: ignore[arg-type] style=self.image_style, # type: ignore[arg-type] diff --git a/live_illustrate/summarize.py b/live_illustrate/summarize.py index 58ad350..137ff99 100644 --- a/live_illustrate/summarize.py +++ b/live_illustrate/summarize.py @@ -2,20 +2,16 @@ from openai import OpenAI +from .prompts import PromptManager, SUMMARY from .util import AsyncThread, Summary, Transcription, num_tokens_from_string -SYSTEM_PROMPT = "You are a helpful assistant that describes scenes to an artist who wants to draw them. \ -You will be given several lines of dialogue that contain details about the physical surroundings of the characters. \ -Your job is to summarize the details of the scene in a bulleted list containing 4-7 bullet points. \ -If there is more than one scene described by the dialog, summarize only the most recent one. \ -Remember to be concise and not include details that cannot be seen." # Not so good about this last bit, eh? - class TextSummarizer(AsyncThread): def __init__(self, model: str) -> None: super().__init__("TextSummarizer") self.openai_client: OpenAI = OpenAI() self.model: str = model + self.prompt_manager = PromptManager() def work(self, transcription: Transcription) -> Summary | None: """Sends the big buffer of provided text to ChatGPT, returns bullets describing the setting""" @@ -28,7 +24,7 @@ def work(self, transcription: Transcription) -> Summary | None: response = self.openai_client.chat.completions.create( model=self.model, messages=[ - {"role": "system", "content": SYSTEM_PROMPT}, + {"role": "system", "content": self.prompt_manager.get_prompt(SUMMARY)}, {"role": "user", "content": text}, ], ) From 745ddbcaf6cec35e73fa4472a7a6e80775a624dc Mon Sep 17 00:00:00 2001 From: Eric Hennenfent Date: Fri, 3 May 2024 02:09:26 -0700 Subject: [PATCH 2/9] isort i figured out how to run it on windows --- live_illustrate/prompts.py | 2 +- live_illustrate/render.py | 2 +- live_illustrate/summarize.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/live_illustrate/prompts.py b/live_illustrate/prompts.py index 1c36875..8aa6deb 100644 --- a/live_illustrate/prompts.py +++ b/live_illustrate/prompts.py @@ -1,5 +1,5 @@ -from pathlib import Path import typing as t +from pathlib import Path PROMPTS_FOLDER = Path(__file__).parent.joinpath("prompts") IMAGE_EXTENSION = PROMPTS_FOLDER.joinpath("image_extra.txt") diff --git a/live_illustrate/render.py b/live_illustrate/render.py index 5fe5079..ccba01b 100644 --- a/live_illustrate/render.py +++ b/live_illustrate/render.py @@ -3,7 +3,7 @@ from openai import OpenAI -from .prompts import PromptManager, IMAGE_EXTENSION +from .prompts import IMAGE_EXTENSION, PromptManager from .util import AsyncThread, Image, Summary diff --git a/live_illustrate/summarize.py b/live_illustrate/summarize.py index 137ff99..d751f08 100644 --- a/live_illustrate/summarize.py +++ b/live_illustrate/summarize.py @@ -2,7 +2,7 @@ from openai import OpenAI -from .prompts import PromptManager, SUMMARY +from .prompts import SUMMARY, PromptManager from .util import AsyncThread, Summary, Transcription, num_tokens_from_string From 6a4ef8d2e384d29d1b930257a3113a15b0c1e192 Mon Sep 17 00:00:00 2001 From: Eric Hennenfent Date: Fri, 3 May 2024 02:14:03 -0700 Subject: [PATCH 3/9] Ruff --- .github/workflows/linting.yml | 2 +- live_illustrate/render.py | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml index 62af4ea..e2f7aea 100644 --- a/.github/workflows/linting.yml +++ b/.github/workflows/linting.yml @@ -31,6 +31,6 @@ jobs: - name: Lint formatting run: black --check . - name: Lint semantics - run: ruff . + run: ruff check . - name: Lint types run: mypy . diff --git a/live_illustrate/render.py b/live_illustrate/render.py index ccba01b..f17308e 100644 --- a/live_illustrate/render.py +++ b/live_illustrate/render.py @@ -1,4 +1,3 @@ -import typing as t from datetime import datetime from openai import OpenAI From 73298358f9b4cc16215dbed4fe1d3318e9de7af2 Mon Sep 17 00:00:00 2001 From: Eric Hennenfent Date: Fri, 3 May 2024 02:47:45 -0700 Subject: [PATCH 4/9] Much more detailed prompt Maybe an example will help? --- live_illustrate/prompts/summary.txt | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/live_illustrate/prompts/summary.txt b/live_illustrate/prompts/summary.txt index 68de012..5bc16da 100644 --- a/live_illustrate/prompts/summary.txt +++ b/live_illustrate/prompts/summary.txt @@ -1,5 +1,19 @@ -You are a helpful assistant that describes scenes to an artist who wants to draw them. -You will be given several lines of dialogue that contain details about the physical surroundings of the characters. -Your job is to summarize the details of the scene in a bulleted list containing 4-7 bullet points. -If there is more than one scene described by the dialog, summarize only the most recent one. -Remember to be concise and not include details that cannot be seen. +You are a skilled illustrator who draws pictures from a tabletop role playing game. +You will receive lines of dialogue that will include (in part) details about the physical surroundings and appearance of the characters. +In one to two sentences, describe an illustration of the current setting. + +For example, given the following dialog (between quotes): +"Luca, what are you doing right now? +I'm still exploring this dungeon. +Great. You come around a corner and enter a wide room. It's too dark to see what's inside. +Shouldn't elves have darkvision? +Yes... something's making it still be too dark. +Can I sneak in? +Uh your armor's too loud. +Okay, I'll light a torch and then roll to investigate. And that's a four. +You see a rune painted in a dark red liquid on the wall, but don't seem to recognize it." + +You might say: "An armor-clad elf holding a torch peers into a dark dungeon room. There's a strange rune painted on the wall." + +If there is more than one scene described by the dialog, try to focus on the most recent one. +Remember to use clear language and to only include details that can be seen. From 360fbb6abb9472af78be334c60f09ee9842ff51a Mon Sep 17 00:00:00 2001 From: Eric Hennenfent Date: Fri, 3 May 2024 23:35:31 -0700 Subject: [PATCH 5/9] Add a manual entry mode for debugging --- live_illustrate/__main__.py | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/live_illustrate/__main__.py b/live_illustrate/__main__.py index 11f1f88..743f3b8 100644 --- a/live_illustrate/__main__.py +++ b/live_illustrate/__main__.py @@ -80,6 +80,11 @@ def get_args() -> argparse.Namespace: type=float, help="How much of the previous transcription to retain after generating each summary. 0 - 1.0", ) + parser.add_argument( + "--oneshot", + type=argparse.FileType("r"), + help="Read transcription lines from a text file and render. Useful for testing.", + ) parser.add_argument("-v", "--verbose", action="count", default=0) return parser.parse_args() @@ -93,8 +98,11 @@ def main() -> None: logging.getLogger("requests").setLevel(logging.INFO if args.verbose > 0 else logging.WARNING) logging.getLogger("werkzeug").setLevel(logging.INFO if args.verbose > 0 else logging.WARNING) # flask - # create each of our thread objects with the apppropriate command line args - transcriber = AudioTranscriber(model=args.audio_model, phrase_timeout=args.wait_minutes * args.phrase_timeout) + # We don't test transcription in oneshot mode + if not (is_oneshot := args.oneshot is not None): + transcriber = AudioTranscriber(model=args.audio_model, phrase_timeout=args.wait_minutes * args.phrase_timeout) + + # Create each of our thread objects with the apppropriate command line args buffer = TextBuffer( wait_minutes=args.wait_minutes, max_context=args.max_context, persistence=args.persistence_of_memory ) @@ -127,7 +135,8 @@ def on_image_rendered(image: Image | None) -> None: session_data.save_image(image) # start each thread with the appropriate callback - Thread(target=transcriber.start, args=(on_text_transcribed,), daemon=True).start() + if not is_oneshot: + Thread(target=transcriber.start, args=(on_text_transcribed,), daemon=True).start() Thread(target=summarizer.start, args=(on_summary_generated,), daemon=True).start() Thread(target=renderer.start, args=(on_image_rendered,), daemon=True).start() @@ -145,6 +154,12 @@ def open_browser() -> None: Thread(target=lambda: open_browser).start() + if is_oneshot: + # Read all the lines from the file, pretend we transcribed them + for line in args.oneshot: # type: ignore + # This will still dump things in the data directory. No sense short circuiting the testing. + on_text_transcribed(Transcription(line.strip())) + # flask feels like it probably has a good ctrl+c handler, so we'll make this one the main thread server.start() From 89b77770d506e0a21b8965192c4e7945eeea5514 Mon Sep 17 00:00:00 2001 From: Eric Hennenfent Date: Fri, 3 May 2024 23:40:44 -0700 Subject: [PATCH 6/9] Add a data directory override argument --- live_illustrate/__main__.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/live_illustrate/__main__.py b/live_illustrate/__main__.py index 743f3b8..9a96753 100644 --- a/live_illustrate/__main__.py +++ b/live_illustrate/__main__.py @@ -85,6 +85,7 @@ def get_args() -> argparse.Namespace: type=argparse.FileType("r"), help="Read transcription lines from a text file and render. Useful for testing.", ) + parser.add_argument("--data_dir", type=str, default=str(DEFAULT_DATA_DIR), help="Directory to save session data") parser.add_argument("-v", "--verbose", action="count", default=0) return parser.parse_args() @@ -117,7 +118,7 @@ def main() -> None: host=args.server_host, port=args.server_port, default_image=f"https://placehold.co/{args.image_size}/png" ) - with SessionData(DEFAULT_DATA_DIR, echo=True) as session_data: + with SessionData(Path(args.data_dir), echo=True) as session_data: # wire up some callbacks to save the intermediate data and forward it along def on_text_transcribed(transcription: Transcription) -> None: if is_transcription_interesting(transcription): From 443bce20eb67da16ec2850d713f0651ef6821810 Mon Sep 17 00:00:00 2001 From: Eric Hennenfent Date: Fri, 3 May 2024 23:55:16 -0700 Subject: [PATCH 7/9] Tiniest prompt tweak "one to two" vs "two to three" sentences makes a huge difference, for future reference. --- live_illustrate/prompts/summary.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/live_illustrate/prompts/summary.txt b/live_illustrate/prompts/summary.txt index 5bc16da..78a8605 100644 --- a/live_illustrate/prompts/summary.txt +++ b/live_illustrate/prompts/summary.txt @@ -13,7 +13,7 @@ Uh your armor's too loud. Okay, I'll light a torch and then roll to investigate. And that's a four. You see a rune painted in a dark red liquid on the wall, but don't seem to recognize it." -You might say: "An armor-clad elf holding a torch peers into a dark dungeon room. There's a strange rune painted on the wall." +You might say: "An armor-clad elf holding a torch peers into a dark dungeon room. There's a strange red rune painted on the wall." If there is more than one scene described by the dialog, try to focus on the most recent one. Remember to use clear language and to only include details that can be seen. From 185ab6e77e1adc62fb9d01f4894df2726d2bb655 Mon Sep 17 00:00:00 2001 From: Eric Hennenfent Date: Fri, 3 May 2024 23:58:32 -0700 Subject: [PATCH 8/9] Maybe don't use an actual character name Random elf sounding name so we don't bias it towards one character accidentally --- live_illustrate/prompts/summary.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/live_illustrate/prompts/summary.txt b/live_illustrate/prompts/summary.txt index 78a8605..ac7067e 100644 --- a/live_illustrate/prompts/summary.txt +++ b/live_illustrate/prompts/summary.txt @@ -3,7 +3,7 @@ You will receive lines of dialogue that will include (in part) details about the In one to two sentences, describe an illustration of the current setting. For example, given the following dialog (between quotes): -"Luca, what are you doing right now? +"Kyran, what are you doing right now? I'm still exploring this dungeon. Great. You come around a corner and enter a wide room. It's too dark to see what's inside. Shouldn't elves have darkvision? From 128c53289e4f5fd0d30452bd95db2e44a5f41235 Mon Sep 17 00:00:00 2001 From: Eric Hennenfent Date: Sat, 4 May 2024 00:02:37 -0700 Subject: [PATCH 9/9] More dynamic example https://community.openai.com/t/dalle3-prompt-tips-and-tricks-thread/498040 --- live_illustrate/prompts/summary.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/live_illustrate/prompts/summary.txt b/live_illustrate/prompts/summary.txt index ac7067e..adab1f5 100644 --- a/live_illustrate/prompts/summary.txt +++ b/live_illustrate/prompts/summary.txt @@ -13,7 +13,7 @@ Uh your armor's too loud. Okay, I'll light a torch and then roll to investigate. And that's a four. You see a rune painted in a dark red liquid on the wall, but don't seem to recognize it." -You might say: "An armor-clad elf holding a torch peers into a dark dungeon room. There's a strange red rune painted on the wall." +You might say: "An armor-clad elf holding a torch peers into a dark dungeon room. A strange red rune painted on the wall catches his eye." If there is more than one scene described by the dialog, try to focus on the most recent one. Remember to use clear language and to only include details that can be seen.