From 291fa8396e66ee76aed3c9f666d80afd07743330 Mon Sep 17 00:00:00 2001
From: Eric Hennenfent <ecapstone@gmail.com>
Date: Fri, 3 May 2024 02:04:25 -0700
Subject: [PATCH 1/9] Add prompt manager (untested)

---
 live_illustrate/__main__.py             | 36 +++++--------------------
 live_illustrate/prompts.py              | 20 ++++++++++++++
 live_illustrate/prompts/image_extra.txt |  1 +
 live_illustrate/prompts/summary.txt     |  5 ++++
 live_illustrate/render.py               |  7 +++--
 live_illustrate/summarize.py            | 10 +++----
 6 files changed, 38 insertions(+), 41 deletions(-)
 create mode 100644 live_illustrate/prompts.py
 create mode 100644 live_illustrate/prompts/image_extra.txt
 create mode 100644 live_illustrate/prompts/summary.txt

diff --git a/live_illustrate/__main__.py b/live_illustrate/__main__.py
index 6bab016..11f1f88 100644
--- a/live_illustrate/__main__.py
+++ b/live_illustrate/__main__.py
@@ -66,45 +66,21 @@ def get_args() -> argparse.Namespace:
         choices=["1792x1024", "1024x1792", "1024x1024", "512x512", "256x256"],
     )
     parser.add_argument(
-        "--image_quality",
-        default="standard",
-        help="How fancy of an image to render",
-        choices=["standard", "hd"],
+        "--image_quality", default="standard", help="How fancy of an image to render", choices=["standard", "hd"]
     )
     parser.add_argument(
-        "--image_style",
-        default="vivid",
-        help="How stylized of an image to render",
-        choices=["vivid", "natural"],
-    )
-    parser.add_argument(
-        "--server_host",
-        default="0.0.0.0",
-        help="Address to bind web server",
-    )
-    parser.add_argument(
-        "--server_port",
-        default=8080,
-        type=int,
-        help="Port to serve HTML viewer on",
-    )
-    parser.add_argument(
-        "--open",
-        action="store_true",
-        help="Automatically open a browser tab for the rendered images",
+        "--image_style", default="vivid", help="How stylized of an image to render", choices=["vivid", "natural"]
     )
+    parser.add_argument("--server_host", default="0.0.0.0", help="Address to bind web server")
+    parser.add_argument("--server_port", default=8080, type=int, help="Port to serve HTML viewer on")
+    parser.add_argument("--open", action="store_true", help="Automatically open a browser tab for the rendered images")
     parser.add_argument(
         "--persistence_of_memory",
         default=0.2,  # Expressed as a fraction of the total buffered transcription
         type=float,
         help="How much of the previous transcription to retain after generating each summary. 0 - 1.0",
     )
-    parser.add_argument(
-        "-v",
-        "--verbose",
-        action="count",
-        default=0,
-    )
+    parser.add_argument("-v", "--verbose", action="count", default=0)
     return parser.parse_args()
 
 
diff --git a/live_illustrate/prompts.py b/live_illustrate/prompts.py
new file mode 100644
index 0000000..1c36875
--- /dev/null
+++ b/live_illustrate/prompts.py
@@ -0,0 +1,20 @@
+from pathlib import Path
+import typing as t
+
+PROMPTS_FOLDER = Path(__file__).parent.joinpath("prompts")
+IMAGE_EXTENSION = PROMPTS_FOLDER.joinpath("image_extra.txt")
+SUMMARY = PROMPTS_FOLDER.joinpath("summary.txt")
+
+
+class PromptManager:
+    def __init__(self):
+        self.cached: t.Dict[Path, str] = {}
+        self.last_modified = {}
+
+    def get_prompt(self, path: Path) -> str:
+        last_modified = path.stat().st_mtime
+        if self.last_modified.get(path) != last_modified:
+            with open(path, "r") as f:
+                self.cached[path] = f.read()
+            self.last_modified[path] = last_modified
+        return self.cached[path]
diff --git a/live_illustrate/prompts/image_extra.txt b/live_illustrate/prompts/image_extra.txt
new file mode 100644
index 0000000..2a4801d
--- /dev/null
+++ b/live_illustrate/prompts/image_extra.txt
@@ -0,0 +1 @@
+digital painting, fantasy art
diff --git a/live_illustrate/prompts/summary.txt b/live_illustrate/prompts/summary.txt
new file mode 100644
index 0000000..68de012
--- /dev/null
+++ b/live_illustrate/prompts/summary.txt
@@ -0,0 +1,5 @@
+You are a helpful assistant that describes scenes to an artist who wants to draw them. 
+You will be given several lines of dialogue that contain details about the physical surroundings of the characters. 
+Your job is to summarize the details of the scene in a bulleted list containing 4-7 bullet points. 
+If there is more than one scene described by the dialog, summarize only the most recent one. 
+Remember to be concise and not include details that cannot be seen.
diff --git a/live_illustrate/render.py b/live_illustrate/render.py
index 536419b..5fe5079 100644
--- a/live_illustrate/render.py
+++ b/live_illustrate/render.py
@@ -3,11 +3,9 @@
 
 from openai import OpenAI
 
+from .prompts import PromptManager, IMAGE_EXTENSION
 from .util import AsyncThread, Image, Summary
 
-# Prompt engineering level 1,000,000
-EXTRA: t.List[str] = ["digital painting, fantasy art"]
-
 
 class ImageRenderer(AsyncThread):
     def __init__(self, model: str, image_size: str, image_quality: str, image_style: str) -> None:
@@ -17,13 +15,14 @@ def __init__(self, model: str, image_size: str, image_quality: str, image_style:
         self.size: str = image_size
         self.image_quality: str = image_quality
         self.image_style: str = image_style
+        self.prompt_manager = PromptManager()
 
     def work(self, summary: Summary) -> Image | None:
         """Sends the text to Dall-e, spits out an image URL"""
         start = datetime.now()
         rendered = self.openai_client.images.generate(
             model=self.model,
-            prompt="\n".join((summary.summary, *EXTRA)),
+            prompt=summary.summary + "\n" + self.prompt_manager.get_prompt(IMAGE_EXTENSION),
             size=self.size,  # type: ignore[arg-type]
             quality=self.image_quality,  # type: ignore[arg-type]
             style=self.image_style,  # type: ignore[arg-type]
diff --git a/live_illustrate/summarize.py b/live_illustrate/summarize.py
index 58ad350..137ff99 100644
--- a/live_illustrate/summarize.py
+++ b/live_illustrate/summarize.py
@@ -2,20 +2,16 @@
 
 from openai import OpenAI
 
+from .prompts import PromptManager, SUMMARY
 from .util import AsyncThread, Summary, Transcription, num_tokens_from_string
 
-SYSTEM_PROMPT = "You are a helpful assistant that describes scenes to an artist who wants to draw them. \
-You will be given several lines of dialogue that contain details about the physical surroundings of the characters. \
-Your job is to summarize the details of the scene in a bulleted list containing 4-7 bullet points. \
-If there is more than one scene described by the dialog, summarize only the most recent one. \
-Remember to be concise and not include details that cannot be seen."  # Not so good about this last bit, eh?
-
 
 class TextSummarizer(AsyncThread):
     def __init__(self, model: str) -> None:
         super().__init__("TextSummarizer")
         self.openai_client: OpenAI = OpenAI()
         self.model: str = model
+        self.prompt_manager = PromptManager()
 
     def work(self, transcription: Transcription) -> Summary | None:
         """Sends the big buffer of provided text to ChatGPT, returns bullets describing the setting"""
@@ -28,7 +24,7 @@ def work(self, transcription: Transcription) -> Summary | None:
         response = self.openai_client.chat.completions.create(
             model=self.model,
             messages=[
-                {"role": "system", "content": SYSTEM_PROMPT},
+                {"role": "system", "content": self.prompt_manager.get_prompt(SUMMARY)},
                 {"role": "user", "content": text},
             ],
         )

From 745ddbcaf6cec35e73fa4472a7a6e80775a624dc Mon Sep 17 00:00:00 2001
From: Eric Hennenfent <ecapstone@gmail.com>
Date: Fri, 3 May 2024 02:09:26 -0700
Subject: [PATCH 2/9] isort

i figured out how to run it on windows
---
 live_illustrate/prompts.py   | 2 +-
 live_illustrate/render.py    | 2 +-
 live_illustrate/summarize.py | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/live_illustrate/prompts.py b/live_illustrate/prompts.py
index 1c36875..8aa6deb 100644
--- a/live_illustrate/prompts.py
+++ b/live_illustrate/prompts.py
@@ -1,5 +1,5 @@
-from pathlib import Path
 import typing as t
+from pathlib import Path
 
 PROMPTS_FOLDER = Path(__file__).parent.joinpath("prompts")
 IMAGE_EXTENSION = PROMPTS_FOLDER.joinpath("image_extra.txt")
diff --git a/live_illustrate/render.py b/live_illustrate/render.py
index 5fe5079..ccba01b 100644
--- a/live_illustrate/render.py
+++ b/live_illustrate/render.py
@@ -3,7 +3,7 @@
 
 from openai import OpenAI
 
-from .prompts import PromptManager, IMAGE_EXTENSION
+from .prompts import IMAGE_EXTENSION, PromptManager
 from .util import AsyncThread, Image, Summary
 
 
diff --git a/live_illustrate/summarize.py b/live_illustrate/summarize.py
index 137ff99..d751f08 100644
--- a/live_illustrate/summarize.py
+++ b/live_illustrate/summarize.py
@@ -2,7 +2,7 @@
 
 from openai import OpenAI
 
-from .prompts import PromptManager, SUMMARY
+from .prompts import SUMMARY, PromptManager
 from .util import AsyncThread, Summary, Transcription, num_tokens_from_string
 
 

From 6a4ef8d2e384d29d1b930257a3113a15b0c1e192 Mon Sep 17 00:00:00 2001
From: Eric Hennenfent <ecapstone@gmail.com>
Date: Fri, 3 May 2024 02:14:03 -0700
Subject: [PATCH 3/9] Ruff

---
 .github/workflows/linting.yml | 2 +-
 live_illustrate/render.py     | 1 -
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml
index 62af4ea..e2f7aea 100644
--- a/.github/workflows/linting.yml
+++ b/.github/workflows/linting.yml
@@ -31,6 +31,6 @@ jobs:
     - name: Lint formatting
       run: black --check .
     - name: Lint semantics
-      run: ruff .
+      run: ruff check .
     - name: Lint types
       run: mypy .
diff --git a/live_illustrate/render.py b/live_illustrate/render.py
index ccba01b..f17308e 100644
--- a/live_illustrate/render.py
+++ b/live_illustrate/render.py
@@ -1,4 +1,3 @@
-import typing as t
 from datetime import datetime
 
 from openai import OpenAI

From 73298358f9b4cc16215dbed4fe1d3318e9de7af2 Mon Sep 17 00:00:00 2001
From: Eric Hennenfent <ecapstone@gmail.com>
Date: Fri, 3 May 2024 02:47:45 -0700
Subject: [PATCH 4/9] Much more detailed prompt

Maybe an example will help?
---
 live_illustrate/prompts/summary.txt | 24 +++++++++++++++++++-----
 1 file changed, 19 insertions(+), 5 deletions(-)

diff --git a/live_illustrate/prompts/summary.txt b/live_illustrate/prompts/summary.txt
index 68de012..5bc16da 100644
--- a/live_illustrate/prompts/summary.txt
+++ b/live_illustrate/prompts/summary.txt
@@ -1,5 +1,19 @@
-You are a helpful assistant that describes scenes to an artist who wants to draw them. 
-You will be given several lines of dialogue that contain details about the physical surroundings of the characters. 
-Your job is to summarize the details of the scene in a bulleted list containing 4-7 bullet points. 
-If there is more than one scene described by the dialog, summarize only the most recent one. 
-Remember to be concise and not include details that cannot be seen.
+You are a skilled illustrator who draws pictures from a tabletop role playing game.
+You will receive lines of dialogue that will include (in part) details about the physical surroundings and appearance of the characters.
+In one to two sentences, describe an illustration of the current setting. 
+
+For example, given the following dialog (between quotes):
+"Luca, what are you doing right now? 
+I'm still exploring this dungeon. 
+Great. You come around a corner and enter a wide room. It's too dark to see what's inside. 
+Shouldn't elves have darkvision? 
+Yes... something's making it still be too dark.
+Can I sneak in? 
+Uh your armor's too loud. 
+Okay, I'll light a torch and then roll to investigate. And that's a four. 
+You see a rune painted in a dark red liquid on the wall, but don't seem to recognize it."
+
+You might say: "An armor-clad elf holding a torch peers into a dark dungeon room. There's a strange rune painted on the wall." 
+
+If there is more than one scene described by the dialog, try to focus on the most recent one. 
+Remember to use clear language and to only include details that can be seen. 

From 360fbb6abb9472af78be334c60f09ee9842ff51a Mon Sep 17 00:00:00 2001
From: Eric Hennenfent <ecapstone@gmail.com>
Date: Fri, 3 May 2024 23:35:31 -0700
Subject: [PATCH 5/9] Add a manual entry mode for debugging

---
 live_illustrate/__main__.py | 21 ++++++++++++++++++---
 1 file changed, 18 insertions(+), 3 deletions(-)

diff --git a/live_illustrate/__main__.py b/live_illustrate/__main__.py
index 11f1f88..743f3b8 100644
--- a/live_illustrate/__main__.py
+++ b/live_illustrate/__main__.py
@@ -80,6 +80,11 @@ def get_args() -> argparse.Namespace:
         type=float,
         help="How much of the previous transcription to retain after generating each summary. 0 - 1.0",
     )
+    parser.add_argument(
+        "--oneshot",
+        type=argparse.FileType("r"),
+        help="Read transcription lines from a text file and render. Useful for testing.",
+    )
     parser.add_argument("-v", "--verbose", action="count", default=0)
     return parser.parse_args()
 
@@ -93,8 +98,11 @@ def main() -> None:
     logging.getLogger("requests").setLevel(logging.INFO if args.verbose > 0 else logging.WARNING)
     logging.getLogger("werkzeug").setLevel(logging.INFO if args.verbose > 0 else logging.WARNING)  # flask
 
-    # create each of our thread objects with the apppropriate command line args
-    transcriber = AudioTranscriber(model=args.audio_model, phrase_timeout=args.wait_minutes * args.phrase_timeout)
+    # We don't test transcription in oneshot mode
+    if not (is_oneshot := args.oneshot is not None):
+        transcriber = AudioTranscriber(model=args.audio_model, phrase_timeout=args.wait_minutes * args.phrase_timeout)
+
+    # Create each of our thread objects with the apppropriate command line args
     buffer = TextBuffer(
         wait_minutes=args.wait_minutes, max_context=args.max_context, persistence=args.persistence_of_memory
     )
@@ -127,7 +135,8 @@ def on_image_rendered(image: Image | None) -> None:
                 session_data.save_image(image)
 
         # start each thread with the appropriate callback
-        Thread(target=transcriber.start, args=(on_text_transcribed,), daemon=True).start()
+        if not is_oneshot:
+            Thread(target=transcriber.start, args=(on_text_transcribed,), daemon=True).start()
         Thread(target=summarizer.start, args=(on_summary_generated,), daemon=True).start()
         Thread(target=renderer.start, args=(on_image_rendered,), daemon=True).start()
 
@@ -145,6 +154,12 @@ def open_browser() -> None:
 
             Thread(target=lambda: open_browser).start()
 
+        if is_oneshot:
+            # Read all the lines from the file, pretend we transcribed them
+            for line in args.oneshot:  # type: ignore
+                # This will still dump things in the data directory. No sense short circuiting the testing.
+                on_text_transcribed(Transcription(line.strip()))
+
         # flask feels like it probably has a good ctrl+c handler, so we'll make this one the main thread
         server.start()
 

From 89b77770d506e0a21b8965192c4e7945eeea5514 Mon Sep 17 00:00:00 2001
From: Eric Hennenfent <ecapstone@gmail.com>
Date: Fri, 3 May 2024 23:40:44 -0700
Subject: [PATCH 6/9] Add a data directory override argument

---
 live_illustrate/__main__.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/live_illustrate/__main__.py b/live_illustrate/__main__.py
index 743f3b8..9a96753 100644
--- a/live_illustrate/__main__.py
+++ b/live_illustrate/__main__.py
@@ -85,6 +85,7 @@ def get_args() -> argparse.Namespace:
         type=argparse.FileType("r"),
         help="Read transcription lines from a text file and render. Useful for testing.",
     )
+    parser.add_argument("--data_dir", type=str, default=str(DEFAULT_DATA_DIR), help="Directory to save session data")
     parser.add_argument("-v", "--verbose", action="count", default=0)
     return parser.parse_args()
 
@@ -117,7 +118,7 @@ def main() -> None:
         host=args.server_host, port=args.server_port, default_image=f"https://placehold.co/{args.image_size}/png"
     )
 
-    with SessionData(DEFAULT_DATA_DIR, echo=True) as session_data:
+    with SessionData(Path(args.data_dir), echo=True) as session_data:
         # wire up some callbacks to save the intermediate data and forward it along
         def on_text_transcribed(transcription: Transcription) -> None:
             if is_transcription_interesting(transcription):

From 443bce20eb67da16ec2850d713f0651ef6821810 Mon Sep 17 00:00:00 2001
From: Eric Hennenfent <ecapstone@gmail.com>
Date: Fri, 3 May 2024 23:55:16 -0700
Subject: [PATCH 7/9] Tiniest prompt tweak

"one to two" vs "two to three" sentences makes a huge difference, for future reference.
---
 live_illustrate/prompts/summary.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/live_illustrate/prompts/summary.txt b/live_illustrate/prompts/summary.txt
index 5bc16da..78a8605 100644
--- a/live_illustrate/prompts/summary.txt
+++ b/live_illustrate/prompts/summary.txt
@@ -13,7 +13,7 @@ Uh your armor's too loud.
 Okay, I'll light a torch and then roll to investigate. And that's a four. 
 You see a rune painted in a dark red liquid on the wall, but don't seem to recognize it."
 
-You might say: "An armor-clad elf holding a torch peers into a dark dungeon room. There's a strange rune painted on the wall." 
+You might say: "An armor-clad elf holding a torch peers into a dark dungeon room. There's a strange red rune painted on the wall." 
 
 If there is more than one scene described by the dialog, try to focus on the most recent one. 
 Remember to use clear language and to only include details that can be seen. 

From 185ab6e77e1adc62fb9d01f4894df2726d2bb655 Mon Sep 17 00:00:00 2001
From: Eric Hennenfent <ecapstone@gmail.com>
Date: Fri, 3 May 2024 23:58:32 -0700
Subject: [PATCH 8/9] Maybe don't use an actual character name

Random elf sounding name so we don't bias it towards one character accidentally
---
 live_illustrate/prompts/summary.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/live_illustrate/prompts/summary.txt b/live_illustrate/prompts/summary.txt
index 78a8605..ac7067e 100644
--- a/live_illustrate/prompts/summary.txt
+++ b/live_illustrate/prompts/summary.txt
@@ -3,7 +3,7 @@ You will receive lines of dialogue that will include (in part) details about the
 In one to two sentences, describe an illustration of the current setting. 
 
 For example, given the following dialog (between quotes):
-"Luca, what are you doing right now? 
+"Kyran, what are you doing right now? 
 I'm still exploring this dungeon. 
 Great. You come around a corner and enter a wide room. It's too dark to see what's inside. 
 Shouldn't elves have darkvision? 

From 128c53289e4f5fd0d30452bd95db2e44a5f41235 Mon Sep 17 00:00:00 2001
From: Eric Hennenfent <ecapstone@gmail.com>
Date: Sat, 4 May 2024 00:02:37 -0700
Subject: [PATCH 9/9] More dynamic example

https://community.openai.com/t/dalle3-prompt-tips-and-tricks-thread/498040
---
 live_illustrate/prompts/summary.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/live_illustrate/prompts/summary.txt b/live_illustrate/prompts/summary.txt
index ac7067e..adab1f5 100644
--- a/live_illustrate/prompts/summary.txt
+++ b/live_illustrate/prompts/summary.txt
@@ -13,7 +13,7 @@ Uh your armor's too loud.
 Okay, I'll light a torch and then roll to investigate. And that's a four. 
 You see a rune painted in a dark red liquid on the wall, but don't seem to recognize it."
 
-You might say: "An armor-clad elf holding a torch peers into a dark dungeon room. There's a strange red rune painted on the wall." 
+You might say: "An armor-clad elf holding a torch peers into a dark dungeon room. A strange red rune painted on the wall catches his eye." 
 
 If there is more than one scene described by the dialog, try to focus on the most recent one. 
 Remember to use clear language and to only include details that can be seen.