diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml index feb20ee..62af4ea 100644 --- a/.github/workflows/linting.yml +++ b/.github/workflows/linting.yml @@ -27,9 +27,9 @@ jobs: sudo apt install portaudio19-dev pip install .[dev] - name: Lint imports - run: isort . + run: isort --check . - name: Lint formatting - run: black . + run: black --check . - name: Lint semantics run: ruff . - name: Lint types diff --git a/live_illustrate/__main__.py b/live_illustrate/__main__.py index 7ebabd4..6bab016 100644 --- a/live_illustrate/__main__.py +++ b/live_illustrate/__main__.py @@ -34,6 +34,13 @@ def get_args() -> argparse.Namespace: type=float, help="How frequently to summarize the conversation and generate an image", ) + parser.add_argument( + "--phrase_timeout", + default=0.75, + type=float, + help="Period of time after which to force transcription, even without a pause. " + "Specified as a fraction of wait_minutes", + ) parser.add_argument( "--max_context", default=2000, # very roughly ten minutes or so? @@ -111,7 +118,7 @@ def main() -> None: logging.getLogger("werkzeug").setLevel(logging.INFO if args.verbose > 0 else logging.WARNING) # flask # create each of our thread objects with the apppropriate command line args - transcriber = AudioTranscriber(model=args.audio_model) + transcriber = AudioTranscriber(model=args.audio_model, phrase_timeout=args.wait_minutes * args.phrase_timeout) buffer = TextBuffer( wait_minutes=args.wait_minutes, max_context=args.max_context, persistence=args.persistence_of_memory ) diff --git a/live_illustrate/summarize.py b/live_illustrate/summarize.py index c7c48c5..58ad350 100644 --- a/live_illustrate/summarize.py +++ b/live_illustrate/summarize.py @@ -35,9 +35,11 @@ def work(self, transcription: Transcription) -> Summary | None: self.logger.info("Summarized %d tokens in %s", token_count, datetime.now() - start) if response.choices: return [ - Summary.from_transcription(transcription, content.strip()) - if (content := choice.message.content) - else None + ( + Summary.from_transcription(transcription, content.strip()) + if (content := choice.message.content) + else None + ) for choice in response.choices ][-1] return None diff --git a/live_illustrate/transcribe.py b/live_illustrate/transcribe.py index 454a8e1..a7d5932 100644 --- a/live_illustrate/transcribe.py +++ b/live_illustrate/transcribe.py @@ -11,12 +11,13 @@ class AudioTranscriber(AsyncThread): - def __init__(self, model: str) -> None: + def __init__(self, model: str, phrase_timeout: float) -> None: super().__init__("AudioTranscriber") self.recorder = sr.Recognizer() self.source = sr.Microphone(sample_rate=SAMPLE_RATE) self.model = model + self.phrase_timeout = int(phrase_timeout * 60) self.recorder.dynamic_energy_threshold = DYNAMIC_ENERGY_THRESHOLD @@ -29,6 +30,6 @@ def start(self, callback: t.Callable[[str], None]) -> None: self.recorder.adjust_for_ambient_noise(self.source) # This creates a separate thread for the audio recording, # but it's non-blocking, so we just let it live here - self.recorder.listen_in_background(self.source, self.send) + self.recorder.listen_in_background(self.source, self.send, phrase_time_limit=self.phrase_timeout) super().start(callback) diff --git a/pyproject.toml b/pyproject.toml index 5df4891..6356807 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -36,7 +36,7 @@ dependencies = [ [project.optional-dependencies] # Optional dev = [ - "black", + "black>=24.0,<25.0", "isort", "mypy", "ruff",