Skip to content

Commit

Permalink
Merge pull request #151 from OpenVoiceOS/release-0.2.4a1
Browse files Browse the repository at this point in the history
Release 0.2.4a1
  • Loading branch information
JarbasAl authored Oct 21, 2024
2 parents 3534215 + 1bfaf8d commit c1f7d55
Show file tree
Hide file tree
Showing 4 changed files with 40 additions and 20 deletions.
10 changes: 7 additions & 3 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,12 +1,16 @@
# Changelog

## [0.2.3a1](https://github.com/OpenVoiceOS/ovos-dinkum-listener/tree/0.2.3a1) (2024-10-19)
## [0.2.4a1](https://github.com/OpenVoiceOS/ovos-dinkum-listener/tree/0.2.4a1) (2024-10-21)

[Full Changelog](https://github.com/OpenVoiceOS/ovos-dinkum-listener/compare/0.2.2...0.2.3a1)
[Full Changelog](https://github.com/OpenVoiceOS/ovos-dinkum-listener/compare/0.2.3...0.2.4a1)

**Closed issues:**

- Empty utterance error [\#147](https://github.com/OpenVoiceOS/ovos-dinkum-listener/issues/147)

**Merged pull requests:**

- fix:save utterances [\#148](https://github.com/OpenVoiceOS/ovos-dinkum-listener/pull/148) ([JarbasAl](https://github.com/JarbasAl))
- fix:handle empty string transcriptions [\#150](https://github.com/OpenVoiceOS/ovos-dinkum-listener/pull/150) ([JarbasAl](https://github.com/JarbasAl))



Expand Down
40 changes: 29 additions & 11 deletions ovos_dinkum_listener/service.py
Original file line number Diff line number Diff line change
Expand Up @@ -659,20 +659,38 @@ def _record_end_signal(self):
)
self.bus.emit(Message("recognizer_loop:record_end"))

def _stt_text(self, transcripts: List[Tuple[str, float]],
stt_context: dict):
# Report utterance to intent service
if transcripts:
utts = [u[0] for u in transcripts] # filter confidence
def __normtranscripts(self, transcripts: List[Tuple[str, float]]) -> List[str]:
# unfortunately common enough when using whisper to deserve a setting
# mainly happens on silent audio, not as a mistranscription
default_hallucinations = [
"thanks for watching!",
'thank you for watching!',
"so",
"beep!"
# "Thank you" # this one can also be valid!!
]
hallucinations = self.config.get("hallucination_list", default_hallucinations) \
if self.config.get("filter_hallucinations", True) else []
utts = [u[0].lstrip(" \"'").strip(" \"'") for u in transcripts]
filtered_hutts = [u for u in utts if u and u.lower() not in hallucinations]
hutts = [u for u in utts if u and u not in filtered_hutts]
if hutts:
LOG.debug(f"Filtered hallucinations: {hutts}")
return filtered_hutts

def _stt_text(self, transcripts: List[Tuple[str, float]], stt_context: dict):
utts = self.__normtranscripts(transcripts)
LOG.debug(f"STT: {utts}")
if utts:
lang = stt_context.get("lang") or Configuration().get("lang", "en-us")
LOG.debug(f"STT: {utts}")
payload = {"utterances": utts,
"lang": lang}
payload = {"utterances": utts, "lang": lang}
self.bus.emit(Message("recognizer_loop:utterance", payload, stt_context))
elif self.voice_loop.listen_mode == ListeningMode.CONTINUOUS:
LOG.debug("ignoring transcription failure")
else:
self.bus.emit(Message("recognizer_loop:speech.recognition.unknown", context=stt_context))
if self.voice_loop.listen_mode != ListeningMode.CONTINUOUS:
LOG.error("Empty transcription, either recorded silence or STT failed!")
self.bus.emit(Message("recognizer_loop:speech.recognition.unknown", context=stt_context))
else:
LOG.debug("Ignoring empty transcription in continuous listening mode")

def _save_stt(self, audio_bytes, stt_meta, save_path=None):
LOG.info("Saving Utterance Recording")
Expand Down
4 changes: 2 additions & 2 deletions ovos_dinkum_listener/version.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# START_VERSION_BLOCK
VERSION_MAJOR = 0
VERSION_MINOR = 2
VERSION_BUILD = 3
VERSION_ALPHA = 0
VERSION_BUILD = 4
VERSION_ALPHA = 1
# END_VERSION_BLOCK
6 changes: 2 additions & 4 deletions ovos_dinkum_listener/voice_loop/voice_loop.py
Original file line number Diff line number Diff line change
Expand Up @@ -781,12 +781,10 @@ def _after_cmd(self, chunk: bytes):
self._vad_remove_silence()

utts, stt_context = self._get_tx(stt_context)

LOG.info(f"Raw transcription: {utts}")
if utts:
LOG.debug(f"transformers metadata: {stt_context}")
LOG.info(f"transcribed: {utts}")
else:
LOG.info("nothing transcribed")

# Voice command has finished recording
if self.stt_audio_callback is not None:
self.stt_audio_callback(self.stt_audio_bytes, stt_context)
Expand Down

0 comments on commit c1f7d55

Please sign in to comment.