-
Notifications
You must be signed in to change notification settings - Fork 1
/
speech_to_text.py
117 lines (93 loc) · 3.91 KB
/
speech_to_text.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
# -*- coding: utf-8 -*-
from os import path
import speech_recognition as sr
import tweaks
import common_functions
import sys
import pickle
__author__ = "Romain Claret"
__maintainer__ = "Romain Claret"
__copyright__ = "Copyright 2015, Romain Claret "
__credits__ = ["Romain Claret"]
# Copyright (C) Romain Claret, All Rights Reserved
# Unauthorized copying of this file, via any medium is strictly prohibited
__license__ = "Proprietary and confidential"
__version__ = "1.0.0"
__email__ = "[email protected]"
__status__ = "Prototype" # Prototype, Development, Production
__date__ = "01.11.2015"
"""@package speech_to_text
Use of the speech_recognition package (pip) for the Google Speech-to-Text
speech_recognition also work with Wit.ai, IBM Speech to Text, and AT&T Speech to Text.
Can be found at: https://pypi.python.org/pypi/SpeechRecognition
"""
def transcriptAudioFile(pathToFile, language):
"""
Transcription of a specific file in a specific language
:param pathToFile: path the audio file
:param language: language used in the audio file
:return:
"""
# obtain full path to file in the same folder as this script
WAV_FILE = path.join(path.dirname(path.realpath(__file__)), pathToFile)
r = sr.Recognizer()
with sr.WavFile(WAV_FILE) as source:
audio = r.record(source) # read the entire WAV file
try:
# for private API key 'r.recognize_google(audio, key="GOOGLE_SPEECH_RECOGNITION_API_KEY")'
return r.recognize_google(audio, language=language, show_all=tweaks.showAllResults)
except sr.UnknownValueError:
pass
except sr.RequestError:
pass
def transcriptProcessedFiles(folderName):
"""
Transcription of all audio files present in a folder and fuse them. To be combined with the audio_splitter.
:param folderName: folder name in the audioProcessed folder
:return: transcript of the splited audio files
"""
pathToFolder = tweaks.outputDirectory + folderName
transcriptText = ""
files = common_functions.getListFolders(pathToFolder)
progressbarUnit = int(100 / len(files))
totalProgressbar = 0
language = common_functions.getLanguage(folderName)
print("speech_to_text: " + folderName + " in " + language)
pathToFolder_serialized = tweaks.serialized_folder + tweaks.serialized_audio_folder
check_serialized_files = common_functions.getListFolders(pathToFolder_serialized)
sys.stdout.write("Text-to-Speech in progress: " + str(totalProgressbar) + "%.")
sys.stdout.flush()
for file in files:
if tweaks.use_serialized_audio:
if folderName in check_serialized_files:
load_file = open(pathToFolder_serialized + folderName, 'rb')
transcriptText = pickle.load(load_file)
load_file.close()
print("Load Serialized.100%.Done")
return transcriptText
try:
text = transcriptAudioFile(pathToFolder + "/" + file, language)
if text is not None:
transcriptText = transcriptText + " " + str(text)
except:
transcriptText = transcriptText + " " + str(text)
totalProgressbar += progressbarUnit
sys.stdout.write(str(totalProgressbar) + "%.")
sys.stdout.flush()
print("..Done")
if tweaks.serialize_audio:
output = open(tweaks.serialized_folder + tweaks.serialized_audio_folder + folderName, 'wb')
pickle.dump(transcriptText[1:], output, -1) # use of highest protocol available
output.close()
print(folderName + " is now serialized.")
return transcriptText[1:]
if __name__ == "__main__":
"""
This function is run if this file is run directly.
It will:
Transcribe the first sub-folder in the audioProcessing folder.
Print the transcription
"""
folders = common_functions.getListFolders(tweaks.outputDirectory)
text = transcriptProcessedFiles(folders[0])
print(text)