-
Notifications
You must be signed in to change notification settings - Fork 2
/
VRED-voiceRecogControlTemplateAnnotation.py
327 lines (282 loc) · 15.2 KB
/
VRED-voiceRecogControlTemplateAnnotation.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
'''
DISCLAIMER:
---------------------------------
In any case, all binaries, configuration code, templates and snippets of this solution are of "work in progress" character.
This also applies to GitHub "Release" versions.
Neither Simon Nagel, nor Autodesk represents that these samples are reliable, accurate, complete, or otherwise valid.
Accordingly, those configuration samples are provided ?as is? with no warranty of any kind and you use the applications at your own risk.
Scripted by Rutvik Bhatt and Simon Nagel. Supported by Marcus Fritzen.
Make sure to install pyaudio and speech_recognition
Just paste the Scene in the Script Editor of VRED and press run.
Press V to enable the voice recognition mode.
Immediately start speaking after pressing V .
Take a pause after finish speaking and wait for your audio to process.
Once you see the output in the terminal start speaking again .
Press B to manually disable voice recognition.
'''
import time
import PySide2.QtGui
import pyaudio
import speech_recognition as sr
from inspect import signature
from datetime import datetime
QVector3D = PySide2.QtGui.QVector3D
r = sr.Recognizer()
m = sr.Microphone()
with m as source:
r.adjust_for_ambient_noise(source)
#----------------------------------------------------------Keyword function registration---------------------------------------------------------------------------------
keywordFunctions = {}
def VoiceRecogControlEnable():
global VRCKeyFlag
global stop_listening
r = sr.Recognizer()
m = sr.Microphone()
voiceRecogAnno_audio_stop()
if VRCKeyFlag == False:
def callback(recognizer, audio):
global sentence
global voice_data
global voice_dataR
global split_voice_data
i = 0
try:
print("Please wait audio is being processed\n")
# English is the default language change 'en-US' to 'de-DE' for German or 'fr-FR' for French etc
voice_data = recognizer.recognize_google(audio, language ='en-US')
voice_dataR = voice_data.replace(" ", "")
print("YOU SAID: " + voice_data)
split_voice_data = [word.lower() for word in voice_data.split()]
print(split_voice_data)
# Check recorded data for a keyword
for word in split_voice_data:
if word in keywordFunctions:
print("Found matching keyword ", word)
nextWordIndex = split_voice_data.index(word) + 1
remainingWords = split_voice_data[nextWordIndex:]
# Test if we have sufficient remaining words for passing as args to function
expectedArgs = keywordFunctions[word][0]
#print(expectedArgs)
if len(remainingWords) > expectedArgs:
newRemainingWords = remainingWords[0:expectedArgs-1]
print(newRemainingWords)
joinedStr = ' '.join(remainingWords[expectedArgs-1:])
print(joinedStr)
newRemainingWords.append(joinedStr)
print(newRemainingWords)
keywordFunctions[word][1](*newRemainingWords)
break
elif len(remainingWords) == expectedArgs:
# Take the expected arguments amount of words after the keyword and pass to function
keywordFunctions[word][1](*remainingWords[0:expectedArgs])
break
else:
print("Detected to less spoken arguments")
print("Expected ", expectedArgs)
print("Got ", len(remainingWords))
except sr.UnknownValueError:
print("Google Speech Recognition could not understand audio")
except sr.RequestError as e:
print("Could not request results from Google Speech Recognition service; {0}".format(e))
print("You pressed 'V'\n Enabled Voice Recognition")
stop_listening = r.listen_in_background(m, callback)
VRCKeyFlag = True
def VoiceRecogControl_audio_stop():
global VRCKeyFlag
try:
if VRCKeyFlag == True:
print("You pressed 'B'\n Stopped Voice Recognition")
stop_listening(wait_for_stop=False)
VRCKeyFlag = False
except:
print("Please enable the Voice Recognition first")
def voiceRecogAnno_audio_stop():
global vrAnnoFlag
try:
if vrAnnoFlag == True:
print("You said 'Stop' or pressed 'S' Stopping Annotation\n Press 'A' again to create a new annotation")
stop_listening(wait_for_stop=False)
vrAnnoFlag = False
except:
print("Please enable Voice Annotation if possible")
def registerKeywordFunction(keywords, function):
global keywordFunctions
sig = signature(function)
args = len(sig.parameters)
print("Registered function with " + str(args) + " parameters")
for keyword in keywords:
keywordFunctions[keyword] = (args, function)
#--------------------------------------------------------------Register and Add custom functions here----------------------------------------------------------------------------
'''
# Register custom functions here
Define the keywords first. It might make sense to add similar words in the list.
i.E. move or roof will call the function myMove
The words that are said after the keyword will be your argument.
The Amount of Arguments will be Autodetected from the function.
i.E. myMove requires two arguments: direction and valueStr
If there are more words than required arguments, the last spoken words will be taken as one last arguement.
i.E. selVar requires one argument that can consist of mulitple words (variant names tend to be more than one word)
'''
def mainKeywords():
registerKeywordFunction(["move", "roof"], myMove)
registerKeywordFunction(["select"], ourSelectFunction)
registerKeywordFunction(["variant","radiants","Marion"], selVar)
registerKeywordFunction(["rotate"], ourRotateFunction)
# Custom functions can be defined here
def myMove(direction, valueStr):
node = getSelectedNode()
pos = getTransformNodeTranslation(node, 0)
try:
if direction.lower() == "up" or direction.lower() == "app":
setTransformNodeTranslation(node, pos.x(), pos.y(), pos.z() + float(valueStr), False)
print("\n The node '"+str(node.getName()+"' was move up by "+str(valueStr) + " mm.\n"))
elif direction.lower() == "down":
setTransformNodeTranslation(node, pos.x(), pos.y(), pos.z() - float(valueStr), False)
print("\n The node '" +str(node.getName()+ "' was move down by "+str(valueStr) + " mm.\n"))
elif direction.lower() == "left":
setTransformNodeTranslation(node, pos.x(), pos.y() - float(valueStr), pos.z(), False)
print("\n The node '"+str(node.getName()+"' was move down by "+str(valueStr) + " mm.\n"))
elif direction.lower() == "right":
setTransformNodeTranslation(node, pos.x(), pos.y() + float(valueStr), pos.z(), False)
print("\n The node '"+str(node.getName()+"' was move right by "+str(valueStr) + " mm.\n"))
else:
print("\n Please provide two valid argument after 'Move' for ex. 'Move up 500' or 'Move left 300'")
except:
print("Please provide an integer after 'Move up' for ex. Move up 500")
def ourRotateFunction(ourArgument):
try:
node = getSelectedNode()
rot = getTransformNodeRotation(node)
setTransformNodeRotation(node,rot.x(),rot.y(),float(ourArgument))
print("\n The node '"+str(node.getName()+"' was rotated by "+str(ourArgument) + " degrees.\n"))
except:
print("\n Please provide an integer after the argument, For ex. Rotate 180")
def ourSelectFunction(ourArgument):
ourArgumentCap = ourArgument.title()
selectNode(ourArgumentCap)
print("\n The node '"+str(ourArgument)+"' was selected.\n")
def selVar(ourArgument):
global voice_data
global voice_dataR
allVars = getVariantSets()
variantFound = False
for variant in allVars:
variantInVoiceData = variant.lower() in voice_data
variantInSplitVoiceData = variant.lower() in split_voice_data
variantInRVoiceData = variant.lower() in voice_dataR
if variantInVoiceData or variantInSplitVoiceData or variantInRVoiceData:
selectVariantSet(variant)
variantFound = True
if variantFound == False:
print("\n Could not find variant set'"+str(ourArgument)+"'")
else:
print("\n The variant set '"+str(ourArgument)+"' was executed.\n")
mainKeywords()
key2 = vrKey(Key_B)
key2.connect(VoiceRecogControl_audio_stop)
VRCKeyFlag = False
key = vrKey(Key_V)
key.connect(VoiceRecogControlEnable)
vrLogInfo("Welcome to Voice Recognition!\nPress V to enable the voice recognition mode\nImmediately start speaking after pressing V \nTake a pause after finish speaking and wait for your audio to process\nOnce you see the output in the terminal start speaking again \nPress B to manually disable voice recognition")
#--------------------------------------------------------------------------Defining Annotations----------------------------------------------------------------------
class RenderActionAnnotation(vrAEBase):
def __init__(self):
vrAEBase.__init__(self)
self.addLoop()
def loop(self):
annos_VoiceRecog = vrAnnotationService.getAnnotations()
syncCollabAnnoMaterials()
annos_VoiceRecog = vrAnnotationService.getAnnotations()
sentence_ = ""
current_Anno_VoiceRecog = []
current_Anno_VoiceRecog.extend(annos_VoiceRecog)
'''
Intersection Method flag defines where do you want to place the annotation in the scene.
By default the Intersection Method is set to "RenderWindow" which means the annotation will be placed at the hitpoint with the center of the renderwindow
Set the Intersection Method to "MousePointer" to set the position of the annotation to your mouse pointer
'''
intersectionMethod = "RenderWindow"
def voiceRecogAnnotationEnable():
global current_Anno_VoiceRecog
global vrAnnoFlag
global stop_listening
global sentence
VoiceRecogControl_audio_stop()
if vrAnnoFlag == False:
if intersectionMethod == "RenderWindow":
intersection_VoiceAnno = getSceneIntersection(-1, int(getRenderWindowWidth(-1)/2), int(getRenderWindowHeight(-1)/2))
elif intersectionMethod == "MousePointer":
mousePos_VoiceAnno = getMousePosition(-1)
intersection_VoiceAnno = getSceneIntersection(-1,mousePos_VoiceAnno[0],mousePos_VoiceAnno[1])
interPos_VoiceAnno = intersection_VoiceAnno[1]
annoNew = createAnnotation("My New Annotation")
annoNew.setPosition(interPos_VoiceAnno)
annos_VoiceRecog = vrAnnotationService.getAnnotations()
r=sr.Recognizer()
m = sr.Microphone()
if len(current_Anno_VoiceRecog) != len(annos_VoiceRecog):
if vrAnnoFlag == False:
print("You pressed 'A'\n Enabled Voice Annotation")
def callback(recognizer, audio):
global sentence
print("Please wait your audio is being processed")
try:
print("YOU SAID: " + recognizer.recognize_google(audio))
#English is default language 'en-US'
#use 'fr-FR' for French; 'de-DE' for German etc
voice_data = recognizer.recognize_google(audio, language='en-US')
sentence = sentence + " " + voice_data
anno_VoiceRecog = annos_VoiceRecog[-1]
localUser = vrSessionService.getUser()
localUserID = vrdSessionUser.getUserId(localUser)
UserName = vrdSessionUser.getUserName(localUser)
now = datetime.now()
dt_string = now.strftime("%d/%m/%Y %H:%M:%S")
if "stop" in voice_data:
voiceRecogAnno_audio_stop()
elif "remove" in voice_data:
sentence = ""
anno_VoiceRecog.setText("User: " +UserName+ "\nTime and Date: " +dt_string+ "\nNote: " +sentence)
except sr.UnknownValueError:
print("Google Speech Recognition could not understand audio")
except sr.RequestError as e:
print("Could not request results from Google Speech Recognition service; {0}".format(e))
stop_listening = r.listen_in_background(m, callback)
vrAnnoFlag = True
sentence = ""
annos_VoiceRecog = vrAnnotationService.getAnnotations()
del current_Anno_VoiceRecog[:]
current_Anno_VoiceRecog.extend(annos_VoiceRecog)
def voiceRecogAnno_audio_stop():
global vrAnnoFlag
if vrAnnoFlag == True:
print("You said 'Stop' or pressed 'S' Stopping Annotation\n Press 'A' again to create a new annotation")
stop_listening(wait_for_stop=False)
vrAnnoFlag = False
def VoiceRecogControl_audio_stop():
global VRCKeyFlag
try:
if VRCKeyFlag == True:
print("You pressed 'B'\n Stopped Voice Recognition")
stop_listening(wait_for_stop=False)
VRCKeyFlag = False
except:
print("Please enable the Voice Recognition first")
def syncCollabAnnoMaterials():
allUsers = vrSessionService.getUsers()
amountAllUsers = len(allUsers)
localUser = vrSessionService.getUser()
localUserID = vrdSessionUser.getUserId(localUser)
localUserColor = vrdSessionUser.getUserColor(localUser)
vrAnnotationService.setDefaultLineColor(localUserColor)
if vrSessionService.isConnected() != 1:
# change to a foldername on your PC
foldername = "c:/temp/"
vrAnnotationService.saveAnnotations(vrAnnotationService.getAnnotations(),foldername + "mVoiceAnno2data.xml")
key3 = vrKey(Key_S)
key3.connect(voiceRecogAnno_audio_stop)
renderAnno = RenderActionAnnotation()
vrAnnoFlag = False
key4 = vrKey(Key_A)
key4.connect(voiceRecogAnnotationEnable)
vrLogInfo("Welcome to Voice Annotation!\nPress A to enable the voice Annotation mode\nImmediately start speaking after pressing A \nTake a pause after finish speaking and wait for your audio to process\nOnce you see the output in the terminal start speaking again to change the current annotation\nSay 'stop' to stop working on current annotation and press A again to create a new annotation\nSay 'remove' to erase your current annotation\nPress S to manually disable voice recognition")