working on the recognition function

SkyDocs · Dec 3, 2021 · 0a98f33 · 0a98f33
1 parent b0bd2b4
commit 0a98f33
Show file tree

Hide file tree

Showing 4 changed files with 175 additions and 94 deletions.
diff --git a/cli/predict.py b/cli/predict.py
@@ -26,54 +26,54 @@
 BATCH_SIZE = 128
 SCALE = 0.5
 
-print("\033[31m[*]\033[0m Get Ready!")
-
-time.sleep(5)
-
-""" Taking the voice input """
-
-chunk = 1024  # Record in chunks of 1024 samples
-sample_format = pyaudio.paInt16  # 16 bits per sample
-channels = 2
-fs = 16000  # Record at 16000 samples per second
-seconds = 3
-filename = "predict.wav"
-
-p = pyaudio.PyAudio()  # Create an interface to PortAudio
-
-# print("-------------------------------------------------------------------------------------------")
-print("\033[31m[*]\033[0m Recording")
-
-stream = p.open(format=sample_format,
-				channels=channels,
-				rate=fs,
-				frames_per_buffer=chunk,
-				input=True)
-
-frames = []  # Initialize array to store frames
-
-# Store data in chunks for 1 seconds
-for i in range(0, int(fs / chunk * seconds)):
-	data = stream.read(chunk)
-	frames.append(data)
-
-# Stop and close the stream
-stream.stop_stream()
-stream.close()
-# Terminate the PortAudio interface
-p.terminate()
-
-print("\033[31m[*]\033[0m Finished recording")
-# print("-------------------------------------------------------------------------------------------")
-# Save the recorded data as a WAV file
-wf = wave.open(filename, 'wb')
-wf.setnchannels(channels)
-wf.setsampwidth(p.get_sample_size(sample_format))
-wf.setframerate(fs)
-wf.writeframes(b''.join(frames))
-wf.close()
-
-print("\033[31m[*]\033[0m Processing")
+# print("\033[31m[*]\033[0m Get Ready!")
+
+# time.sleep(5)
+
+# """ Taking the voice input """
+
+# chunk = 1024  # Record in chunks of 1024 samples
+# sample_format = pyaudio.paInt16  # 16 bits per sample
+# channels = 2
+# fs = 16000  # Record at 16000 samples per second
+# seconds = 3
+# filename = "predict.wav"
+
+# p = pyaudio.PyAudio()  # Create an interface to PortAudio
+
+# # print("-------------------------------------------------------------------------------------------")
+# print("\033[31m[*]\033[0m Recording")
+
+# stream = p.open(format=sample_format,
+# 				channels=channels,
+# 				rate=fs,
+# 				frames_per_buffer=chunk,
+# 				input=True)
+
+# frames = []  # Initialize array to store frames
+
+# # Store data in chunks for 1 seconds
+# for i in range(0, int(fs / chunk * seconds)):
+# 	data = stream.read(chunk)
+# 	frames.append(data)
+
+# # Stop and close the stream
+# stream.stop_stream()
+# stream.close()
+# # Terminate the PortAudio interface
+# p.terminate()
+
+# print("\033[31m[*]\033[0m Finished recording")
+# # print("-------------------------------------------------------------------------------------------")
+# # Save the recorded data as a WAV file
+# wf = wave.open(filename, 'wb')
+# wf.setnchannels(channels)
+# wf.setsampwidth(p.get_sample_size(sample_format))
+# wf.setframerate(fs)
+# wf.writeframes(b''.join(frames))
+# wf.close()
+
+# print("\033[31m[*]\033[0m Processing")
 """Pre-processing Noise"""
 
 # If folder noise, does not exist, create it, otherwise do nothing
@@ -234,27 +234,33 @@ def predict(path, labels):
 			if y_pred[index] == 0:
 				print("\033[31m[*]\033[0m Voice Assistant triggered")
 				print("\033[31m[*]\033[0m Welcome user Harshit!")
-				user.user = 1
-				time.sleep(2)
-				import voice_assistant_cli
+				# user.user = 1
+				return 1
+				# time.sleep(2)
+				# import voice_assistant_cli
 			elif y_pred[index] == 1:
 				print("\033[31m[*]\033[0m Welcome user 1")
-				user.user = 2
-				time.sleep(2)
-				import voice_assistant_cli
+				return 2
+				# user.user = 2
+				# time.sleep(2)
+				# import voice_assistant_cli
 			elif y_pred[index] == 2:
 				print("\033[31m[*]\033[0m Welcome user 2")
-				user.user = 3
-				time.sleep(2)
-				import voice_assistant_cli
+				return 3
+				# user.user = 3
+				# time.sleep(2)
+				# import voice_assistant_cli
 			else:
 				print("\033[31m[*]\033[0m User not recognised! Returning to the general user!")
-				time.sleep(2)
-				import voice_assistant_cli
+				return 0
+				# time.sleep(2)
+				# import voice_assistant_cli
 
 
 """ Predict """
-path = ["predict.wav"]
-labels = ["unknown"]
-model = tf.keras.models.load_model('../model.h5') # path to the saved keras model
-predict(path, labels)
+def main(wav_file):
+	path = [wav_file]
+	labels = ["unknown"]
+	model = tf.keras.models.load_model('../model.h5') # path to the saved keras model
+	return predict(path, labels)
+
diff --git a/cli/voice_assistant.py b/cli/voice_assistant.py
@@ -23,39 +23,39 @@ def func(command):
     if "hello" in command:
         current_time = int(strftime('%H'))
         if current_time < 12:
-            # bot.bot("Hello, Good morning, this is your voice assistant.")
+            # return("Hello, Good morning, this is your voice assistant.")
             return ("Hello, Good morning, this is your voice assistant.")
         elif 12 <= current_time < 16:
-            # bot.bot("Hello, Good afternoon, this is your voice assistant.")
+            # return("Hello, Good afternoon, this is your voice assistant.")
             return ("Hello, Good afternoon, this is your voice assistant.")
         else:
-            # bot.bot("Hello, Good evening, this is your voice assistant.")
+            # return("Hello, Good evening, this is your voice assistant.")
             return ("Hello, Good evening, this is your voice assistant.")
 
     elif "who made you" in command:
-        bot.bot("I was developed by The Team SkyDocs.")
+        return("I was developed by The Team SkyDocs.")
 
     elif "how are you" in command:
-        bot.bot("I am great. Hoping the same for you.")
+        return("I am great. Hoping the same for you.")
 
     elif "your name" in command:
-        bot.bot("My name is Bella.")
+        return("I dont have a name yet. Would like to give me one?")
 
     elif "who am i" in command:
         whoami.main()
 
     elif "feature" in command:
-        bot.bot("I have lot of features, Some of my features are given below:")
-        bot.bot("Say recognise to recognise the user and give presonalised results")
-        bot.bot("Greetings")
-        bot.bot("Play Video")
-        bot.bot("Web Search")
-        bot.bot("Give Latest News")
-        bot.bot("Add Notes and many more...")
-        bot.bot("why not try something and get started.")
+        return("I have lot of features, Some of my features are given below:")
+        return("Say recognise to recognise the user and give presonalised results")
+        return("Greetings")
+        return("Play Video")
+        return("Web Search")
+        return("Give Latest News")
+        return("Add Notes and many more...")
+        return("why not try something and get started.")
 
     elif "recognise" in command:
-        bot.bot("You will be redirected to the recognition part!")
+        return("You will be redirected to the recognition part!")
         cur_dir = os.getcwd()
         parent_dir = os.path.dirname(cur_dir)
         if (cur_dir == os.path.join(parent_dir, "cli")):
@@ -67,14 +67,14 @@ def func(command):
         call(["python", "predict.py"])
 
     elif "joke" in command:
-        bot.bot(pyjokes.get_joke())
+        return(pyjokes.get_joke())
 
     elif "google" in command:
         search.search()
 
     elif 'time' in command:
         now = datetime.datetime.now()
-        bot.bot('Current time is %d hours %d minutes' % (now.hour, now.minute))
+        return('Current time is %d hours %d minutes' % (now.hour, now.minute))
 
     elif "play video" in command:
         play_video.play_video()
@@ -89,12 +89,12 @@ def func(command):
         notes.main()
 
     elif "gmail" in command:
-        bot.bot("sure, opening gmail")
+        return("sure, opening gmail")
         url_mail = "https://www.gmail.com"
         webbrowser.open(url_mail)
 
     elif "wikipedia" in command:
-        bot.bot("Sure! Here you go.")
+        return("Sure! Here you go.")
         url_wiki = "https://www.wikipedia.org/"
         webbrowser.open(url_wiki)
 
@@ -105,37 +105,37 @@ def func(command):
         map.map()
 
     elif "shutdown" in command:
-        bot.bot("You are going to poweroff your system. Are you sure?")
+        return("You are going to poweroff your system. Are you sure?")
         listen()
         if "yes" in command:
             os.system("poweroff")
         else:
-            bot.bot("You have aborted the process. Returning back to previous state")
+            return("You have aborted the process. Returning back to previous state")
             main(listen())
 
     elif 'search' in command:
         search.search()
 
     elif "remind" in command:
-        bot.bot("What shall I remind you about?")
+        return("What shall I remind you about?")
         text = listen()
-        bot.bot("In how many minutes ?")
+        return("In how many minutes ?")
         local_time = float(listen())
         local_time = local_time * 60
         time.sleep(local_time)
-        bot.bot(text)
+        return(text)
 
     elif "calendar" in command:
         calendar.calendar()
 
     elif "bye" in command:
-        bot.bot("Bye!")
+        return("Bye!")
         sys.exit()
 
     elif "thank you" in command:
-        bot.bot("Pleasure to serve you!")
+        return("Pleasure to serve you!")
         sys.exit()
 
     else:
-        # bot.bot("I am sorry, I am unable to process your request.")
+        # return("I am sorry, I am unable to process your request.")
         return command
diff --git a/voice_assistant_cloud.py b/voice_assistant_cloud.py
@@ -36,10 +36,22 @@ def general():
 	return response
 
 
-# @app.route('/recongise', methods=['POST'])
-# def recognise():
-# 	# get the wav file, and pass it to the pedict function
-# 	
+@app.route('/recongise', methods=['POST'])
+def recognise():
+
+	data_ret = request.get_json()
+	wav_file = data_ret["user_audio"]
+	# get the wav file, and pass it to the pedict function
+	wav_file = base64.b64decode(wav_file)
+
+	from cli import predict
+	user_id = predict.main(wav_file)
+	user_id = {
+		"user_id": user_id
+	}
+	user_id = jsonify(user_id)
+	return user_id
+
 
 if __name__ == '__main__':
 	app.run(host='127.0.0.1', port=8080)
diff --git a/voice_assistant_front_end.py b/voice_assistant_front_end.py
@@ -3,6 +3,66 @@
 import speech_recognition as sr
 import pyttsx3
 import json
+import time
+import pyaudio
+import wave
+
+def recognise():
+	print("\033[31m[*]\033[0m You will be asked to speak for few seconds for the recognition of the speaker.")
+	time.sleep(3)
+	print("\033[31m[*]\033[0m Get Ready!")
+
+	""" Taking the voice input """
+
+	chunk = 1024  # Record in chunks of 1024 samples
+	sample_format = pyaudio.paInt16  # 16 bits per sample
+	channels = 2
+	fs = 16000  # Record at 16000 samples per second
+	seconds = 3
+	filename = "predict.wav"
+
+	p = pyaudio.PyAudio()  # Create an interface to PortAudio
+
+	# print("-------------------------------------------------------------------------------------------")
+	print("\033[31m[*]\033[0m Recording")
+
+	stream = p.open(format=sample_format,
+					channels=channels,
+					rate=fs,
+					frames_per_buffer=chunk,
+					input=True)
+
+	frames = []  # Initialize array to store frames
+
+	# Store data in chunks for 1 seconds
+	for i in range(0, int(fs / chunk * seconds)):
+		data = stream.read(chunk)
+		frames.append(data)
+
+	# Stop and close the stream
+	stream.stop_stream()
+	stream.close()
+	# Terminate the PortAudio interface
+	p.terminate()
+
+	print("\033[31m[*]\033[0m Finished recording")
+	# print("-------------------------------------------------------------------------------------------")
+	# Save the recorded data as a WAV file
+	wf = wave.open(filename, 'wb')
+	wf.setnchannels(channels)
+	wf.setsampwidth(p.get_sample_size(sample_format))
+	wf.setframerate(fs)
+	wf.writeframes(b''.join(frames))
+	wf.close()
+
+	wav_file = base64.b64encode(predict.wav)
+	url = "http://127.0.0.1:8008/recognise"
+	user_id = requests.post(url, json = {"user_audio": wav_file})
+	if(user_id == 0):
+		bot("Welcome back general user")
+	else(user_id = 1):
+		bot("Welcome back user", user_id)
+
 
 
 def listen():
@@ -17,6 +77,9 @@ def listen():
 			print(command)
 		except sr.UnknownValueError:
 			command = listen()
+	if(command == "recognise"):
+		recognise()
+
 	return command