diff --git a/cli/predict.py b/cli/predict.py index 97f458c..81de7af 100644 --- a/cli/predict.py +++ b/cli/predict.py @@ -26,54 +26,54 @@ BATCH_SIZE = 128 SCALE = 0.5 -print("\033[31m[*]\033[0m Get Ready!") - -time.sleep(5) - -""" Taking the voice input """ - -chunk = 1024 # Record in chunks of 1024 samples -sample_format = pyaudio.paInt16 # 16 bits per sample -channels = 2 -fs = 16000 # Record at 16000 samples per second -seconds = 3 -filename = "predict.wav" - -p = pyaudio.PyAudio() # Create an interface to PortAudio - -# print("-------------------------------------------------------------------------------------------") -print("\033[31m[*]\033[0m Recording") - -stream = p.open(format=sample_format, - channels=channels, - rate=fs, - frames_per_buffer=chunk, - input=True) - -frames = [] # Initialize array to store frames - -# Store data in chunks for 1 seconds -for i in range(0, int(fs / chunk * seconds)): - data = stream.read(chunk) - frames.append(data) - -# Stop and close the stream -stream.stop_stream() -stream.close() -# Terminate the PortAudio interface -p.terminate() - -print("\033[31m[*]\033[0m Finished recording") -# print("-------------------------------------------------------------------------------------------") -# Save the recorded data as a WAV file -wf = wave.open(filename, 'wb') -wf.setnchannels(channels) -wf.setsampwidth(p.get_sample_size(sample_format)) -wf.setframerate(fs) -wf.writeframes(b''.join(frames)) -wf.close() - -print("\033[31m[*]\033[0m Processing") +# print("\033[31m[*]\033[0m Get Ready!") + +# time.sleep(5) + +# """ Taking the voice input """ + +# chunk = 1024 # Record in chunks of 1024 samples +# sample_format = pyaudio.paInt16 # 16 bits per sample +# channels = 2 +# fs = 16000 # Record at 16000 samples per second +# seconds = 3 +# filename = "predict.wav" + +# p = pyaudio.PyAudio() # Create an interface to PortAudio + +# # print("-------------------------------------------------------------------------------------------") +# print("\033[31m[*]\033[0m Recording") + +# stream = p.open(format=sample_format, +# channels=channels, +# rate=fs, +# frames_per_buffer=chunk, +# input=True) + +# frames = [] # Initialize array to store frames + +# # Store data in chunks for 1 seconds +# for i in range(0, int(fs / chunk * seconds)): +# data = stream.read(chunk) +# frames.append(data) + +# # Stop and close the stream +# stream.stop_stream() +# stream.close() +# # Terminate the PortAudio interface +# p.terminate() + +# print("\033[31m[*]\033[0m Finished recording") +# # print("-------------------------------------------------------------------------------------------") +# # Save the recorded data as a WAV file +# wf = wave.open(filename, 'wb') +# wf.setnchannels(channels) +# wf.setsampwidth(p.get_sample_size(sample_format)) +# wf.setframerate(fs) +# wf.writeframes(b''.join(frames)) +# wf.close() + +# print("\033[31m[*]\033[0m Processing") """Pre-processing Noise""" # If folder noise, does not exist, create it, otherwise do nothing @@ -234,27 +234,33 @@ def predict(path, labels): if y_pred[index] == 0: print("\033[31m[*]\033[0m Voice Assistant triggered") print("\033[31m[*]\033[0m Welcome user Harshit!") - user.user = 1 - time.sleep(2) - import voice_assistant_cli + # user.user = 1 + return 1 + # time.sleep(2) + # import voice_assistant_cli elif y_pred[index] == 1: print("\033[31m[*]\033[0m Welcome user 1") - user.user = 2 - time.sleep(2) - import voice_assistant_cli + return 2 + # user.user = 2 + # time.sleep(2) + # import voice_assistant_cli elif y_pred[index] == 2: print("\033[31m[*]\033[0m Welcome user 2") - user.user = 3 - time.sleep(2) - import voice_assistant_cli + return 3 + # user.user = 3 + # time.sleep(2) + # import voice_assistant_cli else: print("\033[31m[*]\033[0m User not recognised! Returning to the general user!") - time.sleep(2) - import voice_assistant_cli + return 0 + # time.sleep(2) + # import voice_assistant_cli """ Predict """ -path = ["predict.wav"] -labels = ["unknown"] -model = tf.keras.models.load_model('../model.h5') # path to the saved keras model -predict(path, labels) \ No newline at end of file +def main(wav_file): + path = [wav_file] + labels = ["unknown"] + model = tf.keras.models.load_model('../model.h5') # path to the saved keras model + return predict(path, labels) + \ No newline at end of file diff --git a/cli/voice_assistant.py b/cli/voice_assistant.py index 462da4e..b71f208 100755 --- a/cli/voice_assistant.py +++ b/cli/voice_assistant.py @@ -23,39 +23,39 @@ def func(command): if "hello" in command: current_time = int(strftime('%H')) if current_time < 12: - # bot.bot("Hello, Good morning, this is your voice assistant.") + # return("Hello, Good morning, this is your voice assistant.") return ("Hello, Good morning, this is your voice assistant.") elif 12 <= current_time < 16: - # bot.bot("Hello, Good afternoon, this is your voice assistant.") + # return("Hello, Good afternoon, this is your voice assistant.") return ("Hello, Good afternoon, this is your voice assistant.") else: - # bot.bot("Hello, Good evening, this is your voice assistant.") + # return("Hello, Good evening, this is your voice assistant.") return ("Hello, Good evening, this is your voice assistant.") elif "who made you" in command: - bot.bot("I was developed by The Team SkyDocs.") + return("I was developed by The Team SkyDocs.") elif "how are you" in command: - bot.bot("I am great. Hoping the same for you.") + return("I am great. Hoping the same for you.") elif "your name" in command: - bot.bot("My name is Bella.") + return("I dont have a name yet. Would like to give me one?") elif "who am i" in command: whoami.main() elif "feature" in command: - bot.bot("I have lot of features, Some of my features are given below:") - bot.bot("Say recognise to recognise the user and give presonalised results") - bot.bot("Greetings") - bot.bot("Play Video") - bot.bot("Web Search") - bot.bot("Give Latest News") - bot.bot("Add Notes and many more...") - bot.bot("why not try something and get started.") + return("I have lot of features, Some of my features are given below:") + return("Say recognise to recognise the user and give presonalised results") + return("Greetings") + return("Play Video") + return("Web Search") + return("Give Latest News") + return("Add Notes and many more...") + return("why not try something and get started.") elif "recognise" in command: - bot.bot("You will be redirected to the recognition part!") + return("You will be redirected to the recognition part!") cur_dir = os.getcwd() parent_dir = os.path.dirname(cur_dir) if (cur_dir == os.path.join(parent_dir, "cli")): @@ -67,14 +67,14 @@ def func(command): call(["python", "predict.py"]) elif "joke" in command: - bot.bot(pyjokes.get_joke()) + return(pyjokes.get_joke()) elif "google" in command: search.search() elif 'time' in command: now = datetime.datetime.now() - bot.bot('Current time is %d hours %d minutes' % (now.hour, now.minute)) + return('Current time is %d hours %d minutes' % (now.hour, now.minute)) elif "play video" in command: play_video.play_video() @@ -89,12 +89,12 @@ def func(command): notes.main() elif "gmail" in command: - bot.bot("sure, opening gmail") + return("sure, opening gmail") url_mail = "https://www.gmail.com" webbrowser.open(url_mail) elif "wikipedia" in command: - bot.bot("Sure! Here you go.") + return("Sure! Here you go.") url_wiki = "https://www.wikipedia.org/" webbrowser.open(url_wiki) @@ -105,37 +105,37 @@ def func(command): map.map() elif "shutdown" in command: - bot.bot("You are going to poweroff your system. Are you sure?") + return("You are going to poweroff your system. Are you sure?") listen() if "yes" in command: os.system("poweroff") else: - bot.bot("You have aborted the process. Returning back to previous state") + return("You have aborted the process. Returning back to previous state") main(listen()) elif 'search' in command: search.search() elif "remind" in command: - bot.bot("What shall I remind you about?") + return("What shall I remind you about?") text = listen() - bot.bot("In how many minutes ?") + return("In how many minutes ?") local_time = float(listen()) local_time = local_time * 60 time.sleep(local_time) - bot.bot(text) + return(text) elif "calendar" in command: calendar.calendar() elif "bye" in command: - bot.bot("Bye!") + return("Bye!") sys.exit() elif "thank you" in command: - bot.bot("Pleasure to serve you!") + return("Pleasure to serve you!") sys.exit() else: - # bot.bot("I am sorry, I am unable to process your request.") + # return("I am sorry, I am unable to process your request.") return command diff --git a/voice_assistant_cloud.py b/voice_assistant_cloud.py index 2b16d8f..158543f 100644 --- a/voice_assistant_cloud.py +++ b/voice_assistant_cloud.py @@ -36,10 +36,22 @@ def general(): return response -# @app.route('/recongise', methods=['POST']) -# def recognise(): -# # get the wav file, and pass it to the pedict function -# +@app.route('/recongise', methods=['POST']) +def recognise(): + + data_ret = request.get_json() + wav_file = data_ret["user_audio"] + # get the wav file, and pass it to the pedict function + wav_file = base64.b64decode(wav_file) + + from cli import predict + user_id = predict.main(wav_file) + user_id = { + "user_id": user_id + } + user_id = jsonify(user_id) + return user_id + if __name__ == '__main__': app.run(host='127.0.0.1', port=8080) \ No newline at end of file diff --git a/voice_assistant_front_end.py b/voice_assistant_front_end.py index 9f4f82e..58dfa4b 100644 --- a/voice_assistant_front_end.py +++ b/voice_assistant_front_end.py @@ -3,6 +3,66 @@ import speech_recognition as sr import pyttsx3 import json +import time +import pyaudio +import wave + +def recognise(): + print("\033[31m[*]\033[0m You will be asked to speak for few seconds for the recognition of the speaker.") + time.sleep(3) + print("\033[31m[*]\033[0m Get Ready!") + + """ Taking the voice input """ + + chunk = 1024 # Record in chunks of 1024 samples + sample_format = pyaudio.paInt16 # 16 bits per sample + channels = 2 + fs = 16000 # Record at 16000 samples per second + seconds = 3 + filename = "predict.wav" + + p = pyaudio.PyAudio() # Create an interface to PortAudio + + # print("-------------------------------------------------------------------------------------------") + print("\033[31m[*]\033[0m Recording") + + stream = p.open(format=sample_format, + channels=channels, + rate=fs, + frames_per_buffer=chunk, + input=True) + + frames = [] # Initialize array to store frames + + # Store data in chunks for 1 seconds + for i in range(0, int(fs / chunk * seconds)): + data = stream.read(chunk) + frames.append(data) + + # Stop and close the stream + stream.stop_stream() + stream.close() + # Terminate the PortAudio interface + p.terminate() + + print("\033[31m[*]\033[0m Finished recording") + # print("-------------------------------------------------------------------------------------------") + # Save the recorded data as a WAV file + wf = wave.open(filename, 'wb') + wf.setnchannels(channels) + wf.setsampwidth(p.get_sample_size(sample_format)) + wf.setframerate(fs) + wf.writeframes(b''.join(frames)) + wf.close() + + wav_file = base64.b64encode(predict.wav) + url = "http://127.0.0.1:8008/recognise" + user_id = requests.post(url, json = {"user_audio": wav_file}) + if(user_id == 0): + bot("Welcome back general user") + else(user_id = 1): + bot("Welcome back user", user_id) + def listen(): @@ -17,6 +77,9 @@ def listen(): print(command) except sr.UnknownValueError: command = listen() + if(command == "recognise"): + recognise() + return command