Skip to content

Commit

Permalink
working on the recognition function
Browse files Browse the repository at this point in the history
  • Loading branch information
HarshitRuwali committed Dec 3, 2021
1 parent b0bd2b4 commit 0a98f33
Show file tree
Hide file tree
Showing 4 changed files with 175 additions and 94 deletions.
132 changes: 69 additions & 63 deletions cli/predict.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,54 +26,54 @@
BATCH_SIZE = 128
SCALE = 0.5

print("\033[31m[*]\033[0m Get Ready!")

time.sleep(5)

""" Taking the voice input """

chunk = 1024 # Record in chunks of 1024 samples
sample_format = pyaudio.paInt16 # 16 bits per sample
channels = 2
fs = 16000 # Record at 16000 samples per second
seconds = 3
filename = "predict.wav"

p = pyaudio.PyAudio() # Create an interface to PortAudio

# print("-------------------------------------------------------------------------------------------")
print("\033[31m[*]\033[0m Recording")

stream = p.open(format=sample_format,
channels=channels,
rate=fs,
frames_per_buffer=chunk,
input=True)

frames = [] # Initialize array to store frames

# Store data in chunks for 1 seconds
for i in range(0, int(fs / chunk * seconds)):
data = stream.read(chunk)
frames.append(data)

# Stop and close the stream
stream.stop_stream()
stream.close()
# Terminate the PortAudio interface
p.terminate()

print("\033[31m[*]\033[0m Finished recording")
# print("-------------------------------------------------------------------------------------------")
# Save the recorded data as a WAV file
wf = wave.open(filename, 'wb')
wf.setnchannels(channels)
wf.setsampwidth(p.get_sample_size(sample_format))
wf.setframerate(fs)
wf.writeframes(b''.join(frames))
wf.close()

print("\033[31m[*]\033[0m Processing")
# print("\033[31m[*]\033[0m Get Ready!")

# time.sleep(5)

# """ Taking the voice input """

# chunk = 1024 # Record in chunks of 1024 samples
# sample_format = pyaudio.paInt16 # 16 bits per sample
# channels = 2
# fs = 16000 # Record at 16000 samples per second
# seconds = 3
# filename = "predict.wav"

# p = pyaudio.PyAudio() # Create an interface to PortAudio

# # print("-------------------------------------------------------------------------------------------")
# print("\033[31m[*]\033[0m Recording")

# stream = p.open(format=sample_format,
# channels=channels,
# rate=fs,
# frames_per_buffer=chunk,
# input=True)

# frames = [] # Initialize array to store frames

# # Store data in chunks for 1 seconds
# for i in range(0, int(fs / chunk * seconds)):
# data = stream.read(chunk)
# frames.append(data)

# # Stop and close the stream
# stream.stop_stream()
# stream.close()
# # Terminate the PortAudio interface
# p.terminate()

# print("\033[31m[*]\033[0m Finished recording")
# # print("-------------------------------------------------------------------------------------------")
# # Save the recorded data as a WAV file
# wf = wave.open(filename, 'wb')
# wf.setnchannels(channels)
# wf.setsampwidth(p.get_sample_size(sample_format))
# wf.setframerate(fs)
# wf.writeframes(b''.join(frames))
# wf.close()

# print("\033[31m[*]\033[0m Processing")
"""Pre-processing Noise"""

# If folder noise, does not exist, create it, otherwise do nothing
Expand Down Expand Up @@ -234,27 +234,33 @@ def predict(path, labels):
if y_pred[index] == 0:
print("\033[31m[*]\033[0m Voice Assistant triggered")
print("\033[31m[*]\033[0m Welcome user Harshit!")
user.user = 1
time.sleep(2)
import voice_assistant_cli
# user.user = 1
return 1
# time.sleep(2)
# import voice_assistant_cli
elif y_pred[index] == 1:
print("\033[31m[*]\033[0m Welcome user 1")
user.user = 2
time.sleep(2)
import voice_assistant_cli
return 2
# user.user = 2
# time.sleep(2)
# import voice_assistant_cli
elif y_pred[index] == 2:
print("\033[31m[*]\033[0m Welcome user 2")
user.user = 3
time.sleep(2)
import voice_assistant_cli
return 3
# user.user = 3
# time.sleep(2)
# import voice_assistant_cli
else:
print("\033[31m[*]\033[0m User not recognised! Returning to the general user!")
time.sleep(2)
import voice_assistant_cli
return 0
# time.sleep(2)
# import voice_assistant_cli


""" Predict """
path = ["predict.wav"]
labels = ["unknown"]
model = tf.keras.models.load_model('../model.h5') # path to the saved keras model
predict(path, labels)
def main(wav_file):
path = [wav_file]
labels = ["unknown"]
model = tf.keras.models.load_model('../model.h5') # path to the saved keras model
return predict(path, labels)

54 changes: 27 additions & 27 deletions cli/voice_assistant.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,39 +23,39 @@ def func(command):
if "hello" in command:
current_time = int(strftime('%H'))
if current_time < 12:
# bot.bot("Hello, Good morning, this is your voice assistant.")
# return("Hello, Good morning, this is your voice assistant.")
return ("Hello, Good morning, this is your voice assistant.")
elif 12 <= current_time < 16:
# bot.bot("Hello, Good afternoon, this is your voice assistant.")
# return("Hello, Good afternoon, this is your voice assistant.")
return ("Hello, Good afternoon, this is your voice assistant.")
else:
# bot.bot("Hello, Good evening, this is your voice assistant.")
# return("Hello, Good evening, this is your voice assistant.")
return ("Hello, Good evening, this is your voice assistant.")

elif "who made you" in command:
bot.bot("I was developed by The Team SkyDocs.")
return("I was developed by The Team SkyDocs.")

elif "how are you" in command:
bot.bot("I am great. Hoping the same for you.")
return("I am great. Hoping the same for you.")

elif "your name" in command:
bot.bot("My name is Bella.")
return("I dont have a name yet. Would like to give me one?")

elif "who am i" in command:
whoami.main()

elif "feature" in command:
bot.bot("I have lot of features, Some of my features are given below:")
bot.bot("Say recognise to recognise the user and give presonalised results")
bot.bot("Greetings")
bot.bot("Play Video")
bot.bot("Web Search")
bot.bot("Give Latest News")
bot.bot("Add Notes and many more...")
bot.bot("why not try something and get started.")
return("I have lot of features, Some of my features are given below:")
return("Say recognise to recognise the user and give presonalised results")
return("Greetings")
return("Play Video")
return("Web Search")
return("Give Latest News")
return("Add Notes and many more...")
return("why not try something and get started.")

elif "recognise" in command:
bot.bot("You will be redirected to the recognition part!")
return("You will be redirected to the recognition part!")
cur_dir = os.getcwd()
parent_dir = os.path.dirname(cur_dir)
if (cur_dir == os.path.join(parent_dir, "cli")):
Expand All @@ -67,14 +67,14 @@ def func(command):
call(["python", "predict.py"])

elif "joke" in command:
bot.bot(pyjokes.get_joke())
return(pyjokes.get_joke())

elif "google" in command:
search.search()

elif 'time' in command:
now = datetime.datetime.now()
bot.bot('Current time is %d hours %d minutes' % (now.hour, now.minute))
return('Current time is %d hours %d minutes' % (now.hour, now.minute))

elif "play video" in command:
play_video.play_video()
Expand All @@ -89,12 +89,12 @@ def func(command):
notes.main()

elif "gmail" in command:
bot.bot("sure, opening gmail")
return("sure, opening gmail")
url_mail = "https://www.gmail.com"
webbrowser.open(url_mail)

elif "wikipedia" in command:
bot.bot("Sure! Here you go.")
return("Sure! Here you go.")
url_wiki = "https://www.wikipedia.org/"
webbrowser.open(url_wiki)

Expand All @@ -105,37 +105,37 @@ def func(command):
map.map()

elif "shutdown" in command:
bot.bot("You are going to poweroff your system. Are you sure?")
return("You are going to poweroff your system. Are you sure?")
listen()
if "yes" in command:
os.system("poweroff")
else:
bot.bot("You have aborted the process. Returning back to previous state")
return("You have aborted the process. Returning back to previous state")
main(listen())

elif 'search' in command:
search.search()

elif "remind" in command:
bot.bot("What shall I remind you about?")
return("What shall I remind you about?")
text = listen()
bot.bot("In how many minutes ?")
return("In how many minutes ?")
local_time = float(listen())
local_time = local_time * 60
time.sleep(local_time)
bot.bot(text)
return(text)

elif "calendar" in command:
calendar.calendar()

elif "bye" in command:
bot.bot("Bye!")
return("Bye!")
sys.exit()

elif "thank you" in command:
bot.bot("Pleasure to serve you!")
return("Pleasure to serve you!")
sys.exit()

else:
# bot.bot("I am sorry, I am unable to process your request.")
# return("I am sorry, I am unable to process your request.")
return command
20 changes: 16 additions & 4 deletions voice_assistant_cloud.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,10 +36,22 @@ def general():
return response


# @app.route('/recongise', methods=['POST'])
# def recognise():
# # get the wav file, and pass it to the pedict function
#
@app.route('/recongise', methods=['POST'])
def recognise():

data_ret = request.get_json()
wav_file = data_ret["user_audio"]
# get the wav file, and pass it to the pedict function
wav_file = base64.b64decode(wav_file)

from cli import predict
user_id = predict.main(wav_file)
user_id = {
"user_id": user_id
}
user_id = jsonify(user_id)
return user_id


if __name__ == '__main__':
app.run(host='127.0.0.1', port=8080)
63 changes: 63 additions & 0 deletions voice_assistant_front_end.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,66 @@
import speech_recognition as sr
import pyttsx3
import json
import time
import pyaudio
import wave

def recognise():
print("\033[31m[*]\033[0m You will be asked to speak for few seconds for the recognition of the speaker.")
time.sleep(3)
print("\033[31m[*]\033[0m Get Ready!")

""" Taking the voice input """

chunk = 1024 # Record in chunks of 1024 samples
sample_format = pyaudio.paInt16 # 16 bits per sample
channels = 2
fs = 16000 # Record at 16000 samples per second
seconds = 3
filename = "predict.wav"

p = pyaudio.PyAudio() # Create an interface to PortAudio

# print("-------------------------------------------------------------------------------------------")
print("\033[31m[*]\033[0m Recording")

stream = p.open(format=sample_format,
channels=channels,
rate=fs,
frames_per_buffer=chunk,
input=True)

frames = [] # Initialize array to store frames

# Store data in chunks for 1 seconds
for i in range(0, int(fs / chunk * seconds)):
data = stream.read(chunk)
frames.append(data)

# Stop and close the stream
stream.stop_stream()
stream.close()
# Terminate the PortAudio interface
p.terminate()

print("\033[31m[*]\033[0m Finished recording")
# print("-------------------------------------------------------------------------------------------")
# Save the recorded data as a WAV file
wf = wave.open(filename, 'wb')
wf.setnchannels(channels)
wf.setsampwidth(p.get_sample_size(sample_format))
wf.setframerate(fs)
wf.writeframes(b''.join(frames))
wf.close()

wav_file = base64.b64encode(predict.wav)
url = "http://127.0.0.1:8008/recognise"
user_id = requests.post(url, json = {"user_audio": wav_file})
if(user_id == 0):
bot("Welcome back general user")
else(user_id = 1):
bot("Welcome back user", user_id)



def listen():
Expand All @@ -17,6 +77,9 @@ def listen():
print(command)
except sr.UnknownValueError:
command = listen()
if(command == "recognise"):
recognise()

return command


Expand Down

0 comments on commit 0a98f33

Please sign in to comment.