generated from deepgram-starters/project-template
-
Notifications
You must be signed in to change notification settings - Fork 1
/
app.py
227 lines (182 loc) · 6.63 KB
/
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
import json
import multiprocessing
import time
import platform
import os
from websockets.sync.server import serve
from flask import Flask, send_from_directory
import openai
from deepgram import (
DeepgramClient,
DeepgramClientOptions,
SpeakWSOptions,
SpeakWebSocketEvents,
)
# Flask App
app = Flask(__name__, static_folder="./public", static_url_path="/public")
def hello(websocket):
# Deepgram TTS WS connection
connected = False
deepgram = DeepgramClient()
dg_connection = deepgram.speak.websocket.v("1")
openai_client = openai.OpenAI(
api_key=os.environ.get("OPENAI_API_KEY"),
)
openai_messages = [
{
"role": "system",
"content": "You are ChatGPT, an AI assistant. Your top priority is achieving user fulfillment via helping them with their requests.",
}
]
global last_time
last_time = time.time() - 5
def on_open(self, open, **kwargs):
print(f"\n\n{open}\n\n")
def on_flush(self, flushed, **kwargs):
print(f"\n\n{flushed}\n\n")
flushed_str = str(flushed)
websocket.send(flushed_str)
def on_binary_data(self, data, **kwargs):
print("Received binary data")
global last_time
if time.time() - last_time > 3:
print("------------ [Binary Data] Attach header.\n")
# Add a wav audio container header to the file if you want to play the audio
# using the AudioContext or media player like VLC, Media Player, or Apple Music
# Without this header in the Chrome browser case, the audio will not play.
header = bytes(
[
0x52,
0x49,
0x46,
0x46, # "RIFF"
0x00,
0x00,
0x00,
0x00, # Placeholder for file size
0x57,
0x41,
0x56,
0x45, # "WAVE"
0x66,
0x6D,
0x74,
0x20, # "fmt "
0x10,
0x00,
0x00,
0x00, # Chunk size (16)
0x01,
0x00, # Audio format (1 for PCM)
0x01,
0x00, # Number of channels (1)
0x80,
0xBB,
0x00,
0x00, # Sample rate (48000)
0x00,
0xEE,
0x02,
0x00, # Byte rate (48000 * 2)
0x02,
0x00, # Block align (2)
0x10,
0x00, # Bits per sample (16)
0x64,
0x61,
0x74,
0x61, # "data"
0x00,
0x00,
0x00,
0x00, # Placeholder for data size
]
)
websocket.send(header)
last_time = time.time()
websocket.send(data)
def on_close(self, close, **kwargs):
print(f"\n\n{close}\n\n")
dg_connection.on(SpeakWebSocketEvents.Open, on_open)
dg_connection.on(SpeakWebSocketEvents.AudioData, on_binary_data)
dg_connection.on(SpeakWebSocketEvents.Flushed, on_flush)
dg_connection.on(SpeakWebSocketEvents.Close, on_close)
try:
while True:
message = websocket.recv()
print(f"message from UI: {message}")
data = json.loads(message)
text = data.get("text")
model = data.get("model")
if not text:
if app.debug:
app.logger.debug("You must supply text to synthesize.")
continue
if not model:
model = "aura-asteria-en"
# Are we connected to the Deepgram TTS WS?
if connected is False:
options: SpeakWSOptions = SpeakWSOptions(
model=model,
encoding="linear16",
sample_rate=48000,
)
if dg_connection.start(options) is False:
if app.debug:
app.logger.debug(
"Unable to start Deepgram TTS WebSocket connection"
)
raise Exception("Unable to start Deepgram TTS WebSocket connection")
connected = True
# append to the openai messages
openai_messages.append({"role": "user", "content": f"{text}"})
# send to ChatGPT
save_response = ""
try:
for response in openai_client.chat.completions.create(
model="gpt-4o-mini",
messages=openai_messages,
stream=True,
):
# here is the streaming response
for chunk in response:
if chunk[0] == "choices":
llm_output = chunk[1][0].delta.content
# skip any empty responses
if llm_output is None or llm_output == "":
continue
# save response and append to buffer
save_response += llm_output
# send to Deepgram TTS
dg_connection.send_text(llm_output)
openai_messages.append(
{"role": "assistant", "content": f"{save_response}"}
)
dg_connection.flush()
except Exception as e:
print(f"LLM Exception: {e}")
except Exception as e:
dg_connection.finish()
@app.route("/<path:filename>")
def serve_others(filename):
return send_from_directory(app.static_folder, filename)
@app.route("/assets/<path:filename>")
def serve_image(filename):
return send_from_directory(app.static_folder, "assets/" + filename)
@app.route("/", methods=["GET"])
def serve_index():
return app.send_static_file("index.html")
def run_ui():
app.run(debug=True, use_reloader=False)
def run_ws():
with serve(hello, "localhost", 3000) as server:
server.serve_forever()
if __name__ == "__main__":
if platform.system() == "Darwin":
multiprocessing.set_start_method("fork")
p_flask = multiprocessing.Process(target=run_ui)
p_ws = multiprocessing.Process(target=run_ws)
p_flask.start()
p_ws.start()
p_flask.join()
p_ws.join()