-
Notifications
You must be signed in to change notification settings - Fork 5
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
update ai demo & libs See merge request maix_sw/k230_canmv!256
- Loading branch information
Showing
13 changed files
with
345 additions
and
32 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,111 @@ | ||
from libs.PipeLine import ScopedTiming | ||
from libs.AIBase import AIBase | ||
from libs.AI2D import Ai2d | ||
from media.pyaudio import * # 音频模块 | ||
from media.media import * # 软件抽象模块,主要封装媒体数据链路以及媒体缓冲区 | ||
import media.wave as wave # wav音频处理模块 | ||
import nncase_runtime as nn # nncase运行模块,封装了kpu(kmodel推理)和ai2d(图片预处理加速)操作 | ||
import ulab.numpy as np # 类似python numpy操作,但也会有一些接口不同 | ||
import aidemo # aidemo模块,封装ai demo相关前处理、后处理等操作 | ||
import time # 时间统计 | ||
import struct # 字节字符转换模块 | ||
import gc # 垃圾回收模块 | ||
import os,sys # 操作系统接口模块 | ||
|
||
# 自定义关键词唤醒类,继承自AIBase基类 | ||
class KWSApp(AIBase): | ||
def __init__(self, kmodel_path, threshold, debug_mode=0): | ||
super().__init__(kmodel_path) # 调用基类的构造函数 | ||
self.kmodel_path = kmodel_path # 模型文件路径 | ||
self.threshold=threshold | ||
self.debug_mode = debug_mode # 是否开启调试模式 | ||
self.cache_np = np.zeros((1, 256, 105), dtype=np.float) | ||
|
||
# 自定义预处理,返回模型输入tensor列表 | ||
def preprocess(self,pcm_data): | ||
pcm_data_list=[] | ||
# 获取音频流数据 | ||
for i in range(0, len(pcm_data), 2): | ||
# 每两个字节组织成一个有符号整数,然后将其转换为浮点数,即为一次采样的数据,加入到当前一帧(0.3s)的数据列表中 | ||
int_pcm_data = struct.unpack("<h", pcm_data[i:i+2])[0] | ||
float_pcm_data = float(int_pcm_data) | ||
pcm_data_list.append(float_pcm_data) | ||
# 将pcm数据处理为模型输入的特征向量 | ||
mp_feats = aidemo.kws_preprocess(fp, pcm_data_list)[0] | ||
mp_feats_np = np.array(mp_feats).reshape((1, 30, 40)) | ||
audio_input_tensor = nn.from_numpy(mp_feats_np) | ||
cache_input_tensor = nn.from_numpy(self.cache_np) | ||
return [audio_input_tensor,cache_input_tensor] | ||
|
||
# 自定义当前任务的后处理,results是模型输出array列表 | ||
def postprocess(self, results): | ||
with ScopedTiming("postprocess", self.debug_mode > 0): | ||
logits_np = results[0] | ||
self.cache_np= results[1] | ||
max_logits = np.max(logits_np, axis=1)[0] | ||
max_p = np.max(max_logits) | ||
idx = np.argmax(max_logits) | ||
# 如果分数大于阈值,且idx==1(即包含唤醒词),播放回复音频 | ||
if max_p > self.threshold and idx == 1: | ||
return 1 | ||
else: | ||
return 0 | ||
|
||
|
||
if __name__ == "__main__": | ||
os.exitpoint(os.EXITPOINT_ENABLE) | ||
nn.shrink_memory_pool() | ||
# 设置模型路径和其他参数 | ||
kmodel_path = "/sdcard/app/tests/kmodel/kws.kmodel" | ||
# 其它参数 | ||
THRESH = 0.5 # 检测阈值 | ||
SAMPLE_RATE = 16000 # 采样率16000Hz,即每秒采样16000次 | ||
CHANNELS = 1 # 通道数 1为单声道,2为立体声 | ||
FORMAT = paInt16 # 音频输入输出格式 paInt16 | ||
CHUNK = int(0.3 * 16000) # 每次读取音频数据的帧数,设置为0.3s的帧数16000*0.3=4800 | ||
reply_wav_file = "/sdcard/app/tests/utils/wozai.wav" # kws唤醒词回复音频路径 | ||
|
||
# 初始化音频预处理接口 | ||
fp = aidemo.kws_fp_create() | ||
# 初始化音频流 | ||
p = PyAudio() | ||
p.initialize(CHUNK) | ||
MediaManager.init() #vb buffer初始化 | ||
# 用于采集实时音频数据 | ||
input_stream = p.open(format=FORMAT,channels=CHANNELS,rate=SAMPLE_RATE,input=True,frames_per_buffer=CHUNK) | ||
# 用于播放回复音频 | ||
output_stream = p.open(format=FORMAT,channels=CHANNELS,rate=SAMPLE_RATE,output=True,frames_per_buffer=CHUNK) | ||
# 初始化自定义关键词唤醒实例 | ||
kws = KWSApp(kmodel_path,threshold=THRESH,debug_mode=0) | ||
|
||
try: | ||
while True: | ||
os.exitpoint() # 检查是否有退出信号 | ||
with ScopedTiming("total",1): | ||
pcm_data=input_stream.read() | ||
res=kws.run(pcm_data) | ||
if res: | ||
print("====Detected XiaonanXiaonan!====") | ||
wf = wave.open(reply_wav_file, "rb") | ||
wav_data = wf.read_frames(CHUNK) | ||
while wav_data: | ||
output_stream.write(wav_data) | ||
wav_data = wf.read_frames(CHUNK) | ||
time.sleep(1) # 时间缓冲,用于播放回复声音 | ||
wf.close() | ||
else: | ||
print("Deactivated!") | ||
gc.collect() # 垃圾回收 | ||
except Exception as e: | ||
sys.print_exception(e) # 打印异常信息 | ||
finally: | ||
input_stream.stop_stream() | ||
output_stream.stop_stream() | ||
input_stream.close() | ||
output_stream.close() | ||
p.terminate() | ||
MediaManager.deinit() #释放vb buffer | ||
aidemo.kws_fp_destroy(fp) | ||
kws.deinit() # 反初始化 | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.