123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273 |
- """
- 调用讯飞演示笔获取录音
- QObject::connect: Cannot queue arguments of type 'QTextCursor'
- (Make sure 'QTextCursor' is registered using qRegisterMetaType().)
- """
- import os
- import struct
- import sys
- from functools import partial
- from pathlib import Path
- import json
- from PyQt5 import uic, QtCore
- from PyQt5.QtCore import QUrl
- from PyQt5.QtGui import QIcon, QDesktopServices
- from PyQt5.QtWidgets import QMainWindow, QApplication
- import mspeech_ui_thr as mspeech
- from libs import lib_opus, lib_aiui, lib_player
- from util import constants
- from BusinessHadler import BusinessHadler
- socketHadler = None # 全局变量
- OPUS_FLAG = False
- TTS_FLAG = False
- class MainUI(QMainWindow):
- def __init__(self):
- super().__init__()
- self._run_flag = False
- self.tts_player = None
- self.init_thr()
- def init_thr(self):
- # 子线程
- self._mspeech = mspeech.Mspeech()
- self._mspeech.daemon = True
- self._mspeech.start()
- # 处理子线程发送的数据
- self._mspeech.sign_thread_send.connect(self._proc)
- # 业务处理(处理子进程和子线程发送的数据)
- def _proc(self, result):
- try:
- code = result["code"]
- if code != constants.MSPEECH_AIUI_SEND_DATA:
- print("UI_main_proc_real", code)
- if code == constants.MSPEECH_AIUI_SEND_DATA: # aiui
- QtCore.QTimer.singleShot(0, partial(self._ist.audio_write, result["data"]))
- elif code == constants.MSPEECH_AIUI_RESET_DICTATION: # aiui
- self._start_record()
- elif code == constants.MSPEECH_AIUI_STOP_WS: # aiui
- self._stop_record()
- except Exception as e:
- print(e)
- pass
- def _load_ui(self):
- self.ui = uic.loadUi("ui/form.ui")
- self.ui.setWindowTitle("测试工具")
- self.ui.setWindowIcon(QIcon(str(Path.cwd().joinpath("images", "logo.ico"))))
- # self.ui.btn_filedialog.clicked.connect(self._open_dir)
- # self.ui.btn_start.clicked.connect(self._start_record)
- # self.ui.btn_stop.clicked.connect(self._stop_record)
- self.ui.show()
- def _open_dir(self):
- _dir = self.gen_home_doc_path()
- if sys.platform == "win32":
- filepath = _dir.replace("\\", "/")
- QDesktopServices.openUrl(QUrl(filepath, QUrl.StrictMode))
- else:
- os.popen("open %s" % _dir)
- def _logger(self, text):
- if self.ui:
- self.ui.textBrowser.append(text)
- def gen_home_doc_path(self):
- dir_name = "VoiceAssistant"
- if sys.platform == "win32":
- import ctypes.wintypes
- path_id = 5 # 5:文档 0:桌面
- buf = ctypes.create_unicode_buffer(ctypes.wintypes.MAX_PATH)
- ctypes.windll.shell32.SHGetFolderPathW(None, path_id, None, 0, buf)
- path = buf.value
- elif sys.platform == "darwin":
- path = os.path.join(os.environ[self.get_home_env()], dir_name)
- elif sys.platform == "linux":
- path = os.path.join(os.environ[self.get_home_env()], dir_name)
- else:
- path = os.path.join(os.environ[self.get_home_env()], dir_name)
- dir_path = os.path.join(path, dir_name)
- if not os.path.exists(dir_path):
- os.makedirs(dir_path)
- return dir_path
- @staticmethod
- def get_home_env():
- if sys.platform == "win32":
- return 'APPDATA'
- return 'HOME'
- # 初始化ist引擎
- def _init_ist(self):
- self._ist = lib_aiui.AiuiManager()
- self._ist.trigger.connect(self._proc_aiui)
- self._logger("ISR引擎初始化成功!")
- # 初始化opus压缩工具
- def _init_opus(self):
- self.opus = lib_opus.Opus()
- self.enc = self.opus.create_state(constants.SAMPLE_RATE, constants.CHANNEL_NUM, constants.OPUS_APPLICATION_VOIP)
- self.opus.encoder_ctl(self.enc, lib_opus.set_signal, lib_opus.OPUS_SIGNAL_VOICE)
- self.frame_size = int(constants.SAMPLE_RATE * 0.02)
- self.pcm_length = int(constants.SAMPLE_RATE / 8 * 16 * 1 * 0.02)
- self._logger("OPUS始化成功!")
- def _tts_player_callback(self, msg):
- print(msg)
- if msg == "--end--":
- # self._tts_play_end = True
- # self._send_data(my_util.gen_q_data(constants.TTS_STEAM_END, None))
- self.tts_player.quit()
- elif msg == "--stop--":
- self._tts_play_end = False
- def tts_play(self, text, vcn=0, speed_mode=0):
- _duration = lib_player.calculate_duration(text, speed_mode)
- # self._send_data(my_util.gen_q_data(constants.TTS_STEAM_DURATION, _duration))
- _info = {
- "text": text,
- "vcn": vcn,
- "speed": speed_mode
- }
- # 上一个播放器处于暂停状态
- if self.tts_player and self.tts_player.is_paused():
- self.tts_player.resume()
- return
- self.tts_player = lib_player.TTSSteamPlayer(self._tts_player_callback)
- self.tts_player.play(_info)
- def tts_pause(self):
- if self.tts_player:
- self.tts_player.pause()
- def tts_stop(self):
- if self.tts_player:
- self.tts_player.stop()
- self.tts_player.quit()
- # 开启录音
- def _start_record(self):
- # self._logger("开始录音")
- print("开始录音")
- if not self._run_flag:
- self._run_flag = True
- self._start_ist()
- # 停止录音
- def _stop_record(self):
- # self._logger("结束录音")
- print("结束录音")
- self._run_flag = False
- self._start_ist(stop=True)
- def _proc_aiui(self, msg):
- code = msg.get("code")
- data = msg.get("data")
- # IST引擎开始
- if code == constants.AITEST_AIUI_START:
- if data:
- self._logger(f"IST引擎启动成功!")
- # IST引擎异常
- elif code == constants.AITEST_AIUI_ERROR:
- self._logger(f"IST错误:{data}")
- # IST引擎结果
- elif code == constants.AITEST_AIUI_RESULT:
- self._logger(f"IST转写结果:{data}")
- self._stop_record()
- if TTS_FLAG:
- self.tts_play(data)
- elif code == constants.AITEST_AIUI_LOG:
- self._logger(data)
- elif code == constants.AITEST_AIUI_NLP:
- self._logger(data)
- print('开始业务逻辑')
- nlp_date = json.loads(str(data))
- intent = nlp_date['intent']
- if intent != {} and intent['rc'] == 0:
- intent_action = intent['semantic'][0]['intent']
- print("intent_action: ", intent_action)
- # 遍历词槽
- intent_solts = intent['semantic'][0]['slots']
- print("intent_solts: ", intent_solts)
- # # 方式1:使用这种方式,当语音命令为打开浏览器操作时,
- # # webbrowser.open()会阻塞_proc方法,导致AIUI重新发送消息给websocket,会导致_proc再执行一次,从而打开2次浏览器的BUG
- # # tts_text = socketHadler.handler(intent_action, intent_solts)
- # 方式2:为解决方式1的BUG,在调用业务方式时,先断开再重连的方式
- # 断开信号连接
- # PYQT6 中测试没有这个问题,断开再重连的代码注释掉
- self._ist.trigger.disconnect(self._proc_aiui)
- tts_text = socketHadler.handler(intent_action, intent_solts)
- # 重新连接信号
- self._ist.trigger.connect(self._proc_aiui)
- # # 方式3:在socketHadler里面使用以下方式打开,也可以避免打开2次web页面问题
- # # import os
- # # os.system('start http://101.37.148.192:8080/')
- if len(tts_text) != 0:
- self.tts_stop()
- self.tts_play(tts_text)
- else:
- self.tts_stop()
- self.tts_play("我没有理解您说的话")
- print("我没有理解你说的话啊")
- else:
- pass
- def _start_ist(self, stop=False):
- if stop:
- print("stop")
- self._ist.stop_speech()
- else:
- print("start")
- if OPUS_FLAG:
- ext_params = "opus-wb"
- else:
- ext_params = "raw"
- self._ist.regist_engine(ext_params)
- self._ist.start_speech()
- # 压缩音频
- def _compress_buf(self, data):
- out = self.opus.encode(self.enc, bytes(data), self.frame_size, self.pcm_length)
- data = bytearray(len(out) + 2)
- data[0:2] = struct.pack(">H", len(out))
- data[2:] = out
- buf = bytes(data)
- return buf
- def show(self):
- self._load_ui()
- self._init_ist()
- if OPUS_FLAG:
- self._init_opus()
- def closeEvent(self, event):
- super().closeEvent(event)
- QApplication.quit()
- def create_socket_handler():
- print('create_socket_handler')
- global socketHadler
- if socketHadler is None:
- socketHadler = BusinessHadler()
- app.aboutToQuit.connect(socketHadler.cleanup) # 关闭应用时进行清理操作
- if __name__ == '__main__':
- app = QApplication(sys.argv)
- win = MainUI()
- win.show()
- create_socket_handler()
- sys.exit(app.exec_())
|