Forráskód Böngészése

讯飞智能回复,音频类型更改

linhaohong 1 éve
szülő
commit
c851eec230

+ 43 - 25
Controller/SmartReplyController.py

@@ -5,8 +5,10 @@ from Object.ResponseObject import ResponseObject
 from django.views import View
 import time
 from Object.WsParam.AIChatObject import ChatClient
+from Object.WsParam.AudioProcessorObject import AudioProcessor
 from Object.WsParam.WsParamRecognizeObject import WsParamRecognize
 from Object.WsParam.WsParamSynthesizeObject import WsParamSynthesize
+from django.http import FileResponse
 
 
 class WsParamService(View):
@@ -31,50 +33,66 @@ class WsParamService(View):
             return response.json(414)
 
     def smart_reply(self, request, request_dict, response):
+        app_id = "fcff8f4b"
+        api_key = "037571e7285e64e8dc321fa5b937fea2"
+        api_secret = "ZTU3NWMyNTI1MTI4NTU5ZGUxMDZhNmQ5"
+        gpt_url = "wss://spark-api.xf-yun.com/v3.5/chat"
+        domain = "generalv3.5"
         try:
-            audio = request.FILES.get('audio', None)
             system = request_dict.get('system', None)
+            audio = request.FILES.get('audio', None)
             history = request_dict.get('history', None)
-            audio_type = request_dict.get('audioType', 'pcm')
+            if audio is None:
+                return response.json(444)
+
             save_directory = 'static/demo_files/'
-            if not os.path.exists(save_directory):
-                os.makedirs(save_directory)
+            os.makedirs(save_directory, exist_ok=True)
 
-            start = time.time()
-            audio_path = os.path.join(save_directory, audio.name)
-            with open(audio_path, 'wb') as destination:
+            original_audio_path = os.path.join(save_directory, audio.name)
+            with open(original_audio_path, 'wb') as destination:
                 for chunk in audio.chunks():
                     destination.write(chunk)
-            end = time.time()
-            LOGGER.info(f"********smartReply保存文件所需时间为{end-start}秒********")
-            APPID = "fcff8f4b"
-            APIKey = "037571e7285e64e8dc321fa5b937fea2"
-            APISecret = "ZTU3NWMyNTI1MTI4NTU5ZGUxMDZhNmQ5"
-            gpt_url = 'wss://spark-api.xf-yun.com/v3.5/chat'
-            domain = 'generalv3.5'
-            AudioFile = audio_path
 
+            # 转码
+            pcm_audio_path = os.path.splitext(original_audio_path)[0] + '.pcm'
+            audio_processor = AudioProcessor()
+            audio_processor.convert_audio(original_audio_path, pcm_audio_path)
+            # 传入语音 -> 转文字 APPID, APISecret, APIKey, AudioFile
             start = time.time()
-            # 传入语音 -> 转文字 APPID, APISecret, APIKey
-            wsParamRecognize = WsParamRecognize(APPID, APISecret, APIKey, AudioFile)
+            audio_file = pcm_audio_path
+            wsParamRecognize = WsParamRecognize(app_id, api_secret, api_key, audio_file)
             query = wsParamRecognize.start()
             end = time.time()
-            LOGGER.info(f"********smartReply语音转文字所需时间为{end - start}秒,内容为{query}********")
+            LOGGER.info(f"********smart_reply语音转文字所需时间为{end - start}秒,内容为{query}********")
+            # 删除文件 pcm_audio_path 和 original_audio_path
+            os.remove(pcm_audio_path)
+            os.remove(original_audio_path)
 
-            start = time.time()
             # 大语言模型 APPID, APIKey, APISecret, gpt_url, domain, query, history=None, system=None
-            chat = ChatClient(APPID, APIKey, APISecret, gpt_url, domain, query, history, system)
+            start = time.time()
+            chat = ChatClient(app_id, api_key, api_secret, gpt_url, domain, query, history, system)
             answer = chat.start()
             end = time.time()
             LOGGER.info(f"********smartReplyAI回复所需时间为{end - start}秒,内容为{answer}********")
 
+            # 文字转音频 APPID, APIKey, APISecret, Text, AudioName="demo"
             start = time.time()
-            # 文字转编码 self, APPID, APIKey, APISecret, Text, AudioType = "pcm"
-            wsParamSynthesize = WsParamSynthesize(APPID, APIKey, APISecret, answer, audio_type)
-            answer_base = wsParamSynthesize.start()
+            audio_name = f"{os.path.splitext(audio.name)[0]}_answer"
+            wsParamSynthesize = WsParamSynthesize(app_id, api_key, api_secret, answer, audio_name)
+            wsParamSynthesize.start()
+            answer_audio_path = os.path.splitext(original_audio_path)[0] + '_answer.mp3'
+            g711a_audio_path = os.path.splitext(answer_audio_path)[0] + '.g711a'
+            print(answer_audio_path, g711a_audio_path)
+            # 如果有旧文件就删掉
+            if os.path.exists(g711a_audio_path):
+                os.remove(g711a_audio_path)
+            audio_processor.convert_audio(answer_audio_path, g711a_audio_path)
+            os.remove(answer_audio_path)
             end = time.time()
-            LOGGER.info(f"********文字转编码所需时间为{end - start}秒********")
-            return response.json(0, answer_base)
+            LOGGER.info(f"********smartReply文字转编码所需时间为{end - start}秒********")
+            return FileResponse(open(g711a_audio_path, 'rb'), as_attachment=True,
+                                filename=os.path.basename(g711a_audio_path))
         except Exception as e:
             LOGGER.error('*****WsParamService.smart_reply:errLine:{}, errMsg:{}'
                          .format(e.__traceback__.tb_lineno, repr(e)))
+            return response.json(500, 'error_line:{}, error_msg:{}'.format(e.__traceback__.tb_lineno, repr(e)))

+ 48 - 0
Object/WsParam/AudioProcessorObject.py

@@ -0,0 +1,48 @@
+import subprocess
+import os
+
+import subprocess
+
+
+class AudioProcessor:
+    def __init__(self, default_sample_rate=8000, default_channels=1):
+        """
+        初始化音频处理器,并设定默认采样率和声道数。
+        参数:
+            default_sample_rate (int): 默认采样率,以赫兹(Hz)计。
+            default_channels (int): 默认音频通道数。
+        """
+        self.default_sample_rate = default_sample_rate
+        self.default_channels = default_channels
+
+    def convert_audio(self, input_path, output_path):
+        """
+        使用FFmpeg进行音频文件转换。
+        参数:
+            input_path (str): 输入音频文件的路径。
+            output_path (str): 输出音频文件的路径。
+        """
+        # 确定输入和输出文件类型
+        input_ext = input_path.split('.')[-1]
+        output_ext = output_path.split('.')[-1]
+
+        # 构建FFmpeg命令
+        ffmpeg_cmd = ['ffmpeg', '-y']
+
+        # 设置输入格式参数
+        if input_ext == 'g711a':
+            ffmpeg_cmd.extend(['-f', 'alaw', '-ar', '8000', '-ac', '1'])
+        ffmpeg_cmd.extend(['-i', input_path])
+
+        # 设置输出格式参数
+        if output_ext == 'pcm':
+            ffmpeg_cmd.extend(['-acodec', 'pcm_s16le', '-f', 's16le'])
+        elif output_ext == 'g711a':
+            ffmpeg_cmd.extend(['-ar', '8000', '-ac', '1', '-acodec', 'pcm_alaw', '-f', 'alaw'])
+
+        ffmpeg_cmd.append(output_path)
+
+        # 执行FFmpeg命令
+        command = ' '.join(ffmpeg_cmd)
+        print(command)  # 输出命令用于调试
+        subprocess.run(command, shell=True, check=True)

+ 2 - 2
Object/WsParam/WsParamRecognizeObject.py

@@ -10,7 +10,7 @@ import ssl
 from wsgiref.handlers import format_date_time
 from datetime import datetime
 from time import mktime
-import _thread as thread
+import threading
 
 """
 调用讯飞模型 语音转文字
@@ -99,7 +99,7 @@ class WsParamRecognize:
                     time.sleep(intervel)
             ws.close()
 
-        thread.start_new_thread(run, ())
+        threading.Thread(target=run).start()
 
     def start(self):
         websocket.enableTrace(False)

+ 25 - 16
Object/WsParam/WsParamSynthesizeObject.py

@@ -9,7 +9,7 @@ import ssl
 from wsgiref.handlers import format_date_time
 from datetime import datetime
 from time import mktime
-import threading  # 使用更现代的threading代替_thread
+import threading
 
 """
 调用讯飞模型 文字转语音 
@@ -17,20 +17,21 @@ import threading  # 使用更现代的threading代替_thread
 
 
 class WsParamSynthesize:
-    def __init__(self, APPID, APIKey, APISecret, Text, AudioType="pcm"):
+    def __init__(self, APPID, APIKey, APISecret, Text, AudioName="demo"):
         self.APPID = APPID
         self.APIKey = APIKey
         self.APISecret = APISecret
         self.Text = Text
-        self.audio_data = ""
+        self.AudioType = "mp3"
+        self.AudioName = AudioName
 
         # 初始化其他需要的属性
         self.CommonArgs = {"app_id": self.APPID}
-        if AudioType == "mp3":
-            self.BusinessArgs = {"aue": "lame", "auf": "audio/L16;rate=16000", "vcn": "xiaoyan", "tte": "utf8",
+        if self.AudioType == "mp3":
+            self.BusinessArgs = {"aue": "lame", "auf": "audio/L16;rate=8000", "vcn": "xiaoyan", "tte": "utf8",
                                  "sfl": 1}
         else:
-            self.BusinessArgs = {"aue": "raw", "auf": "audio/L16;rate=16000", "vcn": "xiaoyan", "tte": "utf8"}
+            self.BusinessArgs = {"aue": "raw", "auf": "audio/L16;rate=8000", "vcn": "xiaoyan", "tte": "utf8"}
         self.Data = {"status": 2, "text": str(base64.b64encode(self.Text.encode('utf-8')), "UTF8")}
 
     def create_url(self):
@@ -66,20 +67,29 @@ class WsParamSynthesize:
             message = json.loads(message)
             code = message["code"]
             sid = message["sid"]
+            audio = message["data"]["audio"]
+
+            if code != 0:
+                errMsg = message["message"]
+                print(f"Error: {errMsg}, code: {code}")
+                return None
+
+            audio = base64.b64decode(audio)
             status = message["data"]["status"]
+
             if status == 2:
+                print("WebSocket is closed")
                 ws.close()
             if code != 0:
                 errMsg = message["message"]
-                print(f"sid:{sid} call error:{errMsg} code is:{code}")
+                print("sid:%s call error:%s code is:%s" % (sid, errMsg, code))
             else:
-                audio = message["data"]["audio"]
-                self.audio_data = audio
-                if status == 2:  # 最后一帧
-                    print("WebSocket connection is closed.")
-                    ws.close()
+                with open(f'static/demo_files/{self.AudioName}.{self.AudioType}', 'ab') as f:
+                    f.write(audio)
+
         except Exception as e:
-            print("Receive message, but parse exception:", e)
+            print("Exception while parsing message:", e)
+            return None
 
     # on_error和on_close方法类似地修改,可以访问类实例的属性
     def on_error(self, error):
@@ -95,7 +105,7 @@ class WsParamSynthesize:
             d = json.dumps(d)
             ws.send(d)
 
-        threading.Thread(target=run).start()  # 使用threading.Thread以提供更好的线程管理
+        threading.Thread(target=run).start()
 
     def start(self):
         websocket.enableTrace(False)
@@ -104,5 +114,4 @@ class WsParamSynthesize:
                                          on_error=lambda msg: self.on_error(msg),
                                          on_close=self.on_close,
                                          on_open=lambda ws: self.on_open(ws))  # 使用 lambda 来确保 ws 参数传递
-        self.ws.run_forever(sslopt={"cert_reqs": ssl.CERT_NONE})
-        return self.audio_data
+        self.ws.run_forever(sslopt={"cert_reqs": ssl.CERT_NONE})