{"id":153,"date":"2025-02-21T22:21:52","date_gmt":"2025-02-21T14:21:52","guid":{"rendered":"http:\/\/blog.sayoung.xyz\/?p=153"},"modified":"2026-02-10T15:27:55","modified_gmt":"2026-02-10T07:27:55","slug":"%e5%ad%a6%e4%b9%a0%e6%97%a5%e5%bf%97%e7%bb%93%e5%90%88gpt-sovites%e4%b8%8ellm%ef%bc%882%ef%bc%89","status":"publish","type":"post","link":"https:\/\/blog.sayoung.wang\/index.php\/2025\/02\/21\/%e5%ad%a6%e4%b9%a0%e6%97%a5%e5%bf%97%e7%bb%93%e5%90%88gpt-sovites%e4%b8%8ellm%ef%bc%882%ef%bc%89\/","title":{"rendered":"\u5b66\u4e60\u65e5\u5fd7|\u7ed3\u5408GPT-SoVites\u4e0eLLM\uff082\uff09"},"content":{"rendered":"\n<p>\u5728\u4e0a\u4e00\u7bc7\u5b66\u4e60\u65e5\u5fd7\u4e2d\uff0c\u6211\u4ecb\u7ecd\u4e86GSV\u7684API\u98df\u7528\u65b9\u6cd5\uff0c\u672c\u7bc7\u6211\u4eec\u7814\u7a76\u5982\u4f55\u5c06LLM\u4e0eGSV\u5bf9\u63a5\u3002<\/p>\n\n\n\n<p>\u8fd9\u4e4d\u4e00\u542c\u5f88\u7b80\u5355\uff0c\u53ea\u9700\u8981\u5c06\u5927\u6a21\u578b\u8fd4\u56de\u7684\u7ed3\u679c\u7b80\u5355\u5904\u7406\u4e4b\u540e\u4ea4\u7531GSV\u5904\u7406\u4e4b\u540e\u5c31\u884c\u4e86\u2026\u2026\u5bf9\u5417\uff1f\u601d\u8def\u6ca1\u95ee\u9898\uff0c\u786e\u5b9e\u662f\u53ef\u884c\u7684\uff0c\u4e0d\u8fc7\u7531\u4e8eGSV\u5904\u7406\u6587\u5b57\u7684\u65f6\u95f4\u8fc7\u957f\uff0c\u8fd9\u5bfc\u81f4\u5b9e\u9645\u5ef6\u8fdf\u65f6\u95f4\u8fc7\u957f\uff0c\u4f53\u9a8c\u6781\u5dee\u3002<\/p>\n\n\n\n<p>\u6211\u4eec\u771f\u6b63\u60f3\u8981\u7684\u6548\u679c\u662f\u8fd9\u6837\u7684\uff1a\u8ba9GSV\u5904\u7406\u5b8c\u4e00\u90e8\u5206\u97f3\u9891\u540e\u5c31\u7acb\u5373\u64ad\u653e\uff0c\u5269\u4e0b\u7684\u8fb9\u64ad\u653e\u8fb9\u5904\u7406\u3002\u8981\u5b9e\u73b0\u8fd9\u4e2a\u6548\u679c\uff0c\u6211\u7b2c\u4e00\u4e2a\u60f3\u5230\u7684\u662f\u6d41\u5f0f\u8f93\u51fa\uff0c\u4e0d\u8fc7\u53ef\u60dc\u7684\u662f\u76ee\u524d\u7684GSV\u4f3c\u4e4e\u8fd8\u4e0d\u652f\u6301\u8fd9\u6837\u7684\u8f93\u51fa\u65b9\u5f0f\u3002\u4e0d\u8fc7github\u4e0a\u7684\u5176\u4ed6\u5927\u4f6c\u4eec\u4f7f\u7528\u53e6\u4e00\u79cd\u65b9\u5f0f\u89e3\u51b3\u4e86\u8fd9\u4e2a\u95ee\u9898\uff1a\u5c06\u5927\u6bb5\u5185\u5bb9\u6309\u53e5\u5b50\u62c6\u5206\uff0c\u8fd9\u6837\u4fbf\u53ef\u4ee5\u5145\u5206\u5229\u7528\u64ad\u653e\u524d\u4e00\u6bb5\u97f3\u9891\u7684\u65f6\u95f4\u5904\u7406\u4e0b\u4e00\u6bb5\u5185\u5bb9\uff0c\u8fbe\u5230\u65e0\u7f1d\u8854\u63a5\u3002<\/p>\n\n\n\n<p>\u90a3\u4e48\u73b0\u5728\u7684\u5b9e\u73b0\u601d\u8def\u5982\u4e0b\uff1a<\/p>\n\n\n\n<p>\u7531LLM\u751f\u6210\u56de\u7b54\u2192\u5904\u7406\u6587\u5b57\uff08\u5207\u5206\u3001\u53bb\u9664\u591a\u4f59\u7b26\u53f7\uff09\u2192\u5229\u7528\u591a\u7ebf\u7a0b\u548c\u961f\u5217\u673a\u5236\u8ba9GSV\u751f\u6210\u97f3\u9891\u7684\u540c\u65f6\u64ad\u653e\u97f3\u9891\uff0c\u51cf\u5c11\u7b49\u5f85\u65f6\u95f4\u3002<\/p>\n\n\n\n<p>\u4f5c\u4e3a\u5c0f\u767d\u6765\u8bf4\uff0c\u591a\u7ebf\u7a0b\u673a\u5236\u8fd8\u662f\u6709\u4e9b\u592a\u590d\u6742\u4e86\uff08\uff09\u4e0d\u8fc7\u5e78\u597d\u6709deepseek\u5c0f\u59d0\u7684\u5e2e\u52a9\uff0c\u54b1\u6700\u540e\u8fd8\u662f<hm>\u7f1d\u5408<\/hm>\u5b8c\u6210\u4e86\u8fd9\u4e2a\u9879\u76ee(\u203e\u25e1\u25dd)<\/p>\n\n\n<div class=\"wp-block-syntaxhighlighter-code \"><pre class=\"brush: python; title: ; notranslate\" title=\"\">\nimport requests\nimport json\nimport re\n\nfrom pydub import AudioSegment\nfrom pydub.playback import play\nimport io\n\nimport threading\nimport queue\n\n#\u5206\u5272\u6587\u672c\uff0c\u5904\u7406\u591a\u4f59\u7b26\u53f7#\ndef split_text(text):\n    # \u4f7f\u7528\u6b63\u5219\u8868\u8fbe\u5f0f\u6839\u636e\u53e5\u53f7\u3001\u611f\u53f9\u53f7\u3001\u95ee\u53f7\u3001\u5206\u53f7\u3001\u5192\u53f7\u5206\u5272\u6587\u672c\n    sentences = re.split(r'&#x5B;\u3002\uff01\uff1f\uff1b\uff1a]', text)\n    # \u8fc7\u6ee4\u6389\u7a7a\u5b57\u7b26\u4e32\n    sentences = &#x5B;s.strip() for s in sentences if s.strip()]\n    return sentences\n\n#\u8c03\u7528GSV\u8fdb\u884c\u8f6c\u6362#\ndef tts(txt):\n    url = &quot;http:\/\/127.0.0.1:9880\/tts&quot;\n\n    data = {\n        &quot;text&quot;: f&quot;{txt}&quot;,                   # str.(required) text to be synthesized\n        &quot;text_lang&quot;: &quot;zh&quot;,               # str.(required) language of the text to be synthesized\n        &quot;ref_audio_path&quot;: &quot;D:\/Workspace\/RVC_GSV\/\u5e72\u5458\u62a5\u5230.wav&quot;,         # str.(required) reference audio path\n        &quot;aux_ref_audio_paths&quot;: &#x5B;],    # list.(optional) auxiliary reference audio paths for multi-speaker synthesis\n        &quot;prompt_text&quot;: &quot;\u661f\u8c61\u5b66\u8005\uff0c\u661f\u6781\uff0c\u4ee5\u8fd1\u536b\u5e72\u5458\u8eab\u4efd\u4efb\u804c\uff0c\u4eca\u540e\u5c31\u7531\u60a8\u5dee\u9063\u4e86\uff0c\u535a\u58eb\u3002&quot;,     #\u30a2\u30b9\u30c6\u30b7\u30a2\u3001\u5929\u6587\u5b66\u8005\u517c\u5360\u3044\u5e2b\u3088\u3002\u524d\u885b\u30aa\u30da\u30ec\u30fc\u30bf\u30fc\u3068\u3057\u3066\u304a\u4e16\u8a71\u306b\u306a\u308b\u308f\u3002\u3088\u308d\u3057\u304f\u3001\u30c9\u30af\u30bf\u30fc\u3002       # str.(optional) prompt text for the reference audio\n        &quot;prompt_lang&quot;: &quot;zh&quot;,            # str.(required) language of the prompt text for the reference audio\n        &quot;top_k&quot;: 5,                   # int. top k sampling\n        &quot;top_p&quot;: 1,                   # float. top p sampling\n        &quot;temperature&quot;: 1,             # float. temperature for sampling\n        &quot;text_split_method&quot;: &quot;cut5&quot;,  # str. text split method, see text_segmentation_method.py for details.\n        &quot;batch_size&quot;: 1,              # int. batch size for inference\n        &quot;batch_threshold&quot;: 0.75,      # float. threshold for batch splitting.\n        &quot;split_bucket&quot;: True,          # bool. whether to split the batch into multiple buckets.\n        &quot;speed_factor&quot;:1.0,           # float. control the speed of the synthesized audio.\n        &quot;fragment_interval&quot;:0.3,      # float. to control the interval of the audio fragment.\n        &quot;seed&quot;: -1,                   # int. random seed for reproducibility.\n        &quot;media_type&quot;: &quot;wav&quot;,          # str. media type of the output audio, support &quot;wav&quot;, &quot;raw&quot;, &quot;ogg&quot;, &quot;aac&quot;.\n        &quot;streaming_mode&quot;: False,      # bool. whether to return a streaming response.\n        &quot;parallel_infer&quot;: True,       # bool.(optional) whether to use parallel inference.\n        &quot;repetition_penalty&quot;: 1.35    # float.(optional) repetition penalty for T2S model.          \n    }\n\n    headers = {&quot;Content-Type&quot;: &quot;application\/json&quot;}\n\n    response = requests.post(url, data=json.dumps(data), headers=headers)\n\n    if response.status_code == 200:\n        # \u4fdd\u5b58\u751f\u6210\u7684\u97f3\u9891\n        &quot;&quot;&quot; with open(&quot;output2.wav&quot;, &quot;wb&quot;) as f:\n            f.write(response.content) &quot;&quot;&quot;\n        print(&quot;\u97f3\u9891\u751f\u6210\u6210\u529f\uff01&quot;)\n        return response.content\n    else:\n        print(f&quot;\u8bf7\u6c42\u5931\u8d25\uff0c\u72b6\u6001\u7801\uff1a{response.status_code}, \u9519\u8bef\u4fe1\u606f\uff1a{response.text}&quot;)\n\n\n\n\n#LLM#\ndef ai(content):\n        \n    url = &quot;https:\/\/qianfan.baidubce.com\/v2\/chat\/completions&quot;\n    \n    payload = json.dumps({\n        &quot;model&quot;: &quot;deepseek-v3&quot;,\n        &quot;messages&quot;: &#x5B;\n            {\n                &quot;role&quot;: &quot;user&quot;,\n                &quot;content&quot;: content\n            }\n        ],\n        &quot;disable_search&quot;: False,\n        &quot;enable_citation&quot;: False\n    }, ensure_ascii=False)\n    headers = {\n        'Content-Type': 'application\/json',\n        'appid': '',\n        'Authorization': 'YOUR KEY'\n    }\n    \n    response = requests.request(&quot;POST&quot;, url, headers=headers, data=payload.encode(&quot;utf-8&quot;))\n    \n    print(response.text)\n\n    data = json.loads(response.text)\n    text = re.sub(r'\\s+', '', data&#x5B;&quot;choices&quot;]&#x5B;0]&#x5B;&quot;message&quot;]&#x5B;&quot;content&quot;])\n    text_list = split_text(text)\n\n    with open('result.txt', 'w', encoding= 'utf-8') as f:\n        f.write(text)\n    return text_list\n\n\ndef wav_play(result_wav):\n    audio = AudioSegment.from_file(io.BytesIO(result_wav), format=&quot;wav&quot;)\n    play(audio)\n    print(f&quot;\u6b63\u5728\u64ad\u653e\uff1a{audio}&quot;)\n\ndef process_data(data_list, audio_queue):\n    for text in data_list:\n        audio = tts(text)\n        audio_queue.put(audio)  # \u5c06\u97f3\u9891\u653e\u5165\u961f\u5217\n    audio_queue.put(None)  # \u53d1\u9001\u7ed3\u675f\u4fe1\u53f7\n\ndef play_audio(audio_queue):\n    while True:\n        audio = audio_queue.get()  # \u4ece\u961f\u5217\u83b7\u53d6\u97f3\u9891\n        if audio is None:  # \u68c0\u67e5\u7ed3\u675f\u4fe1\u53f7\n            audio_queue.task_done()\n            break\n        wav_play(audio)  # \u64ad\u653e\n        audio_queue.task_done()  \n\nif __name__ == '__main__':\n\n    data_list = ai(&quot;\u4f60\u662f\u4e00\u888b\u732b\u7cae&quot;)\n    audio_queue = queue.Queue()\n\n    # \u521b\u5efa\u5e76\u542f\u52a8\u7ebf\u7a0b\n    processor = threading.Thread(target=process_data, args=(data_list, audio_queue))\n    player = threading.Thread(target=play_audio, args=(audio_queue,))\n\n    processor.start()\n    player.start()\n\n    # \u7b49\u5f85\u5904\u7406\u7ebf\u7a0b\u5b8c\u6210\n    processor.join()\n    # \u7b49\u5f85\u961f\u5217\u4e2d\u7684\u6240\u6709\u4efb\u52a1\u5904\u7406\u5b8c\u6bd5\n    audio_queue.join()\n    # \u786e\u4fdd\u64ad\u653e\u7ebf\u7a0b\u7ed3\u675f\n    player.join()\n\n    print(&quot;\u5b8c\u6210&quot;)\n<\/pre><\/div>\n\n\n<p>\u672c\u7740\u80fd\u8dd1\u5c31\u884c\u7684\u539f\u5219\uff0c\u4ee3\u7801\u5199\u5f97\u5f88\u7c97\u7cd9\uff0c\u8c28\u614e\u53c2\u8003\u3012\u25bd\u3012<\/p>\n\n\n\n<p>\u53e6\u5916\u5199\u8fd9\u4e2a\u7684\u65f6\u5019\u624d\u53d1\u73b0deepseek\u5c45\u7136\u628a\u5145\u503c\u6e20\u9053\u7ed9\u5173\u4e86\u2026\u2026\u53ea\u597d\u5148\u7528\u5176\u4ed6\u5e73\u53f0\u7684\u4ee3\u66ff\u4e86\u3002\u53c8\u4ee5\u53ca\uff0c\u5728\u4f7f\u7528\u8fd9\u4e2apydub\u65f6\u53ef\u80fd\u4f1a\u9047\u5230Permission denied\u7684\u62a5\u9519\uff0c\u53ea\u9700\u8981<code>pip install pyaudio<\/code>\u5b89\u88c5pyaudio\u5e93\u5c31\u597d\u4e86<sup data-fn=\"bfba5774-f628-49cf-9247-1deb31f7ddfb\" class=\"fn\"><a href=\"#bfba5774-f628-49cf-9247-1deb31f7ddfb\" id=\"bfba5774-f628-49cf-9247-1deb31f7ddfb-link\">1<\/a><\/sup>\u3002<\/p>\n\n\n<ol class=\"wp-block-footnotes\"><li id=\"bfba5774-f628-49cf-9247-1deb31f7ddfb\"><a href=\"https:\/\/blog.csdn.net\/weixin_41568999\/article\/details\/105235853\">https:\/\/blog.csdn.net\/weixin_41568999\/article\/details\/105235853<\/a> <a href=\"#bfba5774-f628-49cf-9247-1deb31f7ddfb-link\" aria-label=\"\u8df3\u8f6c\u5230\u811a\u6ce8\u5f15\u7528 1\">\u21a9\ufe0e<\/a><\/li><\/ol>\n\n\n<p><\/p>\n","protected":false},"excerpt":{"rendered":"<p>\u5728\u4e0a\u4e00\u7bc7\u5b66\u4e60\u65e5\u5fd7\u4e2d\uff0c\u6211\u4ecb\u7ecd\u4e86GSV\u7684API\u98df\u7528\u65b9\u6cd5\uff0c\u672c\u7bc7\u6211\u4eec\u7814\u7a76\u5982\u4f55\u5c06LLM\u4e0eGSV\u5bf9\u63a5\u3002 \u8fd9\u4e4d\u4e00\u542c\u5f88\u7b80\u5355\uff0c\u53ea [&hellip;]<\/p>\n","protected":false},"author":1,"featured_media":0,"comment_status":"closed","ping_status":"open","sticky":false,"template":"","format":"standard","meta":{"footnotes":"[{\"content\":\"<a href=\\\"https:\/\/blog.csdn.net\/weixin_41568999\/article\/details\/105235853\\\">https:\/\/blog.csdn.net\/weixin_41568999\/article\/details\/105235853<\/a>\",\"id\":\"bfba5774-f628-49cf-9247-1deb31f7ddfb\"}]"},"categories":[11,16],"tags":[12,13,7],"class_list":["post-153","post","type-post","status-publish","format-standard","hentry","category-11","category-16","tag-ai","tag-gpt-sovites","tag-python"],"_links":{"self":[{"href":"https:\/\/blog.sayoung.wang\/index.php\/wp-json\/wp\/v2\/posts\/153","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/blog.sayoung.wang\/index.php\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/blog.sayoung.wang\/index.php\/wp-json\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/blog.sayoung.wang\/index.php\/wp-json\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"https:\/\/blog.sayoung.wang\/index.php\/wp-json\/wp\/v2\/comments?post=153"}],"version-history":[{"count":3,"href":"https:\/\/blog.sayoung.wang\/index.php\/wp-json\/wp\/v2\/posts\/153\/revisions"}],"predecessor-version":[{"id":158,"href":"https:\/\/blog.sayoung.wang\/index.php\/wp-json\/wp\/v2\/posts\/153\/revisions\/158"}],"wp:attachment":[{"href":"https:\/\/blog.sayoung.wang\/index.php\/wp-json\/wp\/v2\/media?parent=153"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/blog.sayoung.wang\/index.php\/wp-json\/wp\/v2\/categories?post=153"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/blog.sayoung.wang\/index.php\/wp-json\/wp\/v2\/tags?post=153"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}