Skip to content

Commit bd4c03e

Browse files
author
LittleMouse
committed
[fix] Fix cosyvoice Deinit bug
1 parent 324f04d commit bd4c03e

File tree

5 files changed

+41
-28
lines changed

5 files changed

+41
-28
lines changed

projects/llm_framework/main_cosy_voice/src/main.cpp

Lines changed: 24 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,6 @@ class llm_task {
6565
std::atomic<bool> g_llm_finished{false};
6666
std::atomic<bool> g_stop{false};
6767
TokenBuffer g_token_buffer;
68-
Token2Wav lToken2Wav;
6968

7069
std::vector<int> prompt_text_token;
7170
std::vector<unsigned short> prompt_text_embeds;
@@ -81,6 +80,7 @@ class llm_task {
8180
LLMAttrType mode_config_;
8281
Token2WavAttr infer_mode_config_;
8382
std::unique_ptr<LLM> lLaMa_;
83+
std::unique_ptr<Token2Wav> lToken2Wav_;
8484
std::string model_;
8585
std::string response_format_;
8686
std::vector<std::string> inputs_;
@@ -312,14 +312,15 @@ class llm_task {
312312
lLaMa_.reset();
313313
return -2;
314314
}
315-
if (!lToken2Wav.Init(infer_mode_config_)) {
315+
lToken2Wav_ = std::make_unique<Token2Wav>();
316+
if (!lToken2Wav_->Init(infer_mode_config_)) {
316317
lLaMa_->Deinit();
317318
lLaMa_.reset();
318319
return -1;
319320
}
320321
lLaMa_->TextToken2Embeds(prompt_text_token, prompt_text_embeds);
321322
lLaMa_->SpeechToken2Embeds(prompt_speech_token, prompt_speech_embeds);
322-
lToken2Wav.SpeechToken2Embeds(prompt_speech_token, prompt_speech_embeds_flow);
323+
lToken2Wav_->SpeechToken2Embeds(prompt_speech_token, prompt_speech_embeds_flow);
323324

324325
} catch (...) {
325326
SLOGE("config false");
@@ -358,7 +359,7 @@ class llm_task {
358359
{
359360
g_llm_finished = false;
360361
g_token_buffer.erase(g_token_buffer.begin(), g_token_buffer.end());
361-
lToken2Wav.reset();
362+
lToken2Wav_->clear();
362363
}
363364

364365
void resample_audio(float *input_buffer, int input_length, float *output_buffer, int *output_length,
@@ -428,7 +429,7 @@ class llm_task {
428429
}
429430
}
430431

431-
int prompt_token_len = prompt_speech_embeds_flow.size() / lToken2Wav._attr.flow_embed_size;
432+
int prompt_token_len = prompt_speech_embeds_flow.size() / lToken2Wav_->_attr.flow_embed_size;
432433
if (prompt_token_len < 75) {
433434
SLOGE("Error, prompt speech token len %d < 75", prompt_token_len);
434435
if (llm_thread.joinable()) llm_thread.join();
@@ -450,28 +451,28 @@ class llm_task {
450451
int token_offset = 0;
451452
int i = 0;
452453
while (true) {
453-
this_token_hop_len = (token_offset == 0) ? lToken2Wav._attr.token_hop_len + promot_token_pad
454-
: lToken2Wav._attr.token_hop_len;
454+
this_token_hop_len = (token_offset == 0) ? lToken2Wav_->_attr.token_hop_len + promot_token_pad
455+
: lToken2Wav_->_attr.token_hop_len;
455456
std::unique_lock<std::mutex> lock(g_buffer_mutex);
456457
g_buffer_cv.wait(lock, [&] {
457458
return (g_token_buffer.size() - token_offset >=
458-
this_token_hop_len + lToken2Wav._attr.pre_lookahead_len) ||
459+
this_token_hop_len + lToken2Wav_->_attr.pre_lookahead_len) ||
459460
g_llm_finished.load() || g_stop.load();
460461
});
461462
if (g_stop) {
462463
lock.unlock();
463464
break;
464465
} else if (g_token_buffer.size() - token_offset >=
465-
this_token_hop_len + lToken2Wav._attr.pre_lookahead_len) {
466+
this_token_hop_len + lToken2Wav_->_attr.pre_lookahead_len) {
466467
std::vector<SpeechToken> token;
467-
int start = token_offset - std::min(int(token_offset / lToken2Wav._attr.token_hop_len),
468-
lToken2Wav._attr.max_infer_chunk_num - 1) *
469-
lToken2Wav._attr.token_hop_len;
470-
int end = token_offset + this_token_hop_len + lToken2Wav._attr.pre_lookahead_len;
468+
int start = token_offset - std::min(int(token_offset / lToken2Wav_->_attr.token_hop_len),
469+
lToken2Wav_->_attr.max_infer_chunk_num - 1) *
470+
lToken2Wav_->_attr.token_hop_len;
471+
int end = token_offset + this_token_hop_len + lToken2Wav_->_attr.pre_lookahead_len;
471472
token.insert(token.end(), g_token_buffer.begin() + start, g_token_buffer.begin() + end);
472473
lock.unlock();
473-
auto speech = lToken2Wav.infer(token, prompt_speech_embeds_flow1, prompt_feat1, spk_embeds,
474-
token_offset, false);
474+
auto speech = lToken2Wav_->infer(token, prompt_speech_embeds_flow1, prompt_feat1, spk_embeds,
475+
token_offset, false);
475476
token_offset += this_token_hop_len;
476477
output.insert(output.end(), speech.begin(), speech.end());
477478
double src_ratio =
@@ -507,12 +508,12 @@ class llm_task {
507508
}
508509

509510
std::vector<SpeechToken> token;
510-
int start = g_token_buffer.size() - std::min(int(g_token_buffer.size() / lToken2Wav._attr.token_hop_len),
511-
lToken2Wav._attr.max_infer_chunk_num - 1) *
512-
lToken2Wav._attr.token_hop_len;
511+
int start = g_token_buffer.size() - std::min(int(g_token_buffer.size() / lToken2Wav_->_attr.token_hop_len),
512+
lToken2Wav_->_attr.max_infer_chunk_num - 1) *
513+
lToken2Wav_->_attr.token_hop_len;
513514
token.insert(token.end(), g_token_buffer.begin() + start, g_token_buffer.end());
514-
auto speech = lToken2Wav.infer(token, prompt_speech_embeds_flow1, prompt_feat1, spk_embeds,
515-
token_offset - start, true);
515+
auto speech = lToken2Wav_->infer(token, prompt_speech_embeds_flow1, prompt_feat1, spk_embeds,
516+
token_offset - start, true);
516517
output.insert(output.end(), speech.begin(), speech.end());
517518
double src_ratio =
518519
static_cast<double>(mode_config_.audio_rate) / static_cast<double>(mode_config_.mode_rate);
@@ -662,6 +663,9 @@ class llm_task {
662663
if (lLaMa_) {
663664
lLaMa_->Deinit();
664665
}
666+
if (lToken2Wav_) {
667+
lToken2Wav_->Deinit();
668+
}
665669
}
666670
};
667671

projects/llm_framework/main_cosy_voice/src/runner/Token2wav.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -464,7 +464,7 @@ class Token2Wav {
464464
// fade_in_mel_data is now modified in-place with the faded result.
465465
}
466466

467-
void reset()
467+
void clear()
468468
{
469469
std::unordered_map<std::string, std::vector<float>>().swap(hift_cache_dict);
470470
}

projects/llm_framework/main_llm/src/main.cpp

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -404,6 +404,7 @@ class llm_task {
404404
bool pause()
405405
{
406406
if (lLaMa_) lLaMa_->Stop();
407+
if (lLaMa_ctx_) lLaMa_ctx_->Stop();
407408
return true;
408409
}
409410

@@ -414,8 +415,10 @@ class llm_task {
414415
waitpid(tokenizer_pid_, nullptr, 0);
415416
tokenizer_pid_ = -1;
416417
}
417-
lLaMa_->Deinit();
418-
lLaMa_.reset();
418+
if (lLaMa_) lLaMa_->Deinit();
419+
if (lLaMa_) lLaMa_.reset();
420+
if (lLaMa_ctx_) lLaMa_ctx_->Deinit();
421+
if (lLaMa_ctx_) lLaMa_ctx_.reset();
419422
return true;
420423
}
421424

@@ -447,6 +450,7 @@ class llm_task {
447450
std::string par;
448451
async_list_.put(par);
449452
if (lLaMa_) lLaMa_->Stop();
453+
if (lLaMa_ctx_) lLaMa_ctx_->Stop();
450454
inference_run_->join();
451455
inference_run_.reset();
452456
}
@@ -462,6 +466,9 @@ class llm_task {
462466
if (lLaMa_) {
463467
lLaMa_->Deinit();
464468
}
469+
if (lLaMa_ctx_) {
470+
lLaMa_ctx_->Deinit();
471+
}
465472
}
466473
};
467474

@@ -514,7 +521,8 @@ class llm_llm : public StackFlow {
514521
if (!(llm_task_obj && llm_channel)) {
515522
return;
516523
}
517-
llm_task_obj->lLaMa_->Stop();
524+
if (llm_task_obj->lLaMa_) llm_task_obj->lLaMa_->Stop();
525+
if (llm_task_obj->lLaMa_ctx_) llm_task_obj->lLaMa_ctx_->Stop();
518526
}
519527

520528
void pause(const std::string &work_id, const std::string &object, const std::string &data) override
@@ -605,7 +613,8 @@ class llm_llm : public StackFlow {
605613
if (!(llm_task_obj && llm_channel)) {
606614
return;
607615
}
608-
llm_task_obj->lLaMa_->Stop();
616+
if (llm_task_obj->lLaMa_) llm_task_obj->lLaMa_->Stop();
617+
if (llm_task_obj->lLaMa_ctx_) llm_task_obj->lLaMa_ctx_->Stop();
609618
}
610619

611620
int setup(const std::string &work_id, const std::string &object, const std::string &data) override

projects/llm_framework/main_openai_api/SConstruct

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ LINK_SEARCH_PATH = []
1818
STATIC_FILES = []
1919

2020

21-
ModuleLLMOpenAIPluginPath = wget_github_commit('https://github.com/m5stack/ModuleLLM-OpenAI-Plugin.git', '5298be215735f5b1c21bc9225c38d7cb9c1933db', True)
21+
ModuleLLMOpenAIPluginPath = wget_github_commit('https://github.com/m5stack/ModuleLLM-OpenAI-Plugin.git', 'a8f54b0430c478896b45828f612d0d8b0a6f2fa1', True)
2222
python_venv = check_wget_down("https://m5stack.oss-cn-shenzhen.aliyuncs.com/resource/linux/llm/m5stack_llm-openai-api-python-venv_v1.6.tar.gz", 'm5stack_llm-openai-api-python-venv_v1.6.tar.gz')
2323

2424

@@ -52,7 +52,7 @@ ignore['ignore'] = list(set(ignore['ignore']))
5252
with open('../dist/fileignore', 'w') as f:
5353
json.dump(ignore, f, indent=4)
5454

55-
env['COMPONENTS'].append({'target':'llm_openai_api-1.8',
55+
env['COMPONENTS'].append({'target':'llm_openai_api-1.9',
5656
'SRCS':SRCS,
5757
'INCLUDE':INCLUDE,
5858
'PRIVATE_INCLUDE':PRIVATE_INCLUDE,

projects/llm_framework/tools/llm_pack.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -387,7 +387,7 @@ def create_bin_deb(package_name, version, src_folder, revision = 'm5stack1', dep
387387
'llm-depth-anything':[create_bin_deb,'llm-depth-anything', '1.7', src_folder, revision],
388388
'llm-vad':[create_bin_deb,'llm-vad', '1.8', src_folder, revision],
389389
'llm-whisper':[create_bin_deb,'llm-whisper', '1.8', src_folder, revision],
390-
'llm-openai-api':[create_bin_deb,'llm-openai-api', '1.8', src_folder, revision],
390+
'llm-openai-api':[create_bin_deb,'llm-openai-api', '1.9', src_folder, revision],
391391
'llm-cosy-voice':[create_bin_deb,'llm-cosy-voice', '1.8', src_folder, revision],
392392
# keyword spotting Audio file
393393
'llm-model-audio-en-us':[create_data_deb,'llm-model-audio-en-us', data_version, src_folder, revision],

0 commit comments

Comments
 (0)