[update] update asr kws llm vlm vad whisper melotts version

LittleMouse · LittleMouse · commit e628093302bd · 2025-09-04T18:39:47.000+08:00
diff --git a/projects/llm_framework/main_asr/SConstruct b/projects/llm_framework/main_asr/SConstruct
@@ -26,7 +26,7 @@ REQUIREMENTS += ['ncnn', 'sherpa-ncnn-core']
 
 STATIC_FILES += Glob('mode_*.json')
 
-env['COMPONENTS'].append({'target':'llm_asr-1.6',
+env['COMPONENTS'].append({'target':'llm_asr-1.7',
                           'SRCS':SRCS,
                           'INCLUDE':INCLUDE,
                           'PRIVATE_INCLUDE':PRIVATE_INCLUDE,
diff --git a/projects/llm_framework/main_kws/SConstruct b/projects/llm_framework/main_kws/SConstruct
@@ -57,7 +57,7 @@ ignore['ignore'] = list(set(ignore['ignore']))
 with open('../dist/fileignore', 'w') as f:
     json.dump(ignore, f, indent=4)
 
-env['COMPONENTS'].append({'target':'llm_kws-1.8',
+env['COMPONENTS'].append({'target':'llm_kws-1.9',
                           'SRCS':SRCS,
                           'INCLUDE':INCLUDE,
                           'PRIVATE_INCLUDE':PRIVATE_INCLUDE,
diff --git a/projects/llm_framework/main_llm/SConstruct b/projects/llm_framework/main_llm/SConstruct
@@ -66,7 +66,7 @@ ignore['ignore'] = list(set(ignore['ignore']))
 with open('../dist/fileignore', 'w') as f:
     json.dump(ignore, f, indent=4)
 
-env['COMPONENTS'].append({'target':'llm_llm-1.8',
+env['COMPONENTS'].append({'target':'llm_llm-1.9',
                           'SRCS':SRCS,
                           'INCLUDE':INCLUDE,
                           'PRIVATE_INCLUDE':PRIVATE_INCLUDE,
diff --git a/projects/llm_framework/main_llm/src/main.cpp b/projects/llm_framework/main_llm/src/main.cpp
@@ -224,7 +224,7 @@ class llm_task {
                 if (!process_field(mode_config_.filename_tokenizer_model, "filename_tokenizer_model") &&
                     !process_field(mode_config_.url_tokenizer_model, "url_tokenizer_model")) {
                     mode_config_.filename_tokenizer_model = base_model + mode_config_.filename_tokenizer_model;
-                    SLOGE("filename_tokenizer_model: %s", mode_config_.filename_tokenizer_model.c_str());
+                    SLOGI("filename_tokenizer_model: %s", mode_config_.filename_tokenizer_model.c_str());
                 }
             }
             mode_config_.filename_tokens_embed     = base_model + mode_config_.filename_tokens_embed;
diff --git a/projects/llm_framework/main_llm/src/runner/LLM.hpp b/projects/llm_framework/main_llm/src/runner/LLM.hpp
@@ -55,8 +55,8 @@ struct LLMAttrType {
     bool enable_top_p_sampling = false;
     float top_p                = 0.7f;
 
-    bool enable_top_k_sampling = false;
-    int top_k                  = 50;
+    bool enable_top_k_sampling = true;
+    int top_k                  = 10;
 
     bool enable_repetition_penalty = false;
     float repetition_penalty       = 1.2f;
diff --git a/projects/llm_framework/main_melotts/SConstruct b/projects/llm_framework/main_melotts/SConstruct
@@ -32,7 +32,7 @@ REQUIREMENTS += ['onnxruntime']
 
 STATIC_FILES += Glob('models/mode_*.json')
 
-env['COMPONENTS'].append({'target':'llm_melotts-1.8',
+env['COMPONENTS'].append({'target':'llm_melotts-1.9',
                           'SRCS':SRCS,
                           'INCLUDE':INCLUDE,
                           'PRIVATE_INCLUDE':PRIVATE_INCLUDE,
diff --git a/projects/llm_framework/main_vad/SConstruct b/projects/llm_framework/main_vad/SConstruct
@@ -29,7 +29,7 @@ REQUIREMENTS += ['onnxruntime']
 
 STATIC_FILES += Glob('mode_*.json')
 
-env['COMPONENTS'].append({'target':'llm_vad-1.7',
+env['COMPONENTS'].append({'target':'llm_vad-1.8',
                           'SRCS':SRCS,
                           'INCLUDE':INCLUDE,
                           'PRIVATE_INCLUDE':PRIVATE_INCLUDE,
diff --git a/projects/llm_framework/main_vlm/SConstruct b/projects/llm_framework/main_vlm/SConstruct
@@ -73,7 +73,7 @@ ignore['ignore'] = list(set(ignore['ignore']))
 with open('../dist/fileignore', 'w') as f:
     json.dump(ignore, f, indent=4)
 
-env['COMPONENTS'].append({'target':'llm_vlm-1.8',
+env['COMPONENTS'].append({'target':'llm_vlm-1.9',
                           'SRCS':SRCS,
                           'INCLUDE':INCLUDE,
                           'PRIVATE_INCLUDE':PRIVATE_INCLUDE,
diff --git a/projects/llm_framework/main_vlm/src/main.cpp b/projects/llm_framework/main_vlm/src/main.cpp
@@ -157,7 +157,6 @@ class llm_task {
             CONFIG_AUTO_SET(file_body["mode_param"], filename_vpm_resampler_axmodedl);
             CONFIG_AUTO_SET(file_body["mode_param"], filename_image_encoder_axmodel);
             CONFIG_AUTO_SET(file_body["mode_param"], template_filename_axmodel);
-            CONFIG_AUTO_SET(file_body["mode_param"], b_use_topk);
             CONFIG_AUTO_SET(file_body["mode_param"], b_vpm_two_stage);
             CONFIG_AUTO_SET(file_body["mode_param"], b_bos);
             CONFIG_AUTO_SET(file_body["mode_param"], b_eos);
@@ -232,7 +231,7 @@ class llm_task {
                 if (!process_field(mode_config_.filename_tokenizer_model, "filename_tokenizer_model") &&
                     !process_field(mode_config_.url_tokenizer_model, "url_tokenizer_model")) {
                     mode_config_.filename_tokenizer_model = base_model + mode_config_.filename_tokenizer_model;
-                    SLOGE("filename_tokenizer_model: %s", mode_config_.filename_tokenizer_model.c_str());
+                    SLOGI("filename_tokenizer_model: %s", mode_config_.filename_tokenizer_model.c_str());
                 }
             }
             mode_config_.filename_tokens_embed          = base_model + mode_config_.filename_tokens_embed;
diff --git a/projects/llm_framework/main_vlm/src/runner/LLM.hpp b/projects/llm_framework/main_vlm/src/runner/LLM.hpp
@@ -20,69 +20,67 @@ typedef std::function<void(int *, int, const char *, float, void *)> LLMRuningCa
 
 struct LLMAttrType {
     std::string system_prompt;
+
     std::string template_filename_axmodel = "tinyllama-int8/tinyllama_l%d.axmodel";
+    std::string post_config_path          = "post_config.json";
     int axmodel_num                       = 22;
 
-    std::string filename_post_axmodel          = "tinyllama-int8/tinyllama_post.axmodel";
-    std::string filename_image_encoder_axmodel = "minicpmv/vpm_resampler_version0_fp16.axmodel";
-    std::string filename_vpm_encoder_axmodel   = "minicpmv/vpm_resampler_version0_fp16.axmodel";
+    std::string filename_image_encoder_axmodel  = "minicpmv/vpm_resampler_version0_fp16.axmodel";
+    std::string filename_vpm_encoder_axmodel    = "minicpmv/vpm_resampler_version0_fp16.axmodel";
     std::string filename_vpm_resampler_axmodedl = "minicpmv/vpm_resampler_version0_fp16.axmodel";
 
-    int image_encoder_width  = 448;
-    int image_encoder_height = 448;
-    int vpm_width            = 280;
-    int vpm_height           = 280;
-    bool b_vpm_two_stage     = false;
+    int image_encoder_width       = 448;
+    int image_encoder_height      = 448;
+    int vpm_width                 = 280;
+    int vpm_height                = 280;
+    bool b_vpm_two_stage          = false;
+    int IMAGE_CONTEXT_TOKEN       = 151667;
+    int IMAGE_START_TOKEN         = 151665;
+    int IMAGE_ENCODER_INPUT_NCHW  = -1;
+    int IMAGE_ENCODER_OUTPUT_BF16 = -1;
 
     int prefill_token_num     = 96;
     int prefill_max_token_num = 512;
-    std::vector<int> prefill_max_kv_cache_num_grp;
-    int precompute_len = 0;
-    int prefill_grpid  = -1;
+
+    std::string filename_post_axmodel = "tinyllama-int8/tinyllama_post.axmodel";
 
     TokenizerType tokenizer_type         = TKT_LLaMa;
     std::string filename_tokenizer_model = "tokenizer.model";
     std::string url_tokenizer_model;
-    bool b_bos = true, b_eos = false;
+    bool b_bos                        = true;
+    bool b_eos                        = false;
     std::string filename_tokens_embed = "tinyllama.model.embed_tokens.weight.bfloat16.bin";
     int tokens_embed_num              = 32000;
     int img_token_id                  = 151667;
     int tokens_embed_size             = 2048;
 
     int max_token_len = 127;
-
     int kv_cache_num  = 1024;
     int kv_cache_size = 256;
 
+    int precompute_len = 0;
+    std::vector<int> prefill_max_kv_cache_num_grp;
+    int prefill_grpid = -1;
+
     bool enable_temperature = false;
     float temperature       = 0.7f;
 
     bool enable_top_p_sampling = false;
     float top_p                = 0.7f;
 
-    bool enable_top_k_sampling = false;
-    int top_k                  = 50;
+    bool enable_top_k_sampling = true;
+    int top_k                  = 10;
 
     bool enable_repetition_penalty = false;
     float repetition_penalty       = 1.2f;
     int penalty_window             = 50;
 
     bool b_use_mmap_load_embed        = false;
     bool b_dynamic_load_axmodel_layer = false;
+    bool b_use_mmap_load_layer        = true;
 
-    bool b_use_mmap_load_layer = true;
-
-    bool b_use_topk              = false;
-    std::string post_config_path = "post_config.json";
-
-    // bool b_live_print = true;
     LLMRuningCallback runing_callback = nullptr;
     void *reserve                     = nullptr;
-
-    int IMAGE_CONTEXT_TOKEN       = 151667;
-    int IMAGE_START_TOKEN         = 151665;
-    int IMAGE_ENCODER_INPUT_NCHW  = -1;
-    int IMAGE_ENCODER_OUTPUT_BF16 = -1;
 };
 
 class LLM {
@@ -142,7 +140,6 @@ class LLM {
             return false;
         }
         update_cqdm(&cqdm, 1, "count", "embed_selector init ok");
-
         llama_layers.resize(attr.axmodel_num);
 
         char axmodel_path[1024];
@@ -241,13 +238,34 @@ class LLM {
 
             _attr.prefill_token_num = llama_layers[0].layer.get_input(prefill_grpid, "indices").vShape[1];
             ALOGI("prefill_token_num : %d", _attr.prefill_token_num);
-
             ALOGI("vpm_height : %d,vpm_width : %d", _attr.vpm_height, _attr.vpm_width);
         }
         if (attr.b_dynamic_load_axmodel_layer) {
             auto &layer = llama_layers[0];
             layer.layer.deinit();
         }
+        nlohmann::json dynamic_config;
+
+        dynamic_config["enable_temperature"] = _attr.enable_temperature;
+        dynamic_config["temperature"]        = _attr.temperature;
+
+        dynamic_config["enable_repetition_penalty"] = _attr.enable_repetition_penalty;
+        dynamic_config["repetition_penalty"]        = _attr.repetition_penalty;
+        dynamic_config["penalty_window"]            = _attr.penalty_window;
+
+        dynamic_config["enable_top_p_sampling"] = _attr.enable_top_p_sampling;
+        dynamic_config["top_p"]                 = _attr.top_p;
+
+        dynamic_config["enable_top_k_sampling"] = _attr.enable_top_k_sampling;
+        dynamic_config["top_k"]                 = _attr.top_k;
+
+        if (!postprocess.load_config(attr.post_config_path)) {
+            ALOGW("load postprocess config(%s) failed", attr.post_config_path.c_str());
+        }
+
+        if (!postprocess.load_config(dynamic_config)) {
+            ALOGW("load postprocess config(%s) failed", dynamic_config.dump(4).c_str());
+        }
 
         // Reset();
         ALOGI("LLM init ok");
@@ -483,19 +501,15 @@ class LLM {
             auto &input = llama_post.get_input("input");
             memcpy(input.pVirAddr, embed.data(), embed.size() * sizeof(unsigned short));
             llama_post.inference();
+
             int max_index;
-            if (_attr.b_use_topk) {
-                AX_SYS_MinvalidateCache(llama_post.get_output("indices").phyAddr,
-                                        llama_post.get_output("indices").pVirAddr,
-                                        llama_post.get_output("indices").nSize);
-                max_index = *(int *)llama_post.get_output("indices").pVirAddr;
-            } else {
-                auto &output_post = llama_post.get_output("output");
-                AX_SYS_MinvalidateCache(output_post.phyAddr, output_post.pVirAddr, output_post.nSize);
-                unsigned short *post_out = (unsigned short *)output_post.pVirAddr;
-                float max_val            = -MAXFLOAT;
-                max_index = post_process(postprocess, post_out, _attr.tokens_embed_num, token_ids, &max_val);
-            }
+
+            auto &output_post = llama_post.get_output("output");
+            AX_SYS_MinvalidateCache(output_post.phyAddr, output_post.pVirAddr, output_post.nSize);
+            unsigned short *post_out = (unsigned short *)output_post.pVirAddr;
+            float max_val            = -MAXFLOAT;
+            max_index                = post_process(postprocess, post_out, _attr.tokens_embed_num, token_ids, &max_val);
+
             next_token = max_index;
 
             token_ids.push_back(max_index);
@@ -574,18 +588,13 @@ class LLM {
                 memcpy(input.pVirAddr, embed.data(), embed.size() * sizeof(unsigned short));
                 llama_post.inference();
                 int max_index;
-                if (_attr.b_use_topk) {
-                    AX_SYS_MinvalidateCache(llama_post.get_output("indices").phyAddr,
-                                            llama_post.get_output("indices").pVirAddr,
-                                            llama_post.get_output("indices").nSize);
-                    max_index = *(int *)llama_post.get_output("indices").pVirAddr;
-                } else {
-                    auto &output_post = llama_post.get_output("output");
-                    AX_SYS_MinvalidateCache(output_post.phyAddr, output_post.pVirAddr, output_post.nSize);
-                    unsigned short *post_out = (unsigned short *)output_post.pVirAddr;
-                    float max_val            = -MAXFLOAT;
-                    max_index = post_process(postprocess, post_out, _attr.tokens_embed_num, token_ids, &max_val);
-                }
+
+                auto &output_post = llama_post.get_output("output");
+                AX_SYS_MinvalidateCache(output_post.phyAddr, output_post.pVirAddr, output_post.nSize);
+                unsigned short *post_out = (unsigned short *)output_post.pVirAddr;
+                float max_val            = -MAXFLOAT;
+                max_index = post_process(postprocess, post_out, _attr.tokens_embed_num, token_ids, &max_val);
+
                 next_token = max_index;
 
                 if (tokenizer->isEnd(max_index)) {
diff --git a/projects/llm_framework/main_whisper/SConstruct b/projects/llm_framework/main_whisper/SConstruct
@@ -33,7 +33,7 @@ LDFLAGS += ['-l:libopencc.a', '-l:libmarisa.a']
 
 STATIC_FILES += Glob('models/mode_*.json')
 
-env['COMPONENTS'].append({'target':'llm_whisper-1.7',
+env['COMPONENTS'].append({'target':'llm_whisper-1.8',
                           'SRCS':SRCS,
                           'INCLUDE':INCLUDE,
                           'PRIVATE_INCLUDE':PRIVATE_INCLUDE,
diff --git a/projects/llm_framework/tools/llm_pack.py b/projects/llm_framework/tools/llm_pack.py
@@ -356,18 +356,18 @@ def create_bin_deb(package_name, version, src_folder, revision = 'm5stack1', dep
         'lib-llm':[create_lib_deb,'lib-llm', '1.8', src_folder, revision],
         'llm-sys':[create_bin_deb,'llm-sys', '1.6', src_folder, revision],
         'llm-audio':[create_bin_deb,'llm-audio', '1.6', src_folder, revision],
-        'llm-kws':[create_bin_deb,'llm-kws', '1.8', src_folder, revision],
-        'llm-asr':[create_bin_deb,'llm-asr', '1.6', src_folder, revision],
-        'llm-llm':[create_bin_deb,'llm-llm', '1.8', src_folder, revision],
+        'llm-kws':[create_bin_deb,'llm-kws', '1.9', src_folder, revision],
+        'llm-asr':[create_bin_deb,'llm-asr', '1.7', src_folder, revision],
+        'llm-llm':[create_bin_deb,'llm-llm', '1.9', src_folder, revision],
         'llm-tts':[create_bin_deb,'llm-tts', '1.6', src_folder, revision],
-        'llm-melotts':[create_bin_deb,'llm-melotts', '1.8', src_folder, revision],
+        'llm-melotts':[create_bin_deb,'llm-melotts', '1.9', src_folder, revision],
         'llm-camera':[create_bin_deb,'llm-camera', '1.9', src_folder, revision, 'lib-llm'],
-        'llm-vlm':[create_bin_deb,'llm-vlm', '1.8', src_folder, revision],
+        'llm-vlm':[create_bin_deb,'llm-vlm', '1.9', src_folder, revision],
         'llm-yolo':[create_bin_deb,'llm-yolo', '1.9', src_folder, revision],
         'llm-skel':[create_bin_deb,'llm-skel', version, src_folder, revision],
         'llm-depth-anything':[create_bin_deb,'llm-depth-anything', '1.7', src_folder, revision],
-        'llm-vad':[create_bin_deb,'llm-vad', '1.7', src_folder, revision],
-        'llm-whisper':[create_bin_deb,'llm-whisper', '1.7', src_folder, revision],
+        'llm-vad':[create_bin_deb,'llm-vad', '1.8', src_folder, revision],
+        'llm-whisper':[create_bin_deb,'llm-whisper', '1.8', src_folder, revision],
         'llm-openai-api':[create_bin_deb,'llm-openai-api', '1.7', src_folder, revision],
         # keyword spotting Audio file
         'llm-model-audio-en-us':[create_data_deb,'llm-model-audio-en-us', data_version, src_folder, revision],

Original file line number	Diff line number	Diff line change
`@@ -224,7 +224,7 @@ class llm_task {`
`224`	`224`	`if (!process_field(mode_config_.filename_tokenizer_model, "filename_tokenizer_model") &&`
`225`	`225`	`!process_field(mode_config_.url_tokenizer_model, "url_tokenizer_model")) {`
`226`	`226`	`mode_config_.filename_tokenizer_model = base_model + mode_config_.filename_tokenizer_model;`
`227`		`- SLOGE("filename_tokenizer_model: %s", mode_config_.filename_tokenizer_model.c_str());`
	`227`	`+ SLOGI("filename_tokenizer_model: %s", mode_config_.filename_tokenizer_model.c_str());`
`228`	`228`	`}`
`229`	`229`	`}`
`230`	`230`	`mode_config_.filename_tokens_embed = base_model + mode_config_.filename_tokens_embed;`