Skip to content

Commit 8712328

Browse files
author
LittleMouse
committed
[update] Add qwen3-vl-2B-Init4-ax630c model
1 parent 9e70ce8 commit 8712328

File tree

4 files changed

+223
-6
lines changed

4 files changed

+223
-6
lines changed
Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
{
2+
"mode": "qwen3-vl-2B-Int4-ax630c",
3+
"type": "vlm",
4+
"homepage": "https://huggingface.co/AXERA-TECH/Qwen3-VL-2B-Instruct",
5+
"capabilities": [
6+
"text_generation",
7+
"chat"
8+
],
9+
"input_type": [
10+
"vlm.chat_completion",
11+
"vlm.chat_completion.stream"
12+
],
13+
"output_type": [
14+
"vlm.utf-8",
15+
"vlm.utf-8.stream"
16+
],
17+
"mode_param": {
18+
"tokenizer_type": 2,
19+
"url_tokenizer_model": "http://localhost:8080",
20+
"filename_tokens_embed": "model.embed_tokens.weight.bfloat16.bin",
21+
"filename_post_axmodel": "qwen3_vl_text_post.axmodel",
22+
"template_filename_axmodel": "qwen3_vl_text_p64_l%d_together.axmodel",
23+
"filename_image_encoder_axmodel": "Qwen3-VL-2B-Instruct_vision_u8_384_ax630c.axmodel",
24+
"enable_temperature": true,
25+
"temperature": 0.7,
26+
"enable_top_p_sampling": false,
27+
"top_p": 0.9,
28+
"enable_top_k_sampling": true,
29+
"top_k": 40,
30+
"enable_repetition_penalty": false,
31+
"repetition_penalty": 1.1,
32+
"penalty_window": 50,
33+
"axmodel_num": 28,
34+
"tokens_embed_num": 151936,
35+
"tokens_embed_size": 2048,
36+
"b_use_mmap_load_embed": true,
37+
"b_video": false,
38+
"vision_config.temporal_patch_size": 2,
39+
"vision_config.tokens_per_second": 2,
40+
"vision_config.spatial_merge_size": 2,
41+
"vision_config.patch_size": 16,
42+
"vision_config.height": 384,
43+
"vision_config.width": 384,
44+
"vision_config.fps": 1,
45+
"image_token_id": 151655,
46+
"video_token_id": 151656,
47+
"vision_start_token_id": 151652,
48+
"precompute_len": 0,
49+
"cmm_size": 3582336,
50+
"ext_scripts": [
51+
"tokenizer_qwen3-vl-2B-Int4-ax630c.py"
52+
]
53+
}
54+
}
Lines changed: 162 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,162 @@
1+
from transformers import AutoTokenizer, PreTrainedTokenizerFast
2+
from transformers.tokenization_utils_base import AddedToken
3+
from http.server import HTTPServer, BaseHTTPRequestHandler
4+
import json
5+
import argparse
6+
7+
class Tokenizer_Http:
8+
def __init__(self, model_id, system_content="You are a helpful assistant."):
9+
self.tokenizer = AutoTokenizer.from_pretrained(
10+
model_id,
11+
trust_remote_code=True,
12+
use_fast=False
13+
)
14+
self.token_ids_cache = []
15+
self.system_content = system_content
16+
17+
def encode(self, content):
18+
text = [
19+
f'<|im_start|>system\n{self.system_content}<|im_end|>\n'
20+
f'<|im_start|>user\n{content}<|im_end|>\n'
21+
f'<|im_start|>assistant\n'
22+
]
23+
input_ids = self.tokenizer(text)
24+
return input_ids["input_ids"][0]
25+
26+
def encode_vpm_image(self, content="Describe this image.", num_img=1, img_token_num=256):
27+
imgs_token = (
28+
'<|vision_start|>'
29+
+ '<|image_pad|>' * img_token_num
30+
+ '<|vision_end|>'
31+
)
32+
imgs_token *= num_img
33+
text = (
34+
f'<|im_start|>system\n{self.system_content}<|im_end|>\n'
35+
f'<|im_start|>user\n{imgs_token}{content}<|im_end|>\n'
36+
f'<|im_start|>assistant\n'
37+
)
38+
text_inputs = self.tokenizer([text])
39+
return text_inputs["input_ids"][0]
40+
41+
def encode_vpm_video(self, content="Describe this image.", num_img=1, img_token_num=256):
42+
imgs_token = (
43+
'<|vision_start|>'
44+
+ '<|video_pad|>' * img_token_num * num_img
45+
+ '<|vision_end|>'
46+
)
47+
text = (
48+
f'<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n'
49+
f'<|im_start|>user\n{imgs_token}{content}<|im_end|>\n'
50+
f'<|im_start|>assistant\n'
51+
)
52+
text_inputs = self.tokenizer([text])
53+
return text_inputs["input_ids"][0]
54+
55+
def decode(self, token_ids):
56+
self.token_ids_cache += token_ids
57+
text = self.tokenizer.decode(self.token_ids_cache)
58+
if "\ufffd" in text:
59+
print("text 中包含非法字符")
60+
return ""
61+
else:
62+
self.token_ids_cache.clear()
63+
return text
64+
65+
@property
66+
def bos_id(self):
67+
return self.tokenizer.bos_token_id
68+
69+
@property
70+
def eos_id(self):
71+
return self.tokenizer.eos_token_id
72+
73+
@property
74+
def bos_token(self):
75+
return self.tokenizer.bos_token
76+
77+
@property
78+
def eos_token(self):
79+
return self.tokenizer.eos_token
80+
81+
@property
82+
def img_start_token(self):
83+
return self.tokenizer.encode("<|vision_start|>")[0]
84+
85+
@property
86+
def img_context_token(self):
87+
return self.tokenizer.encode("<|image_pad|>")[0]
88+
89+
class Request(BaseHTTPRequestHandler):
90+
timeout = 5
91+
server_version = 'Apache'
92+
93+
def do_GET(self):
94+
print(self.path)
95+
self.send_response(200)
96+
self.send_header("type", "get")
97+
self.end_headers()
98+
if self.path == '/bos_id':
99+
bos_id = tokenizer.bos_id
100+
msg = json.dumps({'bos_id': -1 if bos_id is None else bos_id})
101+
elif self.path == '/eos_id':
102+
eos_id = tokenizer.eos_id
103+
msg = json.dumps({'eos_id': -1 if eos_id is None else eos_id})
104+
elif self.path == '/img_start_token':
105+
img_start_token = tokenizer.img_start_token
106+
msg = json.dumps({'img_start_token': -1 if img_start_token is None else img_start_token})
107+
elif self.path == '/img_context_token':
108+
img_context_token = tokenizer.img_context_token
109+
msg = json.dumps({'img_context_token': -1 if img_context_token is None else img_context_token})
110+
else:
111+
msg = 'error'
112+
print(msg)
113+
msg = str(msg).encode()
114+
self.wfile.write(msg)
115+
116+
def do_POST(self):
117+
data = self.rfile.read(int(self.headers['content-length']))
118+
req = json.loads(data.decode())
119+
if self.path == "/encode":
120+
prompt = req['text']
121+
b_img_prompt = req.get('img_prompt', False)
122+
img_type = req.get('img_type', 'image')
123+
if b_img_prompt:
124+
if img_type == 'image':
125+
token_ids = tokenizer.encode_vpm_image(
126+
prompt,
127+
req.get("num_img", 1),
128+
req.get("img_token_num", 256)
129+
)
130+
elif img_type == 'video':
131+
token_ids = tokenizer.encode_vpm_video(
132+
prompt,
133+
req.get("num_img", 1),
134+
req.get("img_token_num", 256)
135+
)
136+
else:
137+
token_ids = tokenizer.encode(prompt)
138+
else:
139+
token_ids = tokenizer.encode(prompt)
140+
msg = json.dumps({'token_ids': -1 if token_ids is None else token_ids})
141+
elif self.path == "/decode":
142+
token_ids = req['token_ids']
143+
text = tokenizer.decode(token_ids)
144+
msg = json.dumps({'text': "" if text is None else text})
145+
else:
146+
msg = 'error'
147+
self.send_response(200)
148+
self.end_headers()
149+
self.wfile.write(str(msg).encode())
150+
151+
if __name__ == "__main__":
152+
args = argparse.ArgumentParser()
153+
args.add_argument('--host', type=str, default='localhost')
154+
args.add_argument('--port', type=int, default=8080)
155+
args.add_argument('--model_id', type=str, default='tokenizer')
156+
args.add_argument('--content', type=str, default='You are a helpful assistant.')
157+
args = args.parse_args()
158+
tokenizer = Tokenizer_Http(args.model_id, system_content=args.content)
159+
host = (args.host, args.port)
160+
print(f"http://{args.host}:{args.port}")
161+
server = HTTPServer(host, Request)
162+
server.serve_forever()

projects/llm_framework/main_vlm/src/runner/Tokenizer/Tokenizer.cpp

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -162,9 +162,9 @@ class Tokenizer_Http : public BaseTokenizer {
162162

163163
try {
164164
cli = std::make_shared<httplib::Client>(base_url);
165-
cli->set_connection_timeout(1);
166-
cli->set_read_timeout(1);
167-
cli->set_write_timeout(1);
165+
cli->set_connection_timeout(10);
166+
cli->set_read_timeout(10);
167+
cli->set_write_timeout(10);
168168
{
169169
auto ret = cli->Get("/bos_id");
170170
auto rep = ret.value();
@@ -209,9 +209,9 @@ class Tokenizer_Http : public BaseTokenizer {
209209

210210
try {
211211
cli = std::make_shared<httplib::Client>(base_url);
212-
cli->set_connection_timeout(1);
213-
cli->set_read_timeout(1);
214-
cli->set_write_timeout(1);
212+
cli->set_connection_timeout(10);
213+
cli->set_read_timeout(10);
214+
cli->set_write_timeout(10);
215215
{
216216
auto ret = cli->Get("/bos_id");
217217
auto rep = ret.value();

projects/llm_framework/tools/llm_pack.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -465,6 +465,7 @@ def create_bin_deb(package_name, version, src_folder, revision = 'm5stack1', dep
465465
'llm-model-qwen2.5-1.5B-Int4-ax630c':[create_data_deb,'llm-model-qwen2.5-1.5B-Int4-ax630c', '0.4', src_folder, revision],
466466
'llm-model-qwen2.5-coder-0.5B-ax630c':[create_data_deb,'llm-model-qwen2.5-coder-0.5B-ax630c', data_version, src_folder, revision],
467467
'llm-model-qwen3-0.6B-ax630c':[create_data_deb,'llm-model-qwen3-0.6B-ax630c', '0.4', src_folder, revision],
468+
'llm-model-qwen3-vl-2B-Int4-ax630c':[create_data_deb,'llm-model-qwen3-vl-2B-Int4-ax630c', '0.5', src_folder, revision],
468469
## AX650
469470
'llm-model-qwen2.5-0.5B-Int4-ax650':[create_data_deb,'llm-model-qwen2.5-0.5B-Int4-ax650', '0.4', src_folder, revision],
470471
'llm-model-qwen2.5-HA-0.5B-ctx-ax650':[create_data_deb,'llm-model-qwen2.5-HA-0.5B-ctx-ax650', '0.5', src_folder, revision],

0 commit comments

Comments
 (0)