Skip to content

Commit f4ec257

Browse files
committed
update model & fix CI.
1 parent 83317ba commit f4ec257

File tree

5 files changed

+26
-7
lines changed

5 files changed

+26
-7
lines changed

docs/models.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,12 @@
124124
* LLaDA (`LLaDA2MoeModelLM`)
125125
* [x] [mini-preview](https://huggingface.co/inclusionAI/LLaDA2.0-mini-preview/tree/d25d3b2ac0b966b64da11d6c791f8bf4bc31e90c)
126126

127+
Supported options (`--set OPTION VALUE`):
128+
- `block_length`: default 32
129+
- `steps`: default 32
130+
- `minimal_topk`: default 1
131+
- `threshold`: default 0.95
132+
127133
* LlaMA-like (`LlamaForCausalLM`, `Llama4ForConditionalGeneration`):
128134
* [x] All LlaMA-1 models
129135
* [x] LlaMA-2: [Chat-7B](https://huggingface.co/meta-llama/Llama-2-7b-chat-hf), etc

models/bailing.cpp

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
#include <algorithm>
22
#include <numeric>
3+
#include <cstring>
34
#include <functional>
45
#include "deepseek.h"
56
#include "qwen.h"
@@ -464,8 +465,6 @@ namespace chatllm::bailing::llada
464465
if (gen_max_tokens > 0)
465466
gen_max_tokens = n_past + (int)curr_input_ids.size() + gen_max_tokens;
466467

467-
bool first_call = true;
468-
469468
if (performance)
470469
performance->Reset();
471470

@@ -516,7 +515,7 @@ namespace chatllm::bailing::llada
516515
}
517516
}
518517

519-
std::memcpy(block_result.data(), curr_input_ids.data(), curr_input_ids.size() * sizeof(curr_input_ids[0]));
518+
memcpy(block_result.data(), curr_input_ids.data(), curr_input_ids.size() * sizeof(curr_input_ids[0]));
520519
int next_pos_to_add = (int)(curr_input_ids.size());
521520
int block_prefilled_size = next_pos_to_add;
522521
curr_input_ids.clear();
@@ -525,7 +524,7 @@ namespace chatllm::bailing::llada
525524
{
526525
for (int step = 0; !completed && (step < steps); step++)
527526
{
528-
// TODO: don't re-run "known" tokens again and again.
527+
// Note: we have to run a whole block again and again.
529528
std::vector<float> lm_logits;
530529
generate_next_block(block_result.data(), block_length, gen_config, &lm_logits, nullptr);
531530

models/dots.cpp

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -424,7 +424,6 @@ namespace chatllm::dots::ocr
424424

425425
vision::image_arrange(scaled, w, patch_size, image.data, vision::PatchesFormat::PatchesLeftRightDown_MergeN_ChannelsRGB_PixelsLeftRightDown);
426426

427-
const int merge_length = vis_config->spatial_merge_size * vis_config->spatial_merge_size;
428427
image.emb_vec_number = image.grid_width * image.grid_height;
429428

430429
const int id_start = tok->get_image_total_emb_vectors() - image.emb_vec_number + tok->vocab_size;
@@ -556,7 +555,6 @@ namespace chatllm::dots::ocr
556555

557556
void ConditionalGeneration::set_additional_args(const std::map<std::string, std::string> &args)
558557
{
559-
Tokenizer *tok = dynamic_cast<Tokenizer *>(tokenizer);
560558
}
561559

562560
int64_t ConditionalGeneration::get_param_num(bool effective_only) const

scripts/models.json

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3450,5 +3450,21 @@
34503450
}
34513451
}
34523452
}
3453+
},
3454+
"llada2.0": {
3455+
"brief": "LLaDA2.0-mini-preview is a diffusion language model featuring a 16BA1B Mixture-of-Experts (MoE) architecture.",
3456+
"default": "mini-preview",
3457+
"license": "Apache License 2.0",
3458+
"variants": {
3459+
"mini-preview": {
3460+
"default": "q8",
3461+
"quantized": {
3462+
"q8": {
3463+
"size": 17277819200,
3464+
"url": "chatllm_quantized_bailing/llada2.0-mini-preview.bin"
3465+
}
3466+
}
3467+
}
3468+
}
34533469
}
34543470
}

src/models.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1474,7 +1474,7 @@ namespace chatllm
14741474
{
14751475
const int qlen = ggml::get_dim(hidden_states, 1);
14761476
const int batch = ggml::get_dim(hidden_states, 2);
1477-
CHATLLM_CHECK(qlen >= last_n);
1477+
const int last_n = qlen >= this->last_n ? this->last_n : qlen;
14781478
order = nullptr;
14791479

14801480
hidden_states = ggml::view_3d(ctx, hidden_states, model->hidden_size, last_n, batch,

0 commit comments

Comments
 (0)