From f72d234e6f4438a741a9f95ff72be68600f1d59c Mon Sep 17 00:00:00 2001 From: Wenhua Cheng Date: Fri, 28 Nov 2025 15:42:53 +0800 Subject: [PATCH 1/2] improve accuracy for 2bit with auto-round-best --- auto_round/autoround.py | 4 +++- auto_round/compressors/base.py | 10 ++++++++-- docs/alg_202508.md | 18 ++++++++++-------- 3 files changed, 21 insertions(+), 11 deletions(-) diff --git a/auto_round/autoround.py b/auto_round/autoround.py index f2d2b1346..211cf1642 100644 --- a/auto_round/autoround.py +++ b/auto_round/autoround.py @@ -41,7 +41,7 @@ class AutoRound: the quantization of LLMs." arXiv:2309.05516 (2023). Attributes: - model (torch.nn.Module): The loaded PyTorch model in eval mode. + model (torch.nn.Module | str): The loaded PyTorch model in eval mode. tokenizer: Tokenizer used to prepare input text for calibration/tuning. platform (str): The platform to load pretrained moded, options: ["hf", "model_scope"] bits (int): Weight quantization bits. @@ -85,6 +85,8 @@ def __new__( enable_adam: bool = False, # for MLLM and Diffusion extra_config: ExtraConfig = None, + enable_alg_ext: bool = False, + disable_opt_rtn: bool = False, low_cpu_mem_usage: bool = False, **kwargs, ) -> BaseCompressor: diff --git a/auto_round/compressors/base.py b/auto_round/compressors/base.py index f94777773..ffd91a610 100644 --- a/auto_round/compressors/base.py +++ b/auto_round/compressors/base.py @@ -341,7 +341,13 @@ def __init__( if self.iters == 0: self.lr = 5e-3 else: - self.lr = lr or (1.0 / self.iters) # must place after iter setting + if not lr: + # TODO need to check 3/4 bits lr setting for auto-round-best + self.lr = 2.0/self.iters if (self.iters>=1000 and self.bits==2) else 1.0/self.iters + if (self.iters>=1000 and self.bits==2): + logger.info("set the lr to 2.0/iters for better accuracy") + else: + self.lr = lr self.minmax_lr = minmax_lr or self.lr self.enable_alg_ext = enable_alg_ext self.sampler = sampler @@ -510,7 +516,7 @@ def _set_device(self, device_map: Union[str, torch.device, int, dict]) -> None: else: raise TypeError(f"device_map should be [str, torch.device, int, dict], but got {type(device_map)}") - def _parse_and_set_scheme(self, scheme: Union[str, dict, QuantizationScheme], kwargs) -> QuantizationScheme: + def _parse_and_set_scheme(self, scheme: Union[str, dict, QuantizationScheme], kwargs) -> tuple[QuantizationScheme,bool]: """Parse and set the quantization scheme.""" def _parse_and_set(scheme, kwargs): diff --git a/docs/alg_202508.md b/docs/alg_202508.md index bd36cee79..4c5a62520 100644 --- a/docs/alg_202508.md +++ b/docs/alg_202508.md @@ -3,12 +3,14 @@ We use **lm-eval** for evaluation. For LLaMA, we enabled `add_bos_token` and in [modeling_llama.py](https://github.com/huggingface/transformers/blob/main/src/transformers/models/llama/modeling_llama.py#L52C1-L52C40) to stabilize accuracy during evaluation. All other settings follow the default configurations of AutoRound and lm-eval. -| Qwen3-8B W2G64 | Avg. | arc_challenge | hellaswag | gsm8k | lambada_openai | mmlu | mmlupro | truthfulqa_mc1 | winogrande | -|:-------------------|:--------:|:--------:|:--------:|:--------:|:--------:|:--------:|:--------:|:--------:|:--------:| -| AutoRound | 0.4373 | 0.4019 | 0.4437 | 0.4215 | 0.4826 | 0.5474 | 0.2630 | 0.3072 | 0.6314 | -| AutoRound+alg_ext | 0.4787 | 0.4275 | 0.4516 | 0.5944 | 0.5181 | 0.5773 | 0.2807 | 0.3305 | 0.6496 | +| Qwen3-8B W2G64 | Avg. | arc_challenge | hellaswag | gsm8k | lambada_openai | mmlu | mmlupro | truthfulqa_mc1 | winogrande | +|:------------------------------|:------:|:-------------:|:---------:|:------:|:--------------:|:------:|:-------:|:--------------:|:----------:| +| AutoRound | 0.4373 | 0.4019 | 0.4437 | 0.4215 | 0.4826 | 0.5474 | 0.2630 | 0.3072 | 0.6314 | +| AutoRound+alg_ext | 0.4787 | 0.4275 | 0.4516 | 0.5944 | 0.5181 | 0.5773 | 0.2807 | 0.3305 | 0.6496 | +| AutoRoundBest+alg_ext lr 2e-3 | 0.4937 | 0.4505 | 0.474 | 0.5906 | 0.5556 | 0.6028 | 0.3127 | 0.3109 | 0.6527 | -| Llama3.1-8B-Instruct W2G64 | Avg. | arc_challenge | hellaswag | gsm8k | lambada_openai | mmlu | mmlupro | truthfulqa_mc1 | winogrande | -|:---------------------------|:--------:|:--------:|:--------:|:--------:|:--------:|:--------:|:--------:|:--------:|:--------:| -| AutoRound | 0.3820 | 0.3635 | 0.4562 | 0.1622 | 0.5069 | 0.4411 | 0.1661 | 0.3207 | 0.6393 | -| AutoRound+alg_ext | 0.4166 | 0.3712 | 0.4729 | 0.2039 | 0.5946 | 0.4981 | 0.2163 | 0.3011 | 0.6748 | +| Llama3.1-8B-Instruct W2G64 | Avg. | arc_challenge | hellaswag | gsm8k | lambada_openai | mmlu | mmlupro | truthfulqa_mc1 | winogrande | +|:------------------------------|:------:|:-------------:|:---------:|:------:|:--------------:|:------:|:-------:|:--------------:|:----------:| +| AutoRound | 0.3820 | 0.3635 | 0.4562 | 0.1622 | 0.5069 | 0.4411 | 0.1661 | 0.3207 | 0.6393 | +| AutoRound+alg_ext | 0.4166 | 0.3712 | 0.4729 | 0.2039 | 0.5946 | 0.4981 | 0.2163 | 0.3011 | 0.6748 | +| AutoRoundBest+alg_ext lr 2e-3 | 0.4539 | 0.4138 | 0.4999 | 0.3071 | 0.6233 | 0.5279 | 0.2364 | 0.3231 | 0.6993 | From b3172aa75624815c14fd691d3c50ccf889c83fe1 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 28 Nov 2025 07:45:34 +0000 Subject: [PATCH 2/2] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- auto_round/compressors/base.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/auto_round/compressors/base.py b/auto_round/compressors/base.py index ffd91a610..524dd90b9 100644 --- a/auto_round/compressors/base.py +++ b/auto_round/compressors/base.py @@ -343,8 +343,8 @@ def __init__( else: if not lr: # TODO need to check 3/4 bits lr setting for auto-round-best - self.lr = 2.0/self.iters if (self.iters>=1000 and self.bits==2) else 1.0/self.iters - if (self.iters>=1000 and self.bits==2): + self.lr = 2.0 / self.iters if (self.iters >= 1000 and self.bits == 2) else 1.0 / self.iters + if self.iters >= 1000 and self.bits == 2: logger.info("set the lr to 2.0/iters for better accuracy") else: self.lr = lr @@ -516,7 +516,9 @@ def _set_device(self, device_map: Union[str, torch.device, int, dict]) -> None: else: raise TypeError(f"device_map should be [str, torch.device, int, dict], but got {type(device_map)}") - def _parse_and_set_scheme(self, scheme: Union[str, dict, QuantizationScheme], kwargs) -> tuple[QuantizationScheme,bool]: + def _parse_and_set_scheme( + self, scheme: Union[str, dict, QuantizationScheme], kwargs + ) -> tuple[QuantizationScheme, bool]: """Parse and set the quantization scheme.""" def _parse_and_set(scheme, kwargs):