From 617dd1201d97e19a9ca7ac8d6cbc195e8d1dc5dd Mon Sep 17 00:00:00 2001 From: mgoin Date: Tue, 9 Dec 2025 01:28:38 +0000 Subject: [PATCH 1/2] Fix compressed-tensors models failing to load with transformers backend Signed-off-by: mgoin --- .../compressed_tensors/compressed_tensors.py | 39 ++++++++++++++----- 1 file changed, 30 insertions(+), 9 deletions(-) diff --git a/vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors.py b/vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors.py index b91ecb59fee1..9a692cf6cdd4 100644 --- a/vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors.py +++ b/vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors.py @@ -116,16 +116,37 @@ def get_name(self) -> QuantizationMethods: return "compressed-tensors" def apply_vllm_mapper(self, hf_to_vllm_mapper: "WeightsMapper"): - self.target_scheme_map = hf_to_vllm_mapper.apply_dict(self.target_scheme_map) - self.ignore = hf_to_vllm_mapper.apply_list(self.ignore) - self.sparsity_scheme_map = hf_to_vllm_mapper.apply_dict( - self.sparsity_scheme_map - ) - self.sparsity_ignore_list = hf_to_vllm_mapper.apply_list( - self.sparsity_ignore_list - ) + """ + Transform layer paths in config targets to match vLLM's naming. + + The WeightsMapper is designed for weight paths, but some backends + (e.g. transformers) use broad prefix mappings like "" -> "model." + which would incorrectly transform non-path targets. + + compressed-tensors targets can be: + - Layer paths: "layers.0.self_attn.q_proj" -> transformed + - Module class names: "Linear" -> preserved (no ".") + - Regex patterns: "re:.*proj" -> preserved (starts with "re:") + """ + + def _map_target(target: str) -> str | None: + is_layer_path = "." in target and not target.startswith("re:") + if is_layer_path: + return hf_to_vllm_mapper._map_name(target) + return target + + def _apply_dict(d: dict) -> dict: + return {k: v for t, v in d.items() if (k := _map_target(t))} + + def _apply_list(lst: list) -> list: + return [t for x in lst if (t := _map_target(x))] + + self.target_scheme_map = _apply_dict(self.target_scheme_map) + self.ignore = _apply_list(self.ignore) + self.sparsity_scheme_map = _apply_dict(self.sparsity_scheme_map) + self.sparsity_ignore_list = _apply_list(self.sparsity_ignore_list) if self.kv_cache_scheme is not None: - self.kv_cache_scheme = hf_to_vllm_mapper.apply_dict(self.kv_cache_scheme) + self.kv_cache_scheme = _apply_dict(self.kv_cache_scheme) def get_quant_method( self, From 0d84c651189d0b16d3fe96cc53ea3f77a3a1bcc9 Mon Sep 17 00:00:00 2001 From: Michael Goin Date: Mon, 8 Dec 2025 20:37:55 -0500 Subject: [PATCH 2/2] Update vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors.py Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> Signed-off-by: Michael Goin --- .../quantization/compressed_tensors/compressed_tensors.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors.py b/vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors.py index 9a692cf6cdd4..22036f1efd77 100644 --- a/vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors.py +++ b/vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors.py @@ -136,10 +136,10 @@ def _map_target(target: str) -> str | None: return target def _apply_dict(d: dict) -> dict: - return {k: v for t, v in d.items() if (k := _map_target(t))} + return {k: v for t, v in d.items() if (k := _map_target(t)) is not None} def _apply_list(lst: list) -> list: - return [t for x in lst if (t := _map_target(x))] + return [t for x in lst if (t := _map_target(x)) is not None] self.target_scheme_map = _apply_dict(self.target_scheme_map) self.ignore = _apply_list(self.ignore)