quant ops: Dequantize weight in-place (#10935)

rattus128 · web-flow · commit 3f382a4f9884 · 2025-11-27T08:06:30.000-08:00
In flux2 these weights are huge (200MB). As plain_tensor is a throw-away
deep copy, do this multiplication in-place to save VRAM.
diff --git a/comfy/quant_ops.py b/comfy/quant_ops.py
@@ -425,7 +425,8 @@ def quantize(cls, tensor, scale=None, dtype=torch.float8_e4m3fn, stochastic_roun
     @staticmethod
     def dequantize(qdata, scale, orig_dtype, **kwargs):
         plain_tensor = torch.ops.aten._to_copy.default(qdata, dtype=orig_dtype)
-        return plain_tensor * scale
+        plain_tensor.mul_(scale)
+        return plain_tensor
 
     @classmethod
     def get_plain_tensors(cls, qtensor):