Skip to content

Commit acfaa5c

Browse files
Don't try fp8 matrix mult in quantized ops if not supported by hardware. (#10874)
1 parent b680542 commit acfaa5c

File tree

1 file changed

+3
-2
lines changed

1 file changed

+3
-2
lines changed

comfy/ops.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -646,11 +646,12 @@ def forward(self, input, *args, **kwargs):
646646
return MixedPrecisionOps
647647

648648
def pick_operations(weight_dtype, compute_dtype, load_device=None, disable_fast_fp8=False, fp8_optimizations=False, scaled_fp8=None, model_config=None):
649+
fp8_compute = comfy.model_management.supports_fp8_compute(load_device) # TODO: if we support more ops this needs to be more granular
650+
649651
if model_config and hasattr(model_config, 'layer_quant_config') and model_config.layer_quant_config:
650652
logging.info(f"Using mixed precision operations: {len(model_config.layer_quant_config)} quantized layers")
651-
return mixed_precision_ops(model_config.layer_quant_config, compute_dtype)
653+
return mixed_precision_ops(model_config.layer_quant_config, compute_dtype, full_precision_mm=not fp8_compute)
652654

653-
fp8_compute = comfy.model_management.supports_fp8_compute(load_device)
654655
if scaled_fp8 is not None:
655656
return scaled_fp8_ops(fp8_matrix_mult=fp8_compute and fp8_optimizations, scale_input=fp8_optimizations, override_dtype=scaled_fp8)
656657

0 commit comments

Comments
 (0)