[CI/Build][AMD] Add check for flash_att_varlen_func to test_tree_attention.py (#29252)

rasmith · Randall Smith · web-flow · commit 3999442f1c1d · 2025-11-23T04:45:08.000Z
Signed-off-by: Randall Smith &lt;ransmith@amd.com&gt;
Co-authored-by: Randall Smith &lt;ransmith@amd.com&gt;
diff --git a/tests/v1/spec_decode/test_tree_attention.py b/tests/v1/spec_decode/test_tree_attention.py
@@ -3,6 +3,7 @@
 
 import math
 
+import pytest
 import torch
 
 from tests.v1.attention.utils import (
@@ -11,9 +12,16 @@
     try_get_attention_backend,
 )
 from vllm.attention.backends.registry import AttentionBackendEnum
+from vllm.attention.utils.fa_utils import is_flash_attn_varlen_func_available
 from vllm.config import ParallelConfig, SpeculativeConfig
 from vllm.v1.attention.backends.utils import CommonAttentionMetadata
 
+if not is_flash_attn_varlen_func_available():
+    pytest.skip(
+        "This test requires flash_attn_varlen_func, but it's not available.",
+        allow_module_level=True,
+    )
+
 
 class MockAttentionLayer(torch.nn.Module):
     _q_scale = torch.tensor(1.0, dtype=torch.float32, device="cuda")