We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 71362ff commit 3999442Copy full SHA for 3999442
tests/v1/spec_decode/test_tree_attention.py
@@ -3,6 +3,7 @@
3
4
import math
5
6
+import pytest
7
import torch
8
9
from tests.v1.attention.utils import (
@@ -11,9 +12,16 @@
11
12
try_get_attention_backend,
13
)
14
from vllm.attention.backends.registry import AttentionBackendEnum
15
+from vllm.attention.utils.fa_utils import is_flash_attn_varlen_func_available
16
from vllm.config import ParallelConfig, SpeculativeConfig
17
from vllm.v1.attention.backends.utils import CommonAttentionMetadata
18
19
+if not is_flash_attn_varlen_func_available():
20
+ pytest.skip(
21
+ "This test requires flash_attn_varlen_func, but it's not available.",
22
+ allow_module_level=True,
23
+ )
24
+
25
26
class MockAttentionLayer(torch.nn.Module):
27
_q_scale = torch.tensor(1.0, dtype=torch.float32, device="cuda")
0 commit comments