Skip to content

Commit

Permalink
[REMOVE ME] Patch _grouped_size_compiled_for_decode_kernels for flash…
Browse files Browse the repository at this point in the history
…infer v0.2.0
  • Loading branch information
james-p-xu committed Nov 14, 2024
1 parent 6434149 commit 2693a1f
Showing 1 changed file with 6 additions and 1 deletion.
7 changes: 6 additions & 1 deletion python/sglang/srt/layers/attention/flashinfer_backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,14 +30,19 @@
BatchPrefillWithRaggedKVCacheWrapper,
)
from flashinfer.cascade import merge_state
from flashinfer.decode import _grouped_size_compiled_for_decode_kernels


class WrapperDispatch(Enum):
SLIDING_WINDOW = auto()
CROSS_ATTENTION = auto()


def _grouped_size_compiled_for_decode_kernels(
num_qo_heads: int, num_kv_heads: int
) -> bool: # TODO: Remove me! https://github.com/flashinfer-ai/flashinfer/issues/549
return (num_qo_heads // num_kv_heads) in [1, 2, 4, 8]


class FlashInferAttnBackend(AttentionBackend):
"""Flashinfer attention kernels."""

Expand Down

0 comments on commit 2693a1f

Please sign in to comment.