Unverified Commit 1a9c2c92 authored by Lianmin Zheng's avatar Lianmin Zheng Committed by GitHub
Browse files

Fix AMD speculative decoding (#7252)

parent 873ae12c
...@@ -27,14 +27,14 @@ from sglang.srt.utils import is_cuda, is_hip, next_power_of_2 ...@@ -27,14 +27,14 @@ from sglang.srt.utils import is_cuda, is_hip, next_power_of_2
if is_cuda(): if is_cuda():
from sgl_kernel import ( from sgl_kernel import (
fast_topk,
top_k_renorm_prob, top_k_renorm_prob,
top_p_renorm_prob, top_p_renorm_prob,
tree_speculative_sampling_target_only, tree_speculative_sampling_target_only,
verify_tree_greedy, verify_tree_greedy,
) )
from sgl_kernel.top_k import fast_topk
elif is_hip(): elif is_hip():
from sgl_kernel import verify_tree_greedy from sgl_kernel import fast_topk, verify_tree_greedy
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment