Merge remote-tracking branch 'origin/v0.5.4_dev' into sglang_v0.5.5

a1175a4e · maxiao1 · 0c006b88 · 31653dd9 · a1175a4e · a1175a4e
Commit a1175a4e authored Nov 22, 2025 by maxiao1
Hide whitespace changes
Inline Side-by-side

Showing with 220 additions and 0 deletions

sgl-kernel/python/sgl_kernel/kvcacheio.py sgl-kernel/python/sgl_kernel/kvcacheio.py +125 -0

sgl-kernel/setup_hip.py sgl-kernel/setup_hip.py +95 -0

No files found.
--- a/sgl-kernel/python/sgl_kernel/kvcacheio.py
+++ b/sgl-kernel/python/sgl_kernel/kvcacheio.py
@@ -9,6 +9,58 @@ def is_hip() -> bool:
 _is_hip = is_hip()
+def dcu_create_extend_after_decode_spec_info(
+    verified_id: torch.Tensor,
+    seq_lens: torch.Tensor,
+    accept_lens: torch.Tensor,
+    positions: torch.Tensor,
+    new_verified_id: torch.Tensor,
+    bs: int,
+):
+    torch.ops.sgl_kernel.dcu_create_extend_after_decode_spec_info(
+        verified_id,
+        seq_lens,
+        accept_lens,
+        positions,
+        new_verified_id,
+        bs,
+    )
+def dcu_alloc_extend_kernel(
+    pre_lens_ptr: torch.Tensor,
+    seq_lens_ptr: torch.Tensor,
+    last_loc_ptr: torch.Tensor,
+    free_page_ptr: torch.Tensor,
+    out_indices: torch.Tensor,
+    bs: int,
+    page_size: int,
+):
+    torch.ops.sgl_kernel.dcu_alloc_extend_kernel(
+        pre_lens_ptr,
+        seq_lens_ptr,
+        last_loc_ptr,
+        free_page_ptr,
+        out_indices,
+        bs,
+        page_size,
+    )
+def dcu_alloc_decode_kernel(
+    seq_lens_ptr: torch.Tensor,   
+    last_loc_ptr: torch.Tensor,    
+    free_page_ptr: torch.Tensor ,   
+    out_indices: torch.Tensor , 
+    bs: int,          
+    page_size: int,              
+):
+    torch.ops.sgl_kernel.dcu_alloc_decode_kernel(
+        seq_lens_ptr,
+        last_loc_ptr,
+        free_page_ptr,
+        out_indices,
+        bs,
+        page_size,
+    )
 def transfer_kv_per_layer(
    src_k: torch.Tensor,
@@ -305,3 +357,76 @@ def transfer_kv_all_layer_mla_lf_pf(
        block_quota,
        num_warps_per_block,
    )
+def dcu_assign_req_to_token_pool(
+    req_pool_indices:torch.Tensor,
+    req_to_token:torch.Tensor,
+    allocate_lens:torch.Tensor,
+    new_allocate_lens:torch.Tensor,
+    out_cache_loc:torch.Tensor,
+    shape:int,
+    bs:int,
+):
+    torch.ops.sgl_kernel.dcu_assign_req_to_token_pool(
+        req_pool_indices,
+        req_to_token,
+        allocate_lens,
+        new_allocate_lens,
+        out_cache_loc,
+        shape,
+        bs,
+    )
+def dcu_get_last_loc(
+    req_to_token: torch.Tensor,
+    req_pool_indices: torch.Tensor,
+    prefix_lens: torch.Tensor,
+):
+    result = torch.ops.sgl_kernel.dcu_get_last_loc(
+        req_to_token,
+        req_pool_indices,
+        prefix_lens,
+    )
+    return result
+def dcu_assign_extend_cache_locs(
+    req_pool_indices: torch.Tensor,
+    req_to_token: torch.Tensor,
+    start_offset: torch.Tensor,
+    end_offset: torch.Tensor,
+    out_cache_loc: torch.Tensor,
+    pool_len: int,
+    bs: int,
+):
+    torch.ops.sgl_kernel.dcu_assign_extend_cache_locs(
+        req_pool_indices,
+        req_to_token, 
+        start_offset, 
+        end_offset,
+        out_cache_loc, 
+        pool_len, 
+        bs,
+    )
+def dcu_create_chunked_prefix_cache_kv_indices(
+    req_to_token: torch.Tensor,
+    req_pool_indices: torch.Tensor,
+    chunk_starts: torch.Tensor,
+    chunk_seq_lens: torch.Tensor,
+    chunk_cu_seq_lens: torch.Tensor,
+    chunk_kv_indices: torch.Tensor,
+    col_num: int,
+    bs: int,
+):
+    torch.ops.sgl_kernel.dcu_create_chunked_prefix_cache_kv_indices(
+        req_to_token,
+        req_pool_indices, 
+        chunk_starts, 
+        chunk_seq_lens,
+        chunk_cu_seq_lens, 
+        chunk_kv_indices, 
+        col_num, 
+        bs,
+    )
--- a/sgl-kernel/setup_hip.py
+++ b/sgl-kernel/setup_hip.py
+# Copyright 2025 SGLang Team. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+import os
+import platform
+import sys
+from pathlib import Path
+import torch
+from setuptools import find_packages, setup
+from torch.utils.cpp_extension import BuildExtension, CUDAExtension
+root = Path(__file__).parent.resolve()
+arch = platform.machine().lower()
+def _get_version():
+    with open(root / "pyproject.toml") as f:
+        for line in f:
+            if line.startswith("version"):
+                return line.split("=")[1].strip().strip('"')
+operator_namespace = "sgl_kernel"
+include_dirs = [
+    root / "include",
+    root / "include" / "impl",
+    root / "csrc",
+]
+sources = [
+    "csrc/allreduce/custom_all_reduce.hip",
+    "csrc/allreduce/quick_all_reduce.cu",
+    "csrc/common_extension_rocm.cc",
+    "csrc/elementwise/activation.cu",
+    "csrc/grammar/apply_token_bitmask_inplace_cuda.cu",
+    "csrc/moe/moe_align_kernel.cu",
+    "csrc/moe/moe_topk_softmax_kernels.cu",
+    "csrc/speculative/eagle_utils.cu",
+    "csrc/kvcacheio/transfer.cu",
+    "csrc/attention/merge_attn_states.cu",
+]
+cxx_flags = ["-O3"]
+libraries = ["hiprtc", "amdhip64", "c10", "torch", "torch_python"]
+extra_link_args = ["-Wl,-rpath,$ORIGIN/../../torch/lib", f"-L/usr/lib/{arch}-linux-gnu"]
+hipcc_flags = [
+    "-DNDEBUG",
+    f"-DOPERATOR_NAMESPACE={operator_namespace}",
+    "-O3",
+    "-Xcompiler",
+    "-fPIC",
+    "-std=c++17",
+    "-DENABLE_BF16",
+    "-DENABLE_FP8",
+]
+ext_modules = [
+    CUDAExtension(
+        name="sgl_kernel.common_ops",
+        sources=sources,
+        include_dirs=include_dirs,
+        extra_compile_args={
+            "nvcc": hipcc_flags,
+            "cxx": cxx_flags,
+        },
+        libraries=libraries,
+        extra_link_args=extra_link_args,
+        py_limited_api=False,
+    ),
+]
+setup(
+    name="sgl-kernel",
+    version=_get_version(),
+    packages=find_packages(where="python"),
+    package_dir={"": "python"},
+    ext_modules=ext_modules,
+    cmdclass={"build_ext": BuildExtension.with_options(use_ninja=True)},
+    options={"bdist_wheel": {"py_limited_api": "cp39"}},
+)