Unverified Commit 8abf74e3 authored by Lianmin Zheng's avatar Lianmin Zheng Committed by GitHub
Browse files

Rename files in sgl kernel to avoid nested folder structure (#4213)


Co-authored-by: default avatarzhyncs <me@zhyncs.com>
parent ee132a45
...@@ -5,7 +5,7 @@ on: ...@@ -5,7 +5,7 @@ on:
branches: branches:
- main - main
paths: paths:
- sgl-kernel/src/sgl-kernel/version.py - sgl-kernel/python/sgl_kernel/version.py
workflow_dispatch: workflow_dispatch:
concurrency: concurrency:
......
...@@ -9,7 +9,7 @@ on: ...@@ -9,7 +9,7 @@ on:
branches: branches:
- main - main
paths: paths:
- sgl-kernel/src/sgl-kernel/version.py - sgl-kernel/python/sgl_kernel/version.py
jobs: jobs:
build-wheels: build-wheels:
...@@ -59,7 +59,7 @@ jobs: ...@@ -59,7 +59,7 @@ jobs:
id: set_tag_name id: set_tag_name
run: | run: |
if [ -z "${{ inputs.tag_name }}" ]; then if [ -z "${{ inputs.tag_name }}" ]; then
TAG_NAME="v$(cat sgl-kernel/src/sgl-kernel/version.py | cut -d'"' -f2)" TAG_NAME="v$(cat sgl-kernel/python/sgl_kernel/version.py | cut -d'"' -f2)"
echo "tag_name=$TAG_NAME" >> $GITHUB_OUTPUT echo "tag_name=$TAG_NAME" >> $GITHUB_OUTPUT
else else
echo "tag_name=${{ inputs.tag_name }}" >> $GITHUB_OUTPUT echo "tag_name=${{ inputs.tag_name }}" >> $GITHUB_OUTPUT
......
...@@ -75,42 +75,42 @@ else: ...@@ -75,42 +75,42 @@ else:
rank: int, rank: int,
full_nvlink: bool, full_nvlink: bool,
) -> int: ) -> int:
return sgl_kernel.ops.allreduce.init_custom_ar( return sgl_kernel.allreduce.init_custom_ar(
meta, rank_data, handles, offsets, rank, full_nvlink meta, rank_data, handles, offsets, rank, full_nvlink
) )
def all_reduce_reg(fa: int, inp: torch.Tensor, out: torch.Tensor) -> None: def all_reduce_reg(fa: int, inp: torch.Tensor, out: torch.Tensor) -> None:
sgl_kernel.ops.allreduce.all_reduce_reg(fa, inp, out) sgl_kernel.allreduce.all_reduce_reg(fa, inp, out)
def all_reduce_unreg( def all_reduce_unreg(
fa: int, inp: torch.Tensor, reg_buffer: torch.Tensor, out: torch.Tensor fa: int, inp: torch.Tensor, reg_buffer: torch.Tensor, out: torch.Tensor
) -> None: ) -> None:
sgl_kernel.ops.allreduce.all_reduce_unreg(fa, inp, reg_buffer, out) sgl_kernel.allreduce.all_reduce_unreg(fa, inp, reg_buffer, out)
def dispose(fa: int) -> None: def dispose(fa: int) -> None:
sgl_kernel.ops.allreduce.dispose(fa) sgl_kernel.allreduce.dispose(fa)
def meta_size() -> int: def meta_size() -> int:
return sgl_kernel.ops.allreduce.meta_size() return sgl_kernel.allreduce.meta_size()
def register_buffer( def register_buffer(
fa: int, t: torch.Tensor, handles: List[str], offsets: List[int] fa: int, t: torch.Tensor, handles: List[str], offsets: List[int]
) -> None: ) -> None:
return sgl_kernel.ops.allreduce.register_buffer(fa, t, handles, offsets) return sgl_kernel.allreduce.register_buffer(fa, t, handles, offsets)
def get_graph_buffer_ipc_meta(fa: int) -> Tuple[torch.Tensor, List[int]]: def get_graph_buffer_ipc_meta(fa: int) -> Tuple[torch.Tensor, List[int]]:
return sgl_kernel.ops.allreduce.get_graph_buffer_ipc_meta(fa) return sgl_kernel.allreduce.get_graph_buffer_ipc_meta(fa)
def register_graph_buffers( def register_graph_buffers(
fa: int, handles: List[str], offsets: List[List[int]] fa: int, handles: List[str], offsets: List[List[int]]
) -> None: ) -> None:
sgl_kernel.ops.allreduce.register_graph_buffers(fa, handles, offsets) sgl_kernel.allreduce.register_graph_buffers(fa, handles, offsets)
def allocate_meta_buffer(size: int) -> torch.Tensor: def allocate_meta_buffer(size: int) -> torch.Tensor:
return sgl_kernel.ops.allreduce.allocate_meta_buffer(size) return sgl_kernel.allreduce.allocate_meta_buffer(size)
def get_meta_buffer_ipc_handle(inp: torch.Tensor) -> torch.Tensor: def get_meta_buffer_ipc_handle(inp: torch.Tensor) -> torch.Tensor:
return sgl_kernel.ops.allreduce.get_meta_buffer_ipc_handle(inp) return sgl_kernel.allreduce.get_meta_buffer_ipc_handle(inp)
else: else:
# TRTLLM custom allreduce # TRTLLM custom allreduce
...@@ -123,7 +123,7 @@ else: ...@@ -123,7 +123,7 @@ else:
barrier_in: List[int], barrier_in: List[int],
barrier_out: List[int], barrier_out: List[int],
) -> int: ) -> int:
return sgl_kernel.ops.init_custom_reduce( return sgl_kernel.init_custom_reduce(
rank_id, rank_id,
world_size, world_size,
rank_data_base, rank_data_base,
...@@ -134,15 +134,15 @@ else: ...@@ -134,15 +134,15 @@ else:
) )
def all_reduce(fa: int, inp: torch.Tensor, out: torch.Tensor) -> None: def all_reduce(fa: int, inp: torch.Tensor, out: torch.Tensor) -> None:
sgl_kernel.ops.custom_reduce(fa, inp, out) sgl_kernel.custom_reduce(fa, inp, out)
def dispose(fa: int) -> None: def dispose(fa: int) -> None:
sgl_kernel.ops.custom_dispose(fa) sgl_kernel.custom_dispose(fa)
def get_graph_buffer_ipc_meta(fa: int) -> Tuple[List[int], List[int]]: def get_graph_buffer_ipc_meta(fa: int) -> Tuple[List[int], List[int]]:
return sgl_kernel.ops.get_graph_buffer_ipc_meta(fa) return sgl_kernel.get_graph_buffer_ipc_meta(fa)
def register_graph_buffers( def register_graph_buffers(
fa: int, handles: List[List[int]], offsets: List[List[int]] fa: int, handles: List[List[int]], offsets: List[List[int]]
) -> None: ) -> None:
sgl_kernel.ops.register_graph_buffers(fa, handles, offsets) sgl_kernel.register_graph_buffers(fa, handles, offsets)
...@@ -38,12 +38,12 @@ test: ## Run all tests ...@@ -38,12 +38,12 @@ test: ## Run all tests
format: check-deps ## Format all source files format: check-deps ## Format all source files
@echo "Formatting source files..." @echo "Formatting source files..."
@find src tests -name '*.cc' -o -name '*.cu' -o -name '*.cuh' -o -name '*.h' -o -name '*.hpp' | xargs clang-format -i @find csrc tests -name '*.cc' -o -name '*.cu' -o -name '*.cuh' -o -name '*.h' -o -name '*.hpp' | xargs clang-format -i
@find src tests -name '*.py' | xargs isort @find python tests -name '*.py' | xargs isort
@find src tests -name '*.py' | xargs black @find python tests -name '*.py' | xargs black
@pre-commit run --all-files @pre-commit run --all-files
FILES_TO_UPDATE = src/sgl-kernel/version.py \ FILES_TO_UPDATE = python/sgl_kernel/version.py \
pyproject.toml pyproject.toml
update: ## Update version numbers across project files. Usage: make update <new_version> update: ## Update version numbers across project files. Usage: make update <new_version>
...@@ -51,7 +51,7 @@ update: ## Update version numbers across project files. Usage: make update <new_ ...@@ -51,7 +51,7 @@ update: ## Update version numbers across project files. Usage: make update <new_
echo "Version required. Usage: make update <new_version>"; \ echo "Version required. Usage: make update <new_version>"; \
exit 1; \ exit 1; \
fi fi
@OLD_VERSION=$$(grep "version" src/sgl-kernel/version.py | cut -d '"' -f2); \ @OLD_VERSION=$$(grep "version" python/sgl_kernel/version.py | cut -d '"' -f2); \
NEW_VERSION=$(filter-out $@,$(MAKECMDGOALS)); \ NEW_VERSION=$(filter-out $@,$(MAKECMDGOALS)); \
echo "Updating version from $$OLD_VERSION to $$NEW_VERSION"; \ echo "Updating version from $$OLD_VERSION to $$NEW_VERSION"; \
for file in $(FILES_TO_UPDATE); do \ for file in $(FILES_TO_UPDATE); do \
......
...@@ -45,12 +45,11 @@ Third-party libraries: ...@@ -45,12 +45,11 @@ Third-party libraries:
Steps to add a new kernel: Steps to add a new kernel:
1. Implement in [src/sgl-kernel/csrc/](https://github.com/sgl-project/sglang/tree/main/sgl-kernel/src/sgl-kernel/csrc) 1. Implement the kernel in [csrc](https://github.com/sgl-project/sglang/tree/main/sgl-kernel/csrc)
2. Expose interface in [src/sgl-kernel/include/sgl_kernels_ops.h](https://github.com/sgl-project/sglang/blob/main/sgl-kernel/src/sgl-kernel/include/sgl_kernels_ops.h) 2. Expose the interface in [include/sgl_kernel_ops.h](https://github.com/sgl-project/sglang/blob/main/sgl-kernel/include/sgl_kernel_ops.h)
3. Create torch extension in [src/sgl-kernel/torch_extension.cc](https://github.com/sgl-project/sglang/blob/main/sgl-kernel/src/sgl-kernel/torch_extension.cc) 3. Create torch extension in [csrc/torch_extension.cc](https://github.com/sgl-project/sglang/blob/main/sgl-kernel/csrc/torch_extension.cc)
4. Create Python wrapper in [src/sgl-kernel/ops/\_\_init\_\_.py](https://github.com/sgl-project/sglang/blob/main/sgl-kernel/src/sgl-kernel/ops/__init__.py) 4. Update [setup.py](https://github.com/sgl-project/sglang/blob/main/sgl-kernel/setup.py) to include new CUDA source
5. Expose Python interface in [src/sgl-kernel/\_\_init\_\_.py](https://github.com/sgl-project/sglang/blob/main/sgl-kernel/src/sgl-kernel/__init__.py) 5. Expose Python interface in [python](https://github.com/sgl-project/sglang/blob/main/sgl-kernel/python/sgl_kernel)
6. Update [setup.py](https://github.com/sgl-project/sglang/blob/main/sgl-kernel/setup.py) to include new CUDA source
### Build & Install ### Build & Install
...@@ -72,4 +71,4 @@ The `sgl-kernel` is rapidly evolving. If you experience a compilation failure, t ...@@ -72,4 +71,4 @@ The `sgl-kernel` is rapidly evolving. If you experience a compilation failure, t
### Release new version ### Release new version
Update version in [pyproject.toml](https://github.com/sgl-project/sglang/blob/main/sgl-kernel/pyproject.toml) and [version.py](https://github.com/sgl-project/sglang/blob/main/sgl-kernel/src/sgl-kernel/version.py) Update version in [pyproject.toml](https://github.com/sgl-project/sglang/blob/main/sgl-kernel/pyproject.toml) and [version.py](https://github.com/sgl-project/sglang/blob/main/sgl-kernel/python/sgl_kernel/version.py)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment