Unverified Commit 8abf74e3 authored by Lianmin Zheng's avatar Lianmin Zheng Committed by GitHub
Browse files

Rename files in sgl kernel to avoid nested folder structure (#4213)


Co-authored-by: default avatarzhyncs <me@zhyncs.com>
parent ee132a45
......@@ -5,7 +5,7 @@ on:
branches:
- main
paths:
- sgl-kernel/src/sgl-kernel/version.py
- sgl-kernel/python/sgl_kernel/version.py
workflow_dispatch:
concurrency:
......
......@@ -9,7 +9,7 @@ on:
branches:
- main
paths:
- sgl-kernel/src/sgl-kernel/version.py
- sgl-kernel/python/sgl_kernel/version.py
jobs:
build-wheels:
......@@ -59,7 +59,7 @@ jobs:
id: set_tag_name
run: |
if [ -z "${{ inputs.tag_name }}" ]; then
TAG_NAME="v$(cat sgl-kernel/src/sgl-kernel/version.py | cut -d'"' -f2)"
TAG_NAME="v$(cat sgl-kernel/python/sgl_kernel/version.py | cut -d'"' -f2)"
echo "tag_name=$TAG_NAME" >> $GITHUB_OUTPUT
else
echo "tag_name=${{ inputs.tag_name }}" >> $GITHUB_OUTPUT
......
......@@ -75,42 +75,42 @@ else:
rank: int,
full_nvlink: bool,
) -> int:
return sgl_kernel.ops.allreduce.init_custom_ar(
return sgl_kernel.allreduce.init_custom_ar(
meta, rank_data, handles, offsets, rank, full_nvlink
)
def all_reduce_reg(fa: int, inp: torch.Tensor, out: torch.Tensor) -> None:
sgl_kernel.ops.allreduce.all_reduce_reg(fa, inp, out)
sgl_kernel.allreduce.all_reduce_reg(fa, inp, out)
def all_reduce_unreg(
fa: int, inp: torch.Tensor, reg_buffer: torch.Tensor, out: torch.Tensor
) -> None:
sgl_kernel.ops.allreduce.all_reduce_unreg(fa, inp, reg_buffer, out)
sgl_kernel.allreduce.all_reduce_unreg(fa, inp, reg_buffer, out)
def dispose(fa: int) -> None:
sgl_kernel.ops.allreduce.dispose(fa)
sgl_kernel.allreduce.dispose(fa)
def meta_size() -> int:
return sgl_kernel.ops.allreduce.meta_size()
return sgl_kernel.allreduce.meta_size()
def register_buffer(
fa: int, t: torch.Tensor, handles: List[str], offsets: List[int]
) -> None:
return sgl_kernel.ops.allreduce.register_buffer(fa, t, handles, offsets)
return sgl_kernel.allreduce.register_buffer(fa, t, handles, offsets)
def get_graph_buffer_ipc_meta(fa: int) -> Tuple[torch.Tensor, List[int]]:
return sgl_kernel.ops.allreduce.get_graph_buffer_ipc_meta(fa)
return sgl_kernel.allreduce.get_graph_buffer_ipc_meta(fa)
def register_graph_buffers(
fa: int, handles: List[str], offsets: List[List[int]]
) -> None:
sgl_kernel.ops.allreduce.register_graph_buffers(fa, handles, offsets)
sgl_kernel.allreduce.register_graph_buffers(fa, handles, offsets)
def allocate_meta_buffer(size: int) -> torch.Tensor:
return sgl_kernel.ops.allreduce.allocate_meta_buffer(size)
return sgl_kernel.allreduce.allocate_meta_buffer(size)
def get_meta_buffer_ipc_handle(inp: torch.Tensor) -> torch.Tensor:
return sgl_kernel.ops.allreduce.get_meta_buffer_ipc_handle(inp)
return sgl_kernel.allreduce.get_meta_buffer_ipc_handle(inp)
else:
# TRTLLM custom allreduce
......@@ -123,7 +123,7 @@ else:
barrier_in: List[int],
barrier_out: List[int],
) -> int:
return sgl_kernel.ops.init_custom_reduce(
return sgl_kernel.init_custom_reduce(
rank_id,
world_size,
rank_data_base,
......@@ -134,15 +134,15 @@ else:
)
def all_reduce(fa: int, inp: torch.Tensor, out: torch.Tensor) -> None:
sgl_kernel.ops.custom_reduce(fa, inp, out)
sgl_kernel.custom_reduce(fa, inp, out)
def dispose(fa: int) -> None:
sgl_kernel.ops.custom_dispose(fa)
sgl_kernel.custom_dispose(fa)
def get_graph_buffer_ipc_meta(fa: int) -> Tuple[List[int], List[int]]:
return sgl_kernel.ops.get_graph_buffer_ipc_meta(fa)
return sgl_kernel.get_graph_buffer_ipc_meta(fa)
def register_graph_buffers(
fa: int, handles: List[List[int]], offsets: List[List[int]]
) -> None:
sgl_kernel.ops.register_graph_buffers(fa, handles, offsets)
sgl_kernel.register_graph_buffers(fa, handles, offsets)
......@@ -38,12 +38,12 @@ test: ## Run all tests
format: check-deps ## Format all source files
@echo "Formatting source files..."
@find src tests -name '*.cc' -o -name '*.cu' -o -name '*.cuh' -o -name '*.h' -o -name '*.hpp' | xargs clang-format -i
@find src tests -name '*.py' | xargs isort
@find src tests -name '*.py' | xargs black
@find csrc tests -name '*.cc' -o -name '*.cu' -o -name '*.cuh' -o -name '*.h' -o -name '*.hpp' | xargs clang-format -i
@find python tests -name '*.py' | xargs isort
@find python tests -name '*.py' | xargs black
@pre-commit run --all-files
FILES_TO_UPDATE = src/sgl-kernel/version.py \
FILES_TO_UPDATE = python/sgl_kernel/version.py \
pyproject.toml
update: ## Update version numbers across project files. Usage: make update <new_version>
......@@ -51,7 +51,7 @@ update: ## Update version numbers across project files. Usage: make update <new_
echo "Version required. Usage: make update <new_version>"; \
exit 1; \
fi
@OLD_VERSION=$$(grep "version" src/sgl-kernel/version.py | cut -d '"' -f2); \
@OLD_VERSION=$$(grep "version" python/sgl_kernel/version.py | cut -d '"' -f2); \
NEW_VERSION=$(filter-out $@,$(MAKECMDGOALS)); \
echo "Updating version from $$OLD_VERSION to $$NEW_VERSION"; \
for file in $(FILES_TO_UPDATE); do \
......
......@@ -45,12 +45,11 @@ Third-party libraries:
Steps to add a new kernel:
1. Implement in [src/sgl-kernel/csrc/](https://github.com/sgl-project/sglang/tree/main/sgl-kernel/src/sgl-kernel/csrc)
2. Expose interface in [src/sgl-kernel/include/sgl_kernels_ops.h](https://github.com/sgl-project/sglang/blob/main/sgl-kernel/src/sgl-kernel/include/sgl_kernels_ops.h)
3. Create torch extension in [src/sgl-kernel/torch_extension.cc](https://github.com/sgl-project/sglang/blob/main/sgl-kernel/src/sgl-kernel/torch_extension.cc)
4. Create Python wrapper in [src/sgl-kernel/ops/\_\_init\_\_.py](https://github.com/sgl-project/sglang/blob/main/sgl-kernel/src/sgl-kernel/ops/__init__.py)
5. Expose Python interface in [src/sgl-kernel/\_\_init\_\_.py](https://github.com/sgl-project/sglang/blob/main/sgl-kernel/src/sgl-kernel/__init__.py)
6. Update [setup.py](https://github.com/sgl-project/sglang/blob/main/sgl-kernel/setup.py) to include new CUDA source
1. Implement the kernel in [csrc](https://github.com/sgl-project/sglang/tree/main/sgl-kernel/csrc)
2. Expose the interface in [include/sgl_kernel_ops.h](https://github.com/sgl-project/sglang/blob/main/sgl-kernel/include/sgl_kernel_ops.h)
3. Create torch extension in [csrc/torch_extension.cc](https://github.com/sgl-project/sglang/blob/main/sgl-kernel/csrc/torch_extension.cc)
4. Update [setup.py](https://github.com/sgl-project/sglang/blob/main/sgl-kernel/setup.py) to include new CUDA source
5. Expose Python interface in [python](https://github.com/sgl-project/sglang/blob/main/sgl-kernel/python/sgl_kernel)
### Build & Install
......@@ -72,4 +71,4 @@ The `sgl-kernel` is rapidly evolving. If you experience a compilation failure, t
### Release new version
Update version in [pyproject.toml](https://github.com/sgl-project/sglang/blob/main/sgl-kernel/pyproject.toml) and [version.py](https://github.com/sgl-project/sglang/blob/main/sgl-kernel/src/sgl-kernel/version.py)
Update version in [pyproject.toml](https://github.com/sgl-project/sglang/blob/main/sgl-kernel/pyproject.toml) and [version.py](https://github.com/sgl-project/sglang/blob/main/sgl-kernel/python/sgl_kernel/version.py)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment