Unverified Commit 9949aa2e authored by Wentao Ye's avatar Wentao Ye Committed by GitHub
Browse files

[Perf] Apply torch.compile for `per_block_cast_to_fp8` (#24611)


Signed-off-by: default avataryewentao256 <zhyanwentao@126.com>
parent 0b7bed9c
...@@ -135,7 +135,7 @@ DEFAULT_BLOCK_SIZE = [128, 128] ...@@ -135,7 +135,7 @@ DEFAULT_BLOCK_SIZE = [128, 128]
# Taken from https://github.com/deepseek-ai/DeepGEMM/blob/dd6ed14acbc7445dcef224248a77ab4d22b5f240/deep_gemm/utils/math.py#L38 # Taken from https://github.com/deepseek-ai/DeepGEMM/blob/dd6ed14acbc7445dcef224248a77ab4d22b5f240/deep_gemm/utils/math.py#L38
# TODO(wentao): optimize this function, using triton or cuda kernel @torch.compile(dynamic=True, backend=current_platform.simple_compile_backend)
def per_block_cast_to_fp8( def per_block_cast_to_fp8(
x: torch.Tensor, x: torch.Tensor,
block_size: list[int] = DEFAULT_BLOCK_SIZE, block_size: list[int] = DEFAULT_BLOCK_SIZE,
...@@ -187,4 +187,4 @@ __all__ = [ ...@@ -187,4 +187,4 @@ __all__ = [
"is_deep_gemm_e8m0_used", "is_deep_gemm_e8m0_used",
"is_deep_gemm_supported", "is_deep_gemm_supported",
"should_use_deepgemm_for_fp8_linear", "should_use_deepgemm_for_fp8_linear",
] ]
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment