"src/git@developer.sourcefind.cn:gaoqiong/migraphx.git" did not exist on "298fc26b54ef827ec3261e7f84d976b7a347c4e2"
Unverified Commit c5df7938 authored by Lei Wang's avatar Lei Wang Committed by GitHub
Browse files

[Enhancement] Add `--ptxas-options=--register-usage-level=10` option (#684)



* Add `--ptxas-options=--register-usage-level=10` option

* lint fix

---------
Co-authored-by: default avatarChenggang Zhao <chenggangz@deepseek.com>
parent 950ed16c
...@@ -25,6 +25,7 @@ TVM_REGISTER_PASS_CONFIG_OPTION(kDisableDynamicTailSplit, Bool); ...@@ -25,6 +25,7 @@ TVM_REGISTER_PASS_CONFIG_OPTION(kDisableDynamicTailSplit, Bool);
TVM_REGISTER_PASS_CONFIG_OPTION(kDynamicAlignment, Integer); TVM_REGISTER_PASS_CONFIG_OPTION(kDynamicAlignment, Integer);
TVM_REGISTER_PASS_CONFIG_OPTION(kEnableAggressiveSharedMemoryMerge, Bool); TVM_REGISTER_PASS_CONFIG_OPTION(kEnableAggressiveSharedMemoryMerge, Bool);
TVM_REGISTER_PASS_CONFIG_OPTION(kDisableFastMath, Bool); TVM_REGISTER_PASS_CONFIG_OPTION(kDisableFastMath, Bool);
TVM_REGISTER_PASS_CONFIG_OPTION(kPtxasRegisterUsageLevel, Integer);
TVM_REGISTER_PASS_CONFIG_OPTION(kEnablePTXASVerboseOutput, Bool); TVM_REGISTER_PASS_CONFIG_OPTION(kEnablePTXASVerboseOutput, Bool);
#define TIR_DEFINE_TL_BUILTIN(OpName) \ #define TIR_DEFINE_TL_BUILTIN(OpName) \
......
...@@ -28,6 +28,8 @@ static constexpr const char *kConfigIndexBitwidth = "tl.config_index_bitwidth"; ...@@ -28,6 +28,8 @@ static constexpr const char *kConfigIndexBitwidth = "tl.config_index_bitwidth";
static constexpr const char *kEnableAggressiveSharedMemoryMerge = static constexpr const char *kEnableAggressiveSharedMemoryMerge =
"tl.enable_aggressive_shared_memory_merge"; "tl.enable_aggressive_shared_memory_merge";
static constexpr const char *kDisableFastMath = "tl.disable_fast_math"; static constexpr const char *kDisableFastMath = "tl.disable_fast_math";
static constexpr const char *kPtxasRegisterUsageLevel =
"tl.ptxas_register_usage_level";
static constexpr const char *kEnablePTXASVerboseOutput = static constexpr const char *kEnablePTXASVerboseOutput =
"tl.enable_ptxas_verbose_output"; "tl.enable_ptxas_verbose_output";
......
...@@ -73,6 +73,8 @@ class LibraryGenerator(object): ...@@ -73,6 +73,8 @@ class LibraryGenerator(object):
libpath = src.name.replace(".cu", ".so") libpath = src.name.replace(".cu", ".so")
disable_fast_math = self.pass_configs.get(PassConfigKey.TL_DISABLE_FAST_MATH, False) disable_fast_math = self.pass_configs.get(PassConfigKey.TL_DISABLE_FAST_MATH, False)
ptxas_usage_level = self.pass_configs.get(PassConfigKey.TL_PTXAS_REGISTER_USAGE_LEVEL,
None)
verbose_ptxas_output = self.pass_configs.get( verbose_ptxas_output = self.pass_configs.get(
PassConfigKey.TL_ENABLE_PTXAS_VERBOSE_OUTPUT, False) PassConfigKey.TL_ENABLE_PTXAS_VERBOSE_OUTPUT, False)
...@@ -93,10 +95,10 @@ class LibraryGenerator(object): ...@@ -93,10 +95,10 @@ class LibraryGenerator(object):
] ]
if not disable_fast_math: if not disable_fast_math:
command += ["--use_fast_math"] command += ["--use_fast_math"]
if ptxas_usage_level is not None:
command += [f"--ptxas-options=--register-usage-level={ptxas_usage_level}"]
if verbose_ptxas_output: if verbose_ptxas_output:
command += ["--ptxas-options", "-v"] command += ["--ptxas-options=--verbose"]
if compute_version == "90a":
command += ["-D", "CUTE_SM90_EXTENDED_MMA_SHAPES_ENABLED"]
command += [ command += [
"-I" + CUTLASS_INCLUDE_DIR, "-I" + CUTLASS_INCLUDE_DIR,
] ]
......
...@@ -21,6 +21,10 @@ class PassConfigKey(str, Enum): ...@@ -21,6 +21,10 @@ class PassConfigKey(str, Enum):
TL_DISABLE_FAST_MATH = "tl.disable_fast_math" TL_DISABLE_FAST_MATH = "tl.disable_fast_math"
"""Disable fast math optimization. Default: False""" """Disable fast math optimization. Default: False"""
TL_PTXAS_REGISTER_USAGE_LEVEL = "tl.ptxas_register_usage_level"
"""The PTXAS register usage level in [0, 10], which controls the
aggressiveness of optimizations that affect register usage. Default: None"""
TL_ENABLE_PTXAS_VERBOSE_OUTPUT = "tl.enable_ptxas_verbose_output" TL_ENABLE_PTXAS_VERBOSE_OUTPUT = "tl.enable_ptxas_verbose_output"
"""Enable ptxas verbose output. Default: False""" """Enable ptxas verbose output. Default: False"""
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment