Unverified Commit c5df7938 authored by Lei Wang's avatar Lei Wang Committed by GitHub
Browse files

[Enhancement] Add `--ptxas-options=--register-usage-level=10` option (#684)



* Add `--ptxas-options=--register-usage-level=10` option

* lint fix

---------
Co-authored-by: default avatarChenggang Zhao <chenggangz@deepseek.com>
parent 950ed16c
......@@ -25,6 +25,7 @@ TVM_REGISTER_PASS_CONFIG_OPTION(kDisableDynamicTailSplit, Bool);
TVM_REGISTER_PASS_CONFIG_OPTION(kDynamicAlignment, Integer);
TVM_REGISTER_PASS_CONFIG_OPTION(kEnableAggressiveSharedMemoryMerge, Bool);
TVM_REGISTER_PASS_CONFIG_OPTION(kDisableFastMath, Bool);
TVM_REGISTER_PASS_CONFIG_OPTION(kPtxasRegisterUsageLevel, Integer);
TVM_REGISTER_PASS_CONFIG_OPTION(kEnablePTXASVerboseOutput, Bool);
#define TIR_DEFINE_TL_BUILTIN(OpName) \
......
......@@ -28,6 +28,8 @@ static constexpr const char *kConfigIndexBitwidth = "tl.config_index_bitwidth";
static constexpr const char *kEnableAggressiveSharedMemoryMerge =
"tl.enable_aggressive_shared_memory_merge";
static constexpr const char *kDisableFastMath = "tl.disable_fast_math";
static constexpr const char *kPtxasRegisterUsageLevel =
"tl.ptxas_register_usage_level";
static constexpr const char *kEnablePTXASVerboseOutput =
"tl.enable_ptxas_verbose_output";
......
......@@ -73,6 +73,8 @@ class LibraryGenerator(object):
libpath = src.name.replace(".cu", ".so")
disable_fast_math = self.pass_configs.get(PassConfigKey.TL_DISABLE_FAST_MATH, False)
ptxas_usage_level = self.pass_configs.get(PassConfigKey.TL_PTXAS_REGISTER_USAGE_LEVEL,
None)
verbose_ptxas_output = self.pass_configs.get(
PassConfigKey.TL_ENABLE_PTXAS_VERBOSE_OUTPUT, False)
......@@ -93,10 +95,10 @@ class LibraryGenerator(object):
]
if not disable_fast_math:
command += ["--use_fast_math"]
if ptxas_usage_level is not None:
command += [f"--ptxas-options=--register-usage-level={ptxas_usage_level}"]
if verbose_ptxas_output:
command += ["--ptxas-options", "-v"]
if compute_version == "90a":
command += ["-D", "CUTE_SM90_EXTENDED_MMA_SHAPES_ENABLED"]
command += ["--ptxas-options=--verbose"]
command += [
"-I" + CUTLASS_INCLUDE_DIR,
]
......
......@@ -21,6 +21,10 @@ class PassConfigKey(str, Enum):
TL_DISABLE_FAST_MATH = "tl.disable_fast_math"
"""Disable fast math optimization. Default: False"""
TL_PTXAS_REGISTER_USAGE_LEVEL = "tl.ptxas_register_usage_level"
"""The PTXAS register usage level in [0, 10], which controls the
aggressiveness of optimizations that affect register usage. Default: None"""
TL_ENABLE_PTXAS_VERBOSE_OUTPUT = "tl.enable_ptxas_verbose_output"
"""Enable ptxas verbose output. Default: False"""
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment