Commit b6fe9582 authored by Lei Wang's avatar Lei Wang Committed by LeiWang1999
Browse files

[Enhancement] Add CPU utilization and count settings for Auto-Tuning (#630)

* [Enhancement] Add CPU utilization and count settings for Auto-Tuning

- Introduced environment variables for CPU utilization, counts, and maximum CPU count for auto-tuning.
- Updated the AutoTuner class to utilize these new settings, improving flexibility and performance in multi-threaded environments.
- Enhanced logging to provide better insights into the auto-tuning process based on the configured CPU settings.

* typo fix
parent 498db845
......@@ -25,7 +25,13 @@ import threading
import traceback
from pathlib import Path
from tilelang.env import TILELANG_CACHE_DIR, is_cache_enabled
from tilelang.env import (
TILELANG_CACHE_DIR,
TILELANG_AUTO_TUNING_CPU_UTILITIES,
TILELANG_AUTO_TUNING_CPU_COUNTS,
TILELANG_AUTO_TUNING_MAX_CPU_COUNT,
is_cache_enabled,
)
from tilelang.autotuner.param import CompileArgs, ProfileArgs, AutotuneResult
from tilelang.jit.param import _P, _RProg
from tilelang.version import __version__
......@@ -419,8 +425,28 @@ class AutoTuner:
kernel=jit_kernel)
self._memory_cache[key] = autotuner_result
return autotuner_result
# get the cpu count
available_cpu_count = get_available_cpu_count()
cpu_utilizations = float(TILELANG_AUTO_TUNING_CPU_UTILITIES)
cpu_counts = int(TILELANG_AUTO_TUNING_CPU_COUNTS)
max_cpu_count = int(TILELANG_AUTO_TUNING_MAX_CPU_COUNT)
if cpu_counts > 0:
num_workers = min(cpu_counts, available_cpu_count)
logger.info(
f"Auto-tuning with {cpu_counts} CPU counts, {available_cpu_count} CPUs available, {num_workers} CPUs will be used"
)
else:
num_workers = max(1, int(available_cpu_count * cpu_utilizations))
logger.info(
f"Auto-tuning with {cpu_utilizations} CPU utilizations, {available_cpu_count} CPUs available, {num_workers} CPUs will be used"
)
if max_cpu_count > 0 and num_workers > max_cpu_count:
logger.warning(
f"Auto-tuning with {cpu_utilizations} CPU utilizations, {available_cpu_count} CPUs available, {num_workers} CPUs will be used, but the max CPU count is {max_cpu_count}, so we will use {max_cpu_count} CPUs"
)
num_workers = max_cpu_count
num_workers = max(1, int(get_available_cpu_count() * 0.9))
pool = concurrent.futures.ThreadPoolExecutor(max_workers=num_workers)
futures = []
future_to_index = {}
......
......@@ -77,6 +77,17 @@ TILELANG_CACHE_DIR: str = os.environ.get("TILELANG_CACHE_DIR",
# Auto-clear cache if environment variable is set
TILELANG_CLEAR_CACHE = os.environ.get("TILELANG_CLEAR_CACHE", "0")
# CPU Utilizations for Auto-Tuning, default is 0.9
TILELANG_AUTO_TUNING_CPU_UTILITIES: str = os.environ.get("TILELANG_AUTO_TUNING_CPU_UTILITIES",
"0.9")
# CPU COUNTS for Auto-Tuning, default is -1,
# which will use TILELNAG_AUTO_TUNING_CPU_UTILITIES * get_available_cpu_count()
TILELANG_AUTO_TUNING_CPU_COUNTS: str = os.environ.get("TILELANG_AUTO_TUNING_CPU_COUNTS", "-1")
# Max CPU Count for Auto-Tuning, default is 100
TILELANG_AUTO_TUNING_MAX_CPU_COUNT: str = os.environ.get("TILELANG_AUTO_TUNING_MAX_CPU_COUNT", "-1")
# SETUP ENVIRONMENT VARIABLES
CUTLASS_NOT_FOUND_MESSAGE = ("CUTLASS is not installed or found in the expected path")
", which may lead to compilation bugs when utilize tilelang backend."
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment