"...git@developer.sourcefind.cn:yangql/composable_kernel.git" did not exist on "5bf0475afdf33e823ed8c0247b3a8711326601d3"
Commit 9a7a569d authored by Haodong Tian's avatar Haodong Tian Committed by LeiWang1999
Browse files

[Bugfix] Adjust Autotuner threadpool `max_workers` limit to available CPUs (#368)

* [Bugfix] Adjust Autotuner threadpool `max_workers` limit to available CPUs

* [Example] Small fix on example_blocksparse_gemm.py
parent d3536d9e
...@@ -4,7 +4,7 @@ import tilelang ...@@ -4,7 +4,7 @@ import tilelang
import tilelang.language as T import tilelang.language as T
from tilelang.autotuner import AutoTuner from tilelang.autotuner import AutoTuner
from tilelang.engine.param import KernelParam from tilelang.engine.param import KernelParam
from tilelang.utils.tensor import get_tensor_supply from tilelang.utils.tensor import get_tensor_supply, TensorSupplyType
import torch import torch
from typing import List from typing import List
...@@ -27,7 +27,7 @@ args = parser.parse_args() ...@@ -27,7 +27,7 @@ args = parser.parse_args()
M, N, K = args.m, args.n, args.k M, N, K = args.m, args.n, args.k
sparsity = args.sparsity sparsity = args.sparsity
use_autotune = args.use_autotune use_autotune = args.use_autotune
default_tensor_supply = get_tensor_supply() default_tensor_supply = get_tensor_supply(TensorSupplyType.Auto)
print(f"Running BlockSparse MatMul Benchmark for M={M}, N={N}, K={K}") print(f"Running BlockSparse MatMul Benchmark for M={M}, N={N}, K={K}")
print(f"Target Block Sparsity: {sparsity}") print(f"Target Block Sparsity: {sparsity}")
......
...@@ -269,7 +269,7 @@ class AutoTuner: ...@@ -269,7 +269,7 @@ class AutoTuner:
new_args = tuple(new_args) new_args = tuple(new_args)
config_args.append(new_args) config_args.append(new_args)
num_workers = max(1, int(os.cpu_count() * 0.9)) num_workers = max(1, int(get_available_cpu_count() * 0.9))
pool = concurrent.futures.ThreadPoolExecutor(max_workers=num_workers) pool = concurrent.futures.ThreadPoolExecutor(max_workers=num_workers)
futures = [] futures = []
future_to_index = {} future_to_index = {}
...@@ -453,3 +453,14 @@ def check_tensor_list_compatibility( ...@@ -453,3 +453,14 @@ def check_tensor_list_compatibility(
return False return False
return all(tensor1.shape == tensor2.shape for tensor1, tensor2 in zip(list1, list2)) return all(tensor1.shape == tensor2.shape for tensor1, tensor2 in zip(list1, list2))
def get_available_cpu_count():
"""Gets the number of CPU cores available to the current process.
"""
try:
cpu_count = len(os.sched_getaffinity(0))
except AttributeError:
cpu_count = os.cpu_count()
return cpu_count
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment