Unverified Commit 1dab9bc8 authored by Travis Johnson's avatar Travis Johnson Committed by GitHub
Browse files

[Bugfix] set OMP_NUM_THREADS to 1 by default for multiprocessing (#6109)


Signed-off-by: default avatarTravis Johnson <tsjohnso@us.ibm.com>
Co-authored-by: default avatarNick Hill <nickhill@us.ibm.com>
parent 3de6e6a3
...@@ -37,6 +37,11 @@ class MultiprocessingGPUExecutor(DistributedGPUExecutor): ...@@ -37,6 +37,11 @@ class MultiprocessingGPUExecutor(DistributedGPUExecutor):
# Disable torch async compiling which won't work with daemonic processes # Disable torch async compiling which won't work with daemonic processes
os.environ["TORCHINDUCTOR_COMPILE_THREADS"] = "1" os.environ["TORCHINDUCTOR_COMPILE_THREADS"] = "1"
# Set OMP_NUM_THREADS to 1 if it is not set explicitly, avoids CPU
# contention amongst the shards
if "OMP_NUM_THREADS" not in os.environ:
os.environ["OMP_NUM_THREADS"] = "1"
assert world_size <= cuda_device_count_stateless(), ( assert world_size <= cuda_device_count_stateless(), (
"please set tensor_parallel_size to less than max local gpu count") "please set tensor_parallel_size to less than max local gpu count")
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment