Unverified Commit 6557f493 authored by Li, Jiang's avatar Li, Jiang Committed by GitHub
Browse files

[Bugfix][CPU] Skip set_num_threads after thread binding (#38535)


Signed-off-by: default avatarjiang1.li <jiang1.li@intel.com>
parent 677424c7
#!/bin/bash #!/bin/bash
set -euox pipefail set -euox pipefail
export VLLM_CPU_CI_ENV=0 export VLLM_CPU_CI_ENV=0
export VLLM_CPU_KVCACHE_SPACE=1 # avoid OOM
echo "--- PP+TP" echo "--- PP+TP"
vllm serve meta-llama/Llama-3.2-3B-Instruct -tp=2 -pp=2 & vllm serve meta-llama/Llama-3.2-3B-Instruct -tp=2 -pp=2 --max-model-len=4096 &
server_pid=$! server_pid=$!
timeout 600 bash -c "until curl localhost:8000/v1/models > /dev/null 2>&1; do sleep 1; done" || exit 1 timeout 600 bash -c "until curl localhost:8000/v1/models > /dev/null 2>&1; do sleep 1; done" || exit 1
vllm bench serve \ vllm bench serve \
...@@ -23,7 +24,7 @@ if [ "$failed_req" -ne 0 ]; then ...@@ -23,7 +24,7 @@ if [ "$failed_req" -ne 0 ]; then
fi fi
echo "--- DP+TP" echo "--- DP+TP"
vllm serve meta-llama/Llama-3.2-3B-Instruct -tp=2 -dp=2 & vllm serve meta-llama/Llama-3.2-3B-Instruct -tp=2 -dp=2 --max-model-len=4096 &
server_pid=$! server_pid=$!
timeout 600 bash -c "until curl localhost:8000/v1/models > /dev/null 2>&1; do sleep 1; done" || exit 1 timeout 600 bash -c "until curl localhost:8000/v1/models > /dev/null 2>&1; do sleep 1; done" || exit 1
vllm bench serve \ vllm bench serve \
......
...@@ -108,6 +108,15 @@ class CPUWorker(Worker): ...@@ -108,6 +108,15 @@ class CPUWorker(Worker):
if ret: if ret:
logger.info(ret) logger.info(ret)
# After the thread binding, changing thread num is not allowed
def skip_set_num_threads(x: int):
logger.warning(
"CPU backend doesn't allow to use "
"`torch.set_num_threads` after the thread binding, skip it."
)
torch.set_num_threads = skip_set_num_threads
# Note: unique identifier for creating allreduce shared memory # Note: unique identifier for creating allreduce shared memory
os.environ["VLLM_DIST_IDENT"] = self.distributed_init_method.split(":")[-1] os.environ["VLLM_DIST_IDENT"] = self.distributed_init_method.split(":")[-1]
# Initialize the distributed environment. # Initialize the distributed environment.
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment