Commit 26aebea5 authored by shihm's avatar shihm
Browse files

updata

parent 646210d2
...@@ -117,8 +117,7 @@ ray start --address='x.x.x.x:6379' --num-gpus=8 --num-cpus=32 ...@@ -117,8 +117,7 @@ ray start --address='x.x.x.x:6379' --num-gpus=8 --num-cpus=32
vllm serve /path/to/Baichuan-M3-235B vllm serve /path/to/Baichuan-M3-235B
--host x.x.x.x --port 8000 --host x.x.x.x --port 8000
--distributed-executor-backend ray --distributed-executor-backend ray
--tensor-parallel-size 8 --tensor-parallel-size 16
--pipeline-parallel-size 2
--gpu-memory-utilization 0.9 --gpu-memory-utilization 0.9
--served-model-name baichuan-m3 --served-model-name baichuan-m3
--reasoning-parser deepseek_r1 --reasoning-parser deepseek_r1
...@@ -149,12 +148,13 @@ curl http://localhost:8000/v1/chat/completions \ ...@@ -149,12 +148,13 @@ curl http://localhost:8000/v1/chat/completions \
### transformers ### transformers
#### 单机推理 #### 单机推理
```bash ```bash
python
from transformers import AutoTokenizer, AutoModelForCausalLM from transformers import AutoTokenizer, AutoModelForCausalLM
model_path = "/path/to/Baichuan-M3-235B"
import os import os
import torch import torch
os.environ['TRANSFORMERS_OFFLINE'] = '1' os.environ['TRANSFORMERS_OFFLINE'] = '1'
os.environ['MODELSCOPE_OFFLINE'] = '1' os.environ['MODELSCOPE_OFFLINE'] = '1'
model_path = "/path/to/Baichuan-M3-235B"
model = AutoModelForCausalLM.from_pretrained( model = AutoModelForCausalLM.from_pretrained(
model_path, model_path,
trust_remote_code=True, trust_remote_code=True,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment