Commit 26aebea5 authored by shihm's avatar shihm
Browse files

updata

parent 646210d2
......@@ -117,8 +117,7 @@ ray start --address='x.x.x.x:6379' --num-gpus=8 --num-cpus=32
vllm serve /path/to/Baichuan-M3-235B
--host x.x.x.x --port 8000
--distributed-executor-backend ray
--tensor-parallel-size 8
--pipeline-parallel-size 2
--tensor-parallel-size 16
--gpu-memory-utilization 0.9
--served-model-name baichuan-m3
--reasoning-parser deepseek_r1
......@@ -149,12 +148,13 @@ curl http://localhost:8000/v1/chat/completions \
### transformers
#### 单机推理
```bash
python
from transformers import AutoTokenizer, AutoModelForCausalLM
model_path = "/path/to/Baichuan-M3-235B"
import os
import torch
os.environ['TRANSFORMERS_OFFLINE'] = '1'
os.environ['MODELSCOPE_OFFLINE'] = '1'
model_path = "/path/to/Baichuan-M3-235B"
model = AutoModelForCausalLM.from_pretrained(
model_path,
trust_remote_code=True,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment