Unverified Commit 3104bc2c authored by Xiaomeng Zhao's avatar Xiaomeng Zhao Committed by GitHub
Browse files

Update compose.yaml

parent 350bcd75
# Documentation: # Documentation:
# https://github.com/sgl-project/sglang/blob/main/python/sglang/srt/server_args.py # https://docs.sglang.ai/backend/server_arguments.html#common-launch-commands
# https://github.com/opendatalab/MinerU/tree/master?tab=readme-ov-file#23-using-sglang-to-accelerate-vlm-model-inference
services: services:
mineru-sglang: mineru-sglang:
image: mineru-sglang:latest image: mineru-sglang:latest
container_name: mineru-sglang container_name: mineru-sglang
volumes:
# - ${HF_HOME}:/root/.cache/huggingface
# - ${MODELSCOPE_CACHE}:/root/.cache/modelscope
- ./inductor_root_cache:/root/inductor_root_cache
restart: always restart: always
ports: ports:
- 30000:30000 - 30000:30000
environment: environment:
MINERU_MODEL_SOURCE: local MINERU_MODEL_SOURCE: local
# TORCHINDUCTOR_CACHE_DIR: /root/inductor_root_cache
# NO_PROXY: 0.0.0.0,localhost,127.0.0.1
entrypoint: mineru-sglang-server entrypoint: mineru-sglang-server
command: command:
--host 0.0.0.0 --host 0.0.0.0
--port 30000 --port 30000
# --enable-torch-compile # --enable-torch-compile # You can also enable torch.compile to accelerate inference speed by approximately 15%
# --dp 2 # If you have more than two GPUs with 24GB VRAM or above, you can use sglang's multi-GPU parallel mode to increase throughput
# --tp 2 # If you have two GPUs with 12GB or 16GB VRAM, you can use the Tensor Parallel (TP) mode
# --mem-fraction-static 0.7 # If you have two GPUs with 11GB VRAM, in addition to Tensor Parallel mode, you need to reduce the KV cache size
ulimits: ulimits:
memlock: -1 memlock: -1
stack: 67108864 stack: 67108864
...@@ -33,4 +29,4 @@ services: ...@@ -33,4 +29,4 @@ services:
devices: devices:
- driver: nvidia - driver: nvidia
device_ids: ["0"] device_ids: ["0"]
capabilities: [gpu] capabilities: [gpu]
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment