compose.yaml 1.11 KB
Newer Older
QIN2DIM's avatar
QIN2DIM committed
1
2
3
# Documentation:
# https://github.com/sgl-project/sglang/blob/main/python/sglang/srt/server_args.py
# https://github.com/opendatalab/MinerU/tree/master?tab=readme-ov-file#23-using-sglang-to-accelerate-vlm-model-inference
4
5
6
7
services:
  mineru-sglang:
    image: mineru-sglang:latest
    container_name: mineru-sglang
QIN2DIM's avatar
QIN2DIM committed
8
9
10
11
    volumes:
      # - ${HF_HOME}:/root/.cache/huggingface
      # - ${MODELSCOPE_CACHE}:/root/.cache/modelscope
      - ./inductor_root_cache:/root/inductor_root_cache
12
13
14
15
16
    restart: always
    ports:
      - 30000:30000
    environment:
      MINERU_MODEL_SOURCE: local
QIN2DIM's avatar
QIN2DIM committed
17
18
      # TORCHINDUCTOR_CACHE_DIR: /root/inductor_root_cache
      # NO_PROXY: 0.0.0.0,localhost,127.0.0.1
19
20
21
22
    entrypoint: mineru-sglang-server
    command:
      --host 0.0.0.0
      --port 30000
QIN2DIM's avatar
QIN2DIM committed
23
      # --enable-torch-compile
24
25
26
27
28
29
30
31
32
33
34
35
36
    ulimits:
      memlock: -1
      stack: 67108864
    ipc: host
    healthcheck:
      test: ["CMD-SHELL", "curl -f http://localhost:30000/health || exit 1"]
    deploy:
      resources:
        reservations:
          devices:
            - driver: nvidia
              device_ids: ["0"]
              capabilities: [gpu]