compose.yaml 1.06 KB
Newer Older
1
2
3
4
5
6
services:
  sglang:
    image: lmsysorg/sglang:latest
    container_name: sglang
    volumes:
      - ${HOME}/.cache/huggingface:/root/.cache/huggingface
7
8
      # If you use modelscope, you need mount this directory
      # - ${HOME}/.cache/modelscope:/root/.cache/modelscope
9
    restart: always
10
11
    network_mode: host # required by RDMA
    privileged: true # required by RDMA
12
13
14
15
16
    # Or you can only publish port 30000
    # ports:
    #   - 30000:30000
    environment:
      HF_TOKEN: <secret>
17
18
      # if you use modelscope to download model, you need set this environment
      # - SGLANG_USE_MODELSCOPE: true
19
    entrypoint: python3 -m sglang.launch_server
20
    command: --model-path meta-llama/Llama-3.1-8B-Instruct
21
22
23
24
25
26
27
28
29
30
31
32
33
      --host 0.0.0.0
      --port 30000
    ulimits:
      memlock: -1
      stack: 67108864
    ipc: host
    healthcheck:
      test: ["CMD-SHELL", "curl -f http://localhost:30000/health || exit 1"]
    deploy:
      resources:
        reservations:
          devices:
            - driver: nvidia
34
              device_ids: ["0"]
35
              capabilities: [gpu]