compose.yaml 1.01 KB
Newer Older
1
2
3
4
5
6
services:
  sglang:
    image: lmsysorg/sglang:latest
    container_name: sglang
    volumes:
      - ${HOME}/.cache/huggingface:/root/.cache/huggingface
7
8
      # If you use modelscope, you need mount this directory
      # - ${HOME}/.cache/modelscope:/root/.cache/modelscope
9
10
11
12
13
14
15
    restart: always
    network_mode: host
    # Or you can only publish port 30000
    # ports:
    #   - 30000:30000
    environment:
      HF_TOKEN: <secret>
16
17
      # if you use modelscope to download model, you need set this environment
      # - SGLANG_USE_MODELSCOPE: true
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
    entrypoint: python3 -m sglang.launch_server
    command:
      --model-path meta-llama/Meta-Llama-3.1-8B-Instruct
      --host 0.0.0.0
      --port 30000
    ulimits:
      memlock: -1
      stack: 67108864
    ipc: host
    healthcheck:
      test: ["CMD-SHELL", "curl -f http://localhost:30000/health || exit 1"]
    deploy:
      resources:
        reservations:
          devices:
            - driver: nvidia
              device_ids: ['0']
              capabilities: [gpu]