# Documentation:
# https://github.com/sgl-project/sglang/blob/main/python/sglang/srt/server_args.py
# https://github.com/opendatalab/MinerU/tree/master?tab=readme-ov-file#23-using-sglang-to-accelerate-vlm-model-inference
services:
  mineru-sglang:
    image: mineru-sglang:latest
    container_name: mineru-sglang
    volumes:
      # - ${HF_HOME}:/root/.cache/huggingface
      # - ${MODELSCOPE_CACHE}:/root/.cache/modelscope
      - ./inductor_root_cache:/root/inductor_root_cache
    restart: always
    ports:
      - 30000:30000
    environment:
      MINERU_MODEL_SOURCE: local
      # TORCHINDUCTOR_CACHE_DIR: /root/inductor_root_cache
      # NO_PROXY: 0.0.0.0,localhost,127.0.0.1
    entrypoint: mineru-sglang-server
    command:
      --host 0.0.0.0
      --port 30000
      # --enable-torch-compile
    ulimits:
      memlock: -1
      stack: 67108864
    ipc: host
    healthcheck:
      test: ["CMD-SHELL", "curl -f http://localhost:30000/health || exit 1"]
    deploy:
      resources:
        reservations:
          devices:
            - driver: nvidia
              device_ids: ["0"]
              capabilities: [gpu]