# Documentation: # https://github.com/sgl-project/sglang/blob/main/python/sglang/srt/server_args.py # https://github.com/opendatalab/MinerU/tree/master?tab=readme-ov-file#23-using-sglang-to-accelerate-vlm-model-inference services: mineru-sglang: image: mineru-sglang:latest container_name: mineru-sglang volumes: # - ${HF_HOME}:/root/.cache/huggingface # - ${MODELSCOPE_CACHE}:/root/.cache/modelscope - ./inductor_root_cache:/root/inductor_root_cache restart: always ports: - 30000:30000 environment: MINERU_MODEL_SOURCE: local # TORCHINDUCTOR_CACHE_DIR: /root/inductor_root_cache # NO_PROXY: 0.0.0.0,localhost,127.0.0.1 entrypoint: mineru-sglang-server command: --host 0.0.0.0 --port 30000 # --enable-torch-compile ulimits: memlock: -1 stack: 67108864 ipc: host healthcheck: test: ["CMD-SHELL", "curl -f http://localhost:30000/health || exit 1"] deploy: resources: reservations: devices: - driver: nvidia device_ids: ["0"] capabilities: [gpu]