# SPDX-License-Identifier: Apache-2.0 # The CLI entrypoint to vLLM. import os import signal import sys import vllm.entrypoints.cli.benchmark.main import vllm.entrypoints.cli.openai import vllm.entrypoints.cli.serve import vllm.version from vllm.logger import init_logger from vllm.utils import FlexibleArgumentParser logger = init_logger(__name__) CMD_MODULES = [ vllm.entrypoints.cli.openai, vllm.entrypoints.cli.serve, vllm.entrypoints.cli.benchmark.main, ] def register_signal_handlers(): def signal_handler(sig, frame): sys.exit(0) signal.signal(signal.SIGINT, signal_handler) signal.signal(signal.SIGTSTP, signal_handler) def env_setup(): # The safest multiprocessing method is `spawn`, as the default `fork` method # is not compatible with some accelerators. The default method will be # changing in future versions of Python, so we should use it explicitly when # possible. # # We only set it here in the CLI entrypoint, because changing to `spawn` # could break some existing code using vLLM as a library. `spawn` will cause # unexpected behavior if the code is not protected by # `if __name__ == "__main__":`. # # References: # - https://docs.python.org/3/library/multiprocessing.html#contexts-and-start-methods # - https://pytorch.org/docs/stable/notes/multiprocessing.html#cuda-in-multiprocessing # - https://pytorch.org/docs/stable/multiprocessing.html#sharing-cuda-tensors # - https://docs.habana.ai/en/latest/PyTorch/Getting_Started_with_PyTorch_and_Gaudi/Getting_Started_with_PyTorch.html?highlight=multiprocessing#torch-multiprocessing-for-dataloaders if "VLLM_WORKER_MULTIPROC_METHOD" not in os.environ: logger.debug("Setting VLLM_WORKER_MULTIPROC_METHOD to 'spawn'") os.environ["VLLM_WORKER_MULTIPROC_METHOD"] = "spawn" def main(): env_setup() parser = FlexibleArgumentParser(description="vLLM CLI") parser.add_argument('-v', '--version', action='version', version=vllm.version.__version__) subparsers = parser.add_subparsers(required=False, dest="subparser") cmds = {} for cmd_module in CMD_MODULES: new_cmds = cmd_module.cmd_init() for cmd in new_cmds: cmd.subparser_init(subparsers).set_defaults( dispatch_function=cmd.cmd) cmds[cmd.name] = cmd args = parser.parse_args() if args.subparser in cmds: cmds[args.subparser].validate(args) if hasattr(args, "dispatch_function"): args.dispatch_function(args) else: parser.print_help() if __name__ == "__main__": main()