# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 [project] name = "ai-dynamo" version = "0.9.0" description = "Distributed Inference Framework" readme = "README.md" authors = [ { name = "NVIDIA Inc.", email = "sw-dl-dynamo@nvidia.com" }, ] license = { text = "Apache-2.0" } license-files = ["LICENSE"] requires-python = ">=3.10" dependencies = [ "ai-dynamo-runtime==0.9.0", "transformers>=4.56.0", "pytest>=8.3.4", "types-psutil>=7.0.0.20250218", "kubernetes>=32.0.1,<33.0.0", "fastapi>=0.115.0", "distro", # filelock: required by planner "filelock", "typer", "click<8.2.0", "setuptools", "prometheus_client>=0.23.1,<1.0", ] classifiers = [ "Development Status :: 4 - Beta", "Intended Audience :: Developers", "Intended Audience :: Science/Research", "Intended Audience :: Information Technology", "License :: OSI Approved :: Apache Software License", "Programming Language :: Python :: 3", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", "Topic :: Scientific/Engineering", "Topic :: Scientific/Engineering :: Artificial Intelligence", "Operating System :: POSIX :: Linux", ] keywords = ["llm", "genai", "inference", "nvidia", "distributed", "dynamo"] [project.urls] Repository = "https://github.com/ai-dynamo/dynamo.git" [project.optional-dependencies] trtllm =[ "uvloop", "tensorrt-llm==1.3.0rc1", ] vllm = [ "uvloop", "nixl[cu12]<=0.9.0", "vllm[flashinfer,runai]==0.14.1", ] sglang = [ "uvloop", "sglang==0.5.8", "nixl[cu12]<=0.9.0", "cupy-cuda12x>=13.0.0", ] [project.entry-points.pytest11] vllm_tests = "dynamo.vllm.tests.conftest" trtllm_tests = "dynamo.trtllm.tests.conftest" sglang_tests = "dynamo.sglang.tests.conftest" [dependency-groups] docs = [ # Core Sphinx "sphinx>=8.1", "nvidia-sphinx-theme>=0.0.8", # Sphinx extensions "ablog>=0.11", "sphinx-copybutton>=0.5", "sphinx-design>=0.6", "sphinx-prompt>=1.9", "sphinx-sitemap>=2.6", "sphinx-tabs>=3.4", "sphinx-book-theme>=1.1", "sphinxcontrib-mermaid>=1.0", "sphinxcontrib-bibtex>=2.6", "sphinx-reredirects>=1.0.0", # Markdown and notebook support "myst-parser>=4.0", "myst-nb>=1.2", "nbsphinx>=0.9", ] [build-system] requires = ["hatchling"] build-backend = "hatchling.build" [tool.hatch.build.hooks.custom] path = "hatch_build.py" [tool.hatch.build.targets.wheel] packages = [ "components/src/dynamo", ] [tool.hatch.metadata] allow-direct-references = true [tool.codespell] # note: pre-commit passes explicit lists of files here, which this skip file list doesn't override - # this is only to allow you to run codespell interactively # this also overrides the grpc_generated folder, since it is generated # Ignore data files and auto-generated files skip = "./.git,./.github,./lib/llm/tests/data,*.lock,*.sum" # ignore allowed words used in code ignore-words-list = "afterall,ser,ende" # use the 'clear' dictionary for unambiguous spelling mistakes builtin = "clear" # use custom dictionary in addition to the built-in one dictionary = "./codespell.txt" # disable warnings about binary files and wrong encoding quiet-level = 3 [tool.isort] profile = "black" use_parentheses = true multi_line_output = 3 include_trailing_comma = true force_grid_wrap = 0 ensure_newline_before_comments = true line_length = 88 balanced_wrapping = true indent = " " skip = ["build"] known_first_party = ["dynamo"] [tool.pytest.ini_options] minversion = "8.0" tmp_path_retention_policy = "failed" # NOTE # We ignore model.py explicitly here to avoid mypy errors with duplicate modules # pytest overrides the default mypy exclude configuration and so we exclude here as well addopts = [ "-ra", "--showlocals", "--strict-markers", "--strict-config", "--mypy", "--ignore-glob=*model.py", "--ignore-glob=*vllm_integration*", "--ignore-glob=*trtllm_integration*", "--ignore-glob=*kvbm/python/kvbm*", "--ignore-glob=*_inc.py", "--ignore-glob=*/llm/tensorrtllm*", "--ignore-glob=docs/*", "--ignore-glob=components/src/dynamo/sglang/request_handlers/*", "--ignore-glob=components/src/dynamo/sglang/multimodal_utils/*", "--ignore-glob=components/src/dynamo/vllm/multimodal_utils/*", "--ignore-glob=components/src/dynamo/vllm/multimodal_handlers/*", "--ignore-glob=examples/backends/sglang/slurm_jobs/*", # FIXME: Get relative/generic blob paths to work here ] xfail_strict = true log_cli_level = "INFO" filterwarnings = [ "error", "ignore:.*cuda*:DeprecationWarning", # Need this to avoid deprecation warnings from CUDA in tensorrt_llm. "ignore:.*pkg_resources.*:DeprecationWarning", "ignore:.*pkg_resources.*:UserWarning", "ignore:.*multipart.*:PendingDeprecationWarning", "ignore:.*PyType_Spec.*custom tp_new.*:DeprecationWarning", # Ignore protobuf deprecation warning "ignore:.*unclosed.*socket.*:ResourceWarning", # Ignore unclosed socket warnings "ignore:.*unclosed event loop.*:ResourceWarning", # Ignore unclosed event loop warnings "ignore:.*Exception ignored in.*:pytest.PytestUnraisableExceptionWarning", # Ignore unraisable exception warnings "ignore:The pynvml package is deprecated.*:FutureWarning", # Ignore pynvml deprecation warning, temporary until upstream library updates to nvidia-ml-py # Pydantic V2 deprecation warnings from TRTLLM dependencies (raised at import time during collection) "ignore:Support for class-based `config`.*:pydantic.warnings.PydanticDeprecatedSince20", "ignore:Using extra keyword arguments on `Field`.*:pydantic.warnings.PydanticDeprecatedSince20", "ignore:The `schema` method is deprecated.*:pydantic.warnings.PydanticDeprecatedSince20", # Pydantic warning about field shadowing in tensorrt_llm.serve.openai_protocol.ResponseFormat 'ignore:Field name "schema" in "ResponseFormat" shadows an attribute in parent:UserWarning', # pytest-benchmark automatically disables when xdist is active, ignore the warning "ignore:.*Benchmarks are automatically disabled.*:pytest_benchmark.logger.PytestBenchmarkWarning", ################################################################################################ # TRT-LLM ################################################################################################ # torchao sometimes emits SyntaxWarning from docstrings (e.g. invalid escape sequences) at import # time; our global `error` policy would otherwise fail test collection. Do not rely on module= # matching here because these can be raised during compilation where the module field may not # match as expected. "ignore:.*invalid escape sequence.*:SyntaxWarning", # torchao deprecation warnings for import path changes (see https://github.com/pytorch/ao/issues/2752) "ignore:Importing.*torchao\\.dtypes.*:DeprecationWarning", # nvidia-modelopt warning about transformers version incompatibility "ignore:transformers version .* is incompatible with nvidia-modelopt.*:UserWarning", ] # NOTE: Can also manually mark tests with @pytest.mark.asyncio asyncio_mode = "auto" # IMPORTANT: tests/conftest.py also registers a subset of these markers for # environments where pyproject.toml is not available (e.g. some CI containers). # Keep the marker definitions here and in tests/conftest.py synchronized. markers = [ "pre_merge: marks tests to run before merging", "post_merge: marks tests to run after merge", "parallel: marks tests that can run in parallel with pytest-xdist", "nightly: marks tests to run nightly", "weekly: marks tests to run weekly", "release: marks tests to run on release pipelines", "gpu_0: marks tests that don't require GPU", "gpu_1: marks tests to run on GPU", "gpu_2: marks tests to run on 2GPUs", "gpu_4: marks tests to run on 4GPUs", "gpu_8: marks tests to run on 8GPUs", "e2e: marks tests as end-to-end tests", "integration: marks tests as integration tests", "unit: marks tests as unit tests", "stress: marks tests as stress tests", "performance: marks tests as performance tests", "benchmark: marks tests as benchmark tests", "vllm: marks tests as requiring vllm", "trtllm: marks tests as requiring trtllm", "sglang: marks tests as requiring sglang", "multimodal: marks tests as multimodal (image/video) tests", "slow: marks tests as known to be slow", "h100: marks tests to run on H100", "router: marks tests for router component", "planner: marks tests for planner component", "kvbm: marks tests for KV behavior and model determinism", "kvbm_concurrency: marks concurrency stress tests for KVBM (runs separately)", "model: model id used by a test or parameter", "custom_build: marks tests that require custom builds or special setup (e.g., MoE models)", "k8s: marks tests as requiring Kubernetes", "fault_tolerance: marks tests as fault tolerance tests", # Built-in markers "skip: skip this test", "skipif: skip if condition is true", "xfail: expected failure", "usefixtures: use fixtures", "parametrize: parameterized test", "filterwarnings: filter warnings", "asyncio: asyncio test marker" ] # Linting/formatting [tool.ruff] # Same as Black. line-length = 88 indent-width = 4 [tool.ruff.lint.extend-per-file-ignores] "icp/tests/**/test_*.py" = ["F811", "F401"] "*_inc.py" = ["F821"] [tool.mypy] # --disable-error-code: WAR large set of errors due to mypy not being run # previously. We can slowly enable sets of errors to fix over time. # disable_error_code = [] # --explicit-package-bases: WAR errors about duplicate module names used # throughout the llm examples. For example, the common module in # tensorrt_llm and vllm are both named common. explicit_package_bases = true # --ignore-missing-imports: WAR too many errors when developing outside # of container environment with PYTHONPATH set and packages installed. # NOTE: Can possibly move mypy from pre-commit to a github action run only in # a container with the expected environment and PYTHONPATH setup. ignore_missing_imports = true check_untyped_defs = true [[tool.mypy.overrides]] # Skip mypy analysis on internal dependencies of vllm module = ["vllm.*"] follow_imports = "skip" ignore_missing_imports = true [tool.sphinx] # extra-content-head extra_content_head = [ ''' ''', ] #extra-content-footer extra_content_footer = [ ''' ''', ]