[build-system] requires = ["hatchling", "hatch-vcs"] build-backend = "hatchling.build" [project] name = "megatron-energon" dynamic = ["version"] authors = [ { name="Lukas Vögtle", email="lvoegtle@nvidia.com" }, { name="Philipp Fischer", email="pfischer@nvidia.com" }, ] description = "Megatron's multi-modal data loader" readme = "README.md" license = "BSD-3-Clause" requires-python = ">=3.10" classifiers = [ "Programming Language :: Python :: 3", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", "Operating System :: OS Independent", ] dependencies = [ "braceexpand", "click", "dataslots; python_version<'3.10'", "mfusepy", "multi-storage-client>=0.18.0,<0.26.0", "numpy", "pillow>=10.0.1", # WEBP vulnerability fixed starting from 10.0.1 "pyyaml", "rapidyaml>=0.10.0", "s3fs", "torch", "tqdm", "webdataset", ] [project.optional-dependencies] dev = [ "ruff", "sphinxcontrib-napoleon", "sphinx", "myst-parser", "soundfile", "sphinx-rtd-theme", "sphinx-click", ] transforms = [ "torchvision", # Needed for megatron.energon.transforms ] # Storage services for MSC s3 = [ "multi-storage-client[boto3]", ] aistore = [ "multi-storage-client[aistore]", ] azure-storage-blob = [ "multi-storage-client[azure-storage-blob]", ] google-cloud-storage = [ "multi-storage-client[google-cloud-storage]", ] oci = [ "multi-storage-client[oci]", ] # Dependencies for video decoding av_decode = [ # needed for efficient audio and video file decoding "bitstring>=4.2.3", "sortedcontainers>=2.4.0", "filetype>=1.2.0", "ebmlite>=3.3.1", "av>=14.4.0", ] # If using guess_content=True for decoding guess_content = [ "filetype>=1.0.0", ] [project.urls] Homepage = "https://github.com/NVIDIA/Megatron-Energon" [tool.hatch.build.targets.wheel] packages = ["src/megatron"] [tool.hatch.build.targets.sdist] packages = ["src/megatron"] [project.scripts] energon = "megatron.energon.cli.main:main" [tool.hatch.version] source = "vcs" [tool.ruff.lint] extend-select = ["I"] ignore = ["E741", "E731"] [tool.ruff] line-length = 100 target-version = "py310" include = ["**/*.py", "**/*.pyi"] exclude = [ ".idea", "docs", ]