pyproject.toml 2.21 KB
Newer Older
maming's avatar
maming committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
[build-system]
requires = ["hatchling", "hatch-vcs"]
build-backend = "hatchling.build"

[project]
name = "megatron-energon"
dynamic = ["version"]
authors = [
  { name="Lukas Vögtle", email="lvoegtle@nvidia.com" },
  { name="Philipp Fischer", email="pfischer@nvidia.com" },
]
description = "Megatron's multi-modal data loader"
readme = "README.md"
license = "BSD-3-Clause"
requires-python = ">=3.10"
classifiers = [
  "Programming Language :: Python :: 3",
  "Programming Language :: Python :: 3.10",
  "Programming Language :: Python :: 3.11",
  "Programming Language :: Python :: 3.12",
  "Operating System :: OS Independent",
]
dependencies = [
  "braceexpand",
  "click",
  "dataslots; python_version<'3.10'",
  "mfusepy",
  "multi-storage-client>=0.18.0,<0.26.0",
  "numpy",
  "pillow>=10.0.1",  # WEBP vulnerability fixed starting from 10.0.1
  "pyyaml",
  "rapidyaml>=0.10.0",
  "s3fs",
  "torch",
  "tqdm",
  "webdataset",
]

[project.optional-dependencies]
dev = [
  "ruff",
  "sphinxcontrib-napoleon",
  "sphinx",
  "myst-parser",
  "soundfile",
  "sphinx-rtd-theme",
  "sphinx-click",
]
transforms = [
  "torchvision",  # Needed for megatron.energon.transforms
]
# Storage services for MSC
s3 = [
  "multi-storage-client[boto3]",
]
aistore = [
  "multi-storage-client[aistore]",
]
azure-storage-blob = [
  "multi-storage-client[azure-storage-blob]",
]
google-cloud-storage = [
  "multi-storage-client[google-cloud-storage]",
]
oci = [
  "multi-storage-client[oci]",
]

# Dependencies for video decoding
av_decode = [
  # needed for efficient audio and video file decoding
  "bitstring>=4.2.3",
  "sortedcontainers>=2.4.0",
  "filetype>=1.2.0",
  "ebmlite>=3.3.1",
  "av>=14.4.0",
]
# If using guess_content=True for decoding
guess_content = [
  "filetype>=1.0.0",
]

[project.urls]
Homepage = "https://github.com/NVIDIA/Megatron-Energon"

[tool.hatch.build.targets.wheel]
packages = ["src/megatron"]

[tool.hatch.build.targets.sdist]
packages = ["src/megatron"]

[project.scripts]
energon = "megatron.energon.cli.main:main"

[tool.hatch.version]
source = "vcs"

[tool.ruff.lint]
extend-select = ["I"]
ignore = ["E741", "E731"]

[tool.ruff]
line-length = 100
target-version = "py310"
include = ["**/*.py", "**/*.pyi"]
exclude = [
  ".idea",
  "docs",
]