# Copyright (c) 2025, NVIDIA CORPORATION. # SPDX-License-Identifier: BSD-3-Clause try: from importlib.metadata import version __version__ = version("megatron-energon") except Exception: # Fallback for development mode when package is not installed __version__ = "unknown" from megatron.energon.cache import ( CachePool, DecodeFileStore, DirectLazy, FileCacheLazy, FileStore, FileStoreCachePool, FileStoreDecoder, Lazy, MockLazy, NoCachePool, SystemFileStore, ) from megatron.energon.edataclass import edataclass from megatron.energon.flavors import ( BaseCoreDatasetFactory, BaseWebdatasetFactory, CaptioningSample, CaptioningWebdataset, CrudeJsonlDatasetFactory, CrudeSample, CrudeWebdataset, DefaultCrudeJsonlDatasetFactory, DefaultDecoderWebdatasetFactory, DefaultGenericWebdatasetFactory, ImageClassificationSample, ImageClassificationWebdataset, ImageSample, ImageWebdataset, InterleavedSample, InterleavedWebdataset, JoinedWebdatasetFactory, MultiChoiceVQASample, MultiChoiceVQAWebdataset, OCRSample, OCRWebdataset, Sample, SampleDecoder, SavableDataset, SimilarityInterleavedSample, SimilarityInterleavedWebdataset, StandardWebdatasetFactory, TextSample, TextWebdataset, VidQASample, VidQAWebdataset, VQAOCRWebdataset, VQASample, VQAWebdataset, ) from megatron.energon.loader import get_loader, get_savable_loader from megatron.energon.metadataset import ( DatasetLoader, DatasetLoaderInterface, Metadataset, MetadatasetV2, load_dataset, prepare_metadataset, ) from megatron.energon.savable_loader import SavableDataLoader from megatron.energon.source_info import SourceInfo from megatron.energon.task_encoder import ( AugmentTaskEncoder, Batch, Cooker, DefaultTaskEncoder, TaskEncoder, basic_sample_keys, batch_list, batch_pad_stack, batch_stack, cooker, generic_batch, get_train_dataset, get_val_dataset, get_val_datasets, stateless, ) from megatron.energon.worker import WorkerConfig from megatron.energon.wrappers import ( BatchDataset, BlendDataset, ConcatDataset, EpochizeDataset, FilterDataset, GcDataset, GroupBatchDataset, IterMapDataset, LimitDataset, LogSampleDataset, MapDataset, MixBatchDataset, PackingDataset, RepeatDataset, ShuffleBufferDataset, SkipSample, concat_pad, generic_concat, homogeneous_concat_mix, ) __all__ = [ "__version__", "AugmentTaskEncoder", "BaseCoreDatasetFactory", "BaseWebdatasetFactory", "basic_sample_keys", "batch_list", "batch_pad_stack", "batch_stack", "Batch", "BatchDataset", "BlendDataset", "CachePool", "CaptioningSample", "CaptioningWebdataset", "concat_pad", "ConcatDataset", "cooker", "Cooker", "CrudeJsonlDatasetFactory", "CrudeSample", "CrudeWebdataset", "DatasetLoader", "DatasetLoaderInterface", "DecodeFileStore", "DefaultCrudeJsonlDatasetFactory", "DefaultDecoderWebdatasetFactory", "DefaultGenericWebdatasetFactory", "DefaultTaskEncoder", "DirectLazy", "edataclass", "EpochizeDataset", "FileCacheLazy", "FileStore", "FileStoreCachePool", "FileStoreDecoder", "FilterDataset", "GcDataset", "generic_batch", "generic_concat", "get_loader", "get_savable_loader", "get_train_dataset", "get_val_dataset", "get_val_datasets", "GroupBatchDataset", "homogeneous_concat_mix", "ImageClassificationSample", "ImageClassificationWebdataset", "ImageSample", "ImageWebdataset", "InterleavedSample", "InterleavedWebdataset", "IterMapDataset", "JoinedWebdatasetFactory", "Lazy", "LimitDataset", "load_dataset", "LogSampleDataset", "MapDataset", "Metadataset", "MetadatasetV2", "MixBatchDataset", "MockLazy", "MultiChoiceVQASample", "MultiChoiceVQAWebdataset", "NoCachePool", "OCRSample", "OCRWebdataset", "PackingDataset", "prepare_metadataset", "RepeatDataset", "Sample", "SampleDecoder", "SavableDataLoader", "SavableDataset", "ShuffleBufferDataset", "SimilarityInterleavedSample", "SimilarityInterleavedWebdataset", "SkipSample", "SourceInfo", "StandardWebdatasetFactory", "stateless", "SystemFileStore", "TaskEncoder", "TextSample", "TextWebdataset", "VidQASample", "VidQAWebdataset", "VQAOCRWebdataset", "VQASample", "VQAWebdataset", "WorkerConfig", ]