av_data.py 706 Bytes
Newer Older
maming's avatar
maming committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
# Copyright (c) 2025, NVIDIA CORPORATION.
# SPDX-License-Identifier: BSD-3-Clause

from typing import Optional

import torch

from megatron.energon.edataclass import edataclass


@edataclass
class AVData:
    #: A list of video clips in the shape (frames, channels, h, w)
    video_clips: Optional[list[torch.Tensor]]
    #: The timestamps for the video clips. List of tuples (start, end) in seconds
    video_timestamps: Optional[list[tuple[float, float]]]
    #: A list of audio clips in the shape (channels, samples)
    audio_clips: Optional[list[torch.Tensor]]
    #: The timestamps for the audio clips. List of tuples (start, end) in seconds
    audio_timestamps: Optional[list[tuple[float, float]]]