total_len:int# Total token count in the sample, including text and image tokens
labels:torch.Tensor=None
@dataclass
classImageTaskSamplePacked(Sample):
"""Dataclass to store a single packed sample (not a batch).
P = Number of sub-samples in the packed sample
seq_len = Total sequence length
num_imgs = Number of images across all samples in the packed sample
"""
__key__:str# Sample name
__restore_key__:Tuple[Union[str,int,tuple],...]
__subflavor__:Dict# Sample metadata. Deprecated.
__subflavors__:Dict# Sample metadata.
tokens:torch.Tensor# Input tokens packed into a single tensor (seq_len,)
labels:torch.Tensor# Target tokens packed into a single tensor (seq_len,)
imgs:List[torch.Tensor]# Input images
num_tiles:List[int]# Number of tiles for each image of each sample (num_imgs)
max_length:int# Maximum length across sub-samples.
cu_lengths:List[int]# Cumulative length of each sub-sample in this packed sample incl. text and image tokens (P,)
# Typing for the resulting batch data after encode_batch()
@dataclass
classImageTaskBatchPacked(Batch):
"""Dataclass to store a batch of packed samples.
N = Batch size
P = Number of samples in the packed sample
seq_len = Maximum sequence length
num_imgs = Number of images across all samples in the packed sample
"""
__key__:List[str]# Sample names
__restore_key__:Tuple[Union[str,int,tuple],...]
__subflavor__:Dict# Sample metadata. Deprecated.
__subflavors__:List[Dict]# Sample metadatas.
tokens:torch.Tensor# Input tokens packed and padded (N, seq_len)
labels:torch.Tensor# Target tokens packed and padded (N, seq_len)
imgs:torch.Tensor# All image tiles stacked into a single tensor (num_tiles, C, H, W)
num_tiles:List[List[int]]# Number of tiles per image (N, num_imgs)
max_lengths:List[int]# Maximum length across sub-samples (N,)
cu_lengths:List[List[int]]# Cumulative length of each sub-sample in each packed sample of the batch (N, P)
# Based on https://github.com/hiyouga/LLaMA-Factory/blob/641d0dab08d96a93c34657742213d8994d9ed476/src/llamafactory/data/processors/processor_utils.py#L19
"""Finds the index of largest number that fits into the knapsack with the given capacity."""
index=bisect.bisect(numbers,capacity)
return-1ifindex==0else(index-1)
# Based on https://github.com/hiyouga/LLaMA-Factory/blob/641d0dab08d96a93c34657742213d8994d9ed476/src/llamafactory/data/processors/processor_utils.py#L27