among ranks in multigpu setting or only pulling a sample of documents
among ranks in multigpu setting or only pulling a sample of documents
"""
"""
returnislice(raw_iterator,rank,limit,world_size)
returnislice(raw_iterator,rank,limit,world_size)
classCollator:
"""
A class for reordering and batching elements of an array.
This class allows for sorting an array based on a provided sorting function, grouping elements based on a grouping function, and generating batches from the sorted and grouped data.
"""
def__init__(
self,
arr:List,
sort_fn:Callable,
group_fn:Callable=lambdax:x[1],
grouping:bool=False,
)->None:
self.grouping=grouping
self.fn=sort_fn
self.group_fn=lambdax:group_fn(x[1])# first index are enumerated indices