Commit e4bdffd0 authored by haileyschoelkopf's avatar haileyschoelkopf
Browse files

add first_n sampler

parent cc547c7b
class Sampler:
class ContextSampler:
def __init__(self, docs, task, fewshot_indices=None, rnd=None) -> None:
self.rnd = rnd
assert self.rnd, "must pass rnd to FewShotSampler!"
......@@ -71,7 +71,19 @@ class Sampler:
return self.rnd.sample(self.docs, n)
class BalancedSampler(Sampler):
class FirstNSampler(ContextSampler):
def sample(self, n) -> None:
"""
Draw the first `n` samples in order from the specified split.
Used for tasks with "canonical" ordered fewshot examples, such as MMLU and CMMLU.
"""
assert n <= len(
self.docs
), f"Error: number of fewshot samples requested exceeds the {len(self.docs)} that are available."
return self.docs[:n]
class BalancedSampler(ContextSampler):
def sample(self, n) -> None:
"""
TODO: this should return approximately class-balanced samples from our fewshot examples.
......@@ -81,12 +93,27 @@ class BalancedSampler(Sampler):
pass
class ManualSampler(Sampler):
class ManualSampler(ContextSampler):
def sample(self, n) -> None:
""" """
pass
SAMPLER_REGISTRY = {
"default": ContextSampler,
"first_n": FirstNSampler,
}
def get_sampler(name):
try:
return SAMPLER_REGISTRY[name]
except KeyError:
raise ValueError(
f"Attempted to use contextsampler '{name}', but no sampling strategy for this name found! Supported model names: {', '.join(SAMPLER_REGISTRY.keys())}"
)
# TODO: how should we do design here? might be better to have a single sampler and pass more kwargs at init.
# Depends what's easier for new user to add own functionality on top of
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment