Commit 3e8135ce authored by Baber's avatar Baber
Browse files

Merge branch 'main' into comma

parents 8e560c96 0c134ee9
......@@ -2,7 +2,7 @@ import logging
import os
__version__ = "0.4.9"
__version__ = "0.4.9.1"
# Lazy-load .evaluator module to improve CLI startup
......
......@@ -5,8 +5,9 @@ import traceback
from typing import Iterator, List, Sequence, Tuple, TypeVar
# This is a cpp module. Compile janitor_util.cpp with:
# c++ -O3 -Wall -shared -std=c++11 -fPIC $(python3 -m pybind11 --includes) janitor_util.cpp -o janitor_util$(python3-config --extension-suffix) -undefined dynamic_lookup
# This is a cpp module.
# See scripts/clean_training_data/README.md for instructions to compile janitor_util.cpp
try:
import janitor_util
......
......@@ -71,13 +71,6 @@ class SteeredModel(HFLM):
"""
HFLM with a steered forward pass.
To derive steering vectors from a sparse model loadable with sparsify or sae_lens,
provide the path to a CSV file with the following columns (example rows are provided below):
loader,action,sparse_model,hookpoint,feature_index,steering_coefficient,sae_id,description,
sparsify,add,EleutherAI/sae-pythia-70m-32k,layers.3,30,10.0,,,
sae_lens,add,gemma-scope-2b-pt-res-canonical,layers.20,12082,240.0,layer_20/width_16k/canonical,increase dogs,
To load steering vectors directly, provide the path to a pytorch (.pt) file with content in the following format:
{
......@@ -86,9 +79,17 @@ class SteeredModel(HFLM):
"steering_coefficient": <float>,
"action": <Literal["add", "clamp"]>,
"bias": <torch.Tensor | None>,
"head_index": <int | None>,
},
...
}
To derive steering vectors from a sparse model loadable with sparsify or sae_lens,
provide the path to a CSV file with the following columns (example rows are provided below):
loader,action,sparse_model,hookpoint,feature_index,steering_coefficient,head_index,sae_id,description,
sparsify,add,EleutherAI/sae-pythia-70m-32k,layers.3,30,10.0,,,,
sae_lens,add,gemma-scope-2b-pt-res-canonical,layers.20,12082,240.0,,layer_20/width_16k/canonical,increase dogs,
"""
super().__init__(pretrained=pretrained, device=device, **kwargs)
......@@ -105,27 +106,31 @@ class SteeredModel(HFLM):
hook_to_steer = {}
for hookpoint, steer_info in steer_config.items():
action = steer_info["action"]
steering_coefficient = steer_info["steering_coefficient"]
steering_vector = (
steer_info["steering_vector"].to(self.device).to(self.model.dtype)
)
bias = (
steer_info["bias"].to(self.device).to(self.model.dtype)
if steer_info["bias"] is not None
else None
)
steering_coefficient = float(steer_info.get("steering_coefficient", 1.0))
head_index = steer_info.get("head_index", None)
bias = steer_info.get("bias", None)
if bias is not None:
bias = bias.to(self.device).to(self.model.dtype)
if action == "add":
# Steers the model by adding some multiple of a steering vector to all sequence positions.
hook_to_steer[hookpoint] = (
lambda acts: acts + steering_coefficient * steering_vector
# Steer the model by adding a multiple of a steering vector to all sequence positions.
assert bias is None, "Bias is not supported for the `add` action."
hook_to_steer[hookpoint] = partial(
self.add,
vector=steering_vector * steering_coefficient,
head_index=head_index,
)
elif action == "clamp":
# Steer the model by clamping the activations to a value in the direction of the steering vector.
hook_to_steer[hookpoint] = partial(
self.clamp,
steering_vector=steering_vector,
direction=steering_vector / torch.norm(steering_vector),
value=steering_coefficient,
bias=bias,
head_index=head_index,
)
else:
raise ValueError(f"Unknown hook type: {action}")
......@@ -195,34 +200,62 @@ class SteeredModel(HFLM):
return steer_data
@classmethod
def add(
cls,
acts: Tensor,
vector: Tensor,
head_index: Optional[int],
):
"""Adds the given vector to the activations.
Args:
acts (Tensor): The activations tensor to edit of shape [batch, pos, ..., features]
vector (Tensor): A vector to add of shape [features]
head_index (int | None): Optional attention head index to add to
"""
if head_index is not None:
acts[:, :, head_index, :] = acts[:, :, head_index, :] + vector
else:
acts = acts + vector
return acts
@classmethod
def clamp(
cls,
acts: Tensor,
steering_vector: Tensor,
direction: Tensor,
value: float,
head_index: Optional[int],
bias: Optional[Tensor] = None,
):
"""Clamps a direction of the activations to be the steering vector * the value.
"""Clamps the activations to a given value in a specified direction. The direction
must be a unit vector.
Args:
acts (Tensor): The activations tensor to edit of shape [batch, pos, features]
steering_vector (Tensor): A direction to clamp of shape [features]
acts (Tensor): The activations tensor to edit of shape [batch, pos, ..., features]
direction (Tensor): A direction to clamp of shape [features]
value (float): Value to clamp the direction to
head_index (int | None): Optional attention head index to clamp
bias (Tensor | None): Optional bias to add to the activations
Returns:
Tensor: The modified activations with the specified direction clamped
"""
if bias is not None:
acts = acts - bias
direction = steering_vector / torch.norm(steering_vector)
proj_magnitude = torch.sum(acts * direction, dim=-1, keepdim=True)
orthogonal_component = acts - proj_magnitude * direction
if head_index is not None:
x = acts[:, :, head_index, :]
proj = (x * direction).sum(dim=-1, keepdim=True)
assert proj == acts @ direction
clamped = orthogonal_component + direction * value
clamped = acts.clone()
clamped[:, :, head_index, :] = x + direction * (value - proj)
else:
proj = torch.sum(acts * direction, dim=-1, keepdim=True)
clamped = acts + direction * (value - proj)
if bias is not None:
return clamped + bias
......
......@@ -680,10 +680,19 @@ class HFLM(TemplateLM):
"0.4.0"
):
raise AssertionError("load_in_4bit requires peft >= 0.4.0")
if self._model.config.vocab_size != len(self.tokenizer):
# Compatible with Gemma3 (multimodal) and old models
if hasattr(self._model.config, "text_config") and hasattr(
self._model.config.text_config, "vocab_size"
):
vocab_size = self._model.config.text_config.vocab_size
else:
vocab_size = self._model.config.vocab_size
if vocab_size != len(self.tokenizer):
# resize model for LoRAs with added tokens
eval_logger.info(
f"Model config indicates vocab_size='{self._model.config.vocab_size}', but found tokenizer with vocab size '{len(self.tokenizer)}'. Resizing model embedding layer..."
f"Model config indicates vocab_size='{vocab_size}', but found tokenizer with vocab size '{len(self.tokenizer)}'. Resizing model embedding layer..."
)
self._model.resize_token_embeddings(len(self.tokenizer))
self._model = PeftModel.from_pretrained(
......
......@@ -289,7 +289,7 @@ class OpenAIChatCompletion(LocalChatCompletion):
"seed": seed,
**gen_kwargs,
}
if "o1" in self.model:
if "o1" in self.model or "5" in self.model:
output.pop("stop")
output["temperature"] = 1
elif "o3" in self.model:
......
......@@ -28,9 +28,8 @@ class OptimumLM(HFLM):
**kwargs,
) -> None:
if "backend" in kwargs:
# optimum currently only supports causal models
assert kwargs["backend"] == "causal", (
"Currently, only OVModelForCausalLM is supported."
assert kwargs["backend"] in ["causal", "seq2seq"], (
"Currently, only OVModelForCausalLM or OVModelForSeq2SeqLM are supported."
)
self.openvino_device = device
......@@ -54,7 +53,7 @@ class OptimumLM(HFLM):
"package `optimum` is not installed. Please install it via `pip install optimum[openvino]`"
)
else:
from optimum.intel.openvino import OVModelForCausalLM
from optimum.intel.openvino import OVModelForCausalLM, OVModelForSeq2SeqLM
model_kwargs = kwargs if kwargs else {}
if "ov_config" in model_kwargs:
......@@ -76,17 +75,14 @@ class OptimumLM(HFLM):
model_kwargs["ov_config"]["MODEL_DISTRIBUTION_POLICY"] = (
"PIPELINE_PARALLEL"
)
model_file = Path(pretrained) / "openvino_model.xml"
if model_file.exists():
export = False
else:
export = True
self._model = OVModelForCausalLM.from_pretrained(
model_cls = (
OVModelForCausalLM if self.backend == "causal" else OVModelForSeq2SeqLM
)
self._model = model_cls.from_pretrained(
pretrained,
revision=revision,
trust_remote_code=trust_remote_code,
export=export,
device=self.openvino_device.upper(),
**model_kwargs,
)
......@@ -216,7 +216,7 @@ class SGLangLM(TemplateLM):
# we group requests by their generation_kwargs,
# so that we don't try to execute e.g. greedy sampling and temp=0.8 sampling
# in the same batch.
re_ords = Collator(requests, _collate_gen, group_by="gen_kwargs")
re_ords = Collator(requests, _collate_gen, group_by=None)
chunks = re_ords.get_batched(
n=int(self.batch_size) if self.batch_size != "auto" else 0, batch_fn=None
)
......@@ -232,36 +232,41 @@ class SGLangLM(TemplateLM):
context_and_encoding, all_gen_kwargs = zip(*chunk)
context, context_encoding = zip(*context_and_encoding)
# we assume all gen kwargs in the batch are the same
# this is safe to assume because the `grouper` object ensures it.
gen_kwargs = all_gen_kwargs[0]
# unpack our keyword arguments.
if isinstance(gen_kwargs, dict):
kwargs = copy.deepcopy(gen_kwargs) # edge case for repeats > 1
# add EOS token to stop sequences
until = handle_stop_sequences(kwargs.pop("until", None), eos=eos)
else:
raise ValueError(
f"Expected `kwargs` to be of type `dict` but got {type(gen_kwargs)}"
context_encoding_truncated = []
sampling_params = []
for x, gen_kwargs in zip(context_encoding, all_gen_kwargs):
# unpack our keyword arguments.
if isinstance(gen_kwargs, dict):
kwargs = copy.deepcopy(gen_kwargs) # edge case for repeats > 1
# add EOS token to stop sequences
until = handle_stop_sequences(kwargs.pop("until", None), eos=eos)
else:
raise ValueError(
f"Expected `kwargs` to be of type `dict` but got {type(gen_kwargs)}"
)
if "max_gen_toks" in kwargs.keys():
max_gen_toks = kwargs.pop("max_gen_toks")
else:
max_gen_toks = self.max_gen_toks
# set the max length in tokens of inputs ("context_enc")
# max len for inputs = max length, minus room to generate the max new tokens
max_ctx_len = self.max_length - max_gen_toks
if len(x) > max_ctx_len:
context_encoding_truncated.append(x[-max_ctx_len:])
else:
context_encoding_truncated.append(x)
# create sampling params
kwargs = self.modify_gen_kwargs(kwargs)
sampling_params.append(
kwargs | {"max_tokens": max_gen_toks, "stop": until}
)
if "max_gen_toks" in kwargs.keys():
max_gen_toks = kwargs.pop("max_gen_toks")
else:
max_gen_toks = self.max_gen_toks
# set the max length in tokens of inputs ("context_enc")
# max len for inputs = max length, minus room to generate the max new tokens
max_ctx_len = self.max_length - max_gen_toks
context_encoding = [x[-max_ctx_len:] for x in context_encoding]
# perform batched generation
# cont is a list of dic. See here https://github.com/sgl-project/sglang/blob/0a6f18f068e4095fc228e798454e8496c9749214/python/sglang/srt/entrypoints/engine.py#L111 .
cont = self._model_generate(
requests=context_encoding,
requests=context_encoding_truncated,
generate=True,
max_tokens=max_gen_toks,
stop=until,
**kwargs,
sampling_params=sampling_params,
)
# cache generations
......@@ -284,28 +289,22 @@ class SGLangLM(TemplateLM):
self,
requests: List[List[int]] = None,
generate: bool = False,
max_tokens: int = None,
stop: Optional[List[str]] = None,
sampling_params: Union[List[Dict], Dict, None] = None,
return_logprob: bool = False,
top_logprobs_num: int = 1,
logprob_start_len: int = -1,
**kwargs,
):
# check sglang sampling parameters: https://github.com/sgl-project/sglang/blob/main/python/sglang/srt/sampling/sampling_params.py#L21 and https://docs.sglang.ai/references/sampling_params.html.
if generate:
kwargs = self.modify_gen_kwargs(kwargs)
sampling_params = {
"max_new_tokens": max_tokens,
"stop": stop,
}
sampling_params.update(kwargs)
else:
sampling_params = {
"temperature": 0,
"max_new_tokens": 1,
}
sampling_params.update(kwargs)
if not generate:
sampling_params = sampling_params if sampling_params else {}
sampling_params.update(
{
"temperature": 0,
"max_new_tokens": 1,
}
)
if not isinstance(sampling_params, List):
sampling_params = [sampling_params] * len(requests)
# Refer to: https://docs.sglang.ai/backend/offline_engine_api.html
outputs = self.model.generate(
input_ids=requests,
......
import copy
import gc
import inspect
import logging
import os
from importlib.metadata import version
......@@ -33,7 +32,7 @@ from lm_eval.utils import (
try:
import ray
from vllm import LLM, SamplingParams
from vllm import LLM, SamplingParams, TokensPrompt
from vllm.lora.request import LoRARequest
from vllm.transformers_utils.tokenizer import get_tokenizer
from vllm.utils import get_open_port
......@@ -51,7 +50,7 @@ eval_logger = logging.getLogger(__name__)
def _vllm_mp_worker(
model_args: dict,
sampling_params: "SamplingParams",
sampling_params: list["SamplingParams"],
requests: list[list[int]],
lora_request: "LoRARequest",
result_queue: "Queue",
......@@ -79,7 +78,7 @@ def _vllm_mp_worker(
try:
llm = LLM(**model_args)
res = llm.generate(
prompt_token_ids=requests,
[TokensPrompt(prompt_token_ids=request) for request in requests],
sampling_params=sampling_params,
lora_request=lora_request,
)
......@@ -196,6 +195,12 @@ class VLLM(TemplateLM):
self.batch_size = "auto"
eval_logger.info("Manual batching is not compatible with data parallelism.")
if "gemma" in pretrained.lower():
add_bos_token = True
eval_logger.info(
"Found 'gemma' in model name, a BOS token will be used as Gemma series models underperform without it."
)
from transformers import AutoConfig
self._config = AutoConfig.from_pretrained(
......@@ -214,11 +219,6 @@ class VLLM(TemplateLM):
"enable_thinking", enable_thinking
)
self.add_bos_token = add_bos_token
if "gemma" in pretrained.lower():
self.add_bos_token = True
eval_logger.info(
"Found 'gemma' in model name, a BOS token will be used as Gemma series models underperform without it."
)
if parse_version(version("vllm")) >= parse_version("0.8.3"):
kwargs_resolve_hf_chat_template = {
......@@ -239,13 +239,6 @@ class VLLM(TemplateLM):
model_config = engine_args.create_model_config()
kwargs_resolve_hf_chat_template["model_config"] = model_config
# https://github.com/vllm-project/vllm/pull/18259
if (
"trsut_remote_code"
in inspect.signature(resolve_hf_chat_template).parameters
):
kwargs_resolve_hf_chat_template["trsut_remote_code"] = trust_remote_code
else:
kwargs_resolve_hf_chat_template["trust_remote_code"] = trust_remote_code
......@@ -371,17 +364,14 @@ class VLLM(TemplateLM):
self,
requests: List[List[int]] = None,
generate: bool = False,
max_tokens: int = None,
stop: Optional[List[str]] = None,
**kwargs,
sampling_params: Union[List["SamplingParams"], "SamplingParams", None] = None,
):
if generate:
kwargs = self.modify_gen_kwargs(kwargs)
sampling_params = SamplingParams(max_tokens=max_tokens, stop=stop, **kwargs)
else:
if not generate or sampling_params is None:
sampling_params = SamplingParams(
temperature=0, prompt_logprobs=1, max_tokens=1, detokenize=False
)
if not isinstance(sampling_params, List):
sampling_params = [sampling_params] * len(requests)
if self.data_parallel_size > 1 and not self.V1:
# vLLM hangs if resources are set in ray.remote
# also seems to only work with decorator and not with ray.remote() fn
......@@ -389,13 +379,13 @@ class VLLM(TemplateLM):
@ray.remote
def run_inference_one_model(
model_args: dict,
sampling_params: SamplingParams,
sampling_params: List["SamplingParams"],
requests: List[List[int]],
lora_request: LoRARequest,
lora_request: "LoRARequest",
):
llm = LLM(**model_args)
return llm.generate(
prompt_token_ids=requests,
[TokensPrompt(prompt_token_ids=request) for request in requests],
sampling_params=sampling_params,
lora_request=lora_request,
)
......@@ -403,9 +393,12 @@ class VLLM(TemplateLM):
# dispatch requests to all self.data_parallel_size workers, in interleaved fashion
# interleaved important to balance context lengths across workers
requests = [list(x) for x in distribute(self.data_parallel_size, requests)]
sampling_params = [
list(sp) for sp in distribute(self.data_parallel_size, sampling_params)
]
inputs = (
(self.model_args, sampling_params, req, self.lora_request)
for req in requests
(self.model_args, sp, req, self.lora_request)
for req, sp in zip(requests, sampling_params)
)
object_refs = [run_inference_one_model.remote(*x) for x in inputs]
results = ray.get(object_refs)
......@@ -420,16 +413,18 @@ class VLLM(TemplateLM):
dp_master_port = os.environ.get("VLLM_DP_MASTER_PORT") or get_open_port()
requests = (list(x) for x in distribute(self.data_parallel_size, requests))
sampling_params = (
list(sp) for sp in distribute(self.data_parallel_size, sampling_params)
)
procs, resq = [], Queue()
# We use Process as it is non-daemonic
try:
for rank, req in enumerate(requests):
for rank, (sp, req) in enumerate(zip(requests, sampling_params)):
proc = Process(
target=_vllm_mp_worker,
args=(
self.model_args.copy(),
sampling_params,
sp,
req,
self.lora_request,
resq,
......@@ -484,7 +479,7 @@ class VLLM(TemplateLM):
else:
outputs = self.model.generate(
prompt_token_ids=requests,
[TokensPrompt(prompt_token_ids=request) for request in requests],
sampling_params=sampling_params,
use_tqdm=True if self.batch_size == "auto" else False,
lora_request=self.lora_request,
......@@ -583,10 +578,11 @@ class VLLM(TemplateLM):
# - any OOMs will happen right away rather than near the end
return -len(_requests[0][1]), _requests[0][0]
# we group requests by their generation_kwargs,
# so that we don't try to execute e.g. greedy sampling and temp=0.8 sampling
# in the same batch.
re_ords = Collator(requests, _collate_gen, group_by="gen_kwargs")
re_ords = Collator(
requests,
_collate_gen,
group_by=None,
)
chunks = re_ords.get_batched(
n=int(self.batch_size) if self.batch_size != "auto" else 0, batch_fn=None
)
......@@ -601,41 +597,44 @@ class VLLM(TemplateLM):
for chunk in chunks:
context_and_encoding, all_gen_kwargs = zip(*chunk)
context, context_encoding = zip(*context_and_encoding)
# we assume all gen kwargs in the batch are the same
# this is safe to assume because the `grouper` object ensures it.
gen_kwargs = all_gen_kwargs[0]
# unpack our keyword arguments.
if isinstance(gen_kwargs, dict):
kwargs = copy.deepcopy(gen_kwargs) # edge case for repeats > 1
# add EOS token to stop sequences
until = handle_stop_sequences(kwargs.pop("until", None), eos=eos)
else:
raise ValueError(
f"Expected `kwargs` to be of type `dict` but got {type(gen_kwargs)}"
)
if "max_gen_toks" in kwargs.keys():
max_gen_toks = kwargs.pop("max_gen_toks")
else:
max_gen_toks = self.max_gen_toks
# set the max length in tokens of inputs ("context_enc")
# max len for inputs = max length, minus room to generate the max new tokens
max_ctx_len = self.max_length - max_gen_toks
all_lengths = [len(x) for x in context_encoding]
for length in all_lengths:
if length > max_ctx_len:
context_encoding_truncated = []
sampling_params = []
for x, gen_kwargs in zip(context_encoding, all_gen_kwargs):
# unpack our keyword arguments.
if isinstance(gen_kwargs, dict):
kwargs = copy.deepcopy(gen_kwargs) # edge case for repeats > 1
# add EOS token to stop sequences
until = handle_stop_sequences(kwargs.pop("until", None), eos=eos)
else:
raise ValueError(
f"Expected `kwargs` to be of type `dict` but got {type(gen_kwargs)}"
)
if "max_gen_toks" in kwargs.keys():
max_gen_toks = kwargs.pop("max_gen_toks")
else:
max_gen_toks = self.max_gen_toks
# set the max length in tokens of inputs ("context_enc")
# max len for inputs = max length, minus room to generate the max new tokens
max_ctx_len = self.max_length - max_gen_toks
if len(x) > max_ctx_len:
eval_logger.warning(
f"Context length {length} exceeds max length (context + max gen tokens): {max_ctx_len}. Truncating context."
f"Context length {len(x)} exceeds max length (context + max gen tokens): {max_ctx_len}. Truncating context."
)
context_encoding = [x[-max_ctx_len:] for x in context_encoding]
context_encoding_truncated.append(x[-max_ctx_len:])
else:
context_encoding_truncated.append(x)
# create sampling params
kwargs = self.modify_gen_kwargs(kwargs)
sampling_params.append(
SamplingParams(max_tokens=max_gen_toks, stop=until, **kwargs)
)
# perform batched generation
cont = self._model_generate(
requests=context_encoding,
requests=context_encoding_truncated,
generate=True,
max_tokens=max_gen_toks,
stop=until,
**kwargs,
sampling_params=sampling_params,
)
# cache generations
......
This diff is collapsed.
......@@ -81,7 +81,7 @@ class TaskManager:
task_index = {}
for task_dir in all_paths:
tasks = self._get_task_and_group(task_dir)
task_index = {**tasks, **task_index}
task_index = {**task_index, **tasks}
return task_index
......
......@@ -2,7 +2,6 @@ tag:
- adr_tasks
- adr_prompt_1
dataset_path: masakhane/diacritics-restoration
dataset_kwargs: {trust_remote_code: True}
doc_to_target: target
output_type: generate_until
fewshot_split: dev
......
......@@ -2,7 +2,6 @@ tag:
- adr_tasks
- adr_prompt_2
dataset_path: masakhane/diacritics-restoration
dataset_kwargs: {trust_remote_code: True}
doc_to_target: target
output_type: generate_until
fewshot_split: dev
......
......@@ -2,7 +2,6 @@ tag:
- adr_tasks
- adr_prompt_3
dataset_path: masakhane/diacritics-restoration
dataset_kwargs: {trust_remote_code: True}
doc_to_target: target
output_type: generate_until
fewshot_split: dev
......
......@@ -2,7 +2,6 @@ tag:
- adr_tasks
- adr_prompt_4
dataset_path: masakhane/diacritics-restoration
dataset_kwargs: {trust_remote_code: True}
doc_to_target: target
output_type: generate_until
fewshot_split: dev
......
......@@ -2,7 +2,6 @@ tag:
- adr_tasks
- adr_prompt_5
dataset_path: masakhane/diacritics-restoration
dataset_kwargs: {trust_remote_code: True}
doc_to_target: target
output_type: generate_until
fewshot_split: dev
......
......@@ -4,7 +4,6 @@ tag:
task: null
dataset_path: masakhane/afrisenti
dataset_name: null
dataset_kwargs: {trust_remote_code: True}
output_type: multiple_choice
validation_split: validation
test_split: test
......
......@@ -3,7 +3,6 @@ tag:
- afrisent_prompt_2
dataset_path: masakhane/afrisenti
dataset_name: null
dataset_kwargs: {trust_remote_code: True}
output_type: multiple_choice
validation_split: validation
test_split: test
......
......@@ -3,7 +3,6 @@ tag:
- afrisenti_prompt_3
dataset_path: masakhane/afrisenti
dataset_name: null
dataset_kwargs: {trust_remote_code: True}
output_type: multiple_choice
validation_split: validation
test_split: test
......
......@@ -3,7 +3,6 @@ tag:
- afrisenti_prompt_4
dataset_path: masakhane/afrisenti
dataset_name: null
dataset_kwargs: {trust_remote_code: True}
output_type: multiple_choice
validation_split: validation
test_split: test
......
......@@ -3,7 +3,6 @@ tag:
- afrisenti_prompt_5
dataset_path: masakhane/afrisenti
dataset_name: null
dataset_kwargs: {trust_remote_code: True}
output_type: multiple_choice
validation_split: validation
test_split: test
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment