dataset_name:Optional[str]=field(default=None,metadata={"help":"Name of a dataset from the datasets package"})
dataset_config_name:Optional[str]=field(
default=None,metadata={"help":"The configuration name of the dataset to use (via the datasets library)."}
train_dataset_name:str=field(
default=None,
metadata={
"help":"The name of the training dataset to use (via the datasets library). Load and combine "
"multiple datasets by separating dataset ids by a '+' symbol. For example, to load and combine "
" librispeech and common voice, set `train_dataset_name='librispeech_asr+common_voice'`."
},
)
train_file:Optional[str]=field(
default=None,metadata={"help":"A file containing the training audio paths and labels."}
train_dataset_config_name:Optional[str]=field(
default=None,
metadata={
"help":"The configuration name of the training dataset to use (via the datasets library). Load and combine "
"multiple datasets by separating dataset configs by a '+' symbol."
},
)
eval_file:Optional[str]=field(
default=None,metadata={"help":"A file containing the validation audio paths and labels."}
train_dataset_samples:str=field(
default=None,
metadata={
"help":"Number of samples in the training data. Load and combine "
"multiple datasets by separating dataset samples by a '+' symbol."
},
)
train_split_name:str=field(
default="train",
eval_dataset_name:str=field(
default=None,
metadata={
"help":"The name of the training dataset split to use (via the datasets library). Defaults to 'train'"
"help":"The name of the evaluation dataset to use (via the datasets library). Defaults to the training dataset name if unspecified."
},
)
eval_split_name:str=field(
default="validation",
eval_dataset_config_name:Optional[str]=field(
default=None,
metadata={
"help":(
"The name of the training data set split to use (via the datasets library). Defaults to 'validation'"
)
"help":"The configuration name of the evaluation dataset to use (via the datasets library). Defaults to the training dataset config name if unspecified"
},
)
audio_column_name:str=field(
default="audio",
metadata={"help":"The name of the dataset column containing the audio data. Defaults to 'audio'"},
)
label_column_name:str=field(
default="label",metadata={"help":"The name of the dataset column containing the labels. Defaults to 'label'"}
train_label_column_name:str=field(
default="label",
metadata={
"help":"The name of the dataset column containing the labels in the train set. Defaults to 'label'"
},
)
eval_label_column_name:str=field(
default="label",
metadata={"help":"The name of the dataset column containing the labels in the eval set. Defaults to 'label'"},
)
max_train_samples:Optional[int]=field(
default=None,
...
...
@@ -159,12 +174,6 @@ class ModelArguments:
)
},
)
use_auth_token:bool=field(
default=None,
metadata={
"help":"The `use_auth_token` argument is deprecated and will be removed in v4.34. Please use `token` instead."
},
)
trust_remote_code:bool=field(
default=False,
metadata={
...
...
@@ -175,29 +184,153 @@ class ModelArguments:
)
},
)
freeze_feature_extractor:Optional[bool]=field(
default=None,metadata={"help":"Whether to freeze the feature extractor layers of the model."}
)
ignore_mismatched_sizes:bool=field(
default=False,
metadata={"help":"Will enable to load a pretrained model whose head dimensions are different."},