dataset_name:Optional[str]=field(default=None,metadata={"help":"Name of a dataset from the datasets package"})
train_dataset_name:str=field(
dataset_config_name:Optional[str]=field(
default=None,
default=None,metadata={"help":"The configuration name of the dataset to use (via the datasets library)."}
metadata={
"help":"The name of the training dataset to use (via the datasets library). Load and combine "
"multiple datasets by separating dataset ids by a '+' symbol. For example, to load and combine "
" librispeech and common voice, set `train_dataset_name='librispeech_asr+common_voice'`."
},
)
)
train_file:Optional[str]=field(
train_dataset_config_name:Optional[str]=field(
default=None,metadata={"help":"A file containing the training audio paths and labels."}
default=None,
metadata={
"help":"The configuration name of the training dataset to use (via the datasets library). Load and combine "
"multiple datasets by separating dataset configs by a '+' symbol."
},
)
)
eval_file:Optional[str]=field(
train_dataset_samples:str=field(
default=None,metadata={"help":"A file containing the validation audio paths and labels."}
default=None,
metadata={
"help":"Number of samples in the training data. Load and combine "
"multiple datasets by separating dataset samples by a '+' symbol."
},
)
)
train_split_name:str=field(
eval_dataset_name:str=field(
default="train",
default=None,
metadata={
metadata={
"help":"The name of the training dataset split to use (via the datasets library). Defaults to 'train'"
"help":"The name of the evaluation dataset to use (via the datasets library). Defaults to the training dataset name if unspecified."
},
},
)
)
eval_split_name:str=field(
eval_dataset_config_name:Optional[str]=field(
default="validation",
default=None,
metadata={
metadata={
"help":(
"help":"The configuration name of the evaluation dataset to use (via the datasets library). Defaults to the training dataset config name if unspecified"
"The name of the training data set split to use (via the datasets library). Defaults to 'validation'"
)
},
},
)
)
audio_column_name:str=field(
audio_column_name:str=field(
default="audio",
default="audio",
metadata={"help":"The name of the dataset column containing the audio data. Defaults to 'audio'"},
metadata={"help":"The name of the dataset column containing the audio data. Defaults to 'audio'"},
)
)
label_column_name:str=field(
train_label_column_name:str=field(
default="label",metadata={"help":"The name of the dataset column containing the labels. Defaults to 'label'"}
default="label",
metadata={
"help":"The name of the dataset column containing the labels in the train set. Defaults to 'label'"
},
)
eval_label_column_name:str=field(
default="label",
metadata={"help":"The name of the dataset column containing the labels in the eval set. Defaults to 'label'"},
)
)
max_train_samples:Optional[int]=field(
max_train_samples:Optional[int]=field(
default=None,
default=None,
...
@@ -159,12 +174,6 @@ class ModelArguments:
...
@@ -159,12 +174,6 @@ class ModelArguments:
)
)
},
},
)
)
use_auth_token:bool=field(
default=None,
metadata={
"help":"The `use_auth_token` argument is deprecated and will be removed in v4.34. Please use `token` instead."
},
)
trust_remote_code:bool=field(
trust_remote_code:bool=field(
default=False,
default=False,
metadata={
metadata={
...
@@ -175,29 +184,153 @@ class ModelArguments:
...
@@ -175,29 +184,153 @@ class ModelArguments:
)
)
},
},
)
)
freeze_feature_extractor:Optional[bool]=field(
default=None,metadata={"help":"Whether to freeze the feature extractor layers of the model."}
)
ignore_mismatched_sizes:bool=field(
ignore_mismatched_sizes:bool=field(
default=False,
default=False,
metadata={"help":"Will enable to load a pretrained model whose head dimensions are different."},
metadata={"help":"Will enable to load a pretrained model whose head dimensions are different."},