"include/vscode:/vscode.git/clone" did not exist on "67f39ad17a16b9f1a7e6658b6325601905872953"
Commit b03a236d authored by sanchit-gandhi's avatar sanchit-gandhi
Browse files

always filter by max len

parent b73698c1
......@@ -171,11 +171,11 @@ class DataTrainingArguments:
)
},
)
max_length_seconds: float = field(
max_length_seconds: Optional[float] = field(
default=20,
metadata={"help": "Audio samples will be randomly cut to this length during training if the value is set."},
)
min_length_seconds: float = field(
min_length_seconds: Optional[float] = field(
default=5,
metadata={"help": "Audio samples less than this value will be filtered during training if the value is set."},
)
......@@ -551,10 +551,11 @@ def main():
)
# filter training data with inputs < min_input_length
max_input_length = data_args.max_length_seconds * sampling_rate
min_input_length = data_args.min_length_seconds * sampling_rate
def is_audio_valid(audio):
return len(audio["array"]) > min_input_length
return max_input_length > len(audio["array"]) > min_input_length
raw_datasets = raw_datasets.filter(
is_audio_valid,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment