Unverified Commit 924c624a authored by zeyuyun1's avatar zeyuyun1 Committed by GitHub
Browse files

quick fix on concatenating text to support more datasets (#8474)

parent 17b1fd80
...@@ -254,7 +254,7 @@ def main(): ...@@ -254,7 +254,7 @@ def main():
tokenize_function, tokenize_function,
batched=True, batched=True,
num_proc=data_args.preprocessing_num_workers, num_proc=data_args.preprocessing_num_workers,
remove_columns=[text_column_name], remove_columns=column_names,
load_from_cache_file=not data_args.overwrite_cache, load_from_cache_file=not data_args.overwrite_cache,
) )
......
...@@ -292,7 +292,7 @@ def main(): ...@@ -292,7 +292,7 @@ def main():
tokenize_function, tokenize_function,
batched=True, batched=True,
num_proc=data_args.preprocessing_num_workers, num_proc=data_args.preprocessing_num_workers,
remove_columns=[text_column_name], remove_columns=column_names,
load_from_cache_file=not data_args.overwrite_cache, load_from_cache_file=not data_args.overwrite_cache,
) )
......
...@@ -279,7 +279,7 @@ def main(): ...@@ -279,7 +279,7 @@ def main():
tokenize_function, tokenize_function,
batched=True, batched=True,
num_proc=data_args.preprocessing_num_workers, num_proc=data_args.preprocessing_num_workers,
remove_columns=[text_column_name], remove_columns=column_names,
load_from_cache_file=not data_args.overwrite_cache, load_from_cache_file=not data_args.overwrite_cache,
) )
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment