"...ggml/git@developer.sourcefind.cn:OpenDAS/ollama.git" did not exist on "bc8909fb38525c89dda842d4ecfc86a933089a99"
Unverified Commit 924c624a authored by zeyuyun1's avatar zeyuyun1 Committed by GitHub
Browse files

quick fix on concatenating text to support more datasets (#8474)

parent 17b1fd80
...@@ -254,7 +254,7 @@ def main(): ...@@ -254,7 +254,7 @@ def main():
tokenize_function, tokenize_function,
batched=True, batched=True,
num_proc=data_args.preprocessing_num_workers, num_proc=data_args.preprocessing_num_workers,
remove_columns=[text_column_name], remove_columns=column_names,
load_from_cache_file=not data_args.overwrite_cache, load_from_cache_file=not data_args.overwrite_cache,
) )
......
...@@ -292,7 +292,7 @@ def main(): ...@@ -292,7 +292,7 @@ def main():
tokenize_function, tokenize_function,
batched=True, batched=True,
num_proc=data_args.preprocessing_num_workers, num_proc=data_args.preprocessing_num_workers,
remove_columns=[text_column_name], remove_columns=column_names,
load_from_cache_file=not data_args.overwrite_cache, load_from_cache_file=not data_args.overwrite_cache,
) )
......
...@@ -279,7 +279,7 @@ def main(): ...@@ -279,7 +279,7 @@ def main():
tokenize_function, tokenize_function,
batched=True, batched=True,
num_proc=data_args.preprocessing_num_workers, num_proc=data_args.preprocessing_num_workers,
remove_columns=[text_column_name], remove_columns=column_names,
load_from_cache_file=not data_args.overwrite_cache, load_from_cache_file=not data_args.overwrite_cache,
) )
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment