Unverified Commit b74a9553 authored by Wonjae Kim's avatar Wonjae Kim Committed by GitHub
Browse files

fix `rum_clm.py` seeking text column name twice (#16624)

parent 3663fca4
......@@ -347,10 +347,6 @@ def main():
column_names = raw_datasets["train"].column_names
text_column_name = "text" if "text" in column_names else column_names[0]
# First we tokenize all the texts.
column_names = raw_datasets["train"].column_names
text_column_name = "text" if "text" in column_names else column_names[0]
def tokenize_function(examples):
return tokenizer(examples[text_column_name])
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment