Unverified Commit 07f0bb69 authored by Stas Bekman's avatar Stas Bekman Committed by GitHub
Browse files

[examples run_clm] fix _LazyModule hasher error (#11168)

* fix _LazyModule hasher error

* reword
parent c161dd56
...@@ -317,8 +317,10 @@ def main(): ...@@ -317,8 +317,10 @@ def main():
column_names = datasets["validation"].column_names column_names = datasets["validation"].column_names
text_column_name = "text" if "text" in column_names else column_names[0] text_column_name = "text" if "text" in column_names else column_names[0]
# since this will be pickled to avoid _LazyModule error in Hasher force logger loading before tokenize_function
tok_logger = transformers.utils.logging.get_logger("transformers.tokenization_utils_base")
def tokenize_function(examples): def tokenize_function(examples):
tok_logger = transformers.utils.logging.get_logger("transformers.tokenization_utils_base")
with CaptureLogger(tok_logger) as cl: with CaptureLogger(tok_logger) as cl:
output = tokenizer(examples[text_column_name]) output = tokenizer(examples[text_column_name])
# clm input could be much much longer than block_size # clm input could be much much longer than block_size
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment