"vscode:/vscode.git/clone" did not exist on "ec7f8af1061658701acae1d3e14bc50f68d8c62c"
Unverified Commit 07f0bb69 authored by Stas Bekman's avatar Stas Bekman Committed by GitHub
Browse files

[examples run_clm] fix _LazyModule hasher error (#11168)

* fix _LazyModule hasher error

* reword
parent c161dd56
......@@ -317,8 +317,10 @@ def main():
column_names = datasets["validation"].column_names
text_column_name = "text" if "text" in column_names else column_names[0]
def tokenize_function(examples):
# since this will be pickled to avoid _LazyModule error in Hasher force logger loading before tokenize_function
tok_logger = transformers.utils.logging.get_logger("transformers.tokenization_utils_base")
def tokenize_function(examples):
with CaptureLogger(tok_logger) as cl:
output = tokenizer(examples[text_column_name])
# clm input could be much much longer than block_size
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment