Unverified Commit 149cb0cc authored by Yih-Dar's avatar Yih-Dar Committed by GitHub
Browse files

Add `token` arugment in example scripts (#25172)



* fix

* fix

* fix

* fix

* fix

* fix

* fix

---------
Co-authored-by: default avatarydshieh <ydshieh@users.noreply.github.com>
parent c6a8768d
...@@ -21,6 +21,7 @@ Fine-tuning XLNet for question answering with beam search using a slightly adapt ...@@ -21,6 +21,7 @@ Fine-tuning XLNet for question answering with beam search using a slightly adapt
import logging import logging
import os import os
import sys import sys
import warnings
from dataclasses import dataclass, field from dataclasses import dataclass, field
from typing import Optional from typing import Optional
...@@ -78,15 +79,21 @@ class ModelArguments: ...@@ -78,15 +79,21 @@ class ModelArguments:
default="main", default="main",
metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."}, metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."},
) )
use_auth_token: bool = field( token: str = field(
default=False, default=None,
metadata={ metadata={
"help": ( "help": (
"Will use the token generated when running `huggingface-cli login` (necessary to use this script " "The token to use as HTTP bearer authorization for remote files. If not specified, will use the token "
"with private models)." "generated when running `huggingface-cli login` (stored in `~/.huggingface`)."
) )
}, },
) )
use_auth_token: bool = field(
default=None,
metadata={
"help": "The `use_auth_token` argument is deprecated and will be removed in v4.34. Please use `token`."
},
)
@dataclass @dataclass
...@@ -226,6 +233,12 @@ def main(): ...@@ -226,6 +233,12 @@ def main():
else: else:
model_args, data_args, training_args = parser.parse_args_into_dataclasses() model_args, data_args, training_args = parser.parse_args_into_dataclasses()
if model_args.use_auth_token is not None:
warnings.warn("The `use_auth_token` argument is deprecated and will be removed in v4.34.", FutureWarning)
if model_args.token is not None:
raise ValueError("`token` and `use_auth_token` are both specified. Please set only the argument `token`.")
model_args.token = model_args.use_auth_token
# Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The # Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The
# information sent is the one passed as arguments along with your Python/PyTorch versions. # information sent is the one passed as arguments along with your Python/PyTorch versions.
send_example_telemetry("run_qa_beam_search", model_args, data_args) send_example_telemetry("run_qa_beam_search", model_args, data_args)
...@@ -288,7 +301,7 @@ def main(): ...@@ -288,7 +301,7 @@ def main():
data_args.dataset_name, data_args.dataset_name,
data_args.dataset_config_name, data_args.dataset_config_name,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
) )
else: else:
data_files = {} data_files = {}
...@@ -306,7 +319,7 @@ def main(): ...@@ -306,7 +319,7 @@ def main():
data_files=data_files, data_files=data_files,
field="data", field="data",
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
) )
# See more about loading any type of standard or custom dataset (from files, python dict, pandas DataFrame, etc) at # See more about loading any type of standard or custom dataset (from files, python dict, pandas DataFrame, etc) at
# https://huggingface.co/docs/datasets/loading_datasets.html. # https://huggingface.co/docs/datasets/loading_datasets.html.
...@@ -320,13 +333,13 @@ def main(): ...@@ -320,13 +333,13 @@ def main():
model_args.config_name if model_args.config_name else model_args.model_name_or_path, model_args.config_name if model_args.config_name else model_args.model_name_or_path,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
revision=model_args.model_revision, revision=model_args.model_revision,
token=True if model_args.use_auth_token else None, token=model_args.token,
) )
tokenizer = XLNetTokenizerFast.from_pretrained( tokenizer = XLNetTokenizerFast.from_pretrained(
model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path, model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
revision=model_args.model_revision, revision=model_args.model_revision,
token=True if model_args.use_auth_token else None, token=model_args.token,
) )
model = XLNetForQuestionAnswering.from_pretrained( model = XLNetForQuestionAnswering.from_pretrained(
model_args.model_name_or_path, model_args.model_name_or_path,
...@@ -334,7 +347,7 @@ def main(): ...@@ -334,7 +347,7 @@ def main():
config=config, config=config,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
revision=model_args.model_revision, revision=model_args.model_revision,
token=True if model_args.use_auth_token else None, token=model_args.token,
) )
# Preprocessing the datasets. # Preprocessing the datasets.
......
...@@ -21,6 +21,7 @@ Fine-tuning the library's seq2seq models for question answering using the 🤗 S ...@@ -21,6 +21,7 @@ Fine-tuning the library's seq2seq models for question answering using the 🤗 S
import logging import logging
import os import os
import sys import sys
import warnings
from dataclasses import dataclass, field from dataclasses import dataclass, field
from typing import List, Optional, Tuple from typing import List, Optional, Tuple
...@@ -80,15 +81,21 @@ class ModelArguments: ...@@ -80,15 +81,21 @@ class ModelArguments:
default="main", default="main",
metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."}, metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."},
) )
use_auth_token: bool = field( token: str = field(
default=False, default=None,
metadata={ metadata={
"help": ( "help": (
"Will use the token generated when running `huggingface-cli login` (necessary to use this script " "The token to use as HTTP bearer authorization for remote files. If not specified, will use the token "
"with private models)." "generated when running `huggingface-cli login` (stored in `~/.huggingface`)."
) )
}, },
) )
use_auth_token: bool = field(
default=None,
metadata={
"help": "The `use_auth_token` argument is deprecated and will be removed in v4.34. Please use `token`."
},
)
@dataclass @dataclass
...@@ -273,6 +280,12 @@ def main(): ...@@ -273,6 +280,12 @@ def main():
else: else:
model_args, data_args, training_args = parser.parse_args_into_dataclasses() model_args, data_args, training_args = parser.parse_args_into_dataclasses()
if model_args.use_auth_token is not None:
warnings.warn("The `use_auth_token` argument is deprecated and will be removed in v4.34.", FutureWarning)
if model_args.token is not None:
raise ValueError("`token` and `use_auth_token` are both specified. Please set only the argument `token`.")
model_args.token = model_args.use_auth_token
# Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The # Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The
# information sent is the one passed as arguments along with your Python/PyTorch versions. # information sent is the one passed as arguments along with your Python/PyTorch versions.
send_example_telemetry("run_seq2seq_qa", model_args, data_args) send_example_telemetry("run_seq2seq_qa", model_args, data_args)
...@@ -335,7 +348,7 @@ def main(): ...@@ -335,7 +348,7 @@ def main():
data_args.dataset_name, data_args.dataset_name,
data_args.dataset_config_name, data_args.dataset_config_name,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
) )
else: else:
data_files = {} data_files = {}
...@@ -353,7 +366,7 @@ def main(): ...@@ -353,7 +366,7 @@ def main():
data_files=data_files, data_files=data_files,
field="data", field="data",
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
) )
# See more about loading any type of standard or custom dataset (from files, python dict, pandas DataFrame, etc) at # See more about loading any type of standard or custom dataset (from files, python dict, pandas DataFrame, etc) at
# https://huggingface.co/docs/datasets/loading_datasets.html. # https://huggingface.co/docs/datasets/loading_datasets.html.
...@@ -367,14 +380,14 @@ def main(): ...@@ -367,14 +380,14 @@ def main():
model_args.config_name if model_args.config_name else model_args.model_name_or_path, model_args.config_name if model_args.config_name else model_args.model_name_or_path,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
revision=model_args.model_revision, revision=model_args.model_revision,
token=True if model_args.use_auth_token else None, token=model_args.token,
) )
tokenizer = AutoTokenizer.from_pretrained( tokenizer = AutoTokenizer.from_pretrained(
model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path, model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_fast=model_args.use_fast_tokenizer, use_fast=model_args.use_fast_tokenizer,
revision=model_args.model_revision, revision=model_args.model_revision,
token=True if model_args.use_auth_token else None, token=model_args.token,
) )
model = AutoModelForSeq2SeqLM.from_pretrained( model = AutoModelForSeq2SeqLM.from_pretrained(
model_args.model_name_or_path, model_args.model_name_or_path,
...@@ -382,7 +395,7 @@ def main(): ...@@ -382,7 +395,7 @@ def main():
config=config, config=config,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
revision=model_args.model_revision, revision=model_args.model_revision,
token=True if model_args.use_auth_token else None, token=model_args.token,
) )
# We resize the embeddings only when necessary to avoid index errors. If you are creating a model from scratch # We resize the embeddings only when necessary to avoid index errors. If you are creating a model from scratch
......
...@@ -18,6 +18,7 @@ import logging ...@@ -18,6 +18,7 @@ import logging
import os import os
import random import random
import sys import sys
import warnings
from dataclasses import dataclass, field from dataclasses import dataclass, field
from typing import Optional from typing import Optional
...@@ -241,15 +242,21 @@ class ModelArguments: ...@@ -241,15 +242,21 @@ class ModelArguments:
metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."}, metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."},
) )
image_processor_name: str = field(default=None, metadata={"help": "Name or path of preprocessor config."}) image_processor_name: str = field(default=None, metadata={"help": "Name or path of preprocessor config."})
use_auth_token: bool = field( token: str = field(
default=False, default=None,
metadata={ metadata={
"help": ( "help": (
"Will use the token generated when running `huggingface-cli login` (necessary to use this script " "The token to use as HTTP bearer authorization for remote files. If not specified, will use the token "
"with private models)." "generated when running `huggingface-cli login` (stored in `~/.huggingface`)."
) )
}, },
) )
use_auth_token: bool = field(
default=None,
metadata={
"help": "The `use_auth_token` argument is deprecated and will be removed in v4.34. Please use `token`."
},
)
def main(): def main():
...@@ -265,6 +272,12 @@ def main(): ...@@ -265,6 +272,12 @@ def main():
else: else:
model_args, data_args, training_args = parser.parse_args_into_dataclasses() model_args, data_args, training_args = parser.parse_args_into_dataclasses()
if model_args.use_auth_token is not None:
warnings.warn("The `use_auth_token` argument is deprecated and will be removed in v4.34.", FutureWarning)
if model_args.token is not None:
raise ValueError("`token` and `use_auth_token` are both specified. Please set only the argument `token`.")
model_args.token = model_args.use_auth_token
# Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The # Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The
# information sent is the one passed as arguments along with your Python/PyTorch versions. # information sent is the one passed as arguments along with your Python/PyTorch versions.
send_example_telemetry("run_semantic_segmentation", model_args, data_args) send_example_telemetry("run_semantic_segmentation", model_args, data_args)
...@@ -379,7 +392,7 @@ def main(): ...@@ -379,7 +392,7 @@ def main():
id2label=id2label, id2label=id2label,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
revision=model_args.model_revision, revision=model_args.model_revision,
token=True if model_args.use_auth_token else None, token=model_args.token,
) )
model = AutoModelForSemanticSegmentation.from_pretrained( model = AutoModelForSemanticSegmentation.from_pretrained(
model_args.model_name_or_path, model_args.model_name_or_path,
...@@ -387,13 +400,13 @@ def main(): ...@@ -387,13 +400,13 @@ def main():
config=config, config=config,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
revision=model_args.model_revision, revision=model_args.model_revision,
token=True if model_args.use_auth_token else None, token=model_args.token,
) )
image_processor = AutoImageProcessor.from_pretrained( image_processor = AutoImageProcessor.from_pretrained(
model_args.image_processor_name or model_args.model_name_or_path, model_args.image_processor_name or model_args.model_name_or_path,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
revision=model_args.model_revision, revision=model_args.model_revision,
token=True if model_args.use_auth_token else None, token=model_args.token,
) )
# Define torchvision transforms to be applied to each image + target. # Define torchvision transforms to be applied to each image + target.
......
...@@ -229,15 +229,21 @@ class DataTrainingArguments: ...@@ -229,15 +229,21 @@ class DataTrainingArguments:
) )
}, },
) )
use_auth_token: bool = field( token: str = field(
default=False, default=None,
metadata={ metadata={
"help": ( "help": (
"If :obj:`True`, will use the token generated when running" "The token to use as HTTP bearer authorization for remote files. If not specified, will use the token "
":obj:`huggingface-cli login` as HTTP bearer authorization for remote files." "generated when running `huggingface-cli login` (stored in `~/.huggingface`)."
) )
}, },
) )
use_auth_token: bool = field(
default=None,
metadata={
"help": "The `use_auth_token` argument is deprecated and will be removed in v4.34. Please use `token`."
},
)
unk_token: str = field( unk_token: str = field(
default="[UNK]", default="[UNK]",
metadata={"help": "The unk token for the tokenizer"}, metadata={"help": "The unk token for the tokenizer"},
...@@ -379,6 +385,12 @@ def main(): ...@@ -379,6 +385,12 @@ def main():
else: else:
model_args, data_args, training_args = parser.parse_args_into_dataclasses() model_args, data_args, training_args = parser.parse_args_into_dataclasses()
if data_args.use_auth_token is not None:
warnings.warn("The `use_auth_token` argument is deprecated and will be removed in v4.34.", FutureWarning)
if data_args.token is not None:
raise ValueError("`token` and `use_auth_token` are both specified. Please set only the argument `token`.")
data_args.token = data_args.use_auth_token
# Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The # Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The
# information sent is the one passed as arguments along with your Python/PyTorch versions. # information sent is the one passed as arguments along with your Python/PyTorch versions.
send_example_telemetry("run_speech_recognition_ctc", model_args, data_args) send_example_telemetry("run_speech_recognition_ctc", model_args, data_args)
...@@ -427,7 +439,7 @@ def main(): ...@@ -427,7 +439,7 @@ def main():
data_args.dataset_name, data_args.dataset_name,
data_args.dataset_config_name, data_args.dataset_config_name,
split=data_args.train_split_name, split=data_args.train_split_name,
use_auth_token=data_args.use_auth_token, token=data_args.token,
) )
if data_args.audio_column_name not in raw_datasets["train"].column_names: if data_args.audio_column_name not in raw_datasets["train"].column_names:
...@@ -452,7 +464,7 @@ def main(): ...@@ -452,7 +464,7 @@ def main():
data_args.dataset_name, data_args.dataset_name,
data_args.dataset_config_name, data_args.dataset_config_name,
split=data_args.eval_split_name, split=data_args.eval_split_name,
use_auth_token=data_args.use_auth_token, token=data_args.token,
) )
if data_args.max_eval_samples is not None: if data_args.max_eval_samples is not None:
...@@ -490,7 +502,9 @@ def main(): ...@@ -490,7 +502,9 @@ def main():
# the tokenizer # the tokenizer
# load config # load config
config = AutoConfig.from_pretrained( config = AutoConfig.from_pretrained(
model_args.model_name_or_path, cache_dir=model_args.cache_dir, use_auth_token=data_args.use_auth_token model_args.model_name_or_path,
cache_dir=model_args.cache_dir,
token=data_args.token,
) )
# 4. Next, if no tokenizer file is defined, # 4. Next, if no tokenizer file is defined,
...@@ -546,11 +560,13 @@ def main(): ...@@ -546,11 +560,13 @@ def main():
# load feature_extractor and tokenizer # load feature_extractor and tokenizer
tokenizer = AutoTokenizer.from_pretrained( tokenizer = AutoTokenizer.from_pretrained(
tokenizer_name_or_path, tokenizer_name_or_path,
use_auth_token=data_args.use_auth_token, token=data_args.token,
**tokenizer_kwargs, **tokenizer_kwargs,
) )
feature_extractor = AutoFeatureExtractor.from_pretrained( feature_extractor = AutoFeatureExtractor.from_pretrained(
model_args.model_name_or_path, cache_dir=model_args.cache_dir, use_auth_token=data_args.use_auth_token model_args.model_name_or_path,
cache_dir=model_args.cache_dir,
token=data_args.token,
) )
# adapt config # adapt config
...@@ -578,7 +594,7 @@ def main(): ...@@ -578,7 +594,7 @@ def main():
model_args.model_name_or_path, model_args.model_name_or_path,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
config=config, config=config,
use_auth_token=data_args.use_auth_token, token=data_args.token,
) )
# freeze encoder # freeze encoder
......
...@@ -232,15 +232,21 @@ class DataTrainingArguments: ...@@ -232,15 +232,21 @@ class DataTrainingArguments:
) )
}, },
) )
use_auth_token: bool = field( token: str = field(
default=False, default=None,
metadata={ metadata={
"help": ( "help": (
"If :obj:`True`, will use the token generated when running" "The token to use as HTTP bearer authorization for remote files. If not specified, will use the token "
":obj:`huggingface-cli login` as HTTP bearer authorization for remote files." "generated when running `huggingface-cli login` (stored in `~/.huggingface`)."
) )
}, },
) )
use_auth_token: bool = field(
default=None,
metadata={
"help": "The `use_auth_token` argument is deprecated and will be removed in v4.34. Please use `token`."
},
)
unk_token: str = field( unk_token: str = field(
default="[UNK]", default="[UNK]",
metadata={"help": "The unk token for the tokenizer"}, metadata={"help": "The unk token for the tokenizer"},
...@@ -375,6 +381,12 @@ def main(): ...@@ -375,6 +381,12 @@ def main():
else: else:
model_args, data_args, training_args = parser.parse_args_into_dataclasses() model_args, data_args, training_args = parser.parse_args_into_dataclasses()
if data_args.use_auth_token is not None:
warnings.warn("The `use_auth_token` argument is deprecated and will be removed in v4.34.", FutureWarning)
if data_args.token is not None:
raise ValueError("`token` and `use_auth_token` are both specified. Please set only the argument `token`.")
data_args.token = data_args.use_auth_token
# Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The # Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The
# information sent is the one passed as arguments along with your Python/PyTorch versions. # information sent is the one passed as arguments along with your Python/PyTorch versions.
send_example_telemetry("run_speech_recognition_ctc_adapter", model_args, data_args) send_example_telemetry("run_speech_recognition_ctc_adapter", model_args, data_args)
...@@ -423,7 +435,7 @@ def main(): ...@@ -423,7 +435,7 @@ def main():
data_args.dataset_name, data_args.dataset_name,
data_args.dataset_config_name, data_args.dataset_config_name,
split=data_args.train_split_name, split=data_args.train_split_name,
use_auth_token=data_args.use_auth_token, token=data_args.token,
) )
if data_args.audio_column_name not in raw_datasets["train"].column_names: if data_args.audio_column_name not in raw_datasets["train"].column_names:
...@@ -448,7 +460,7 @@ def main(): ...@@ -448,7 +460,7 @@ def main():
data_args.dataset_name, data_args.dataset_name,
data_args.dataset_config_name, data_args.dataset_config_name,
split=data_args.eval_split_name, split=data_args.eval_split_name,
use_auth_token=data_args.use_auth_token, token=data_args.token,
) )
if data_args.max_eval_samples is not None: if data_args.max_eval_samples is not None:
...@@ -486,7 +498,9 @@ def main(): ...@@ -486,7 +498,9 @@ def main():
# the tokenizer # the tokenizer
# load config # load config
config = AutoConfig.from_pretrained( config = AutoConfig.from_pretrained(
model_args.model_name_or_path, cache_dir=model_args.cache_dir, use_auth_token=data_args.use_auth_token model_args.model_name_or_path,
cache_dir=model_args.cache_dir,
token=data_args.token,
) )
# 4. Next, if no tokenizer file is defined, # 4. Next, if no tokenizer file is defined,
...@@ -500,7 +514,10 @@ def main(): ...@@ -500,7 +514,10 @@ def main():
vocab_dict = {} vocab_dict = {}
if tokenizer_name_or_path is not None: if tokenizer_name_or_path is not None:
# load vocabulary of other adapter languages so that new language can be appended # load vocabulary of other adapter languages so that new language can be appended
tokenizer = AutoTokenizer.from_pretrained(tokenizer_name_or_path, use_auth_token=data_args.use_auth_token) tokenizer = AutoTokenizer.from_pretrained(
tokenizer_name_or_path,
token=data_args.token,
)
vocab_dict = tokenizer.vocab.copy() vocab_dict = tokenizer.vocab.copy()
if tokenizer.target_lang is None: if tokenizer.target_lang is None:
raise ValueError("Make sure to load a multi-lingual tokenizer with a set target language.") raise ValueError("Make sure to load a multi-lingual tokenizer with a set target language.")
...@@ -566,11 +583,13 @@ def main(): ...@@ -566,11 +583,13 @@ def main():
# load feature_extractor and tokenizer # load feature_extractor and tokenizer
tokenizer = AutoTokenizer.from_pretrained( tokenizer = AutoTokenizer.from_pretrained(
tokenizer_name_or_path, tokenizer_name_or_path,
use_auth_token=data_args.use_auth_token, token=data_args.token,
**tokenizer_kwargs, **tokenizer_kwargs,
) )
feature_extractor = AutoFeatureExtractor.from_pretrained( feature_extractor = AutoFeatureExtractor.from_pretrained(
model_args.model_name_or_path, cache_dir=model_args.cache_dir, use_auth_token=data_args.use_auth_token model_args.model_name_or_path,
cache_dir=model_args.cache_dir,
token=data_args.token,
) )
# adapt config # adapt config
...@@ -595,7 +614,7 @@ def main(): ...@@ -595,7 +614,7 @@ def main():
model_args.model_name_or_path, model_args.model_name_or_path,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
config=config, config=config,
use_auth_token=data_args.use_auth_token, token=data_args.token,
ignore_mismatched_sizes=True, ignore_mismatched_sizes=True,
) )
......
...@@ -22,6 +22,7 @@ Fine-tuning the library models for sequence to sequence speech recognition. ...@@ -22,6 +22,7 @@ Fine-tuning the library models for sequence to sequence speech recognition.
import logging import logging
import os import os
import sys import sys
import warnings
from dataclasses import dataclass, field from dataclasses import dataclass, field
from typing import Any, Dict, List, Optional, Union from typing import Any, Dict, List, Optional, Union
...@@ -85,15 +86,21 @@ class ModelArguments: ...@@ -85,15 +86,21 @@ class ModelArguments:
default="main", default="main",
metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."}, metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."},
) )
use_auth_token: bool = field( token: str = field(
default=False, default=None,
metadata={ metadata={
"help": ( "help": (
"Will use the token generated when running `huggingface-cli login` (necessary to use this script " "The token to use as HTTP bearer authorization for remote files. If not specified, will use the token "
"with private models)." "generated when running `huggingface-cli login` (stored in `~/.huggingface`)."
) )
}, },
) )
use_auth_token: bool = field(
default=None,
metadata={
"help": "The `use_auth_token` argument is deprecated and will be removed in v4.34. Please use `token`."
},
)
freeze_feature_encoder: bool = field( freeze_feature_encoder: bool = field(
default=True, metadata={"help": "Whether to freeze the feature encoder layers of the model."} default=True, metadata={"help": "Whether to freeze the feature encoder layers of the model."}
) )
...@@ -278,6 +285,12 @@ def main(): ...@@ -278,6 +285,12 @@ def main():
else: else:
model_args, data_args, training_args = parser.parse_args_into_dataclasses() model_args, data_args, training_args = parser.parse_args_into_dataclasses()
if model_args.use_auth_token is not None:
warnings.warn("The `use_auth_token` argument is deprecated and will be removed in v4.34.", FutureWarning)
if model_args.token is not None:
raise ValueError("`token` and `use_auth_token` are both specified. Please set only the argument `token`.")
model_args.token = model_args.use_auth_token
# Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The # Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The
# information sent is the one passed as arguments along with your Python/PyTorch versions. # information sent is the one passed as arguments along with your Python/PyTorch versions.
send_example_telemetry("run_speech_recognition_seq2seq", model_args, data_args) send_example_telemetry("run_speech_recognition_seq2seq", model_args, data_args)
...@@ -336,7 +349,7 @@ def main(): ...@@ -336,7 +349,7 @@ def main():
data_args.dataset_config_name, data_args.dataset_config_name,
split=data_args.train_split_name, split=data_args.train_split_name,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
) )
if training_args.do_eval: if training_args.do_eval:
...@@ -345,7 +358,7 @@ def main(): ...@@ -345,7 +358,7 @@ def main():
data_args.dataset_config_name, data_args.dataset_config_name,
split=data_args.eval_split_name, split=data_args.eval_split_name,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
) )
if data_args.audio_column_name not in next(iter(raw_datasets.values())).column_names: if data_args.audio_column_name not in next(iter(raw_datasets.values())).column_names:
...@@ -370,7 +383,7 @@ def main(): ...@@ -370,7 +383,7 @@ def main():
model_args.config_name if model_args.config_name else model_args.model_name_or_path, model_args.config_name if model_args.config_name else model_args.model_name_or_path,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
revision=model_args.model_revision, revision=model_args.model_revision,
token=True if model_args.use_auth_token else None, token=model_args.token,
) )
config.update({"forced_decoder_ids": model_args.forced_decoder_ids, "suppress_tokens": model_args.suppress_tokens}) config.update({"forced_decoder_ids": model_args.forced_decoder_ids, "suppress_tokens": model_args.suppress_tokens})
...@@ -383,21 +396,21 @@ def main(): ...@@ -383,21 +396,21 @@ def main():
model_args.feature_extractor_name if model_args.feature_extractor_name else model_args.model_name_or_path, model_args.feature_extractor_name if model_args.feature_extractor_name else model_args.model_name_or_path,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
revision=model_args.model_revision, revision=model_args.model_revision,
token=True if model_args.use_auth_token else None, token=model_args.token,
) )
tokenizer = AutoTokenizer.from_pretrained( tokenizer = AutoTokenizer.from_pretrained(
model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path, model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_fast=model_args.use_fast_tokenizer, use_fast=model_args.use_fast_tokenizer,
revision=model_args.model_revision, revision=model_args.model_revision,
token=True if model_args.use_auth_token else None, token=model_args.token,
) )
model = AutoModelForSpeechSeq2Seq.from_pretrained( model = AutoModelForSpeechSeq2Seq.from_pretrained(
model_args.model_name_or_path, model_args.model_name_or_path,
config=config, config=config,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
revision=model_args.model_revision, revision=model_args.model_revision,
token=True if model_args.use_auth_token else None, token=model_args.token,
) )
if model.config.decoder_start_token_id is None: if model.config.decoder_start_token_id is None:
......
...@@ -21,6 +21,7 @@ Fine-tuning the library models for sequence to sequence. ...@@ -21,6 +21,7 @@ Fine-tuning the library models for sequence to sequence.
import logging import logging
import os import os
import sys import sys
import warnings
from dataclasses import dataclass, field from dataclasses import dataclass, field
from typing import Optional from typing import Optional
...@@ -99,15 +100,21 @@ class ModelArguments: ...@@ -99,15 +100,21 @@ class ModelArguments:
default="main", default="main",
metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."}, metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."},
) )
use_auth_token: bool = field( token: str = field(
default=False, default=None,
metadata={ metadata={
"help": ( "help": (
"Will use the token generated when running `huggingface-cli login` (necessary to use this script " "The token to use as HTTP bearer authorization for remote files. If not specified, will use the token "
"with private models)." "generated when running `huggingface-cli login` (stored in `~/.huggingface`)."
) )
}, },
) )
use_auth_token: bool = field(
default=None,
metadata={
"help": "The `use_auth_token` argument is deprecated and will be removed in v4.34. Please use `token`."
},
)
resize_position_embeddings: Optional[bool] = field( resize_position_embeddings: Optional[bool] = field(
default=None, default=None,
metadata={ metadata={
...@@ -312,6 +319,12 @@ def main(): ...@@ -312,6 +319,12 @@ def main():
else: else:
model_args, data_args, training_args = parser.parse_args_into_dataclasses() model_args, data_args, training_args = parser.parse_args_into_dataclasses()
if model_args.use_auth_token is not None:
warnings.warn("The `use_auth_token` argument is deprecated and will be removed in v4.34.", FutureWarning)
if model_args.token is not None:
raise ValueError("`token` and `use_auth_token` are both specified. Please set only the argument `token`.")
model_args.token = model_args.use_auth_token
# Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The # Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The
# information sent is the one passed as arguments along with your Python/PyTorch versions. # information sent is the one passed as arguments along with your Python/PyTorch versions.
send_example_telemetry("run_summarization", model_args, data_args) send_example_telemetry("run_summarization", model_args, data_args)
...@@ -386,7 +399,7 @@ def main(): ...@@ -386,7 +399,7 @@ def main():
data_args.dataset_name, data_args.dataset_name,
data_args.dataset_config_name, data_args.dataset_config_name,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
) )
else: else:
data_files = {} data_files = {}
...@@ -403,7 +416,7 @@ def main(): ...@@ -403,7 +416,7 @@ def main():
extension, extension,
data_files=data_files, data_files=data_files,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
) )
# See more about loading any type of standard or custom dataset (from files, python dict, pandas DataFrame, etc) at # See more about loading any type of standard or custom dataset (from files, python dict, pandas DataFrame, etc) at
# https://huggingface.co/docs/datasets/loading_datasets.html. # https://huggingface.co/docs/datasets/loading_datasets.html.
...@@ -417,14 +430,14 @@ def main(): ...@@ -417,14 +430,14 @@ def main():
model_args.config_name if model_args.config_name else model_args.model_name_or_path, model_args.config_name if model_args.config_name else model_args.model_name_or_path,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
revision=model_args.model_revision, revision=model_args.model_revision,
token=True if model_args.use_auth_token else None, token=model_args.token,
) )
tokenizer = AutoTokenizer.from_pretrained( tokenizer = AutoTokenizer.from_pretrained(
model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path, model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_fast=model_args.use_fast_tokenizer, use_fast=model_args.use_fast_tokenizer,
revision=model_args.model_revision, revision=model_args.model_revision,
token=True if model_args.use_auth_token else None, token=model_args.token,
) )
model = AutoModelForSeq2SeqLM.from_pretrained( model = AutoModelForSeq2SeqLM.from_pretrained(
model_args.model_name_or_path, model_args.model_name_or_path,
...@@ -432,7 +445,7 @@ def main(): ...@@ -432,7 +445,7 @@ def main():
config=config, config=config,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
revision=model_args.model_revision, revision=model_args.model_revision,
token=True if model_args.use_auth_token else None, token=model_args.token,
) )
# We resize the embeddings only when necessary to avoid index errors. If you are creating a model from scratch # We resize the embeddings only when necessary to avoid index errors. If you are creating a model from scratch
......
...@@ -20,6 +20,7 @@ import logging ...@@ -20,6 +20,7 @@ import logging
import os import os
import random import random
import sys import sys
import warnings
from dataclasses import dataclass, field from dataclasses import dataclass, field
from typing import List, Optional from typing import List, Optional
...@@ -227,15 +228,21 @@ class ModelArguments: ...@@ -227,15 +228,21 @@ class ModelArguments:
default="main", default="main",
metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."}, metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."},
) )
use_auth_token: bool = field( token: str = field(
default=False, default=None,
metadata={ metadata={
"help": ( "help": (
"Will use the token generated when running `huggingface-cli login` (necessary to use this script " "The token to use as HTTP bearer authorization for remote files. If not specified, will use the token "
"with private models)." "generated when running `huggingface-cli login` (stored in `~/.huggingface`)."
) )
}, },
) )
use_auth_token: bool = field(
default=None,
metadata={
"help": "The `use_auth_token` argument is deprecated and will be removed in v4.34. Please use `token`."
},
)
ignore_mismatched_sizes: bool = field( ignore_mismatched_sizes: bool = field(
default=False, default=False,
metadata={"help": "Will enable to load a pretrained model whose head dimensions are different."}, metadata={"help": "Will enable to load a pretrained model whose head dimensions are different."},
...@@ -268,6 +275,12 @@ def main(): ...@@ -268,6 +275,12 @@ def main():
else: else:
model_args, data_args, training_args = parser.parse_args_into_dataclasses() model_args, data_args, training_args = parser.parse_args_into_dataclasses()
if model_args.use_auth_token is not None:
warnings.warn("The `use_auth_token` argument is deprecated and will be removed in v4.34.", FutureWarning)
if model_args.token is not None:
raise ValueError("`token` and `use_auth_token` are both specified. Please set only the argument `token`.")
model_args.token = model_args.use_auth_token
# Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The # Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The
# information sent is the one passed as arguments along with your Python/PyTorch versions. # information sent is the one passed as arguments along with your Python/PyTorch versions.
send_example_telemetry("run_classification", model_args, data_args) send_example_telemetry("run_classification", model_args, data_args)
...@@ -327,7 +340,7 @@ def main(): ...@@ -327,7 +340,7 @@ def main():
data_args.dataset_name, data_args.dataset_name,
data_args.dataset_config_name, data_args.dataset_config_name,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
) )
# Try print some info about the dataset # Try print some info about the dataset
logger.info(f"Dataset loaded: {raw_datasets}") logger.info(f"Dataset loaded: {raw_datasets}")
...@@ -358,7 +371,7 @@ def main(): ...@@ -358,7 +371,7 @@ def main():
"csv", "csv",
data_files=data_files, data_files=data_files,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
) )
else: else:
# Loading a dataset from local json files # Loading a dataset from local json files
...@@ -366,7 +379,7 @@ def main(): ...@@ -366,7 +379,7 @@ def main():
"json", "json",
data_files=data_files, data_files=data_files,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
) )
# See more about loading any type of standard or custom dataset at # See more about loading any type of standard or custom dataset at
...@@ -468,7 +481,7 @@ def main(): ...@@ -468,7 +481,7 @@ def main():
finetuning_task="text-classification", finetuning_task="text-classification",
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
revision=model_args.model_revision, revision=model_args.model_revision,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
) )
if is_regression: if is_regression:
...@@ -486,7 +499,7 @@ def main(): ...@@ -486,7 +499,7 @@ def main():
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_fast=model_args.use_fast_tokenizer, use_fast=model_args.use_fast_tokenizer,
revision=model_args.model_revision, revision=model_args.model_revision,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
) )
model = AutoModelForSequenceClassification.from_pretrained( model = AutoModelForSequenceClassification.from_pretrained(
model_args.model_name_or_path, model_args.model_name_or_path,
...@@ -494,7 +507,7 @@ def main(): ...@@ -494,7 +507,7 @@ def main():
config=config, config=config,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
revision=model_args.model_revision, revision=model_args.model_revision,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
ignore_mismatched_sizes=model_args.ignore_mismatched_sizes, ignore_mismatched_sizes=model_args.ignore_mismatched_sizes,
) )
......
...@@ -20,6 +20,7 @@ import logging ...@@ -20,6 +20,7 @@ import logging
import os import os
import random import random
import sys import sys
import warnings
from dataclasses import dataclass, field from dataclasses import dataclass, field
from typing import Optional from typing import Optional
...@@ -188,15 +189,21 @@ class ModelArguments: ...@@ -188,15 +189,21 @@ class ModelArguments:
default="main", default="main",
metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."}, metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."},
) )
use_auth_token: bool = field( token: str = field(
default=False, default=None,
metadata={ metadata={
"help": ( "help": (
"Will use the token generated when running `huggingface-cli login` (necessary to use this script " "The token to use as HTTP bearer authorization for remote files. If not specified, will use the token "
"with private models)." "generated when running `huggingface-cli login` (stored in `~/.huggingface`)."
) )
}, },
) )
use_auth_token: bool = field(
default=None,
metadata={
"help": "The `use_auth_token` argument is deprecated and will be removed in v4.34. Please use `token`."
},
)
ignore_mismatched_sizes: bool = field( ignore_mismatched_sizes: bool = field(
default=False, default=False,
metadata={"help": "Will enable to load a pretrained model whose head dimensions are different."}, metadata={"help": "Will enable to load a pretrained model whose head dimensions are different."},
...@@ -216,6 +223,12 @@ def main(): ...@@ -216,6 +223,12 @@ def main():
else: else:
model_args, data_args, training_args = parser.parse_args_into_dataclasses() model_args, data_args, training_args = parser.parse_args_into_dataclasses()
if model_args.use_auth_token is not None:
warnings.warn("The `use_auth_token` argument is deprecated and will be removed in v4.34.", FutureWarning)
if model_args.token is not None:
raise ValueError("`token` and `use_auth_token` are both specified. Please set only the argument `token`.")
model_args.token = model_args.use_auth_token
# Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The # Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The
# information sent is the one passed as arguments along with your Python/PyTorch versions. # information sent is the one passed as arguments along with your Python/PyTorch versions.
send_example_telemetry("run_glue", model_args, data_args) send_example_telemetry("run_glue", model_args, data_args)
...@@ -281,7 +294,7 @@ def main(): ...@@ -281,7 +294,7 @@ def main():
"glue", "glue",
data_args.task_name, data_args.task_name,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
) )
elif data_args.dataset_name is not None: elif data_args.dataset_name is not None:
# Downloading and loading a dataset from the hub. # Downloading and loading a dataset from the hub.
...@@ -289,7 +302,7 @@ def main(): ...@@ -289,7 +302,7 @@ def main():
data_args.dataset_name, data_args.dataset_name,
data_args.dataset_config_name, data_args.dataset_config_name,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
) )
else: else:
# Loading a dataset from your local files. # Loading a dataset from your local files.
...@@ -318,7 +331,7 @@ def main(): ...@@ -318,7 +331,7 @@ def main():
"csv", "csv",
data_files=data_files, data_files=data_files,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
) )
else: else:
# Loading a dataset from local json files # Loading a dataset from local json files
...@@ -326,7 +339,7 @@ def main(): ...@@ -326,7 +339,7 @@ def main():
"json", "json",
data_files=data_files, data_files=data_files,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
) )
# See more about loading any type of standard or custom dataset at # See more about loading any type of standard or custom dataset at
# https://huggingface.co/docs/datasets/loading_datasets.html. # https://huggingface.co/docs/datasets/loading_datasets.html.
...@@ -361,14 +374,14 @@ def main(): ...@@ -361,14 +374,14 @@ def main():
finetuning_task=data_args.task_name, finetuning_task=data_args.task_name,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
revision=model_args.model_revision, revision=model_args.model_revision,
token=True if model_args.use_auth_token else None, token=model_args.token,
) )
tokenizer = AutoTokenizer.from_pretrained( tokenizer = AutoTokenizer.from_pretrained(
model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path, model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_fast=model_args.use_fast_tokenizer, use_fast=model_args.use_fast_tokenizer,
revision=model_args.model_revision, revision=model_args.model_revision,
token=True if model_args.use_auth_token else None, token=model_args.token,
) )
model = AutoModelForSequenceClassification.from_pretrained( model = AutoModelForSequenceClassification.from_pretrained(
model_args.model_name_or_path, model_args.model_name_or_path,
...@@ -376,7 +389,7 @@ def main(): ...@@ -376,7 +389,7 @@ def main():
config=config, config=config,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
revision=model_args.model_revision, revision=model_args.model_revision,
token=True if model_args.use_auth_token else None, token=model_args.token,
ignore_mismatched_sizes=model_args.ignore_mismatched_sizes, ignore_mismatched_sizes=model_args.ignore_mismatched_sizes,
) )
......
...@@ -21,6 +21,7 @@ import logging ...@@ -21,6 +21,7 @@ import logging
import os import os
import random import random
import sys import sys
import warnings
from dataclasses import dataclass, field from dataclasses import dataclass, field
from typing import Optional from typing import Optional
...@@ -152,15 +153,21 @@ class ModelArguments: ...@@ -152,15 +153,21 @@ class ModelArguments:
default="main", default="main",
metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."}, metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."},
) )
use_auth_token: bool = field( token: str = field(
default=False, default=None,
metadata={ metadata={
"help": ( "help": (
"Will use the token generated when running `huggingface-cli login` (necessary to use this script " "The token to use as HTTP bearer authorization for remote files. If not specified, will use the token "
"with private models)." "generated when running `huggingface-cli login` (stored in `~/.huggingface`)."
) )
}, },
) )
use_auth_token: bool = field(
default=None,
metadata={
"help": "The `use_auth_token` argument is deprecated and will be removed in v4.34. Please use `token`."
},
)
ignore_mismatched_sizes: bool = field( ignore_mismatched_sizes: bool = field(
default=False, default=False,
metadata={"help": "Will enable to load a pretrained model whose head dimensions are different."}, metadata={"help": "Will enable to load a pretrained model whose head dimensions are different."},
...@@ -175,6 +182,12 @@ def main(): ...@@ -175,6 +182,12 @@ def main():
parser = HfArgumentParser((ModelArguments, DataTrainingArguments, TrainingArguments)) parser = HfArgumentParser((ModelArguments, DataTrainingArguments, TrainingArguments))
model_args, data_args, training_args = parser.parse_args_into_dataclasses() model_args, data_args, training_args = parser.parse_args_into_dataclasses()
if model_args.use_auth_token is not None:
warnings.warn("The `use_auth_token` argument is deprecated and will be removed in v4.34.", FutureWarning)
if model_args.token is not None:
raise ValueError("`token` and `use_auth_token` are both specified. Please set only the argument `token`.")
model_args.token = model_args.use_auth_token
# Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The # Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The
# information sent is the one passed as arguments along with your Python/PyTorch versions. # information sent is the one passed as arguments along with your Python/PyTorch versions.
send_example_telemetry("run_xnli", model_args) send_example_telemetry("run_xnli", model_args)
...@@ -232,7 +245,7 @@ def main(): ...@@ -232,7 +245,7 @@ def main():
model_args.language, model_args.language,
split="train", split="train",
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
) )
else: else:
train_dataset = load_dataset( train_dataset = load_dataset(
...@@ -240,7 +253,7 @@ def main(): ...@@ -240,7 +253,7 @@ def main():
model_args.train_language, model_args.train_language,
split="train", split="train",
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
) )
label_list = train_dataset.features["label"].names label_list = train_dataset.features["label"].names
...@@ -250,7 +263,7 @@ def main(): ...@@ -250,7 +263,7 @@ def main():
model_args.language, model_args.language,
split="validation", split="validation",
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
) )
label_list = eval_dataset.features["label"].names label_list = eval_dataset.features["label"].names
...@@ -260,7 +273,7 @@ def main(): ...@@ -260,7 +273,7 @@ def main():
model_args.language, model_args.language,
split="test", split="test",
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
) )
label_list = predict_dataset.features["label"].names label_list = predict_dataset.features["label"].names
...@@ -278,7 +291,7 @@ def main(): ...@@ -278,7 +291,7 @@ def main():
finetuning_task="xnli", finetuning_task="xnli",
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
revision=model_args.model_revision, revision=model_args.model_revision,
token=True if model_args.use_auth_token else None, token=model_args.token,
) )
tokenizer = AutoTokenizer.from_pretrained( tokenizer = AutoTokenizer.from_pretrained(
model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path, model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path,
...@@ -286,7 +299,7 @@ def main(): ...@@ -286,7 +299,7 @@ def main():
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_fast=model_args.use_fast_tokenizer, use_fast=model_args.use_fast_tokenizer,
revision=model_args.model_revision, revision=model_args.model_revision,
token=True if model_args.use_auth_token else None, token=model_args.token,
) )
model = AutoModelForSequenceClassification.from_pretrained( model = AutoModelForSequenceClassification.from_pretrained(
model_args.model_name_or_path, model_args.model_name_or_path,
...@@ -294,7 +307,7 @@ def main(): ...@@ -294,7 +307,7 @@ def main():
config=config, config=config,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
revision=model_args.model_revision, revision=model_args.model_revision,
token=True if model_args.use_auth_token else None, token=model_args.token,
ignore_mismatched_sizes=model_args.ignore_mismatched_sizes, ignore_mismatched_sizes=model_args.ignore_mismatched_sizes,
) )
......
...@@ -22,6 +22,7 @@ Fine-tuning the library models for token classification. ...@@ -22,6 +22,7 @@ Fine-tuning the library models for token classification.
import logging import logging
import os import os
import sys import sys
import warnings
from dataclasses import dataclass, field from dataclasses import dataclass, field
from typing import Optional from typing import Optional
...@@ -79,15 +80,21 @@ class ModelArguments: ...@@ -79,15 +80,21 @@ class ModelArguments:
default="main", default="main",
metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."}, metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."},
) )
use_auth_token: bool = field( token: str = field(
default=False, default=None,
metadata={ metadata={
"help": ( "help": (
"Will use the token generated when running `huggingface-cli login` (necessary to use this script " "The token to use as HTTP bearer authorization for remote files. If not specified, will use the token "
"with private models)." "generated when running `huggingface-cli login` (stored in `~/.huggingface`)."
) )
}, },
) )
use_auth_token: bool = field(
default=None,
metadata={
"help": "The `use_auth_token` argument is deprecated and will be removed in v4.34. Please use `token`."
},
)
ignore_mismatched_sizes: bool = field( ignore_mismatched_sizes: bool = field(
default=False, default=False,
metadata={"help": "Will enable to load a pretrained model whose head dimensions are different."}, metadata={"help": "Will enable to load a pretrained model whose head dimensions are different."},
...@@ -217,6 +224,12 @@ def main(): ...@@ -217,6 +224,12 @@ def main():
else: else:
model_args, data_args, training_args = parser.parse_args_into_dataclasses() model_args, data_args, training_args = parser.parse_args_into_dataclasses()
if model_args.use_auth_token is not None:
warnings.warn("The `use_auth_token` argument is deprecated and will be removed in v4.34.", FutureWarning)
if model_args.token is not None:
raise ValueError("`token` and `use_auth_token` are both specified. Please set only the argument `token`.")
model_args.token = model_args.use_auth_token
# Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The # Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The
# information sent is the one passed as arguments along with your Python/PyTorch versions. # information sent is the one passed as arguments along with your Python/PyTorch versions.
send_example_telemetry("run_ner", model_args, data_args) send_example_telemetry("run_ner", model_args, data_args)
...@@ -279,7 +292,7 @@ def main(): ...@@ -279,7 +292,7 @@ def main():
data_args.dataset_name, data_args.dataset_name,
data_args.dataset_config_name, data_args.dataset_config_name,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
) )
else: else:
data_files = {} data_files = {}
...@@ -348,7 +361,7 @@ def main(): ...@@ -348,7 +361,7 @@ def main():
finetuning_task=data_args.task_name, finetuning_task=data_args.task_name,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
revision=model_args.model_revision, revision=model_args.model_revision,
token=True if model_args.use_auth_token else None, token=model_args.token,
) )
tokenizer_name_or_path = model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path tokenizer_name_or_path = model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path
...@@ -358,7 +371,7 @@ def main(): ...@@ -358,7 +371,7 @@ def main():
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_fast=True, use_fast=True,
revision=model_args.model_revision, revision=model_args.model_revision,
token=True if model_args.use_auth_token else None, token=model_args.token,
add_prefix_space=True, add_prefix_space=True,
) )
else: else:
...@@ -367,7 +380,7 @@ def main(): ...@@ -367,7 +380,7 @@ def main():
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_fast=True, use_fast=True,
revision=model_args.model_revision, revision=model_args.model_revision,
token=True if model_args.use_auth_token else None, token=model_args.token,
) )
model = AutoModelForTokenClassification.from_pretrained( model = AutoModelForTokenClassification.from_pretrained(
...@@ -376,7 +389,7 @@ def main(): ...@@ -376,7 +389,7 @@ def main():
config=config, config=config,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
revision=model_args.model_revision, revision=model_args.model_revision,
token=True if model_args.use_auth_token else None, token=model_args.token,
ignore_mismatched_sizes=model_args.ignore_mismatched_sizes, ignore_mismatched_sizes=model_args.ignore_mismatched_sizes,
) )
......
...@@ -21,6 +21,7 @@ Fine-tuning the library models for sequence to sequence. ...@@ -21,6 +21,7 @@ Fine-tuning the library models for sequence to sequence.
import logging import logging
import os import os
import sys import sys
import warnings
from dataclasses import dataclass, field from dataclasses import dataclass, field
from typing import Optional from typing import Optional
...@@ -89,15 +90,21 @@ class ModelArguments: ...@@ -89,15 +90,21 @@ class ModelArguments:
default="main", default="main",
metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."}, metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."},
) )
use_auth_token: bool = field( token: str = field(
default=False, default=None,
metadata={ metadata={
"help": ( "help": (
"Will use the token generated when running `huggingface-cli login` (necessary to use this script " "The token to use as HTTP bearer authorization for remote files. If not specified, will use the token "
"with private models)." "generated when running `huggingface-cli login` (stored in `~/.huggingface`)."
) )
}, },
) )
use_auth_token: bool = field(
default=None,
metadata={
"help": "The `use_auth_token` argument is deprecated and will be removed in v4.34. Please use `token`."
},
)
@dataclass @dataclass
...@@ -261,6 +268,12 @@ def main(): ...@@ -261,6 +268,12 @@ def main():
else: else:
model_args, data_args, training_args = parser.parse_args_into_dataclasses() model_args, data_args, training_args = parser.parse_args_into_dataclasses()
if model_args.use_auth_token is not None:
warnings.warn("The `use_auth_token` argument is deprecated and will be removed in v4.34.", FutureWarning)
if model_args.token is not None:
raise ValueError("`token` and `use_auth_token` are both specified. Please set only the argument `token`.")
model_args.token = model_args.use_auth_token
# Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The # Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The
# information sent is the one passed as arguments along with your Python/PyTorch versions. # information sent is the one passed as arguments along with your Python/PyTorch versions.
send_example_telemetry("run_translation", model_args, data_args) send_example_telemetry("run_translation", model_args, data_args)
...@@ -335,7 +348,7 @@ def main(): ...@@ -335,7 +348,7 @@ def main():
data_args.dataset_name, data_args.dataset_name,
data_args.dataset_config_name, data_args.dataset_config_name,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
) )
else: else:
data_files = {} data_files = {}
...@@ -352,7 +365,7 @@ def main(): ...@@ -352,7 +365,7 @@ def main():
extension, extension,
data_files=data_files, data_files=data_files,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
) )
# See more about loading any type of standard or custom dataset (from files, python dict, pandas DataFrame, etc) at # See more about loading any type of standard or custom dataset (from files, python dict, pandas DataFrame, etc) at
# https://huggingface.co/docs/datasets/loading_datasets.html. # https://huggingface.co/docs/datasets/loading_datasets.html.
...@@ -366,14 +379,14 @@ def main(): ...@@ -366,14 +379,14 @@ def main():
model_args.config_name if model_args.config_name else model_args.model_name_or_path, model_args.config_name if model_args.config_name else model_args.model_name_or_path,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
revision=model_args.model_revision, revision=model_args.model_revision,
token=True if model_args.use_auth_token else None, token=model_args.token,
) )
tokenizer = AutoTokenizer.from_pretrained( tokenizer = AutoTokenizer.from_pretrained(
model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path, model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_fast=model_args.use_fast_tokenizer, use_fast=model_args.use_fast_tokenizer,
revision=model_args.model_revision, revision=model_args.model_revision,
token=True if model_args.use_auth_token else None, token=model_args.token,
) )
model = AutoModelForSeq2SeqLM.from_pretrained( model = AutoModelForSeq2SeqLM.from_pretrained(
model_args.model_name_or_path, model_args.model_name_or_path,
...@@ -381,7 +394,7 @@ def main(): ...@@ -381,7 +394,7 @@ def main():
config=config, config=config,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
revision=model_args.model_revision, revision=model_args.model_revision,
token=True if model_args.use_auth_token else None, token=model_args.token,
) )
# We resize the embeddings only when necessary to avoid index errors. If you are creating a model from scratch # We resize the embeddings only when necessary to avoid index errors. If you are creating a model from scratch
......
...@@ -26,6 +26,7 @@ Text models: BERT, ROBERTa (https://huggingface.co/models?filter=fill-mask) ...@@ -26,6 +26,7 @@ Text models: BERT, ROBERTa (https://huggingface.co/models?filter=fill-mask)
import logging import logging
import os import os
import sys import sys
import warnings
from dataclasses import dataclass, field from dataclasses import dataclass, field
from typing import Optional from typing import Optional
...@@ -92,15 +93,21 @@ class ModelArguments: ...@@ -92,15 +93,21 @@ class ModelArguments:
default=True, default=True,
metadata={"help": "Whether to use one of the fast tokenizer (backed by the tokenizers library) or not."}, metadata={"help": "Whether to use one of the fast tokenizer (backed by the tokenizers library) or not."},
) )
use_auth_token: bool = field( token: str = field(
default=False, default=None,
metadata={ metadata={
"help": ( "help": (
"Will use the token generated when running `huggingface-cli login` (necessary to use this script " "The token to use as HTTP bearer authorization for remote files. If not specified, will use the token "
"with private models)." "generated when running `huggingface-cli login` (stored in `~/.huggingface`)."
) )
}, },
) )
use_auth_token: bool = field(
default=None,
metadata={
"help": "The `use_auth_token` argument is deprecated and will be removed in v4.34. Please use `token`."
},
)
freeze_vision_model: bool = field( freeze_vision_model: bool = field(
default=False, metadata={"help": "Whether to freeze the vision model parameters or not."} default=False, metadata={"help": "Whether to freeze the vision model parameters or not."}
) )
...@@ -245,6 +252,12 @@ def main(): ...@@ -245,6 +252,12 @@ def main():
else: else:
model_args, data_args, training_args = parser.parse_args_into_dataclasses() model_args, data_args, training_args = parser.parse_args_into_dataclasses()
if model_args.use_auth_token is not None:
warnings.warn("The `use_auth_token` argument is deprecated and will be removed in v4.34.", FutureWarning)
if model_args.token is not None:
raise ValueError("`token` and `use_auth_token` are both specified. Please set only the argument `token`.")
model_args.token = model_args.use_auth_token
if model_args.model_name_or_path is not None: if model_args.model_name_or_path is not None:
if model_args.vision_model_name_or_path is not None or model_args.text_model_name_or_path is not None: if model_args.vision_model_name_or_path is not None or model_args.text_model_name_or_path is not None:
raise ValueError( raise ValueError(
...@@ -315,7 +328,7 @@ def main(): ...@@ -315,7 +328,7 @@ def main():
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
keep_in_memory=False, keep_in_memory=False,
data_dir=data_args.data_dir, data_dir=data_args.data_dir,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
) )
else: else:
data_files = {} data_files = {}
...@@ -332,7 +345,7 @@ def main(): ...@@ -332,7 +345,7 @@ def main():
extension, extension,
data_files=data_files, data_files=data_files,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
) )
# See more about loading any type of standard or custom dataset (from files, python dict, pandas DataFrame, etc) at # See more about loading any type of standard or custom dataset (from files, python dict, pandas DataFrame, etc) at
# https://huggingface.co/docs/datasets/loading_datasets.html. # https://huggingface.co/docs/datasets/loading_datasets.html.
...@@ -362,14 +375,14 @@ def main(): ...@@ -362,14 +375,14 @@ def main():
model_args.image_processor_name or model_args.model_name_or_path, model_args.image_processor_name or model_args.model_name_or_path,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
revision=model_args.model_revision, revision=model_args.model_revision,
token=True if model_args.use_auth_token else None, token=model_args.token,
) )
with training_args.strategy.scope(): with training_args.strategy.scope():
model = TFAutoModel.from_pretrained( model = TFAutoModel.from_pretrained(
model_args.model_name_or_path, model_args.model_name_or_path,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
revision=model_args.model_revision, revision=model_args.model_revision,
token=True if model_args.use_auth_token else None, token=model_args.token,
) )
else: else:
# Load image_processor, in this script we only use this to get the mean and std for normalization. # Load image_processor, in this script we only use this to get the mean and std for normalization.
...@@ -377,14 +390,14 @@ def main(): ...@@ -377,14 +390,14 @@ def main():
model_args.image_processor_name or model_args.vision_model_name_or_path, model_args.image_processor_name or model_args.vision_model_name_or_path,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
revision=model_args.model_revision, revision=model_args.model_revision,
token=True if model_args.use_auth_token else None, token=model_args.token,
) )
with training_args.strategy.scope(): with training_args.strategy.scope():
model = TFVisionTextDualEncoderModel.from_vision_text_pretrained( model = TFVisionTextDualEncoderModel.from_vision_text_pretrained(
vision_model_name_or_path=model_args.vision_model_name_or_path, vision_model_name_or_path=model_args.vision_model_name_or_path,
text_model_name_or_path=model_args.text_model_name_or_path, text_model_name_or_path=model_args.text_model_name_or_path,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
token=True if model_args.use_auth_token else None, token=model_args.token,
) )
config = model.config config = model.config
......
...@@ -23,6 +23,7 @@ import json ...@@ -23,6 +23,7 @@ import json
import logging import logging
import os import os
import sys import sys
import warnings
from dataclasses import dataclass, field from dataclasses import dataclass, field
from typing import Optional from typing import Optional
...@@ -157,15 +158,21 @@ class ModelArguments: ...@@ -157,15 +158,21 @@ class ModelArguments:
metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."}, metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."},
) )
image_processor_name: str = field(default=None, metadata={"help": "Name or path of preprocessor config."}) image_processor_name: str = field(default=None, metadata={"help": "Name or path of preprocessor config."})
use_auth_token: bool = field( token: str = field(
default=False, default=None,
metadata={ metadata={
"help": ( "help": (
"Will use the token generated when running `huggingface-cli login` (necessary to use this script " "The token to use as HTTP bearer authorization for remote files. If not specified, will use the token "
"with private models)." "generated when running `huggingface-cli login` (stored in `~/.huggingface`)."
) )
}, },
) )
use_auth_token: bool = field(
default=None,
metadata={
"help": "The `use_auth_token` argument is deprecated and will be removed in v4.34. Please use `token`."
},
)
ignore_mismatched_sizes: bool = field( ignore_mismatched_sizes: bool = field(
default=False, default=False,
metadata={"help": "Will enable to load a pretrained model whose head dimensions are different."}, metadata={"help": "Will enable to load a pretrained model whose head dimensions are different."},
...@@ -226,6 +233,12 @@ def main(): ...@@ -226,6 +233,12 @@ def main():
else: else:
model_args, data_args, training_args = parser.parse_args_into_dataclasses() model_args, data_args, training_args = parser.parse_args_into_dataclasses()
if model_args.use_auth_token is not None:
warnings.warn("The `use_auth_token` argument is deprecated and will be removed in v4.34.", FutureWarning)
if model_args.token is not None:
raise ValueError("`token` and `use_auth_token` are both specified. Please set only the argument `token`.")
model_args.token = model_args.use_auth_token
if not (training_args.do_train or training_args.do_eval or training_args.do_predict): if not (training_args.do_train or training_args.do_eval or training_args.do_predict):
exit("Must specify at least one of --do_train, --do_eval or --do_predict!") exit("Must specify at least one of --do_train, --do_eval or --do_predict!")
...@@ -275,7 +288,7 @@ def main(): ...@@ -275,7 +288,7 @@ def main():
data_args.dataset_config_name, data_args.dataset_config_name,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
task="image-classification", task="image-classification",
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
) )
else: else:
data_files = {} data_files = {}
...@@ -309,13 +322,13 @@ def main(): ...@@ -309,13 +322,13 @@ def main():
finetuning_task="image-classification", finetuning_task="image-classification",
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
revision=model_args.model_revision, revision=model_args.model_revision,
token=True if model_args.use_auth_token else None, token=model_args.token,
) )
image_processor = AutoImageProcessor.from_pretrained( image_processor = AutoImageProcessor.from_pretrained(
model_args.image_processor_name or model_args.model_name_or_path, model_args.image_processor_name or model_args.model_name_or_path,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
revision=model_args.model_revision, revision=model_args.model_revision,
token=True if model_args.use_auth_token else None, token=model_args.token,
) )
# If we don't have a validation split, split off a percentage of train as validation. # If we don't have a validation split, split off a percentage of train as validation.
...@@ -435,7 +448,7 @@ def main(): ...@@ -435,7 +448,7 @@ def main():
from_pt=bool(".bin" in model_path), from_pt=bool(".bin" in model_path),
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
revision=model_args.model_revision, revision=model_args.model_revision,
token=True if model_args.use_auth_token else None, token=model_args.token,
ignore_mismatched_sizes=model_args.ignore_mismatched_sizes, ignore_mismatched_sizes=model_args.ignore_mismatched_sizes,
) )
num_replicas = training_args.strategy.num_replicas_in_sync num_replicas = training_args.strategy.num_replicas_in_sync
......
...@@ -30,6 +30,7 @@ import math ...@@ -30,6 +30,7 @@ import math
import os import os
import random import random
import sys import sys
import warnings
from dataclasses import dataclass, field from dataclasses import dataclass, field
from itertools import chain from itertools import chain
from pathlib import Path from pathlib import Path
...@@ -112,15 +113,21 @@ class ModelArguments: ...@@ -112,15 +113,21 @@ class ModelArguments:
default="main", default="main",
metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."}, metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."},
) )
use_auth_token: bool = field( token: str = field(
default=False, default=None,
metadata={ metadata={
"help": ( "help": (
"Will use the token generated when running `huggingface-cli login` (necessary to use this script " "The token to use as HTTP bearer authorization for remote files. If not specified, will use the token "
"with private models)." "generated when running `huggingface-cli login` (stored in `~/.huggingface`)."
) )
}, },
) )
use_auth_token: bool = field(
default=None,
metadata={
"help": "The `use_auth_token` argument is deprecated and will be removed in v4.34. Please use `token`."
},
)
def __post_init__(self): def __post_init__(self):
if self.config_overrides is not None and (self.config_name is not None or self.model_name_or_path is not None): if self.config_overrides is not None and (self.config_name is not None or self.model_name_or_path is not None):
...@@ -220,6 +227,12 @@ def main(): ...@@ -220,6 +227,12 @@ def main():
else: else:
model_args, data_args, training_args = parser.parse_args_into_dataclasses() model_args, data_args, training_args = parser.parse_args_into_dataclasses()
if model_args.use_auth_token is not None:
warnings.warn("The `use_auth_token` argument is deprecated and will be removed in v4.34.", FutureWarning)
if model_args.token is not None:
raise ValueError("`token` and `use_auth_token` are both specified. Please set only the argument `token`.")
model_args.token = model_args.use_auth_token
# Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The # Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The
# information sent is the one passed as arguments along with your Python/PyTorch versions. # information sent is the one passed as arguments along with your Python/PyTorch versions.
send_example_telemetry("run_clm", model_args, data_args, framework="tensorflow") send_example_telemetry("run_clm", model_args, data_args, framework="tensorflow")
...@@ -287,7 +300,7 @@ def main(): ...@@ -287,7 +300,7 @@ def main():
data_args.dataset_name, data_args.dataset_name,
data_args.dataset_config_name, data_args.dataset_config_name,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
) )
if "validation" not in raw_datasets.keys(): if "validation" not in raw_datasets.keys():
raw_datasets["validation"] = load_dataset( raw_datasets["validation"] = load_dataset(
...@@ -295,14 +308,14 @@ def main(): ...@@ -295,14 +308,14 @@ def main():
data_args.dataset_config_name, data_args.dataset_config_name,
split=f"train[:{data_args.validation_split_percentage}%]", split=f"train[:{data_args.validation_split_percentage}%]",
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
) )
raw_datasets["train"] = load_dataset( raw_datasets["train"] = load_dataset(
data_args.dataset_name, data_args.dataset_name,
data_args.dataset_config_name, data_args.dataset_config_name,
split=f"train[{data_args.validation_split_percentage}%:]", split=f"train[{data_args.validation_split_percentage}%:]",
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
) )
else: else:
data_files = {} data_files = {}
...@@ -323,7 +336,7 @@ def main(): ...@@ -323,7 +336,7 @@ def main():
extension, extension,
data_files=data_files, data_files=data_files,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
**dataset_args, **dataset_args,
) )
# If no validation data is there, validation_split_percentage will be used to divide the dataset. # If no validation data is there, validation_split_percentage will be used to divide the dataset.
...@@ -333,7 +346,7 @@ def main(): ...@@ -333,7 +346,7 @@ def main():
data_files=data_files, data_files=data_files,
split=f"train[:{data_args.validation_split_percentage}%]", split=f"train[:{data_args.validation_split_percentage}%]",
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
**dataset_args, **dataset_args,
) )
raw_datasets["train"] = load_dataset( raw_datasets["train"] = load_dataset(
...@@ -341,7 +354,7 @@ def main(): ...@@ -341,7 +354,7 @@ def main():
data_files=data_files, data_files=data_files,
split=f"train[{data_args.validation_split_percentage}%:]", split=f"train[{data_args.validation_split_percentage}%:]",
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
**dataset_args, **dataset_args,
) )
# See more about loading any type of standard or custom dataset (from files, python dict, pandas DataFrame, etc) at # See more about loading any type of standard or custom dataset (from files, python dict, pandas DataFrame, etc) at
......
...@@ -28,6 +28,7 @@ import math ...@@ -28,6 +28,7 @@ import math
import os import os
import random import random
import sys import sys
import warnings
from dataclasses import dataclass, field from dataclasses import dataclass, field
from itertools import chain from itertools import chain
from pathlib import Path from pathlib import Path
...@@ -110,15 +111,21 @@ class ModelArguments: ...@@ -110,15 +111,21 @@ class ModelArguments:
default="main", default="main",
metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."}, metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."},
) )
use_auth_token: bool = field( token: str = field(
default=False, default=None,
metadata={ metadata={
"help": ( "help": (
"Will use the token generated when running `huggingface-cli login` (necessary to use this script " "The token to use as HTTP bearer authorization for remote files. If not specified, will use the token "
"with private models)." "generated when running `huggingface-cli login` (stored in `~/.huggingface`)."
) )
}, },
) )
use_auth_token: bool = field(
default=None,
metadata={
"help": "The `use_auth_token` argument is deprecated and will be removed in v4.34. Please use `token`."
},
)
def __post_init__(self): def __post_init__(self):
if self.config_overrides is not None and (self.config_name is not None or self.model_name_or_path is not None): if self.config_overrides is not None and (self.config_name is not None or self.model_name_or_path is not None):
...@@ -226,6 +233,12 @@ def main(): ...@@ -226,6 +233,12 @@ def main():
else: else:
model_args, data_args, training_args = parser.parse_args_into_dataclasses() model_args, data_args, training_args = parser.parse_args_into_dataclasses()
if model_args.use_auth_token is not None:
warnings.warn("The `use_auth_token` argument is deprecated and will be removed in v4.34.", FutureWarning)
if model_args.token is not None:
raise ValueError("`token` and `use_auth_token` are both specified. Please set only the argument `token`.")
model_args.token = model_args.use_auth_token
# Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The # Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The
# information sent is the one passed as arguments along with your Python/PyTorch versions. # information sent is the one passed as arguments along with your Python/PyTorch versions.
send_example_telemetry("run_mlm", model_args, data_args, framework="tensorflow") send_example_telemetry("run_mlm", model_args, data_args, framework="tensorflow")
...@@ -296,20 +309,20 @@ def main(): ...@@ -296,20 +309,20 @@ def main():
raw_datasets = load_dataset( raw_datasets = load_dataset(
data_args.dataset_name, data_args.dataset_name,
data_args.dataset_config_name, data_args.dataset_config_name,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
) )
if "validation" not in raw_datasets.keys(): if "validation" not in raw_datasets.keys():
raw_datasets["validation"] = load_dataset( raw_datasets["validation"] = load_dataset(
data_args.dataset_name, data_args.dataset_name,
data_args.dataset_config_name, data_args.dataset_config_name,
split=f"train[:{data_args.validation_split_percentage}%]", split=f"train[:{data_args.validation_split_percentage}%]",
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
) )
raw_datasets["train"] = load_dataset( raw_datasets["train"] = load_dataset(
data_args.dataset_name, data_args.dataset_name,
data_args.dataset_config_name, data_args.dataset_config_name,
split=f"train[{data_args.validation_split_percentage}%:]", split=f"train[{data_args.validation_split_percentage}%:]",
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
) )
else: else:
data_files = {} data_files = {}
...@@ -323,7 +336,7 @@ def main(): ...@@ -323,7 +336,7 @@ def main():
raw_datasets = load_dataset( raw_datasets = load_dataset(
extension, extension,
data_files=data_files, data_files=data_files,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
) )
# See more about loading any type of standard or custom dataset (from files, python dict, pandas DataFrame, etc) at # See more about loading any type of standard or custom dataset (from files, python dict, pandas DataFrame, etc) at
......
...@@ -22,6 +22,7 @@ import json ...@@ -22,6 +22,7 @@ import json
import logging import logging
import os import os
import sys import sys
import warnings
from dataclasses import dataclass, field from dataclasses import dataclass, field
from itertools import chain from itertools import chain
from pathlib import Path from pathlib import Path
...@@ -146,15 +147,21 @@ class ModelArguments: ...@@ -146,15 +147,21 @@ class ModelArguments:
default="main", default="main",
metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."}, metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."},
) )
use_auth_token: bool = field( token: str = field(
default=False, default=None,
metadata={ metadata={
"help": ( "help": (
"Will use the token generated when running `huggingface-cli login` (necessary to use this script " "The token to use as HTTP bearer authorization for remote files. If not specified, will use the token "
"with private models)." "generated when running `huggingface-cli login` (stored in `~/.huggingface`)."
) )
}, },
) )
use_auth_token: bool = field(
default=None,
metadata={
"help": "The `use_auth_token` argument is deprecated and will be removed in v4.34. Please use `token`."
},
)
@dataclass @dataclass
...@@ -239,6 +246,12 @@ def main(): ...@@ -239,6 +246,12 @@ def main():
else: else:
model_args, data_args, training_args = parser.parse_args_into_dataclasses() model_args, data_args, training_args = parser.parse_args_into_dataclasses()
if model_args.use_auth_token is not None:
warnings.warn("The `use_auth_token` argument is deprecated and will be removed in v4.34.", FutureWarning)
if model_args.token is not None:
raise ValueError("`token` and `use_auth_token` are both specified. Please set only the argument `token`.")
model_args.token = model_args.use_auth_token
# Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The # Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The
# information sent is the one passed as arguments along with your Python/PyTorch versions. # information sent is the one passed as arguments along with your Python/PyTorch versions.
send_example_telemetry("run_swag", model_args, data_args, framework="tensorflow") send_example_telemetry("run_swag", model_args, data_args, framework="tensorflow")
...@@ -301,7 +314,7 @@ def main(): ...@@ -301,7 +314,7 @@ def main():
extension, extension,
data_files=data_files, data_files=data_files,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
) )
else: else:
# Downloading and loading the swag dataset from the hub. # Downloading and loading the swag dataset from the hub.
...@@ -309,7 +322,7 @@ def main(): ...@@ -309,7 +322,7 @@ def main():
"swag", "swag",
"regular", "regular",
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
) )
# See more about loading any type of standard or custom dataset (from files, python dict, pandas DataFrame, etc) at # See more about loading any type of standard or custom dataset (from files, python dict, pandas DataFrame, etc) at
# https://huggingface.co/docs/datasets/loading_datasets.html. # https://huggingface.co/docs/datasets/loading_datasets.html.
...@@ -335,14 +348,14 @@ def main(): ...@@ -335,14 +348,14 @@ def main():
config_path, config_path,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
revision=model_args.model_revision, revision=model_args.model_revision,
token=True if model_args.use_auth_token else None, token=model_args.token,
) )
tokenizer = AutoTokenizer.from_pretrained( tokenizer = AutoTokenizer.from_pretrained(
model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path, model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_fast=model_args.use_fast_tokenizer, use_fast=model_args.use_fast_tokenizer,
revision=model_args.model_revision, revision=model_args.model_revision,
token=True if model_args.use_auth_token else None, token=model_args.token,
) )
# endregion # endregion
...@@ -428,7 +441,7 @@ def main(): ...@@ -428,7 +441,7 @@ def main():
config=config, config=config,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
revision=model_args.model_revision, revision=model_args.model_revision,
token=True if model_args.use_auth_token else None, token=model_args.token,
) )
num_replicas = training_args.strategy.num_replicas_in_sync num_replicas = training_args.strategy.num_replicas_in_sync
......
...@@ -22,6 +22,7 @@ import json ...@@ -22,6 +22,7 @@ import json
import logging import logging
import os import os
import sys import sys
import warnings
from dataclasses import dataclass, field from dataclasses import dataclass, field
from pathlib import Path from pathlib import Path
from typing import Optional from typing import Optional
...@@ -77,15 +78,21 @@ class ModelArguments: ...@@ -77,15 +78,21 @@ class ModelArguments:
default="main", default="main",
metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."}, metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."},
) )
use_auth_token: bool = field( token: str = field(
default=False, default=None,
metadata={ metadata={
"help": ( "help": (
"Will use the token generated when running `huggingface-cli login` (necessary to use this script " "The token to use as HTTP bearer authorization for remote files. If not specified, will use the token "
"with private models)." "generated when running `huggingface-cli login` (stored in `~/.huggingface`)."
) )
}, },
) )
use_auth_token: bool = field(
default=None,
metadata={
"help": "The `use_auth_token` argument is deprecated and will be removed in v4.34. Please use `token`."
},
)
@dataclass @dataclass
...@@ -245,6 +252,12 @@ def main(): ...@@ -245,6 +252,12 @@ def main():
else: else:
model_args, data_args, training_args = parser.parse_args_into_dataclasses() model_args, data_args, training_args = parser.parse_args_into_dataclasses()
if model_args.use_auth_token is not None:
warnings.warn("The `use_auth_token` argument is deprecated and will be removed in v4.34.", FutureWarning)
if model_args.token is not None:
raise ValueError("`token` and `use_auth_token` are both specified. Please set only the argument `token`.")
model_args.token = model_args.use_auth_token
# Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The # Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The
# information sent is the one passed as arguments along with your Python/PyTorch versions. # information sent is the one passed as arguments along with your Python/PyTorch versions.
send_example_telemetry("run_qa", model_args, data_args, framework="tensorflow") send_example_telemetry("run_qa", model_args, data_args, framework="tensorflow")
...@@ -304,7 +317,7 @@ def main(): ...@@ -304,7 +317,7 @@ def main():
data_args.dataset_name, data_args.dataset_name,
data_args.dataset_config_name, data_args.dataset_config_name,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
) )
else: else:
data_files = {} data_files = {}
...@@ -323,7 +336,7 @@ def main(): ...@@ -323,7 +336,7 @@ def main():
data_files=data_files, data_files=data_files,
field="data", field="data",
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
) )
# See more about loading any type of standard or custom dataset (from files, python dict, pandas DataFrame, etc) at # See more about loading any type of standard or custom dataset (from files, python dict, pandas DataFrame, etc) at
# https://huggingface.co/docs/datasets/loading_datasets.html. # https://huggingface.co/docs/datasets/loading_datasets.html.
...@@ -338,14 +351,14 @@ def main(): ...@@ -338,14 +351,14 @@ def main():
model_args.config_name if model_args.config_name else model_args.model_name_or_path, model_args.config_name if model_args.config_name else model_args.model_name_or_path,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
revision=model_args.model_revision, revision=model_args.model_revision,
token=True if model_args.use_auth_token else None, token=model_args.token,
) )
tokenizer = AutoTokenizer.from_pretrained( tokenizer = AutoTokenizer.from_pretrained(
model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path, model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_fast=True, use_fast=True,
revision=model_args.model_revision, revision=model_args.model_revision,
token=True if model_args.use_auth_token else None, token=model_args.token,
) )
# endregion # endregion
...@@ -625,7 +638,7 @@ def main(): ...@@ -625,7 +638,7 @@ def main():
config=config, config=config,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
revision=model_args.model_revision, revision=model_args.model_revision,
token=True if model_args.use_auth_token else None, token=model_args.token,
) )
if training_args.do_train: if training_args.do_train:
training_dataset = model.prepare_tf_dataset( training_dataset = model.prepare_tf_dataset(
......
...@@ -22,6 +22,7 @@ import json ...@@ -22,6 +22,7 @@ import json
import logging import logging
import os import os
import sys import sys
import warnings
from dataclasses import dataclass, field from dataclasses import dataclass, field
from typing import Optional from typing import Optional
...@@ -99,15 +100,21 @@ class ModelArguments: ...@@ -99,15 +100,21 @@ class ModelArguments:
default="main", default="main",
metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."}, metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."},
) )
use_auth_token: bool = field( token: str = field(
default=False, default=None,
metadata={ metadata={
"help": ( "help": (
"Will use the token generated when running `huggingface-cli login` (necessary to use this script " "The token to use as HTTP bearer authorization for remote files. If not specified, will use the token "
"with private models)." "generated when running `huggingface-cli login` (stored in `~/.huggingface`)."
) )
}, },
) )
use_auth_token: bool = field(
default=None,
metadata={
"help": "The `use_auth_token` argument is deprecated and will be removed in v4.34. Please use `token`."
},
)
@dataclass @dataclass
...@@ -287,6 +294,12 @@ def main(): ...@@ -287,6 +294,12 @@ def main():
else: else:
model_args, data_args, training_args = parser.parse_args_into_dataclasses() model_args, data_args, training_args = parser.parse_args_into_dataclasses()
if model_args.use_auth_token is not None:
warnings.warn("The `use_auth_token` argument is deprecated and will be removed in v4.34.", FutureWarning)
if model_args.token is not None:
raise ValueError("`token` and `use_auth_token` are both specified. Please set only the argument `token`.")
model_args.token = model_args.use_auth_token
# Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The # Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The
# information sent is the one passed as arguments along with your Python/PyTorch versions. # information sent is the one passed as arguments along with your Python/PyTorch versions.
send_example_telemetry("run_summarization", model_args, data_args, framework="tensorflow") send_example_telemetry("run_summarization", model_args, data_args, framework="tensorflow")
...@@ -355,7 +368,7 @@ def main(): ...@@ -355,7 +368,7 @@ def main():
data_args.dataset_name, data_args.dataset_name,
data_args.dataset_config_name, data_args.dataset_config_name,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
) )
else: else:
data_files = {} data_files = {}
...@@ -372,7 +385,7 @@ def main(): ...@@ -372,7 +385,7 @@ def main():
extension, extension,
data_files=data_files, data_files=data_files,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
) )
# See more about loading any type of standard or custom dataset (from files, python dict, pandas DataFrame, etc) at # See more about loading any type of standard or custom dataset (from files, python dict, pandas DataFrame, etc) at
# https://huggingface.co/docs/datasets/loading_datasets.html. # https://huggingface.co/docs/datasets/loading_datasets.html.
...@@ -388,14 +401,14 @@ def main(): ...@@ -388,14 +401,14 @@ def main():
model_args.config_name if model_args.config_name else model_args.model_name_or_path, model_args.config_name if model_args.config_name else model_args.model_name_or_path,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
revision=model_args.model_revision, revision=model_args.model_revision,
token=True if model_args.use_auth_token else None, token=model_args.token,
) )
tokenizer = AutoTokenizer.from_pretrained( tokenizer = AutoTokenizer.from_pretrained(
model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path, model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_fast=model_args.use_fast_tokenizer, use_fast=model_args.use_fast_tokenizer,
revision=model_args.model_revision, revision=model_args.model_revision,
token=True if model_args.use_auth_token else None, token=model_args.token,
) )
prefix = data_args.source_prefix if data_args.source_prefix is not None else "" prefix = data_args.source_prefix if data_args.source_prefix is not None else ""
...@@ -513,7 +526,7 @@ def main(): ...@@ -513,7 +526,7 @@ def main():
config=config, config=config,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
revision=model_args.model_revision, revision=model_args.model_revision,
token=True if model_args.use_auth_token else None, token=model_args.token,
) )
# We resize the embeddings only when necessary to avoid index errors. If you are creating a model from scratch # We resize the embeddings only when necessary to avoid index errors. If you are creating a model from scratch
......
...@@ -20,6 +20,7 @@ import json ...@@ -20,6 +20,7 @@ import json
import logging import logging
import os import os
import sys import sys
import warnings
from dataclasses import dataclass, field from dataclasses import dataclass, field
from typing import Optional from typing import Optional
...@@ -164,15 +165,21 @@ class ModelArguments: ...@@ -164,15 +165,21 @@ class ModelArguments:
default="main", default="main",
metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."}, metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."},
) )
use_auth_token: bool = field( token: str = field(
default=False, default=None,
metadata={ metadata={
"help": ( "help": (
"Will use the token generated when running `huggingface-cli login` (necessary to use this script " "The token to use as HTTP bearer authorization for remote files. If not specified, will use the token "
"with private models)." "generated when running `huggingface-cli login` (stored in `~/.huggingface`)."
) )
}, },
) )
use_auth_token: bool = field(
default=None,
metadata={
"help": "The `use_auth_token` argument is deprecated and will be removed in v4.34. Please use `token`."
},
)
# endregion # endregion
...@@ -192,6 +199,12 @@ def main(): ...@@ -192,6 +199,12 @@ def main():
else: else:
model_args, data_args, training_args = parser.parse_args_into_dataclasses() model_args, data_args, training_args = parser.parse_args_into_dataclasses()
if model_args.use_auth_token is not None:
warnings.warn("The `use_auth_token` argument is deprecated and will be removed in v4.34.", FutureWarning)
if model_args.token is not None:
raise ValueError("`token` and `use_auth_token` are both specified. Please set only the argument `token`.")
model_args.token = model_args.use_auth_token
# Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The # Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The
# information sent is the one passed as arguments along with your Python/PyTorch versions. # information sent is the one passed as arguments along with your Python/PyTorch versions.
send_example_telemetry("run_glue", model_args, data_args, framework="tensorflow") send_example_telemetry("run_glue", model_args, data_args, framework="tensorflow")
...@@ -242,7 +255,7 @@ def main(): ...@@ -242,7 +255,7 @@ def main():
"glue", "glue",
data_args.task_name, data_args.task_name,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
) )
# See more about loading any type of standard or custom dataset at # See more about loading any type of standard or custom dataset at
# https://huggingface.co/docs/datasets/loading_datasets.html. # https://huggingface.co/docs/datasets/loading_datasets.html.
...@@ -284,14 +297,14 @@ def main(): ...@@ -284,14 +297,14 @@ def main():
finetuning_task=data_args.task_name, finetuning_task=data_args.task_name,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
revision=model_args.model_revision, revision=model_args.model_revision,
token=True if model_args.use_auth_token else None, token=model_args.token,
) )
tokenizer = AutoTokenizer.from_pretrained( tokenizer = AutoTokenizer.from_pretrained(
model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path, model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_fast=model_args.use_fast_tokenizer, use_fast=model_args.use_fast_tokenizer,
revision=model_args.model_revision, revision=model_args.model_revision,
token=True if model_args.use_auth_token else None, token=model_args.token,
) )
# endregion # endregion
...@@ -374,7 +387,7 @@ def main(): ...@@ -374,7 +387,7 @@ def main():
config=config, config=config,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
revision=model_args.model_revision, revision=model_args.model_revision,
token=True if model_args.use_auth_token else None, token=model_args.token,
) )
# endregion # endregion
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment