Unverified Commit 149cb0cc authored by Yih-Dar's avatar Yih-Dar Committed by GitHub
Browse files

Add `token` arugment in example scripts (#25172)



* fix

* fix

* fix

* fix

* fix

* fix

* fix

---------
Co-authored-by: default avatarydshieh <ydshieh@users.noreply.github.com>
parent c6a8768d
...@@ -22,6 +22,7 @@ import logging ...@@ -22,6 +22,7 @@ import logging
import os import os
import sys import sys
import time import time
import warnings
from dataclasses import asdict, dataclass, field from dataclasses import asdict, dataclass, field
from enum import Enum from enum import Enum
from functools import partial from functools import partial
...@@ -182,15 +183,21 @@ class ModelArguments: ...@@ -182,15 +183,21 @@ class ModelArguments:
) )
}, },
) )
use_auth_token: bool = field( token: str = field(
default=False, default=None,
metadata={ metadata={
"help": ( "help": (
"Will use the token generated when running `huggingface-cli login` (necessary to use this script " "The token to use as HTTP bearer authorization for remote files. If not specified, will use the token "
"with private models)." "generated when running `huggingface-cli login` (stored in `~/.huggingface`)."
) )
}, },
) )
use_auth_token: bool = field(
default=None,
metadata={
"help": "The `use_auth_token` argument is deprecated and will be removed in v4.34. Please use `token`."
},
)
@dataclass @dataclass
...@@ -389,6 +396,12 @@ def main(): ...@@ -389,6 +396,12 @@ def main():
else: else:
model_args, data_args, training_args = parser.parse_args_into_dataclasses() model_args, data_args, training_args = parser.parse_args_into_dataclasses()
if model_args.use_auth_token is not None:
warnings.warn("The `use_auth_token` argument is deprecated and will be removed in v4.34.", FutureWarning)
if model_args.token is not None:
raise ValueError("`token` and `use_auth_token` are both specified. Please set only the argument `token`.")
model_args.token = model_args.use_auth_token
# Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The # Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The
# information sent is the one passed as arguments along with your Python/PyTorch versions. # information sent is the one passed as arguments along with your Python/PyTorch versions.
send_example_telemetry("run_image_captioning", model_args, data_args, framework="flax") send_example_telemetry("run_image_captioning", model_args, data_args, framework="flax")
...@@ -448,7 +461,7 @@ def main(): ...@@ -448,7 +461,7 @@ def main():
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
keep_in_memory=False, keep_in_memory=False,
data_dir=data_args.data_dir, data_dir=data_args.data_dir,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
) )
else: else:
data_files = {} data_files = {}
...@@ -465,7 +478,7 @@ def main(): ...@@ -465,7 +478,7 @@ def main():
extension, extension,
data_files=data_files, data_files=data_files,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
) )
# See more about loading any type of standard or custom dataset (from files, python dict, pandas DataFrame, etc) at # See more about loading any type of standard or custom dataset (from files, python dict, pandas DataFrame, etc) at
# https://huggingface.co/docs/datasets/loading_datasets.html. # https://huggingface.co/docs/datasets/loading_datasets.html.
...@@ -475,18 +488,18 @@ def main(): ...@@ -475,18 +488,18 @@ def main():
model_args.model_name_or_path, model_args.model_name_or_path,
seed=training_args.seed, seed=training_args.seed,
dtype=getattr(jnp, model_args.dtype), dtype=getattr(jnp, model_args.dtype),
token=True if model_args.use_auth_token else None, token=model_args.token,
) )
image_processor = AutoImageProcessor.from_pretrained( image_processor = AutoImageProcessor.from_pretrained(
model_args.model_name_or_path, model_args.model_name_or_path,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
token=True if model_args.use_auth_token else None, token=model_args.token,
) )
tokenizer = AutoTokenizer.from_pretrained( tokenizer = AutoTokenizer.from_pretrained(
model_args.model_name_or_path, model_args.model_name_or_path,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_fast=model_args.use_fast_tokenizer, use_fast=model_args.use_fast_tokenizer,
token=True if model_args.use_auth_token else None, token=model_args.token,
) )
tokenizer.pad_token = tokenizer.convert_ids_to_tokens(model.config.pad_token_id) tokenizer.pad_token = tokenizer.convert_ids_to_tokens(model.config.pad_token_id)
......
...@@ -26,6 +26,7 @@ import math ...@@ -26,6 +26,7 @@ import math
import os import os
import sys import sys
import time import time
import warnings
from dataclasses import asdict, dataclass, field from dataclasses import asdict, dataclass, field
from enum import Enum from enum import Enum
from itertools import chain from itertools import chain
...@@ -168,15 +169,21 @@ class ModelArguments: ...@@ -168,15 +169,21 @@ class ModelArguments:
) )
}, },
) )
use_auth_token: bool = field( token: str = field(
default=False, default=None,
metadata={ metadata={
"help": ( "help": (
"Will use the token generated when running `huggingface-cli login` (necessary to use this script " "The token to use as HTTP bearer authorization for remote files. If not specified, will use the token "
"with private models)." "generated when running `huggingface-cli login` (stored in `~/.huggingface`)."
) )
}, },
) )
use_auth_token: bool = field(
default=None,
metadata={
"help": "The `use_auth_token` argument is deprecated and will be removed in v4.34. Please use `token`."
},
)
@dataclass @dataclass
...@@ -463,6 +470,12 @@ def main(): ...@@ -463,6 +470,12 @@ def main():
else: else:
model_args, data_args, training_args = parser.parse_args_into_dataclasses() model_args, data_args, training_args = parser.parse_args_into_dataclasses()
if model_args.use_auth_token is not None:
warnings.warn("The `use_auth_token` argument is deprecated and will be removed in v4.34.", FutureWarning)
if model_args.token is not None:
raise ValueError("`token` and `use_auth_token` are both specified. Please set only the argument `token`.")
model_args.token = model_args.use_auth_token
# Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The # Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The
# information sent is the one passed as arguments along with your Python/PyTorch versions. # information sent is the one passed as arguments along with your Python/PyTorch versions.
send_example_telemetry("run_bart_dlm", model_args, data_args, framework="flax") send_example_telemetry("run_bart_dlm", model_args, data_args, framework="flax")
...@@ -517,7 +530,7 @@ def main(): ...@@ -517,7 +530,7 @@ def main():
data_args.dataset_name, data_args.dataset_name,
data_args.dataset_config_name, data_args.dataset_config_name,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
) )
if "validation" not in datasets.keys(): if "validation" not in datasets.keys():
...@@ -526,14 +539,14 @@ def main(): ...@@ -526,14 +539,14 @@ def main():
data_args.dataset_config_name, data_args.dataset_config_name,
split=f"train[:{data_args.validation_split_percentage}%]", split=f"train[:{data_args.validation_split_percentage}%]",
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
) )
datasets["train"] = load_dataset( datasets["train"] = load_dataset(
data_args.dataset_name, data_args.dataset_name,
data_args.dataset_config_name, data_args.dataset_config_name,
split=f"train[{data_args.validation_split_percentage}%:]", split=f"train[{data_args.validation_split_percentage}%:]",
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
) )
else: else:
data_files = {} data_files = {}
...@@ -548,7 +561,7 @@ def main(): ...@@ -548,7 +561,7 @@ def main():
extension, extension,
data_files=data_files, data_files=data_files,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
) )
if "validation" not in datasets.keys(): if "validation" not in datasets.keys():
...@@ -557,14 +570,14 @@ def main(): ...@@ -557,14 +570,14 @@ def main():
data_files=data_files, data_files=data_files,
split=f"train[:{data_args.validation_split_percentage}%]", split=f"train[:{data_args.validation_split_percentage}%]",
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
) )
datasets["train"] = load_dataset( datasets["train"] = load_dataset(
extension, extension,
data_files=data_files, data_files=data_files,
split=f"train[{data_args.validation_split_percentage}%:]", split=f"train[{data_args.validation_split_percentage}%:]",
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
) )
# See more about loading any type of standard or custom dataset (from files, python dict, pandas DataFrame, etc) at # See more about loading any type of standard or custom dataset (from files, python dict, pandas DataFrame, etc) at
# https://huggingface.co/docs/datasets/loading_datasets.html. # https://huggingface.co/docs/datasets/loading_datasets.html.
...@@ -576,14 +589,14 @@ def main(): ...@@ -576,14 +589,14 @@ def main():
model_args.tokenizer_name, model_args.tokenizer_name,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_fast=model_args.use_fast_tokenizer, use_fast=model_args.use_fast_tokenizer,
token=True if model_args.use_auth_token else None, token=model_args.token,
) )
elif model_args.model_name_or_path: elif model_args.model_name_or_path:
tokenizer = AutoTokenizer.from_pretrained( tokenizer = AutoTokenizer.from_pretrained(
model_args.model_name_or_path, model_args.model_name_or_path,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_fast=model_args.use_fast_tokenizer, use_fast=model_args.use_fast_tokenizer,
token=True if model_args.use_auth_token else None, token=model_args.token,
) )
else: else:
raise ValueError( raise ValueError(
...@@ -596,13 +609,13 @@ def main(): ...@@ -596,13 +609,13 @@ def main():
model_args.config_name, model_args.config_name,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
vocab_size=len(tokenizer), vocab_size=len(tokenizer),
token=True if model_args.use_auth_token else None, token=model_args.token,
) )
elif model_args.model_name_or_path: elif model_args.model_name_or_path:
config = BartConfig.from_pretrained( config = BartConfig.from_pretrained(
model_args.model_name_or_path, model_args.model_name_or_path,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
token=True if model_args.use_auth_token else None, token=model_args.token,
) )
else: else:
config = CONFIG_MAPPING[model_args.model_type]() config = CONFIG_MAPPING[model_args.model_type]()
...@@ -707,7 +720,7 @@ def main(): ...@@ -707,7 +720,7 @@ def main():
config=config, config=config,
seed=training_args.seed, seed=training_args.seed,
dtype=getattr(jnp, model_args.dtype), dtype=getattr(jnp, model_args.dtype),
token=True if model_args.use_auth_token else None, token=model_args.token,
) )
else: else:
config.vocab_size = len(tokenizer) config.vocab_size = len(tokenizer)
......
...@@ -27,6 +27,7 @@ import math ...@@ -27,6 +27,7 @@ import math
import os import os
import sys import sys
import time import time
import warnings
from dataclasses import asdict, dataclass, field from dataclasses import asdict, dataclass, field
from enum import Enum from enum import Enum
from itertools import chain from itertools import chain
...@@ -169,15 +170,21 @@ class ModelArguments: ...@@ -169,15 +170,21 @@ class ModelArguments:
) )
}, },
) )
use_auth_token: bool = field( token: str = field(
default=False, default=None,
metadata={ metadata={
"help": ( "help": (
"Will use the token generated when running `huggingface-cli login` (necessary to use this script " "The token to use as HTTP bearer authorization for remote files. If not specified, will use the token "
"with private models)." "generated when running `huggingface-cli login` (stored in `~/.huggingface`)."
) )
}, },
) )
use_auth_token: bool = field(
default=None,
metadata={
"help": "The `use_auth_token` argument is deprecated and will be removed in v4.34. Please use `token`."
},
)
@dataclass @dataclass
...@@ -334,6 +341,12 @@ def main(): ...@@ -334,6 +341,12 @@ def main():
else: else:
model_args, data_args, training_args = parser.parse_args_into_dataclasses() model_args, data_args, training_args = parser.parse_args_into_dataclasses()
if model_args.use_auth_token is not None:
warnings.warn("The `use_auth_token` argument is deprecated and will be removed in v4.34.", FutureWarning)
if model_args.token is not None:
raise ValueError("`token` and `use_auth_token` are both specified. Please set only the argument `token`.")
model_args.token = model_args.use_auth_token
# Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The # Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The
# information sent is the one passed as arguments along with your Python/PyTorch versions. # information sent is the one passed as arguments along with your Python/PyTorch versions.
send_example_telemetry("run_clm", model_args, data_args, framework="flax") send_example_telemetry("run_clm", model_args, data_args, framework="flax")
...@@ -397,7 +410,7 @@ def main(): ...@@ -397,7 +410,7 @@ def main():
data_args.dataset_config_name, data_args.dataset_config_name,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
keep_in_memory=False, keep_in_memory=False,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
) )
if "validation" not in dataset.keys(): if "validation" not in dataset.keys():
...@@ -406,14 +419,14 @@ def main(): ...@@ -406,14 +419,14 @@ def main():
data_args.dataset_config_name, data_args.dataset_config_name,
split=f"train[:{data_args.validation_split_percentage}%]", split=f"train[:{data_args.validation_split_percentage}%]",
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
) )
dataset["train"] = load_dataset( dataset["train"] = load_dataset(
data_args.dataset_name, data_args.dataset_name,
data_args.dataset_config_name, data_args.dataset_config_name,
split=f"train[{data_args.validation_split_percentage}%:]", split=f"train[{data_args.validation_split_percentage}%:]",
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
) )
else: else:
data_files = {} data_files = {}
...@@ -431,7 +444,7 @@ def main(): ...@@ -431,7 +444,7 @@ def main():
data_files=data_files, data_files=data_files,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
**dataset_args, **dataset_args,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
) )
if "validation" not in dataset.keys(): if "validation" not in dataset.keys():
...@@ -441,7 +454,7 @@ def main(): ...@@ -441,7 +454,7 @@ def main():
split=f"train[:{data_args.validation_split_percentage}%]", split=f"train[:{data_args.validation_split_percentage}%]",
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
**dataset_args, **dataset_args,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
) )
dataset["train"] = load_dataset( dataset["train"] = load_dataset(
extension, extension,
...@@ -449,7 +462,7 @@ def main(): ...@@ -449,7 +462,7 @@ def main():
split=f"train[{data_args.validation_split_percentage}%:]", split=f"train[{data_args.validation_split_percentage}%:]",
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
**dataset_args, **dataset_args,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
) )
# See more about loading any type of standard or custom dataset (from files, python dict, pandas DataFrame, etc) at # See more about loading any type of standard or custom dataset (from files, python dict, pandas DataFrame, etc) at
# https://huggingface.co/docs/datasets/loading_datasets.html. # https://huggingface.co/docs/datasets/loading_datasets.html.
...@@ -463,13 +476,13 @@ def main(): ...@@ -463,13 +476,13 @@ def main():
config = AutoConfig.from_pretrained( config = AutoConfig.from_pretrained(
model_args.config_name, model_args.config_name,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
token=True if model_args.use_auth_token else None, token=model_args.token,
) )
elif model_args.model_name_or_path: elif model_args.model_name_or_path:
config = AutoConfig.from_pretrained( config = AutoConfig.from_pretrained(
model_args.model_name_or_path, model_args.model_name_or_path,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
token=True if model_args.use_auth_token else None, token=model_args.token,
) )
else: else:
config = CONFIG_MAPPING[model_args.model_type]() config = CONFIG_MAPPING[model_args.model_type]()
...@@ -480,14 +493,14 @@ def main(): ...@@ -480,14 +493,14 @@ def main():
model_args.tokenizer_name, model_args.tokenizer_name,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_fast=model_args.use_fast_tokenizer, use_fast=model_args.use_fast_tokenizer,
token=True if model_args.use_auth_token else None, token=model_args.token,
) )
elif model_args.model_name_or_path: elif model_args.model_name_or_path:
tokenizer = AutoTokenizer.from_pretrained( tokenizer = AutoTokenizer.from_pretrained(
model_args.model_name_or_path, model_args.model_name_or_path,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_fast=model_args.use_fast_tokenizer, use_fast=model_args.use_fast_tokenizer,
token=True if model_args.use_auth_token else None, token=model_args.token,
) )
else: else:
raise ValueError( raise ValueError(
...@@ -501,7 +514,7 @@ def main(): ...@@ -501,7 +514,7 @@ def main():
config=config, config=config,
seed=training_args.seed, seed=training_args.seed,
dtype=getattr(jnp, model_args.dtype), dtype=getattr(jnp, model_args.dtype),
token=True if model_args.use_auth_token else None, token=model_args.token,
) )
else: else:
model = FlaxAutoModelForCausalLM.from_config( model = FlaxAutoModelForCausalLM.from_config(
......
...@@ -26,6 +26,7 @@ import math ...@@ -26,6 +26,7 @@ import math
import os import os
import sys import sys
import time import time
import warnings
from dataclasses import asdict, dataclass, field from dataclasses import asdict, dataclass, field
from enum import Enum from enum import Enum
from itertools import chain from itertools import chain
...@@ -174,15 +175,21 @@ class ModelArguments: ...@@ -174,15 +175,21 @@ class ModelArguments:
) )
}, },
) )
use_auth_token: bool = field( token: str = field(
default=False, default=None,
metadata={ metadata={
"help": ( "help": (
"Will use the token generated when running `huggingface-cli login` (necessary to use this script " "The token to use as HTTP bearer authorization for remote files. If not specified, will use the token "
"with private models)." "generated when running `huggingface-cli login` (stored in `~/.huggingface`)."
) )
}, },
) )
use_auth_token: bool = field(
default=None,
metadata={
"help": "The `use_auth_token` argument is deprecated and will be removed in v4.34. Please use `token`."
},
)
@dataclass @dataclass
...@@ -377,6 +384,12 @@ def main(): ...@@ -377,6 +384,12 @@ def main():
else: else:
model_args, data_args, training_args = parser.parse_args_into_dataclasses() model_args, data_args, training_args = parser.parse_args_into_dataclasses()
if model_args.use_auth_token is not None:
warnings.warn("The `use_auth_token` argument is deprecated and will be removed in v4.34.", FutureWarning)
if model_args.token is not None:
raise ValueError("`token` and `use_auth_token` are both specified. Please set only the argument `token`.")
model_args.token = model_args.use_auth_token
# Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The # Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The
# information sent is the one passed as arguments along with your Python/PyTorch versions. # information sent is the one passed as arguments along with your Python/PyTorch versions.
send_example_telemetry("run_mlm", model_args, data_args, framework="flax") send_example_telemetry("run_mlm", model_args, data_args, framework="flax")
...@@ -434,7 +447,7 @@ def main(): ...@@ -434,7 +447,7 @@ def main():
data_args.dataset_name, data_args.dataset_name,
data_args.dataset_config_name, data_args.dataset_config_name,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
) )
if "validation" not in datasets.keys(): if "validation" not in datasets.keys():
...@@ -443,14 +456,14 @@ def main(): ...@@ -443,14 +456,14 @@ def main():
data_args.dataset_config_name, data_args.dataset_config_name,
split=f"train[:{data_args.validation_split_percentage}%]", split=f"train[:{data_args.validation_split_percentage}%]",
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
) )
datasets["train"] = load_dataset( datasets["train"] = load_dataset(
data_args.dataset_name, data_args.dataset_name,
data_args.dataset_config_name, data_args.dataset_config_name,
split=f"train[{data_args.validation_split_percentage}%:]", split=f"train[{data_args.validation_split_percentage}%:]",
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
) )
else: else:
data_files = {} data_files = {}
...@@ -465,7 +478,7 @@ def main(): ...@@ -465,7 +478,7 @@ def main():
extension, extension,
data_files=data_files, data_files=data_files,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
) )
if "validation" not in datasets.keys(): if "validation" not in datasets.keys():
...@@ -474,14 +487,14 @@ def main(): ...@@ -474,14 +487,14 @@ def main():
data_files=data_files, data_files=data_files,
split=f"train[:{data_args.validation_split_percentage}%]", split=f"train[:{data_args.validation_split_percentage}%]",
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
) )
datasets["train"] = load_dataset( datasets["train"] = load_dataset(
extension, extension,
data_files=data_files, data_files=data_files,
split=f"train[{data_args.validation_split_percentage}%:]", split=f"train[{data_args.validation_split_percentage}%:]",
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
) )
# See more about loading any type of standard or custom dataset (from files, python dict, pandas DataFrame, etc) at # See more about loading any type of standard or custom dataset (from files, python dict, pandas DataFrame, etc) at
# https://huggingface.co/docs/datasets/loading_datasets.html. # https://huggingface.co/docs/datasets/loading_datasets.html.
...@@ -495,13 +508,13 @@ def main(): ...@@ -495,13 +508,13 @@ def main():
config = AutoConfig.from_pretrained( config = AutoConfig.from_pretrained(
model_args.config_name, model_args.config_name,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
token=True if model_args.use_auth_token else None, token=model_args.token,
) )
elif model_args.model_name_or_path: elif model_args.model_name_or_path:
config = AutoConfig.from_pretrained( config = AutoConfig.from_pretrained(
model_args.model_name_or_path, model_args.model_name_or_path,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
token=True if model_args.use_auth_token else None, token=model_args.token,
) )
else: else:
config = CONFIG_MAPPING[model_args.model_type]() config = CONFIG_MAPPING[model_args.model_type]()
...@@ -512,14 +525,14 @@ def main(): ...@@ -512,14 +525,14 @@ def main():
model_args.tokenizer_name, model_args.tokenizer_name,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_fast=model_args.use_fast_tokenizer, use_fast=model_args.use_fast_tokenizer,
token=True if model_args.use_auth_token else None, token=model_args.token,
) )
elif model_args.model_name_or_path: elif model_args.model_name_or_path:
tokenizer = AutoTokenizer.from_pretrained( tokenizer = AutoTokenizer.from_pretrained(
model_args.model_name_or_path, model_args.model_name_or_path,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_fast=model_args.use_fast_tokenizer, use_fast=model_args.use_fast_tokenizer,
token=True if model_args.use_auth_token else None, token=model_args.token,
) )
else: else:
raise ValueError( raise ValueError(
...@@ -638,7 +651,7 @@ def main(): ...@@ -638,7 +651,7 @@ def main():
config=config, config=config,
seed=training_args.seed, seed=training_args.seed,
dtype=getattr(jnp, model_args.dtype), dtype=getattr(jnp, model_args.dtype),
token=True if model_args.use_auth_token else None, token=model_args.token,
) )
else: else:
model = FlaxAutoModelForMaskedLM.from_config( model = FlaxAutoModelForMaskedLM.from_config(
......
...@@ -25,6 +25,7 @@ import math ...@@ -25,6 +25,7 @@ import math
import os import os
import sys import sys
import time import time
import warnings
from dataclasses import asdict, dataclass, field from dataclasses import asdict, dataclass, field
# You can also adapt this script on your own masked language modeling task. Pointers for this are left as comments. # You can also adapt this script on your own masked language modeling task. Pointers for this are left as comments.
...@@ -168,15 +169,21 @@ class ModelArguments: ...@@ -168,15 +169,21 @@ class ModelArguments:
) )
}, },
) )
use_auth_token: bool = field( token: str = field(
default=False, default=None,
metadata={ metadata={
"help": ( "help": (
"Will use the token generated when running `huggingface-cli login` (necessary to use this script " "The token to use as HTTP bearer authorization for remote files. If not specified, will use the token "
"with private models)." "generated when running `huggingface-cli login` (stored in `~/.huggingface`)."
) )
}, },
) )
use_auth_token: bool = field(
default=None,
metadata={
"help": "The `use_auth_token` argument is deprecated and will be removed in v4.34. Please use `token`."
},
)
@dataclass @dataclass
...@@ -504,6 +511,12 @@ def main(): ...@@ -504,6 +511,12 @@ def main():
else: else:
model_args, data_args, training_args = parser.parse_args_into_dataclasses() model_args, data_args, training_args = parser.parse_args_into_dataclasses()
if model_args.use_auth_token is not None:
warnings.warn("The `use_auth_token` argument is deprecated and will be removed in v4.34.", FutureWarning)
if model_args.token is not None:
raise ValueError("`token` and `use_auth_token` are both specified. Please set only the argument `token`.")
model_args.token = model_args.use_auth_token
# Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The # Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The
# information sent is the one passed as arguments along with your Python/PyTorch versions. # information sent is the one passed as arguments along with your Python/PyTorch versions.
send_example_telemetry("run_t5_mlm", model_args, data_args, framework="flax") send_example_telemetry("run_t5_mlm", model_args, data_args, framework="flax")
...@@ -558,7 +571,7 @@ def main(): ...@@ -558,7 +571,7 @@ def main():
data_args.dataset_name, data_args.dataset_name,
data_args.dataset_config_name, data_args.dataset_config_name,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
) )
if "validation" not in datasets.keys(): if "validation" not in datasets.keys():
...@@ -567,14 +580,14 @@ def main(): ...@@ -567,14 +580,14 @@ def main():
data_args.dataset_config_name, data_args.dataset_config_name,
split=f"train[:{data_args.validation_split_percentage}%]", split=f"train[:{data_args.validation_split_percentage}%]",
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
) )
datasets["train"] = load_dataset( datasets["train"] = load_dataset(
data_args.dataset_name, data_args.dataset_name,
data_args.dataset_config_name, data_args.dataset_config_name,
split=f"train[{data_args.validation_split_percentage}%:]", split=f"train[{data_args.validation_split_percentage}%:]",
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
) )
else: else:
data_files = {} data_files = {}
...@@ -589,7 +602,7 @@ def main(): ...@@ -589,7 +602,7 @@ def main():
extension, extension,
data_files=data_files, data_files=data_files,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
) )
if "validation" not in datasets.keys(): if "validation" not in datasets.keys():
...@@ -598,14 +611,14 @@ def main(): ...@@ -598,14 +611,14 @@ def main():
data_files=data_files, data_files=data_files,
split=f"train[:{data_args.validation_split_percentage}%]", split=f"train[:{data_args.validation_split_percentage}%]",
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
) )
datasets["train"] = load_dataset( datasets["train"] = load_dataset(
extension, extension,
data_files=data_files, data_files=data_files,
split=f"train[{data_args.validation_split_percentage}%:]", split=f"train[{data_args.validation_split_percentage}%:]",
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
) )
# See more about loading any type of standard or custom dataset (from files, python dict, pandas DataFrame, etc) at # See more about loading any type of standard or custom dataset (from files, python dict, pandas DataFrame, etc) at
# https://huggingface.co/docs/datasets/loading_datasets.html. # https://huggingface.co/docs/datasets/loading_datasets.html.
...@@ -617,14 +630,14 @@ def main(): ...@@ -617,14 +630,14 @@ def main():
model_args.tokenizer_name, model_args.tokenizer_name,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_fast=model_args.use_fast_tokenizer, use_fast=model_args.use_fast_tokenizer,
token=True if model_args.use_auth_token else None, token=model_args.token,
) )
elif model_args.model_name_or_path: elif model_args.model_name_or_path:
tokenizer = AutoTokenizer.from_pretrained( tokenizer = AutoTokenizer.from_pretrained(
model_args.model_name_or_path, model_args.model_name_or_path,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_fast=model_args.use_fast_tokenizer, use_fast=model_args.use_fast_tokenizer,
token=True if model_args.use_auth_token else None, token=model_args.token,
) )
else: else:
raise ValueError( raise ValueError(
...@@ -637,13 +650,13 @@ def main(): ...@@ -637,13 +650,13 @@ def main():
model_args.config_name, model_args.config_name,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
vocab_size=len(tokenizer), vocab_size=len(tokenizer),
token=True if model_args.use_auth_token else None, token=model_args.token,
) )
elif model_args.model_name_or_path: elif model_args.model_name_or_path:
config = T5Config.from_pretrained( config = T5Config.from_pretrained(
model_args.model_name_or_path, model_args.model_name_or_path,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
token=True if model_args.use_auth_token else None, token=model_args.token,
) )
else: else:
config = CONFIG_MAPPING[model_args.model_type]() config = CONFIG_MAPPING[model_args.model_type]()
...@@ -738,7 +751,7 @@ def main(): ...@@ -738,7 +751,7 @@ def main():
config=config, config=config,
seed=training_args.seed, seed=training_args.seed,
dtype=getattr(jnp, model_args.dtype), dtype=getattr(jnp, model_args.dtype),
token=True if model_args.use_auth_token else None, token=model_args.token,
) )
else: else:
config.vocab_size = len(tokenizer) config.vocab_size = len(tokenizer)
......
...@@ -25,6 +25,7 @@ import os ...@@ -25,6 +25,7 @@ import os
import random import random
import sys import sys
import time import time
import warnings
from dataclasses import asdict, dataclass, field from dataclasses import asdict, dataclass, field
from enum import Enum from enum import Enum
from pathlib import Path from pathlib import Path
...@@ -155,15 +156,21 @@ class ModelArguments: ...@@ -155,15 +156,21 @@ class ModelArguments:
default="main", default="main",
metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."}, metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."},
) )
use_auth_token: bool = field( token: str = field(
default=False, default=None,
metadata={ metadata={
"help": ( "help": (
"Will use the token generated when running `huggingface-cli login` (necessary to use this script " "The token to use as HTTP bearer authorization for remote files. If not specified, will use the token "
"with private models)." "generated when running `huggingface-cli login` (stored in `~/.huggingface`)."
) )
}, },
) )
use_auth_token: bool = field(
default=None,
metadata={
"help": "The `use_auth_token` argument is deprecated and will be removed in v4.34. Please use `token`."
},
)
dtype: Optional[str] = field( dtype: Optional[str] = field(
default="float32", default="float32",
metadata={ metadata={
...@@ -438,6 +445,12 @@ def main(): ...@@ -438,6 +445,12 @@ def main():
else: else:
model_args, data_args, training_args = parser.parse_args_into_dataclasses() model_args, data_args, training_args = parser.parse_args_into_dataclasses()
if model_args.use_auth_token is not None:
warnings.warn("The `use_auth_token` argument is deprecated and will be removed in v4.34.", FutureWarning)
if model_args.token is not None:
raise ValueError("`token` and `use_auth_token` are both specified. Please set only the argument `token`.")
model_args.token = model_args.use_auth_token
# Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The # Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The
# information sent is the one passed as arguments along with your Python/PyTorch versions. # information sent is the one passed as arguments along with your Python/PyTorch versions.
send_example_telemetry("run_qa", model_args, data_args, framework="flax") send_example_telemetry("run_qa", model_args, data_args, framework="flax")
...@@ -487,7 +500,7 @@ def main(): ...@@ -487,7 +500,7 @@ def main():
data_args.dataset_name, data_args.dataset_name,
data_args.dataset_config_name, data_args.dataset_config_name,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
) )
else: else:
# Loading the dataset from local csv or json file. # Loading the dataset from local csv or json file.
...@@ -507,7 +520,7 @@ def main(): ...@@ -507,7 +520,7 @@ def main():
data_files=data_files, data_files=data_files,
field="data", field="data",
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
) )
# See more about loading any type of standard or custom dataset (from files, python dict, pandas DataFrame, etc) at # See more about loading any type of standard or custom dataset (from files, python dict, pandas DataFrame, etc) at
# https://huggingface.co/docs/datasets/loading_datasets.html. # https://huggingface.co/docs/datasets/loading_datasets.html.
...@@ -520,14 +533,14 @@ def main(): ...@@ -520,14 +533,14 @@ def main():
model_args.config_name if model_args.config_name else model_args.model_name_or_path, model_args.config_name if model_args.config_name else model_args.model_name_or_path,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
revision=model_args.model_revision, revision=model_args.model_revision,
token=True if model_args.use_auth_token else None, token=model_args.token,
) )
tokenizer = AutoTokenizer.from_pretrained( tokenizer = AutoTokenizer.from_pretrained(
model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path, model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_fast=True, use_fast=True,
revision=model_args.model_revision, revision=model_args.model_revision,
token=True if model_args.use_auth_token else None, token=model_args.token,
) )
# endregion # endregion
...@@ -874,7 +887,7 @@ def main(): ...@@ -874,7 +887,7 @@ def main():
config=config, config=config,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
revision=model_args.model_revision, revision=model_args.model_revision,
token=True if model_args.use_auth_token else None, token=model_args.token,
seed=training_args.seed, seed=training_args.seed,
dtype=getattr(jnp, model_args.dtype), dtype=getattr(jnp, model_args.dtype),
) )
......
...@@ -24,6 +24,7 @@ import math ...@@ -24,6 +24,7 @@ import math
import os import os
import sys import sys
import time import time
import warnings
from dataclasses import asdict, dataclass, field from dataclasses import asdict, dataclass, field
from enum import Enum from enum import Enum
from functools import partial from functools import partial
...@@ -188,15 +189,21 @@ class ModelArguments: ...@@ -188,15 +189,21 @@ class ModelArguments:
) )
}, },
) )
use_auth_token: bool = field( token: str = field(
default=False, default=None,
metadata={ metadata={
"help": ( "help": (
"Will use the token generated when running `huggingface-cli login` (necessary to use this script " "The token to use as HTTP bearer authorization for remote files. If not specified, will use the token "
"with private models)." "generated when running `huggingface-cli login` (stored in `~/.huggingface`)."
) )
}, },
) )
use_auth_token: bool = field(
default=None,
metadata={
"help": "The `use_auth_token` argument is deprecated and will be removed in v4.34. Please use `token`."
},
)
@dataclass @dataclass
...@@ -417,6 +424,12 @@ def main(): ...@@ -417,6 +424,12 @@ def main():
else: else:
model_args, data_args, training_args = parser.parse_args_into_dataclasses() model_args, data_args, training_args = parser.parse_args_into_dataclasses()
if model_args.use_auth_token is not None:
warnings.warn("The `use_auth_token` argument is deprecated and will be removed in v4.34.", FutureWarning)
if model_args.token is not None:
raise ValueError("`token` and `use_auth_token` are both specified. Please set only the argument `token`.")
model_args.token = model_args.use_auth_token
# Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The # Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The
# information sent is the one passed as arguments along with your Python/PyTorch versions. # information sent is the one passed as arguments along with your Python/PyTorch versions.
send_example_telemetry("run_summarization", model_args, data_args, framework="flax") send_example_telemetry("run_summarization", model_args, data_args, framework="flax")
...@@ -475,7 +488,7 @@ def main(): ...@@ -475,7 +488,7 @@ def main():
data_args.dataset_config_name, data_args.dataset_config_name,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
keep_in_memory=False, keep_in_memory=False,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
) )
else: else:
data_files = {} data_files = {}
...@@ -492,7 +505,7 @@ def main(): ...@@ -492,7 +505,7 @@ def main():
extension, extension,
data_files=data_files, data_files=data_files,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
) )
# See more about loading any type of standard or custom dataset (from files, python dict, pandas DataFrame, etc) at # See more about loading any type of standard or custom dataset (from files, python dict, pandas DataFrame, etc) at
# https://huggingface.co/docs/datasets/loading_datasets.html. # https://huggingface.co/docs/datasets/loading_datasets.html.
...@@ -503,13 +516,13 @@ def main(): ...@@ -503,13 +516,13 @@ def main():
config = AutoConfig.from_pretrained( config = AutoConfig.from_pretrained(
model_args.config_name, model_args.config_name,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
token=True if model_args.use_auth_token else None, token=model_args.token,
) )
elif model_args.model_name_or_path: elif model_args.model_name_or_path:
config = AutoConfig.from_pretrained( config = AutoConfig.from_pretrained(
model_args.model_name_or_path, model_args.model_name_or_path,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
token=True if model_args.use_auth_token else None, token=model_args.token,
) )
else: else:
config = CONFIG_MAPPING[model_args.model_type]() config = CONFIG_MAPPING[model_args.model_type]()
...@@ -520,14 +533,14 @@ def main(): ...@@ -520,14 +533,14 @@ def main():
model_args.tokenizer_name, model_args.tokenizer_name,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_fast=model_args.use_fast_tokenizer, use_fast=model_args.use_fast_tokenizer,
token=True if model_args.use_auth_token else None, token=model_args.token,
) )
elif model_args.model_name_or_path: elif model_args.model_name_or_path:
tokenizer = AutoTokenizer.from_pretrained( tokenizer = AutoTokenizer.from_pretrained(
model_args.model_name_or_path, model_args.model_name_or_path,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_fast=model_args.use_fast_tokenizer, use_fast=model_args.use_fast_tokenizer,
token=True if model_args.use_auth_token else None, token=model_args.token,
) )
else: else:
raise ValueError( raise ValueError(
...@@ -541,7 +554,7 @@ def main(): ...@@ -541,7 +554,7 @@ def main():
config=config, config=config,
seed=training_args.seed, seed=training_args.seed,
dtype=getattr(jnp, model_args.dtype), dtype=getattr(jnp, model_args.dtype),
token=True if model_args.use_auth_token else None, token=model_args.token,
) )
else: else:
model = FlaxAutoModelForSeq2SeqLM.from_config( model = FlaxAutoModelForSeq2SeqLM.from_config(
......
...@@ -21,6 +21,7 @@ import os ...@@ -21,6 +21,7 @@ import os
import random import random
import sys import sys
import time import time
import warnings
from dataclasses import dataclass, field from dataclasses import dataclass, field
from pathlib import Path from pathlib import Path
from typing import Any, Callable, Dict, Optional, Tuple from typing import Any, Callable, Dict, Optional, Tuple
...@@ -101,15 +102,21 @@ class ModelArguments: ...@@ -101,15 +102,21 @@ class ModelArguments:
default="main", default="main",
metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."}, metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."},
) )
use_auth_token: bool = field( token: str = field(
default=False, default=None,
metadata={ metadata={
"help": ( "help": (
"Will use the token generated when running `huggingface-cli login` (necessary to use this script " "The token to use as HTTP bearer authorization for remote files. If not specified, will use the token "
"with private models)." "generated when running `huggingface-cli login` (stored in `~/.huggingface`)."
) )
}, },
) )
use_auth_token: bool = field(
default=None,
metadata={
"help": "The `use_auth_token` argument is deprecated and will be removed in v4.34. Please use `token`."
},
)
@dataclass @dataclass
...@@ -321,6 +328,12 @@ def main(): ...@@ -321,6 +328,12 @@ def main():
else: else:
model_args, data_args, training_args = parser.parse_args_into_dataclasses() model_args, data_args, training_args = parser.parse_args_into_dataclasses()
if model_args.use_auth_token is not None:
warnings.warn("The `use_auth_token` argument is deprecated and will be removed in v4.34.", FutureWarning)
if model_args.token is not None:
raise ValueError("`token` and `use_auth_token` are both specified. Please set only the argument `token`.")
model_args.token = model_args.use_auth_token
# Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The # Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The
# information sent is the one passed as arguments along with your Python/PyTorch versions. # information sent is the one passed as arguments along with your Python/PyTorch versions.
send_example_telemetry("run_glue", model_args, data_args, framework="flax") send_example_telemetry("run_glue", model_args, data_args, framework="flax")
...@@ -368,7 +381,7 @@ def main(): ...@@ -368,7 +381,7 @@ def main():
raw_datasets = load_dataset( raw_datasets = load_dataset(
"glue", "glue",
data_args.task_name, data_args.task_name,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
) )
else: else:
# Loading the dataset from local csv or json file. # Loading the dataset from local csv or json file.
...@@ -381,7 +394,7 @@ def main(): ...@@ -381,7 +394,7 @@ def main():
raw_datasets = load_dataset( raw_datasets = load_dataset(
extension, extension,
data_files=data_files, data_files=data_files,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
) )
# See more about loading any type of standard or custom dataset at # See more about loading any type of standard or custom dataset at
# https://huggingface.co/docs/datasets/loading_datasets.html. # https://huggingface.co/docs/datasets/loading_datasets.html.
...@@ -411,17 +424,17 @@ def main(): ...@@ -411,17 +424,17 @@ def main():
model_args.model_name_or_path, model_args.model_name_or_path,
num_labels=num_labels, num_labels=num_labels,
finetuning_task=data_args.task_name, finetuning_task=data_args.task_name,
token=True if model_args.use_auth_token else None, token=model_args.token,
) )
tokenizer = AutoTokenizer.from_pretrained( tokenizer = AutoTokenizer.from_pretrained(
model_args.model_name_or_path, model_args.model_name_or_path,
use_fast=not model_args.use_slow_tokenizer, use_fast=not model_args.use_slow_tokenizer,
token=True if model_args.use_auth_token else None, token=model_args.token,
) )
model = FlaxAutoModelForSequenceClassification.from_pretrained( model = FlaxAutoModelForSequenceClassification.from_pretrained(
model_args.model_name_or_path, model_args.model_name_or_path,
config=config, config=config,
token=True if model_args.use_auth_token else None, token=model_args.token,
) )
# Preprocessing the datasets # Preprocessing the datasets
......
...@@ -21,6 +21,7 @@ import os ...@@ -21,6 +21,7 @@ import os
import random import random
import sys import sys
import time import time
import warnings
from dataclasses import asdict, dataclass, field from dataclasses import asdict, dataclass, field
from enum import Enum from enum import Enum
from itertools import chain from itertools import chain
...@@ -149,15 +150,21 @@ class ModelArguments: ...@@ -149,15 +150,21 @@ class ModelArguments:
default="main", default="main",
metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."}, metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."},
) )
use_auth_token: bool = field( token: str = field(
default=False, default=None,
metadata={ metadata={
"help": ( "help": (
"Will use the token generated when running `huggingface-cli login` (necessary to use this script " "The token to use as HTTP bearer authorization for remote files. If not specified, will use the token "
"with private models)." "generated when running `huggingface-cli login` (stored in `~/.huggingface`)."
) )
}, },
) )
use_auth_token: bool = field(
default=None,
metadata={
"help": "The `use_auth_token` argument is deprecated and will be removed in v4.34. Please use `token`."
},
)
@dataclass @dataclass
...@@ -377,6 +384,12 @@ def main(): ...@@ -377,6 +384,12 @@ def main():
else: else:
model_args, data_args, training_args = parser.parse_args_into_dataclasses() model_args, data_args, training_args = parser.parse_args_into_dataclasses()
if model_args.use_auth_token is not None:
warnings.warn("The `use_auth_token` argument is deprecated and will be removed in v4.34.", FutureWarning)
if model_args.token is not None:
raise ValueError("`token` and `use_auth_token` are both specified. Please set only the argument `token`.")
model_args.token = model_args.use_auth_token
# Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The # Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The
# information sent is the one passed as arguments along with your Python/PyTorch versions. # information sent is the one passed as arguments along with your Python/PyTorch versions.
send_example_telemetry("run_ner", model_args, data_args, framework="flax") send_example_telemetry("run_ner", model_args, data_args, framework="flax")
...@@ -422,7 +435,7 @@ def main(): ...@@ -422,7 +435,7 @@ def main():
data_args.dataset_name, data_args.dataset_name,
data_args.dataset_config_name, data_args.dataset_config_name,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
) )
else: else:
# Loading the dataset from local csv or json file. # Loading the dataset from local csv or json file.
...@@ -436,7 +449,7 @@ def main(): ...@@ -436,7 +449,7 @@ def main():
extension, extension,
data_files=data_files, data_files=data_files,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
) )
# See more about loading any type of standard or custom dataset at # See more about loading any type of standard or custom dataset at
# https://huggingface.co/docs/datasets/loading_datasets.html. # https://huggingface.co/docs/datasets/loading_datasets.html.
...@@ -490,7 +503,7 @@ def main(): ...@@ -490,7 +503,7 @@ def main():
finetuning_task=data_args.task_name, finetuning_task=data_args.task_name,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
revision=model_args.model_revision, revision=model_args.model_revision,
token=True if model_args.use_auth_token else None, token=model_args.token,
) )
tokenizer_name_or_path = model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path tokenizer_name_or_path = model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path
if config.model_type in {"gpt2", "roberta"}: if config.model_type in {"gpt2", "roberta"}:
...@@ -498,7 +511,7 @@ def main(): ...@@ -498,7 +511,7 @@ def main():
tokenizer_name_or_path, tokenizer_name_or_path,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
revision=model_args.model_revision, revision=model_args.model_revision,
token=True if model_args.use_auth_token else None, token=model_args.token,
add_prefix_space=True, add_prefix_space=True,
) )
else: else:
...@@ -506,14 +519,14 @@ def main(): ...@@ -506,14 +519,14 @@ def main():
tokenizer_name_or_path, tokenizer_name_or_path,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
revision=model_args.model_revision, revision=model_args.model_revision,
token=True if model_args.use_auth_token else None, token=model_args.token,
) )
model = FlaxAutoModelForTokenClassification.from_pretrained( model = FlaxAutoModelForTokenClassification.from_pretrained(
model_args.model_name_or_path, model_args.model_name_or_path,
config=config, config=config,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
revision=model_args.model_revision, revision=model_args.model_revision,
token=True if model_args.use_auth_token else None, token=model_args.token,
) )
# Preprocessing the datasets # Preprocessing the datasets
......
...@@ -24,6 +24,7 @@ import logging ...@@ -24,6 +24,7 @@ import logging
import os import os
import sys import sys
import time import time
import warnings
from dataclasses import asdict, dataclass, field from dataclasses import asdict, dataclass, field
from enum import Enum from enum import Enum
from pathlib import Path from pathlib import Path
...@@ -159,15 +160,21 @@ class ModelArguments: ...@@ -159,15 +160,21 @@ class ModelArguments:
) )
}, },
) )
use_auth_token: bool = field( token: str = field(
default=False, default=None,
metadata={ metadata={
"help": ( "help": (
"Will use the token generated when running `huggingface-cli login` (necessary to use this script " "The token to use as HTTP bearer authorization for remote files. If not specified, will use the token "
"with private models)." "generated when running `huggingface-cli login` (stored in `~/.huggingface`)."
) )
}, },
) )
use_auth_token: bool = field(
default=None,
metadata={
"help": "The `use_auth_token` argument is deprecated and will be removed in v4.34. Please use `token`."
},
)
@dataclass @dataclass
...@@ -257,6 +264,12 @@ def main(): ...@@ -257,6 +264,12 @@ def main():
else: else:
model_args, data_args, training_args = parser.parse_args_into_dataclasses() model_args, data_args, training_args = parser.parse_args_into_dataclasses()
if model_args.use_auth_token is not None:
warnings.warn("The `use_auth_token` argument is deprecated and will be removed in v4.34.", FutureWarning)
if model_args.token is not None:
raise ValueError("`token` and `use_auth_token` are both specified. Please set only the argument `token`.")
model_args.token = model_args.use_auth_token
# Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The # Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The
# information sent is the one passed as arguments along with your Python/PyTorch versions. # information sent is the one passed as arguments along with your Python/PyTorch versions.
send_example_telemetry("run_image_classification", model_args, data_args, framework="flax") send_example_telemetry("run_image_classification", model_args, data_args, framework="flax")
...@@ -338,7 +351,7 @@ def main(): ...@@ -338,7 +351,7 @@ def main():
num_labels=len(train_dataset.classes), num_labels=len(train_dataset.classes),
image_size=data_args.image_size, image_size=data_args.image_size,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
token=True if model_args.use_auth_token else None, token=model_args.token,
) )
elif model_args.model_name_or_path: elif model_args.model_name_or_path:
config = AutoConfig.from_pretrained( config = AutoConfig.from_pretrained(
...@@ -346,7 +359,7 @@ def main(): ...@@ -346,7 +359,7 @@ def main():
num_labels=len(train_dataset.classes), num_labels=len(train_dataset.classes),
image_size=data_args.image_size, image_size=data_args.image_size,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
token=True if model_args.use_auth_token else None, token=model_args.token,
) )
else: else:
config = CONFIG_MAPPING[model_args.model_type]() config = CONFIG_MAPPING[model_args.model_type]()
...@@ -358,7 +371,7 @@ def main(): ...@@ -358,7 +371,7 @@ def main():
config=config, config=config,
seed=training_args.seed, seed=training_args.seed,
dtype=getattr(jnp, model_args.dtype), dtype=getattr(jnp, model_args.dtype),
token=True if model_args.use_auth_token else None, token=model_args.token,
) )
else: else:
model = FlaxAutoModelForImageClassification.from_config( model = FlaxAutoModelForImageClassification.from_config(
......
...@@ -152,15 +152,21 @@ class ModelArguments: ...@@ -152,15 +152,21 @@ class ModelArguments:
attention_mask: bool = field( attention_mask: bool = field(
default=True, metadata={"help": "Whether to generate an attention mask in the feature extractor."} default=True, metadata={"help": "Whether to generate an attention mask in the feature extractor."}
) )
use_auth_token: bool = field( token: str = field(
default=False, default=None,
metadata={ metadata={
"help": ( "help": (
"Will use the token generated when running `huggingface-cli login` (necessary to use this script " "The token to use as HTTP bearer authorization for remote files. If not specified, will use the token "
"with private models)." "generated when running `huggingface-cli login` (stored in `~/.huggingface`)."
) )
}, },
) )
use_auth_token: bool = field(
default=None,
metadata={
"help": "The `use_auth_token` argument is deprecated and will be removed in v4.34. Please use `token`."
},
)
freeze_feature_extractor: Optional[bool] = field( freeze_feature_extractor: Optional[bool] = field(
default=None, metadata={"help": "Whether to freeze the feature extractor layers of the model."} default=None, metadata={"help": "Whether to freeze the feature extractor layers of the model."}
) )
...@@ -198,6 +204,12 @@ def main(): ...@@ -198,6 +204,12 @@ def main():
else: else:
model_args, data_args, training_args = parser.parse_args_into_dataclasses() model_args, data_args, training_args = parser.parse_args_into_dataclasses()
if model_args.use_auth_token is not None:
warnings.warn("The `use_auth_token` argument is deprecated and will be removed in v4.34.", FutureWarning)
if model_args.token is not None:
raise ValueError("`token` and `use_auth_token` are both specified. Please set only the argument `token`.")
model_args.token = model_args.use_auth_token
# Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The # Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The
# information sent is the one passed as arguments along with your Python/PyTorch versions. # information sent is the one passed as arguments along with your Python/PyTorch versions.
send_example_telemetry("run_audio_classification", model_args, data_args) send_example_telemetry("run_audio_classification", model_args, data_args)
...@@ -250,13 +262,13 @@ def main(): ...@@ -250,13 +262,13 @@ def main():
data_args.dataset_name, data_args.dataset_name,
data_args.dataset_config_name, data_args.dataset_config_name,
split=data_args.train_split_name, split=data_args.train_split_name,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
) )
raw_datasets["eval"] = load_dataset( raw_datasets["eval"] = load_dataset(
data_args.dataset_name, data_args.dataset_name,
data_args.dataset_config_name, data_args.dataset_config_name,
split=data_args.eval_split_name, split=data_args.eval_split_name,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
) )
if data_args.audio_column_name not in raw_datasets["train"].column_names: if data_args.audio_column_name not in raw_datasets["train"].column_names:
...@@ -280,7 +292,7 @@ def main(): ...@@ -280,7 +292,7 @@ def main():
return_attention_mask=model_args.attention_mask, return_attention_mask=model_args.attention_mask,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
revision=model_args.model_revision, revision=model_args.model_revision,
token=True if model_args.use_auth_token else None, token=model_args.token,
) )
# `datasets` takes care of automatically loading and resampling the audio, # `datasets` takes care of automatically loading and resampling the audio,
...@@ -340,7 +352,7 @@ def main(): ...@@ -340,7 +352,7 @@ def main():
finetuning_task="audio-classification", finetuning_task="audio-classification",
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
revision=model_args.model_revision, revision=model_args.model_revision,
token=True if model_args.use_auth_token else None, token=model_args.token,
) )
model = AutoModelForAudioClassification.from_pretrained( model = AutoModelForAudioClassification.from_pretrained(
model_args.model_name_or_path, model_args.model_name_or_path,
...@@ -348,7 +360,7 @@ def main(): ...@@ -348,7 +360,7 @@ def main():
config=config, config=config,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
revision=model_args.model_revision, revision=model_args.model_revision,
token=True if model_args.use_auth_token else None, token=model_args.token,
ignore_mismatched_sizes=model_args.ignore_mismatched_sizes, ignore_mismatched_sizes=model_args.ignore_mismatched_sizes,
) )
......
...@@ -26,6 +26,7 @@ Text models: BERT, ROBERTa (https://huggingface.co/models?filter=fill-mask) ...@@ -26,6 +26,7 @@ Text models: BERT, ROBERTa (https://huggingface.co/models?filter=fill-mask)
import logging import logging
import os import os
import sys import sys
import warnings
from dataclasses import dataclass, field from dataclasses import dataclass, field
from typing import Optional from typing import Optional
...@@ -86,15 +87,21 @@ class ModelArguments: ...@@ -86,15 +87,21 @@ class ModelArguments:
default=True, default=True,
metadata={"help": "Whether to use one of the fast tokenizer (backed by the tokenizers library) or not."}, metadata={"help": "Whether to use one of the fast tokenizer (backed by the tokenizers library) or not."},
) )
use_auth_token: bool = field( token: str = field(
default=False, default=None,
metadata={ metadata={
"help": ( "help": (
"Will use the token generated when running `huggingface-cli login` (necessary to use this script " "The token to use as HTTP bearer authorization for remote files. If not specified, will use the token "
"with private models)." "generated when running `huggingface-cli login` (stored in `~/.huggingface`)."
) )
}, },
) )
use_auth_token: bool = field(
default=None,
metadata={
"help": "The `use_auth_token` argument is deprecated and will be removed in v4.34. Please use `token`."
},
)
freeze_vision_model: bool = field( freeze_vision_model: bool = field(
default=False, metadata={"help": "Whether to freeze the vision model parameters or not."} default=False, metadata={"help": "Whether to freeze the vision model parameters or not."}
) )
...@@ -235,6 +242,12 @@ def main(): ...@@ -235,6 +242,12 @@ def main():
else: else:
model_args, data_args, training_args = parser.parse_args_into_dataclasses() model_args, data_args, training_args = parser.parse_args_into_dataclasses()
if model_args.use_auth_token is not None:
warnings.warn("The `use_auth_token` argument is deprecated and will be removed in v4.34.", FutureWarning)
if model_args.token is not None:
raise ValueError("`token` and `use_auth_token` are both specified. Please set only the argument `token`.")
model_args.token = model_args.use_auth_token
# Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The # Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The
# information sent is the one passed as arguments along with your Python/PyTorch versions. # information sent is the one passed as arguments along with your Python/PyTorch versions.
send_example_telemetry("run_clip", model_args, data_args) send_example_telemetry("run_clip", model_args, data_args)
...@@ -294,7 +307,7 @@ def main(): ...@@ -294,7 +307,7 @@ def main():
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
keep_in_memory=False, keep_in_memory=False,
data_dir=data_args.data_dir, data_dir=data_args.data_dir,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
) )
else: else:
data_files = {} data_files = {}
...@@ -311,7 +324,7 @@ def main(): ...@@ -311,7 +324,7 @@ def main():
extension, extension,
data_files=data_files, data_files=data_files,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
) )
# See more about loading any type of standard or custom dataset (from files, python dict, pandas DataFrame, etc) at # See more about loading any type of standard or custom dataset (from files, python dict, pandas DataFrame, etc) at
# https://huggingface.co/docs/datasets/loading_datasets.html. # https://huggingface.co/docs/datasets/loading_datasets.html.
...@@ -336,14 +349,14 @@ def main(): ...@@ -336,14 +349,14 @@ def main():
model_args.image_processor_name or model_args.model_name_or_path, model_args.image_processor_name or model_args.model_name_or_path,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
revision=model_args.model_revision, revision=model_args.model_revision,
token=True if model_args.use_auth_token else None, token=model_args.token,
) )
model = AutoModel.from_pretrained( model = AutoModel.from_pretrained(
model_args.model_name_or_path, model_args.model_name_or_path,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
revision=model_args.model_revision, revision=model_args.model_revision,
token=True if model_args.use_auth_token else None, token=model_args.token,
) )
config = model.config config = model.config
......
...@@ -16,6 +16,7 @@ ...@@ -16,6 +16,7 @@
import logging import logging
import os import os
import sys import sys
import warnings
from dataclasses import dataclass, field from dataclasses import dataclass, field
from typing import Optional from typing import Optional
...@@ -142,15 +143,21 @@ class ModelArguments: ...@@ -142,15 +143,21 @@ class ModelArguments:
metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."}, metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."},
) )
image_processor_name: str = field(default=None, metadata={"help": "Name or path of preprocessor config."}) image_processor_name: str = field(default=None, metadata={"help": "Name or path of preprocessor config."})
use_auth_token: bool = field( token: str = field(
default=False, default=None,
metadata={ metadata={
"help": ( "help": (
"Will use the token generated when running `huggingface-cli login` (necessary to use this script " "The token to use as HTTP bearer authorization for remote files. If not specified, will use the token "
"with private models)." "generated when running `huggingface-cli login` (stored in `~/.huggingface`)."
) )
}, },
) )
use_auth_token: bool = field(
default=None,
metadata={
"help": "The `use_auth_token` argument is deprecated and will be removed in v4.34. Please use `token`."
},
)
ignore_mismatched_sizes: bool = field( ignore_mismatched_sizes: bool = field(
default=False, default=False,
metadata={"help": "Will enable to load a pretrained model whose head dimensions are different."}, metadata={"help": "Will enable to load a pretrained model whose head dimensions are different."},
...@@ -176,6 +183,12 @@ def main(): ...@@ -176,6 +183,12 @@ def main():
else: else:
model_args, data_args, training_args = parser.parse_args_into_dataclasses() model_args, data_args, training_args = parser.parse_args_into_dataclasses()
if model_args.use_auth_token is not None:
warnings.warn("The `use_auth_token` argument is deprecated and will be removed in v4.34.", FutureWarning)
if model_args.token is not None:
raise ValueError("`token` and `use_auth_token` are both specified. Please set only the argument `token`.")
model_args.token = model_args.use_auth_token
# Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The # Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The
# information sent is the one passed as arguments along with your Python/PyTorch versions. # information sent is the one passed as arguments along with your Python/PyTorch versions.
send_example_telemetry("run_image_classification", model_args, data_args) send_example_telemetry("run_image_classification", model_args, data_args)
...@@ -229,7 +242,7 @@ def main(): ...@@ -229,7 +242,7 @@ def main():
data_args.dataset_config_name, data_args.dataset_config_name,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
task="image-classification", task="image-classification",
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
) )
else: else:
data_files = {} data_files = {}
...@@ -276,7 +289,7 @@ def main(): ...@@ -276,7 +289,7 @@ def main():
finetuning_task="image-classification", finetuning_task="image-classification",
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
revision=model_args.model_revision, revision=model_args.model_revision,
token=True if model_args.use_auth_token else None, token=model_args.token,
) )
model = AutoModelForImageClassification.from_pretrained( model = AutoModelForImageClassification.from_pretrained(
model_args.model_name_or_path, model_args.model_name_or_path,
...@@ -284,14 +297,14 @@ def main(): ...@@ -284,14 +297,14 @@ def main():
config=config, config=config,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
revision=model_args.model_revision, revision=model_args.model_revision,
token=True if model_args.use_auth_token else None, token=model_args.token,
ignore_mismatched_sizes=model_args.ignore_mismatched_sizes, ignore_mismatched_sizes=model_args.ignore_mismatched_sizes,
) )
image_processor = AutoImageProcessor.from_pretrained( image_processor = AutoImageProcessor.from_pretrained(
model_args.image_processor_name or model_args.model_name_or_path, model_args.image_processor_name or model_args.model_name_or_path,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
revision=model_args.model_revision, revision=model_args.model_revision,
token=True if model_args.use_auth_token else None, token=model_args.token,
) )
# Define torchvision transforms to be applied to each image. # Define torchvision transforms to be applied to each image.
......
...@@ -16,6 +16,7 @@ ...@@ -16,6 +16,7 @@
import logging import logging
import os import os
import sys import sys
import warnings
from dataclasses import dataclass, field from dataclasses import dataclass, field
from typing import Optional from typing import Optional
...@@ -133,15 +134,21 @@ class ModelArguments: ...@@ -133,15 +134,21 @@ class ModelArguments:
metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."}, metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."},
) )
image_processor_name: str = field(default=None, metadata={"help": "Name or path of preprocessor config."}) image_processor_name: str = field(default=None, metadata={"help": "Name or path of preprocessor config."})
use_auth_token: bool = field( token: str = field(
default=False, default=None,
metadata={ metadata={
"help": ( "help": (
"Will use the token generated when running `huggingface-cli login` (necessary to use this script " "The token to use as HTTP bearer authorization for remote files. If not specified, will use the token "
"with private models)." "generated when running `huggingface-cli login` (stored in `~/.huggingface`)."
) )
}, },
) )
use_auth_token: bool = field(
default=None,
metadata={
"help": "The `use_auth_token` argument is deprecated and will be removed in v4.34. Please use `token`."
},
)
mask_ratio: float = field( mask_ratio: float = field(
default=0.75, metadata={"help": "The ratio of the number of masked tokens in the input sequence."} default=0.75, metadata={"help": "The ratio of the number of masked tokens in the input sequence."}
) )
...@@ -175,6 +182,12 @@ def main(): ...@@ -175,6 +182,12 @@ def main():
else: else:
model_args, data_args, training_args = parser.parse_args_into_dataclasses() model_args, data_args, training_args = parser.parse_args_into_dataclasses()
if model_args.use_auth_token is not None:
warnings.warn("The `use_auth_token` argument is deprecated and will be removed in v4.34.", FutureWarning)
if model_args.token is not None:
raise ValueError("`token` and `use_auth_token` are both specified. Please set only the argument `token`.")
model_args.token = model_args.use_auth_token
# Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The # Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The
# information sent is the one passed as arguments along with your Python/PyTorch versions. # information sent is the one passed as arguments along with your Python/PyTorch versions.
send_example_telemetry("run_mae", model_args, data_args) send_example_telemetry("run_mae", model_args, data_args)
...@@ -224,7 +237,7 @@ def main(): ...@@ -224,7 +237,7 @@ def main():
data_args.dataset_config_name, data_args.dataset_config_name,
data_files=data_args.data_files, data_files=data_args.data_files,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
) )
# If we don't have a validation split, split off a percentage of train as validation. # If we don't have a validation split, split off a percentage of train as validation.
...@@ -242,7 +255,7 @@ def main(): ...@@ -242,7 +255,7 @@ def main():
config_kwargs = { config_kwargs = {
"cache_dir": model_args.cache_dir, "cache_dir": model_args.cache_dir,
"revision": model_args.model_revision, "revision": model_args.model_revision,
"use_auth_token": True if model_args.use_auth_token else None, "token": model_args.token,
} }
if model_args.config_name: if model_args.config_name:
config = ViTMAEConfig.from_pretrained(model_args.config_name, **config_kwargs) config = ViTMAEConfig.from_pretrained(model_args.config_name, **config_kwargs)
...@@ -280,7 +293,7 @@ def main(): ...@@ -280,7 +293,7 @@ def main():
config=config, config=config,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
revision=model_args.model_revision, revision=model_args.model_revision,
token=True if model_args.use_auth_token else None, token=model_args.token,
) )
else: else:
logger.info("Training new model from scratch") logger.info("Training new model from scratch")
......
...@@ -16,6 +16,7 @@ ...@@ -16,6 +16,7 @@
import logging import logging
import os import os
import sys import sys
import warnings
from dataclasses import dataclass, field from dataclasses import dataclass, field
from typing import Optional from typing import Optional
...@@ -153,15 +154,21 @@ class ModelArguments: ...@@ -153,15 +154,21 @@ class ModelArguments:
metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."}, metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."},
) )
image_processor_name: str = field(default=None, metadata={"help": "Name or path of preprocessor config."}) image_processor_name: str = field(default=None, metadata={"help": "Name or path of preprocessor config."})
use_auth_token: bool = field( token: str = field(
default=False, default=None,
metadata={ metadata={
"help": ( "help": (
"Will use the token generated when running `huggingface-cli login` (necessary to use this script " "The token to use as HTTP bearer authorization for remote files. If not specified, will use the token "
"with private models)." "generated when running `huggingface-cli login` (stored in `~/.huggingface`)."
) )
}, },
) )
use_auth_token: bool = field(
default=None,
metadata={
"help": "The `use_auth_token` argument is deprecated and will be removed in v4.34. Please use `token`."
},
)
image_size: Optional[int] = field( image_size: Optional[int] = field(
default=None, default=None,
metadata={ metadata={
...@@ -239,6 +246,12 @@ def main(): ...@@ -239,6 +246,12 @@ def main():
else: else:
model_args, data_args, training_args = parser.parse_args_into_dataclasses() model_args, data_args, training_args = parser.parse_args_into_dataclasses()
if model_args.use_auth_token is not None:
warnings.warn("The `use_auth_token` argument is deprecated and will be removed in v4.34.", FutureWarning)
if model_args.token is not None:
raise ValueError("`token` and `use_auth_token` are both specified. Please set only the argument `token`.")
model_args.token = model_args.use_auth_token
# Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The # Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The
# information sent is the one passed as arguments along with your Python/PyTorch versions. # information sent is the one passed as arguments along with your Python/PyTorch versions.
send_example_telemetry("run_mim", model_args, data_args) send_example_telemetry("run_mim", model_args, data_args)
...@@ -288,7 +301,7 @@ def main(): ...@@ -288,7 +301,7 @@ def main():
data_args.dataset_config_name, data_args.dataset_config_name,
data_files=data_args.data_files, data_files=data_args.data_files,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
) )
# If we don't have a validation split, split off a percentage of train as validation. # If we don't have a validation split, split off a percentage of train as validation.
...@@ -305,7 +318,7 @@ def main(): ...@@ -305,7 +318,7 @@ def main():
config_kwargs = { config_kwargs = {
"cache_dir": model_args.cache_dir, "cache_dir": model_args.cache_dir,
"revision": model_args.model_revision, "revision": model_args.model_revision,
"use_auth_token": True if model_args.use_auth_token else None, "token": model_args.token,
} }
if model_args.config_name_or_path: if model_args.config_name_or_path:
config = AutoConfig.from_pretrained(model_args.config_name_or_path, **config_kwargs) config = AutoConfig.from_pretrained(model_args.config_name_or_path, **config_kwargs)
...@@ -357,7 +370,7 @@ def main(): ...@@ -357,7 +370,7 @@ def main():
config=config, config=config,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
revision=model_args.model_revision, revision=model_args.model_revision,
token=True if model_args.use_auth_token else None, token=model_args.token,
) )
else: else:
logger.info("Training new model from scratch") logger.info("Training new model from scratch")
......
...@@ -25,6 +25,7 @@ import logging ...@@ -25,6 +25,7 @@ import logging
import math import math
import os import os
import sys import sys
import warnings
from dataclasses import dataclass, field from dataclasses import dataclass, field
from itertools import chain from itertools import chain
from typing import Optional from typing import Optional
...@@ -111,15 +112,21 @@ class ModelArguments: ...@@ -111,15 +112,21 @@ class ModelArguments:
default="main", default="main",
metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."}, metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."},
) )
use_auth_token: bool = field( token: str = field(
default=False, default=None,
metadata={ metadata={
"help": ( "help": (
"Will use the token generated when running `huggingface-cli login` (necessary to use this script " "The token to use as HTTP bearer authorization for remote files. If not specified, will use the token "
"with private models)." "generated when running `huggingface-cli login` (stored in `~/.huggingface`)."
) )
}, },
) )
use_auth_token: bool = field(
default=None,
metadata={
"help": "The `use_auth_token` argument is deprecated and will be removed in v4.34. Please use `token`."
},
)
torch_dtype: Optional[str] = field( torch_dtype: Optional[str] = field(
default=None, default=None,
metadata={ metadata={
...@@ -238,6 +245,12 @@ def main(): ...@@ -238,6 +245,12 @@ def main():
else: else:
model_args, data_args, training_args = parser.parse_args_into_dataclasses() model_args, data_args, training_args = parser.parse_args_into_dataclasses()
if model_args.use_auth_token is not None:
warnings.warn("The `use_auth_token` argument is deprecated and will be removed in v4.34.", FutureWarning)
if model_args.token is not None:
raise ValueError("`token` and `use_auth_token` are both specified. Please set only the argument `token`.")
model_args.token = model_args.use_auth_token
# Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The # Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The
# information sent is the one passed as arguments along with your Python/PyTorch versions. # information sent is the one passed as arguments along with your Python/PyTorch versions.
send_example_telemetry("run_clm", model_args, data_args) send_example_telemetry("run_clm", model_args, data_args)
...@@ -300,7 +313,7 @@ def main(): ...@@ -300,7 +313,7 @@ def main():
data_args.dataset_name, data_args.dataset_name,
data_args.dataset_config_name, data_args.dataset_config_name,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
streaming=data_args.streaming, streaming=data_args.streaming,
) )
if "validation" not in raw_datasets.keys(): if "validation" not in raw_datasets.keys():
...@@ -309,7 +322,7 @@ def main(): ...@@ -309,7 +322,7 @@ def main():
data_args.dataset_config_name, data_args.dataset_config_name,
split=f"train[:{data_args.validation_split_percentage}%]", split=f"train[:{data_args.validation_split_percentage}%]",
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
streaming=data_args.streaming, streaming=data_args.streaming,
) )
raw_datasets["train"] = load_dataset( raw_datasets["train"] = load_dataset(
...@@ -317,7 +330,7 @@ def main(): ...@@ -317,7 +330,7 @@ def main():
data_args.dataset_config_name, data_args.dataset_config_name,
split=f"train[{data_args.validation_split_percentage}%:]", split=f"train[{data_args.validation_split_percentage}%:]",
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
streaming=data_args.streaming, streaming=data_args.streaming,
) )
else: else:
...@@ -339,7 +352,7 @@ def main(): ...@@ -339,7 +352,7 @@ def main():
extension, extension,
data_files=data_files, data_files=data_files,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
**dataset_args, **dataset_args,
) )
# If no validation data is there, validation_split_percentage will be used to divide the dataset. # If no validation data is there, validation_split_percentage will be used to divide the dataset.
...@@ -349,7 +362,7 @@ def main(): ...@@ -349,7 +362,7 @@ def main():
data_files=data_files, data_files=data_files,
split=f"train[:{data_args.validation_split_percentage}%]", split=f"train[:{data_args.validation_split_percentage}%]",
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
**dataset_args, **dataset_args,
) )
raw_datasets["train"] = load_dataset( raw_datasets["train"] = load_dataset(
...@@ -357,7 +370,7 @@ def main(): ...@@ -357,7 +370,7 @@ def main():
data_files=data_files, data_files=data_files,
split=f"train[{data_args.validation_split_percentage}%:]", split=f"train[{data_args.validation_split_percentage}%:]",
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
**dataset_args, **dataset_args,
) )
...@@ -373,7 +386,7 @@ def main(): ...@@ -373,7 +386,7 @@ def main():
config_kwargs = { config_kwargs = {
"cache_dir": model_args.cache_dir, "cache_dir": model_args.cache_dir,
"revision": model_args.model_revision, "revision": model_args.model_revision,
"use_auth_token": True if model_args.use_auth_token else None, "token": model_args.token,
} }
if model_args.config_name: if model_args.config_name:
config = AutoConfig.from_pretrained(model_args.config_name, **config_kwargs) config = AutoConfig.from_pretrained(model_args.config_name, **config_kwargs)
...@@ -391,7 +404,7 @@ def main(): ...@@ -391,7 +404,7 @@ def main():
"cache_dir": model_args.cache_dir, "cache_dir": model_args.cache_dir,
"use_fast": model_args.use_fast_tokenizer, "use_fast": model_args.use_fast_tokenizer,
"revision": model_args.model_revision, "revision": model_args.model_revision,
"use_auth_token": True if model_args.use_auth_token else None, "token": model_args.token,
} }
if model_args.tokenizer_name: if model_args.tokenizer_name:
tokenizer = AutoTokenizer.from_pretrained(model_args.tokenizer_name, **tokenizer_kwargs) tokenizer = AutoTokenizer.from_pretrained(model_args.tokenizer_name, **tokenizer_kwargs)
...@@ -415,7 +428,7 @@ def main(): ...@@ -415,7 +428,7 @@ def main():
config=config, config=config,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
revision=model_args.model_revision, revision=model_args.model_revision,
token=True if model_args.use_auth_token else None, token=model_args.token,
torch_dtype=torch_dtype, torch_dtype=torch_dtype,
low_cpu_mem_usage=model_args.low_cpu_mem_usage, low_cpu_mem_usage=model_args.low_cpu_mem_usage,
) )
......
...@@ -25,6 +25,7 @@ import logging ...@@ -25,6 +25,7 @@ import logging
import math import math
import os import os
import sys import sys
import warnings
from dataclasses import dataclass, field from dataclasses import dataclass, field
from itertools import chain from itertools import chain
from typing import Optional from typing import Optional
...@@ -107,15 +108,21 @@ class ModelArguments: ...@@ -107,15 +108,21 @@ class ModelArguments:
default="main", default="main",
metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."}, metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."},
) )
use_auth_token: bool = field( token: str = field(
default=False, default=None,
metadata={ metadata={
"help": ( "help": (
"Will use the token generated when running `huggingface-cli login` (necessary to use this script " "The token to use as HTTP bearer authorization for remote files. If not specified, will use the token "
"with private models)." "generated when running `huggingface-cli login` (stored in `~/.huggingface`)."
) )
}, },
) )
use_auth_token: bool = field(
default=None,
metadata={
"help": "The `use_auth_token` argument is deprecated and will be removed in v4.34. Please use `token`."
},
)
low_cpu_mem_usage: bool = field( low_cpu_mem_usage: bool = field(
default=False, default=False,
metadata={ metadata={
...@@ -238,6 +245,12 @@ def main(): ...@@ -238,6 +245,12 @@ def main():
else: else:
model_args, data_args, training_args = parser.parse_args_into_dataclasses() model_args, data_args, training_args = parser.parse_args_into_dataclasses()
if model_args.use_auth_token is not None:
warnings.warn("The `use_auth_token` argument is deprecated and will be removed in v4.34.", FutureWarning)
if model_args.token is not None:
raise ValueError("`token` and `use_auth_token` are both specified. Please set only the argument `token`.")
model_args.token = model_args.use_auth_token
# Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The # Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The
# information sent is the one passed as arguments along with your Python/PyTorch versions. # information sent is the one passed as arguments along with your Python/PyTorch versions.
send_example_telemetry("run_mlm", model_args, data_args) send_example_telemetry("run_mlm", model_args, data_args)
...@@ -301,7 +314,7 @@ def main(): ...@@ -301,7 +314,7 @@ def main():
data_args.dataset_name, data_args.dataset_name,
data_args.dataset_config_name, data_args.dataset_config_name,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
streaming=data_args.streaming, streaming=data_args.streaming,
) )
if "validation" not in raw_datasets.keys(): if "validation" not in raw_datasets.keys():
...@@ -310,7 +323,7 @@ def main(): ...@@ -310,7 +323,7 @@ def main():
data_args.dataset_config_name, data_args.dataset_config_name,
split=f"train[:{data_args.validation_split_percentage}%]", split=f"train[:{data_args.validation_split_percentage}%]",
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
streaming=data_args.streaming, streaming=data_args.streaming,
) )
raw_datasets["train"] = load_dataset( raw_datasets["train"] = load_dataset(
...@@ -318,7 +331,7 @@ def main(): ...@@ -318,7 +331,7 @@ def main():
data_args.dataset_config_name, data_args.dataset_config_name,
split=f"train[{data_args.validation_split_percentage}%:]", split=f"train[{data_args.validation_split_percentage}%:]",
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
streaming=data_args.streaming, streaming=data_args.streaming,
) )
else: else:
...@@ -335,7 +348,7 @@ def main(): ...@@ -335,7 +348,7 @@ def main():
extension, extension,
data_files=data_files, data_files=data_files,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
) )
# If no validation data is there, validation_split_percentage will be used to divide the dataset. # If no validation data is there, validation_split_percentage will be used to divide the dataset.
...@@ -345,14 +358,14 @@ def main(): ...@@ -345,14 +358,14 @@ def main():
data_files=data_files, data_files=data_files,
split=f"train[:{data_args.validation_split_percentage}%]", split=f"train[:{data_args.validation_split_percentage}%]",
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
) )
raw_datasets["train"] = load_dataset( raw_datasets["train"] = load_dataset(
extension, extension,
data_files=data_files, data_files=data_files,
split=f"train[{data_args.validation_split_percentage}%:]", split=f"train[{data_args.validation_split_percentage}%:]",
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
) )
# See more about loading any type of standard or custom dataset (from files, python dict, pandas DataFrame, etc) at # See more about loading any type of standard or custom dataset (from files, python dict, pandas DataFrame, etc) at
...@@ -366,7 +379,7 @@ def main(): ...@@ -366,7 +379,7 @@ def main():
config_kwargs = { config_kwargs = {
"cache_dir": model_args.cache_dir, "cache_dir": model_args.cache_dir,
"revision": model_args.model_revision, "revision": model_args.model_revision,
"use_auth_token": True if model_args.use_auth_token else None, "token": model_args.token,
} }
if model_args.config_name: if model_args.config_name:
config = AutoConfig.from_pretrained(model_args.config_name, **config_kwargs) config = AutoConfig.from_pretrained(model_args.config_name, **config_kwargs)
...@@ -384,7 +397,7 @@ def main(): ...@@ -384,7 +397,7 @@ def main():
"cache_dir": model_args.cache_dir, "cache_dir": model_args.cache_dir,
"use_fast": model_args.use_fast_tokenizer, "use_fast": model_args.use_fast_tokenizer,
"revision": model_args.model_revision, "revision": model_args.model_revision,
"use_auth_token": True if model_args.use_auth_token else None, "token": model_args.token,
} }
if model_args.tokenizer_name: if model_args.tokenizer_name:
tokenizer = AutoTokenizer.from_pretrained(model_args.tokenizer_name, **tokenizer_kwargs) tokenizer = AutoTokenizer.from_pretrained(model_args.tokenizer_name, **tokenizer_kwargs)
...@@ -403,7 +416,7 @@ def main(): ...@@ -403,7 +416,7 @@ def main():
config=config, config=config,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
revision=model_args.model_revision, revision=model_args.model_revision,
token=True if model_args.use_auth_token else None, token=model_args.token,
low_cpu_mem_usage=model_args.low_cpu_mem_usage, low_cpu_mem_usage=model_args.low_cpu_mem_usage,
) )
else: else:
......
...@@ -22,6 +22,7 @@ import logging ...@@ -22,6 +22,7 @@ import logging
import math import math
import os import os
import sys import sys
import warnings
from dataclasses import dataclass, field from dataclasses import dataclass, field
from itertools import chain from itertools import chain
from typing import Optional from typing import Optional
...@@ -95,15 +96,21 @@ class ModelArguments: ...@@ -95,15 +96,21 @@ class ModelArguments:
default="main", default="main",
metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."}, metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."},
) )
use_auth_token: bool = field( token: str = field(
default=False, default=None,
metadata={ metadata={
"help": ( "help": (
"Will use the token generated when running `huggingface-cli login` (necessary to use this script " "The token to use as HTTP bearer authorization for remote files. If not specified, will use the token "
"with private models)." "generated when running `huggingface-cli login` (stored in `~/.huggingface`)."
) )
}, },
) )
use_auth_token: bool = field(
default=None,
metadata={
"help": "The `use_auth_token` argument is deprecated and will be removed in v4.34. Please use `token`."
},
)
low_cpu_mem_usage: bool = field( low_cpu_mem_usage: bool = field(
default=False, default=False,
metadata={ metadata={
...@@ -229,6 +236,12 @@ def main(): ...@@ -229,6 +236,12 @@ def main():
else: else:
model_args, data_args, training_args = parser.parse_args_into_dataclasses() model_args, data_args, training_args = parser.parse_args_into_dataclasses()
if model_args.use_auth_token is not None:
warnings.warn("The `use_auth_token` argument is deprecated and will be removed in v4.34.", FutureWarning)
if model_args.token is not None:
raise ValueError("`token` and `use_auth_token` are both specified. Please set only the argument `token`.")
model_args.token = model_args.use_auth_token
# Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The # Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The
# information sent is the one passed as arguments along with your Python/PyTorch versions. # information sent is the one passed as arguments along with your Python/PyTorch versions.
send_example_telemetry("run_plm", model_args, data_args) send_example_telemetry("run_plm", model_args, data_args)
...@@ -291,7 +304,7 @@ def main(): ...@@ -291,7 +304,7 @@ def main():
data_args.dataset_name, data_args.dataset_name,
data_args.dataset_config_name, data_args.dataset_config_name,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
) )
if "validation" not in raw_datasets.keys(): if "validation" not in raw_datasets.keys():
raw_datasets["validation"] = load_dataset( raw_datasets["validation"] = load_dataset(
...@@ -299,14 +312,14 @@ def main(): ...@@ -299,14 +312,14 @@ def main():
data_args.dataset_config_name, data_args.dataset_config_name,
split=f"train[:{data_args.validation_split_percentage}%]", split=f"train[:{data_args.validation_split_percentage}%]",
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
) )
raw_datasets["train"] = load_dataset( raw_datasets["train"] = load_dataset(
data_args.dataset_name, data_args.dataset_name,
data_args.dataset_config_name, data_args.dataset_config_name,
split=f"train[{data_args.validation_split_percentage}%:]", split=f"train[{data_args.validation_split_percentage}%:]",
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
) )
else: else:
data_files = {} data_files = {}
...@@ -325,14 +338,14 @@ def main(): ...@@ -325,14 +338,14 @@ def main():
data_files=data_files, data_files=data_files,
split=f"train[:{data_args.validation_split_percentage}%]", split=f"train[:{data_args.validation_split_percentage}%]",
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
) )
raw_datasets["train"] = load_dataset( raw_datasets["train"] = load_dataset(
extension, extension,
data_files=data_files, data_files=data_files,
split=f"train[{data_args.validation_split_percentage}%:]", split=f"train[{data_args.validation_split_percentage}%:]",
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
) )
# See more about loading any type of standard or custom dataset (from files, python dict, pandas DataFrame, etc) at # See more about loading any type of standard or custom dataset (from files, python dict, pandas DataFrame, etc) at
...@@ -346,7 +359,7 @@ def main(): ...@@ -346,7 +359,7 @@ def main():
config_kwargs = { config_kwargs = {
"cache_dir": model_args.cache_dir, "cache_dir": model_args.cache_dir,
"revision": model_args.model_revision, "revision": model_args.model_revision,
"use_auth_token": True if model_args.use_auth_token else None, "token": model_args.token,
} }
if model_args.config_name: if model_args.config_name:
config = AutoConfig.from_pretrained(model_args.config_name, **config_kwargs) config = AutoConfig.from_pretrained(model_args.config_name, **config_kwargs)
...@@ -364,7 +377,7 @@ def main(): ...@@ -364,7 +377,7 @@ def main():
"cache_dir": model_args.cache_dir, "cache_dir": model_args.cache_dir,
"use_fast": model_args.use_fast_tokenizer, "use_fast": model_args.use_fast_tokenizer,
"revision": model_args.model_revision, "revision": model_args.model_revision,
"use_auth_token": True if model_args.use_auth_token else None, "token": model_args.token,
} }
if model_args.tokenizer_name: if model_args.tokenizer_name:
tokenizer = AutoTokenizer.from_pretrained(model_args.tokenizer_name, **tokenizer_kwargs) tokenizer = AutoTokenizer.from_pretrained(model_args.tokenizer_name, **tokenizer_kwargs)
...@@ -383,7 +396,7 @@ def main(): ...@@ -383,7 +396,7 @@ def main():
config=config, config=config,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
revision=model_args.model_revision, revision=model_args.model_revision,
token=True if model_args.use_auth_token else None, token=model_args.token,
low_cpu_mem_usage=model_args.low_cpu_mem_usage, low_cpu_mem_usage=model_args.low_cpu_mem_usage,
) )
else: else:
......
...@@ -21,6 +21,7 @@ Fine-tuning the library models for multiple choice. ...@@ -21,6 +21,7 @@ Fine-tuning the library models for multiple choice.
import logging import logging
import os import os
import sys import sys
import warnings
from dataclasses import dataclass, field from dataclasses import dataclass, field
from itertools import chain from itertools import chain
from typing import Optional, Union from typing import Optional, Union
...@@ -79,15 +80,21 @@ class ModelArguments: ...@@ -79,15 +80,21 @@ class ModelArguments:
default="main", default="main",
metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."}, metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."},
) )
use_auth_token: bool = field( token: str = field(
default=False, default=None,
metadata={ metadata={
"help": ( "help": (
"Will use the token generated when running `huggingface-cli login` (necessary to use this script " "The token to use as HTTP bearer authorization for remote files. If not specified, will use the token "
"with private models)." "generated when running `huggingface-cli login` (stored in `~/.huggingface`)."
) )
}, },
) )
use_auth_token: bool = field(
default=None,
metadata={
"help": "The `use_auth_token` argument is deprecated and will be removed in v4.34. Please use `token`."
},
)
@dataclass @dataclass
...@@ -225,6 +232,12 @@ def main(): ...@@ -225,6 +232,12 @@ def main():
else: else:
model_args, data_args, training_args = parser.parse_args_into_dataclasses() model_args, data_args, training_args = parser.parse_args_into_dataclasses()
if model_args.use_auth_token is not None:
warnings.warn("The `use_auth_token` argument is deprecated and will be removed in v4.34.", FutureWarning)
if model_args.token is not None:
raise ValueError("`token` and `use_auth_token` are both specified. Please set only the argument `token`.")
model_args.token = model_args.use_auth_token
# Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The # Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The
# information sent is the one passed as arguments along with your Python/PyTorch versions. # information sent is the one passed as arguments along with your Python/PyTorch versions.
send_example_telemetry("run_swag", model_args, data_args) send_example_telemetry("run_swag", model_args, data_args)
...@@ -292,7 +305,7 @@ def main(): ...@@ -292,7 +305,7 @@ def main():
extension, extension,
data_files=data_files, data_files=data_files,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
) )
else: else:
# Downloading and loading the swag dataset from the hub. # Downloading and loading the swag dataset from the hub.
...@@ -300,7 +313,7 @@ def main(): ...@@ -300,7 +313,7 @@ def main():
"swag", "swag",
"regular", "regular",
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
) )
# See more about loading any type of standard or custom dataset (from files, python dict, pandas DataFrame, etc) at # See more about loading any type of standard or custom dataset (from files, python dict, pandas DataFrame, etc) at
# https://huggingface.co/docs/datasets/loading_datasets.html. # https://huggingface.co/docs/datasets/loading_datasets.html.
...@@ -314,14 +327,14 @@ def main(): ...@@ -314,14 +327,14 @@ def main():
model_args.config_name if model_args.config_name else model_args.model_name_or_path, model_args.config_name if model_args.config_name else model_args.model_name_or_path,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
revision=model_args.model_revision, revision=model_args.model_revision,
token=True if model_args.use_auth_token else None, token=model_args.token,
) )
tokenizer = AutoTokenizer.from_pretrained( tokenizer = AutoTokenizer.from_pretrained(
model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path, model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_fast=model_args.use_fast_tokenizer, use_fast=model_args.use_fast_tokenizer,
revision=model_args.model_revision, revision=model_args.model_revision,
token=True if model_args.use_auth_token else None, token=model_args.token,
) )
model = AutoModelForMultipleChoice.from_pretrained( model = AutoModelForMultipleChoice.from_pretrained(
model_args.model_name_or_path, model_args.model_name_or_path,
...@@ -329,7 +342,7 @@ def main(): ...@@ -329,7 +342,7 @@ def main():
config=config, config=config,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
revision=model_args.model_revision, revision=model_args.model_revision,
token=True if model_args.use_auth_token else None, token=model_args.token,
) )
# When using your own dataset or a different dataset from swag, you will probably need to change this. # When using your own dataset or a different dataset from swag, you will probably need to change this.
......
...@@ -21,6 +21,7 @@ Fine-tuning the library models for question answering using a slightly adapted v ...@@ -21,6 +21,7 @@ Fine-tuning the library models for question answering using a slightly adapted v
import logging import logging
import os import os
import sys import sys
import warnings
from dataclasses import dataclass, field from dataclasses import dataclass, field
from typing import Optional from typing import Optional
...@@ -79,15 +80,21 @@ class ModelArguments: ...@@ -79,15 +80,21 @@ class ModelArguments:
default="main", default="main",
metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."}, metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."},
) )
use_auth_token: bool = field( token: str = field(
default=False, default=None,
metadata={ metadata={
"help": ( "help": (
"Will use the token generated when running `huggingface-cli login` (necessary to use this script " "The token to use as HTTP bearer authorization for remote files. If not specified, will use the token "
"with private models)." "generated when running `huggingface-cli login` (stored in `~/.huggingface`)."
) )
}, },
) )
use_auth_token: bool = field(
default=None,
metadata={
"help": "The `use_auth_token` argument is deprecated and will be removed in v4.34. Please use `token`."
},
)
@dataclass @dataclass
...@@ -227,6 +234,12 @@ def main(): ...@@ -227,6 +234,12 @@ def main():
else: else:
model_args, data_args, training_args = parser.parse_args_into_dataclasses() model_args, data_args, training_args = parser.parse_args_into_dataclasses()
if model_args.use_auth_token is not None:
warnings.warn("The `use_auth_token` argument is deprecated and will be removed in v4.34.", FutureWarning)
if model_args.token is not None:
raise ValueError("`token` and `use_auth_token` are both specified. Please set only the argument `token`.")
model_args.token = model_args.use_auth_token
# Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The # Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The
# information sent is the one passed as arguments along with your Python/PyTorch versions. # information sent is the one passed as arguments along with your Python/PyTorch versions.
send_example_telemetry("run_qa", model_args, data_args) send_example_telemetry("run_qa", model_args, data_args)
...@@ -289,7 +302,7 @@ def main(): ...@@ -289,7 +302,7 @@ def main():
data_args.dataset_name, data_args.dataset_name,
data_args.dataset_config_name, data_args.dataset_config_name,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
) )
else: else:
data_files = {} data_files = {}
...@@ -308,7 +321,7 @@ def main(): ...@@ -308,7 +321,7 @@ def main():
data_files=data_files, data_files=data_files,
field="data", field="data",
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_auth_token=True if model_args.use_auth_token else None, token=model_args.token,
) )
# See more about loading any type of standard or custom dataset (from files, python dict, pandas DataFrame, etc) at # See more about loading any type of standard or custom dataset (from files, python dict, pandas DataFrame, etc) at
# https://huggingface.co/docs/datasets/loading_datasets.html. # https://huggingface.co/docs/datasets/loading_datasets.html.
...@@ -322,14 +335,14 @@ def main(): ...@@ -322,14 +335,14 @@ def main():
model_args.config_name if model_args.config_name else model_args.model_name_or_path, model_args.config_name if model_args.config_name else model_args.model_name_or_path,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
revision=model_args.model_revision, revision=model_args.model_revision,
token=True if model_args.use_auth_token else None, token=model_args.token,
) )
tokenizer = AutoTokenizer.from_pretrained( tokenizer = AutoTokenizer.from_pretrained(
model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path, model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
use_fast=True, use_fast=True,
revision=model_args.model_revision, revision=model_args.model_revision,
token=True if model_args.use_auth_token else None, token=model_args.token,
) )
model = AutoModelForQuestionAnswering.from_pretrained( model = AutoModelForQuestionAnswering.from_pretrained(
model_args.model_name_or_path, model_args.model_name_or_path,
...@@ -337,7 +350,7 @@ def main(): ...@@ -337,7 +350,7 @@ def main():
config=config, config=config,
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
revision=model_args.model_revision, revision=model_args.model_revision,
token=True if model_args.use_auth_token else None, token=model_args.token,
) )
# Tokenizer check: this script requires a fast tokenizer. # Tokenizer check: this script requires a fast tokenizer.
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment