Unverified Commit b97cab7e authored by Arthur's avatar Arthur Committed by GitHub
Browse files

Remove-auth-token (#27060)

* don't use `use_auth_token`internally

* let's use token everywhere

* fixup
parent 8f577dca
......@@ -99,7 +99,7 @@ Define a `model_init` function and pass it to the [`Trainer`], as an example:
... config=config,
... cache_dir=model_args.cache_dir,
... revision=model_args.model_revision,
... use_auth_token=True if model_args.use_auth_token else None,
... token=True if model_args.use_auth_token else None,
... )
```
......
......@@ -118,9 +118,9 @@ See example below for a translation from romanian to german:
>>> from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
>>> tokenizer = AutoTokenizer.from_pretrained(
... "facebook/nllb-200-distilled-600M", use_auth_token=True, src_lang="ron_Latn"
... "facebook/nllb-200-distilled-600M", token=True, src_lang="ron_Latn"
... )
>>> model = AutoModelForSeq2SeqLM.from_pretrained("facebook/nllb-200-distilled-600M", use_auth_token=True)
>>> model = AutoModelForSeq2SeqLM.from_pretrained("facebook/nllb-200-distilled-600M", token=True)
>>> article = "Şeful ONU spune că nu există o soluţie militară în Siria"
>>> inputs = tokenizer(article, return_tensors="pt")
......
......@@ -105,7 +105,7 @@ Wandbについては、[object_parameter](https://docs.wandb.ai/guides/sweeps/co
... config=config,
... cache_dir=model_args.cache_dir,
... revision=model_args.model_revision,
... use_auth_token=True if model_args.use_auth_token else None,
... token=True if model_args.use_auth_token else None,
... )
```
......
......@@ -87,7 +87,7 @@ wandb의 경우, 해당 [object_parameter](https://docs.wandb.ai/guides/sweeps/c
... config=config,
... cache_dir=model_args.cache_dir,
... revision=model_args.model_revision,
... use_auth_token=True if model_args.use_auth_token else None,
... token=True if model_args.use_auth_token else None,
... )
```
......
......@@ -1117,7 +1117,7 @@ params = model.init(key2, x)
bytes_output = serialization.to_bytes(params)
repo = Repository("flax-model", clone_from="flax-community/flax-model-dummy", use_auth_token=True)
repo = Repository("flax-model", clone_from="flax-community/flax-model-dummy", token=True)
with repo.commit("My cool Flax model :)"):
with open("flax_model.msgpack", "wb") as f:
f.write(bytes_output)
......
......@@ -250,7 +250,7 @@ def main():
"nielsr/funsd-layoutlmv3",
data_args.dataset_config_name,
cache_dir=model_args.cache_dir,
use_auth_token=True if model_args.use_auth_token else None,
token=True if model_args.use_auth_token else None,
)
elif data_args.dataset_name == "cord":
# Downloading and loading a dataset from the hub.
......@@ -258,7 +258,7 @@ def main():
"nielsr/cord-layoutlmv3",
data_args.dataset_config_name,
cache_dir=model_args.cache_dir,
use_auth_token=True if model_args.use_auth_token else None,
token=True if model_args.use_auth_token else None,
)
else:
raise ValueError("This script only supports either FUNSD or CORD out-of-the-box.")
......@@ -313,7 +313,7 @@ def main():
finetuning_task=data_args.task_name,
cache_dir=model_args.cache_dir,
revision=model_args.model_revision,
use_auth_token=True if model_args.use_auth_token else None,
token=True if model_args.use_auth_token else None,
)
processor = AutoProcessor.from_pretrained(
......@@ -321,7 +321,7 @@ def main():
cache_dir=model_args.cache_dir,
use_fast=True,
revision=model_args.model_revision,
use_auth_token=True if model_args.use_auth_token else None,
token=True if model_args.use_auth_token else None,
add_prefix_space=True,
apply_ocr=False,
)
......@@ -332,7 +332,7 @@ def main():
config=config,
cache_dir=model_args.cache_dir,
revision=model_args.model_revision,
use_auth_token=True if model_args.use_auth_token else None,
token=True if model_args.use_auth_token else None,
)
# Set the correspondences label/ID inside the model config
......
......@@ -325,7 +325,7 @@ def main():
config=config,
cache_dir=model_args.cache_dir,
revision=model_args.model_revision,
use_auth_token=True if model_args.use_auth_token else None,
token=True if model_args.use_auth_token else None,
)
else:
logger.info("Training new model from scratch")
......
......@@ -322,14 +322,14 @@ def main():
model_args.config_name if model_args.config_name else model_args.model_name_or_path,
cache_dir=model_args.cache_dir,
revision=model_args.model_revision,
use_auth_token=True if model_args.use_auth_token else None,
token=True if model_args.use_auth_token else None,
)
tokenizer = AutoTokenizer.from_pretrained(
model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path,
cache_dir=model_args.cache_dir,
use_fast=True,
revision=model_args.model_revision,
use_auth_token=True if model_args.use_auth_token else None,
token=True if model_args.use_auth_token else None,
)
model = QDQBertForQuestionAnswering.from_pretrained(
model_args.model_name_or_path,
......@@ -337,7 +337,7 @@ def main():
config=config,
cache_dir=model_args.cache_dir,
revision=model_args.model_revision,
use_auth_token=True if model_args.use_auth_token else None,
token=True if model_args.use_auth_token else None,
)
# Tokenizer check: this script requires a fast tokenizer.
......
......@@ -65,7 +65,7 @@ def normalize_text(text: str) -> str:
def main(args):
# load dataset
dataset = load_dataset(args.dataset, args.config, split=args.split, use_auth_token=True)
dataset = load_dataset(args.dataset, args.config, split=args.split, token=True)
# for testing: only process the first two examples as a test
# dataset = dataset.select(range(10))
......
......@@ -418,7 +418,7 @@ def main():
data_args.dataset_name,
data_args.dataset_config_name,
split=data_args.train_split_name,
use_auth_token=data_args.use_auth_token,
token=data_args.use_auth_token,
)
if data_args.audio_column_name not in raw_datasets["train"].column_names:
......@@ -443,7 +443,7 @@ def main():
data_args.dataset_name,
data_args.dataset_config_name,
split=data_args.eval_split_name,
use_auth_token=data_args.use_auth_token,
token=data_args.use_auth_token,
)
if data_args.max_eval_samples is not None:
......@@ -481,7 +481,7 @@ def main():
# the tokenizer
# load config
config = AutoConfig.from_pretrained(
model_args.model_name_or_path, cache_dir=model_args.cache_dir, use_auth_token=data_args.use_auth_token
model_args.model_name_or_path, cache_dir=model_args.cache_dir, token=data_args.use_auth_token
)
# 4. Next, if no tokenizer file is defined,
......@@ -532,11 +532,11 @@ def main():
# load feature_extractor and tokenizer
tokenizer = AutoTokenizer.from_pretrained(
tokenizer_name_or_path,
use_auth_token=data_args.use_auth_token,
token=data_args.use_auth_token,
**tokenizer_kwargs,
)
feature_extractor = AutoFeatureExtractor.from_pretrained(
model_args.model_name_or_path, cache_dir=model_args.cache_dir, use_auth_token=data_args.use_auth_token
model_args.model_name_or_path, cache_dir=model_args.cache_dir, token=data_args.use_auth_token
)
# adapt config
......@@ -564,7 +564,7 @@ def main():
model_args.model_name_or_path,
cache_dir=model_args.cache_dir,
config=config,
use_auth_token=data_args.use_auth_token,
token=data_args.use_auth_token,
)
# freeze encoder
......
......@@ -395,7 +395,7 @@ def main():
# so we just need to set the correct target sampling rate and normalize the input
# via the `feature_extractor`
feature_extractor = AutoFeatureExtractor.from_pretrained(
model_args.model_name_or_path, cache_dir=model_args.cache_dir, use_auth_token=data_args.use_auth_token
model_args.model_name_or_path, cache_dir=model_args.cache_dir, token=data_args.use_auth_token
)
if training_args.do_train:
......@@ -403,7 +403,7 @@ def main():
path=data_args.dataset_name,
name=data_args.dataset_config_name,
split=data_args.train_split_name,
use_auth_token=data_args.use_auth_token,
token=data_args.use_auth_token,
streaming=True,
sampling_rate=feature_extractor.sampling_rate,
)
......@@ -431,7 +431,7 @@ def main():
path=data_args.dataset_name,
name=data_args.dataset_config_name,
split=data_args.eval_split_name,
use_auth_token=data_args.use_auth_token,
token=data_args.use_auth_token,
streaming=True,
sampling_rate=feature_extractor.sampling_rate,
)
......@@ -465,7 +465,7 @@ def main():
# 3. Next, let's load the config as we might need it to create
# the tokenizer
config = AutoConfig.from_pretrained(
model_args.model_name_or_path, cache_dir=model_args.cache_dir, use_auth_token=data_args.use_auth_token
model_args.model_name_or_path, cache_dir=model_args.cache_dir, token=data_args.use_auth_token
)
# 4. Now we can instantiate the tokenizer and model
......@@ -481,7 +481,7 @@ def main():
tokenizer = AutoTokenizer.from_pretrained(
tokenizer_name_or_path,
config=config,
use_auth_token=data_args.use_auth_token,
token=data_args.use_auth_token,
)
# adapt config
......@@ -509,7 +509,7 @@ def main():
model_args.model_name_or_path,
cache_dir=model_args.cache_dir,
config=config,
use_auth_token=data_args.use_auth_token,
token=data_args.use_auth_token,
)
# freeze encoder
......
......@@ -292,7 +292,7 @@ def main():
num_labels=num_labels,
cache_dir=model_args.cache_dir,
revision=model_args.model_revision,
use_auth_token=True if model_args.use_auth_token else None,
token=True if model_args.use_auth_token else None,
)
# load tapex tokenizer
tokenizer = TapexTokenizer.from_pretrained(
......@@ -300,7 +300,7 @@ def main():
cache_dir=model_args.cache_dir,
use_fast=model_args.use_fast_tokenizer,
revision=model_args.model_revision,
use_auth_token=True if model_args.use_auth_token else None,
token=True if model_args.use_auth_token else None,
add_prefix_space=True,
)
model = BartForSequenceClassification.from_pretrained(
......@@ -309,7 +309,7 @@ def main():
config=config,
cache_dir=model_args.cache_dir,
revision=model_args.model_revision,
use_auth_token=True if model_args.use_auth_token else None,
token=True if model_args.use_auth_token else None,
)
# Padding strategy
......
......@@ -329,7 +329,7 @@ def main():
model_args.config_name if model_args.config_name else model_args.model_name_or_path,
cache_dir=model_args.cache_dir,
revision=model_args.model_revision,
use_auth_token=True if model_args.use_auth_token else None,
token=True if model_args.use_auth_token else None,
)
# IMPORTANT: the initial BART model's decoding is penalized by no_repeat_ngram_size, and thus
......@@ -344,7 +344,7 @@ def main():
cache_dir=model_args.cache_dir,
use_fast=model_args.use_fast_tokenizer,
revision=model_args.model_revision,
use_auth_token=True if model_args.use_auth_token else None,
token=True if model_args.use_auth_token else None,
add_prefix_space=True,
)
......@@ -355,7 +355,7 @@ def main():
config=config,
cache_dir=model_args.cache_dir,
revision=model_args.model_revision,
use_auth_token=True if model_args.use_auth_token else None,
token=True if model_args.use_auth_token else None,
)
if model.config.decoder_start_token_id is None:
......
......@@ -327,7 +327,7 @@ def main():
model_args.config_name if model_args.config_name else model_args.model_name_or_path,
cache_dir=model_args.cache_dir,
revision=model_args.model_revision,
use_auth_token=True if model_args.use_auth_token else None,
token=True if model_args.use_auth_token else None,
)
# IMPORTANT: the initial BART model's decoding is penalized by no_repeat_ngram_size, and thus
......@@ -342,7 +342,7 @@ def main():
cache_dir=model_args.cache_dir,
use_fast=model_args.use_fast_tokenizer,
revision=model_args.model_revision,
use_auth_token=True if model_args.use_auth_token else None,
token=True if model_args.use_auth_token else None,
add_prefix_space=True,
)
......@@ -353,7 +353,7 @@ def main():
config=config,
cache_dir=model_args.cache_dir,
revision=model_args.model_revision,
use_auth_token=True if model_args.use_auth_token else None,
token=True if model_args.use_auth_token else None,
)
if model.config.decoder_start_token_id is None:
......
......@@ -502,7 +502,7 @@ def main():
data_args.dataset_name,
config_name,
split=data_args.train_split_name,
use_auth_token=data_args.use_auth_token,
token=data_args.use_auth_token,
cache_dir=model_args.cache_dir,
)
......@@ -528,7 +528,7 @@ def main():
data_args.dataset_name,
config_name,
split=data_args.eval_split_name,
use_auth_token=data_args.use_auth_token,
token=data_args.use_auth_token,
cache_dir=model_args.cache_dir,
)
......@@ -540,7 +540,7 @@ def main():
data_args.dataset_name,
config_name,
split=data_args.predict_split_name,
use_auth_token=data_args.use_auth_token,
token=data_args.use_auth_token,
cache_dir=model_args.cache_dir,
)
......@@ -595,7 +595,7 @@ def main():
# 3. Next, let's load the config as we might need it to create
# the tokenizer
config = AutoConfig.from_pretrained(
model_args.model_name_or_path, cache_dir=model_args.cache_dir, use_auth_token=data_args.use_auth_token
model_args.model_name_or_path, cache_dir=model_args.cache_dir, token=data_args.use_auth_token
)
if is_text_target:
......@@ -651,11 +651,11 @@ def main():
if is_text_target:
tokenizer = AutoTokenizer.from_pretrained(
tokenizer_name_or_path,
use_auth_token=data_args.use_auth_token,
token=data_args.use_auth_token,
**tokenizer_kwargs,
)
feature_extractor = AutoFeatureExtractor.from_pretrained(
model_args.model_name_or_path, cache_dir=model_args.cache_dir, use_auth_token=data_args.use_auth_token
model_args.model_name_or_path, cache_dir=model_args.cache_dir, token=data_args.use_auth_token
)
# adapt config
......@@ -694,14 +694,14 @@ def main():
model_args.model_name_or_path,
cache_dir=model_args.cache_dir,
config=config,
use_auth_token=data_args.use_auth_token,
token=data_args.use_auth_token,
)
elif config.is_encoder_decoder:
model = AutoModelForSpeechSeq2Seq.from_pretrained(
model_args.model_name_or_path,
cache_dir=model_args.cache_dir,
config=config,
use_auth_token=data_args.use_auth_token,
token=data_args.use_auth_token,
)
if model.config.decoder_start_token_id is None:
raise ValueError("Make sure that `config.decoder_start_token_id` is correctly defined")
......@@ -710,7 +710,7 @@ def main():
model_args.model_name_or_path,
cache_dir=model_args.cache_dir,
config=config,
use_auth_token=data_args.use_auth_token,
token=data_args.use_auth_token,
)
# freeze encoder
......
......@@ -716,7 +716,7 @@ class GenerationConfig(PushToHubMixin):
proxies=proxies,
resume_download=resume_download,
local_files_only=local_files_only,
use_auth_token=token,
token=token,
user_agent=user_agent,
revision=revision,
subfolder=subfolder,
......
......@@ -179,7 +179,7 @@ class PeftAdapterMixin:
peft_config = PeftConfig.from_pretrained(
peft_model_id,
use_auth_token=token,
token=token,
**adapter_kwargs,
)
......@@ -190,7 +190,7 @@ class PeftAdapterMixin:
self._hf_peft_config_loaded = True
if peft_model_id is not None:
adapter_state_dict = load_peft_weights(peft_model_id, use_auth_token=token, **adapter_kwargs)
adapter_state_dict = load_peft_weights(peft_model_id, token=token, **adapter_kwargs)
# We need to pre-process the state dict to remove unneeded prefixes - for backward compatibility
processed_adapter_state_dict = {}
......
......@@ -94,7 +94,7 @@ class BarkProcessor(ProcessorMixin):
proxies=kwargs.pop("proxies", None),
resume_download=kwargs.pop("resume_download", False),
local_files_only=kwargs.pop("local_files_only", False),
use_auth_token=kwargs.pop("use_auth_token", None),
token=kwargs.pop("use_auth_token", None),
revision=kwargs.pop("revision", None),
)
if speaker_embeddings_path is None:
......@@ -190,7 +190,7 @@ class BarkProcessor(ProcessorMixin):
proxies=kwargs.pop("proxies", None),
resume_download=kwargs.pop("resume_download", False),
local_files_only=kwargs.pop("local_files_only", False),
use_auth_token=kwargs.pop("use_auth_token", None),
token=kwargs.pop("use_auth_token", None),
revision=kwargs.pop("revision", None),
)
if path is None:
......
......@@ -226,7 +226,7 @@ class Tool:
resolved_config_file = cached_file(
repo_id,
TOOL_CONFIG_FILE,
use_auth_token=token,
token=token,
**hub_kwargs,
_raise_exceptions_for_missing_entries=False,
_raise_exceptions_for_connection_errors=False,
......@@ -236,7 +236,7 @@ class Tool:
resolved_config_file = cached_file(
repo_id,
CONFIG_NAME,
use_auth_token=token,
token=token,
**hub_kwargs,
_raise_exceptions_for_missing_entries=False,
_raise_exceptions_for_connection_errors=False,
......@@ -259,7 +259,7 @@ class Tool:
custom_tool = config
tool_class = custom_tool["tool_class"]
tool_class = get_class_from_dynamic_module(tool_class, repo_id, use_auth_token=token, **hub_kwargs)
tool_class = get_class_from_dynamic_module(tool_class, repo_id, token=token, **hub_kwargs)
if len(tool_class.name) == 0:
tool_class.name = custom_tool["name"]
......
......@@ -308,9 +308,7 @@ class ProcessorPushToHubTester(unittest.TestCase):
def test_push_to_hub(self):
processor = Wav2Vec2Processor.from_pretrained(SAMPLE_PROCESSOR_CONFIG_DIR)
with tempfile.TemporaryDirectory() as tmp_dir:
processor.save_pretrained(
os.path.join(tmp_dir, "test-processor"), push_to_hub=True, use_auth_token=self._token
)
processor.save_pretrained(os.path.join(tmp_dir, "test-processor"), push_to_hub=True, token=self._token)
new_processor = Wav2Vec2Processor.from_pretrained(f"{USER}/test-processor")
for k, v in processor.feature_extractor.__dict__.items():
......@@ -324,7 +322,7 @@ class ProcessorPushToHubTester(unittest.TestCase):
processor.save_pretrained(
os.path.join(tmp_dir, "test-processor-org"),
push_to_hub=True,
use_auth_token=self._token,
token=self._token,
organization="valid_org",
)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment