"examples/vscode:/vscode.git/clone" did not exist on "e808225f1859fbf47e778b73c64e82669d1c1e94"
Unverified Commit 05e72aa0 authored by Sylvain Gugger's avatar Sylvain Gugger Committed by GitHub
Browse files

Adapt repository creation to latest hf_hub (#21158)

* Adapt repository creation to latest hf_hub

* Update all examples

* Fix other tests, add Flax examples

* Address review comments
parent 32525428
...@@ -37,7 +37,7 @@ import transformers ...@@ -37,7 +37,7 @@ import transformers
from accelerate import Accelerator from accelerate import Accelerator
from accelerate.logging import get_logger from accelerate.logging import get_logger
from accelerate.utils import set_seed from accelerate.utils import set_seed
from huggingface_hub import Repository from huggingface_hub import Repository, create_repo
from transformers import ( from transformers import (
CONFIG_MAPPING, CONFIG_MAPPING,
MODEL_MAPPING, MODEL_MAPPING,
...@@ -298,7 +298,8 @@ def main(): ...@@ -298,7 +298,8 @@ def main():
repo_name = get_full_repo_name(Path(args.output_dir).name, token=args.hub_token) repo_name = get_full_repo_name(Path(args.output_dir).name, token=args.hub_token)
else: else:
repo_name = args.hub_model_id repo_name = args.hub_model_id
repo = Repository(args.output_dir, clone_from=repo_name) create_repo(repo_name, exist_ok=True, token=args.hub_token)
repo = Repository(args.output_dir, clone_from=repo_name, token=args.hub_token)
with open(os.path.join(args.output_dir, ".gitignore"), "w+") as gitignore: with open(os.path.join(args.output_dir, ".gitignore"), "w+") as gitignore:
if "step_*" not in gitignore: if "step_*" not in gitignore:
......
...@@ -38,7 +38,7 @@ import transformers ...@@ -38,7 +38,7 @@ import transformers
from accelerate import Accelerator from accelerate import Accelerator
from accelerate.logging import get_logger from accelerate.logging import get_logger
from accelerate.utils import set_seed from accelerate.utils import set_seed
from huggingface_hub import Repository from huggingface_hub import Repository, create_repo
from transformers import ( from transformers import (
CONFIG_MAPPING, CONFIG_MAPPING,
MODEL_MAPPING, MODEL_MAPPING,
...@@ -345,7 +345,8 @@ def main(): ...@@ -345,7 +345,8 @@ def main():
repo_name = get_full_repo_name(Path(args.output_dir).name, token=args.hub_token) repo_name = get_full_repo_name(Path(args.output_dir).name, token=args.hub_token)
else: else:
repo_name = args.hub_model_id repo_name = args.hub_model_id
repo = Repository(args.output_dir, clone_from=repo_name) create_repo(repo_name, exist_ok=True, token=args.hub_token)
repo = Repository(args.output_dir, clone_from=repo_name, token=args.hub_token)
with open(os.path.join(args.output_dir, ".gitignore"), "w+") as gitignore: with open(os.path.join(args.output_dir, ".gitignore"), "w+") as gitignore:
if "step_*" not in gitignore: if "step_*" not in gitignore:
......
...@@ -117,7 +117,7 @@ _deps = [ ...@@ -117,7 +117,7 @@ _deps = [
"fugashi>=1.0", "fugashi>=1.0",
"GitPython<3.1.19", "GitPython<3.1.19",
"hf-doc-builder>=0.3.0", "hf-doc-builder>=0.3.0",
"huggingface-hub>=0.10.0,<1.0", "huggingface-hub>=0.11.0,<1.0",
"importlib_metadata", "importlib_metadata",
"ipadic>=1.0.0,<2.0", "ipadic>=1.0.0,<2.0",
"isort>=5.5.4", "isort>=5.5.4",
......
...@@ -23,7 +23,7 @@ deps = { ...@@ -23,7 +23,7 @@ deps = {
"fugashi": "fugashi>=1.0", "fugashi": "fugashi>=1.0",
"GitPython": "GitPython<3.1.19", "GitPython": "GitPython<3.1.19",
"hf-doc-builder": "hf-doc-builder>=0.3.0", "hf-doc-builder": "hf-doc-builder>=0.3.0",
"huggingface-hub": "huggingface-hub>=0.10.0,<1.0", "huggingface-hub": "huggingface-hub>=0.11.0,<1.0",
"importlib_metadata": "importlib_metadata", "importlib_metadata": "importlib_metadata",
"ipadic": "ipadic>=1.0.0,<2.0", "ipadic": "ipadic>=1.0.0,<2.0",
"isort": "isort>=5.5.4", "isort": "isort>=5.5.4",
......
...@@ -340,11 +340,7 @@ class PushToHubCallback(Callback): ...@@ -340,11 +340,7 @@ class PushToHubCallback(Callback):
self.output_dir = output_dir self.output_dir = output_dir
self.hub_model_id = hub_model_id self.hub_model_id = hub_model_id
create_repo(self.hub_model_id, exist_ok=True) create_repo(self.hub_model_id, exist_ok=True)
self.repo = Repository( self.repo = Repository(str(self.output_dir), clone_from=self.hub_model_id, token=hub_token)
str(self.output_dir),
clone_from=self.hub_model_id,
use_auth_token=hub_token if hub_token else True,
)
self.tokenizer = tokenizer self.tokenizer = tokenizer
self.last_job = None self.last_job = None
......
...@@ -60,7 +60,7 @@ from torch import nn ...@@ -60,7 +60,7 @@ from torch import nn
from torch.utils.data import DataLoader, Dataset, RandomSampler, SequentialSampler from torch.utils.data import DataLoader, Dataset, RandomSampler, SequentialSampler
from torch.utils.data.distributed import DistributedSampler from torch.utils.data.distributed import DistributedSampler
from huggingface_hub import Repository from huggingface_hub import Repository, create_repo
from . import __version__ from . import __version__
from .configuration_utils import PretrainedConfig from .configuration_utils import PretrainedConfig
...@@ -3315,7 +3315,6 @@ class Trainer: ...@@ -3315,7 +3315,6 @@ class Trainer:
""" """
if not self.is_world_process_zero(): if not self.is_world_process_zero():
return return
use_auth_token = True if self.args.hub_token is None else self.args.hub_token
if self.args.hub_model_id is None: if self.args.hub_model_id is None:
repo_name = Path(self.args.output_dir).absolute().name repo_name = Path(self.args.output_dir).absolute().name
else: else:
...@@ -3323,22 +3322,15 @@ class Trainer: ...@@ -3323,22 +3322,15 @@ class Trainer:
if "/" not in repo_name: if "/" not in repo_name:
repo_name = get_full_repo_name(repo_name, token=self.args.hub_token) repo_name = get_full_repo_name(repo_name, token=self.args.hub_token)
# Make sure the repo exists.
create_repo(repo_name, token=self.args.hub_token, private=self.args.hub_private_repo, exist_ok=True)
try: try:
self.repo = Repository( self.repo = Repository(self.args.output_dir, clone_from=repo_name, token=self.args.hub_token)
self.args.output_dir,
clone_from=repo_name,
use_auth_token=use_auth_token,
private=self.args.hub_private_repo,
)
except EnvironmentError: except EnvironmentError:
if self.args.overwrite_output_dir and at_init: if self.args.overwrite_output_dir and at_init:
# Try again after wiping output_dir # Try again after wiping output_dir
shutil.rmtree(self.args.output_dir) shutil.rmtree(self.args.output_dir)
self.repo = Repository( self.repo = Repository(self.args.output_dir, clone_from=repo_name, token=self.args.hub_token)
self.args.output_dir,
clone_from=repo_name,
use_auth_token=use_auth_token,
)
else: else:
raise raise
......
...@@ -21,7 +21,7 @@ import unittest ...@@ -21,7 +21,7 @@ import unittest
from pathlib import Path from pathlib import Path
from shutil import copyfile from shutil import copyfile
from huggingface_hub import HfFolder, Repository, delete_repo, set_access_token from huggingface_hub import HfFolder, Repository, create_repo, delete_repo, set_access_token
from requests.exceptions import HTTPError from requests.exceptions import HTTPError
from transformers import ( from transformers import (
CONFIG_MAPPING, CONFIG_MAPPING,
...@@ -282,7 +282,8 @@ class ProcessorPushToHubTester(unittest.TestCase): ...@@ -282,7 +282,8 @@ class ProcessorPushToHubTester(unittest.TestCase):
processor = CustomProcessor(feature_extractor, tokenizer) processor = CustomProcessor(feature_extractor, tokenizer)
with tempfile.TemporaryDirectory() as tmp_dir: with tempfile.TemporaryDirectory() as tmp_dir:
repo = Repository(tmp_dir, clone_from=f"{USER}/test-dynamic-processor", use_auth_token=self._token) create_repo(f"{USER}/test-dynamic-processor", token=self._token)
repo = Repository(tmp_dir, clone_from=f"{USER}/test-dynamic-processor", token=self._token)
processor.save_pretrained(tmp_dir) processor.save_pretrained(tmp_dir)
# This has added the proper auto_map field to the feature extractor config # This has added the proper auto_map field to the feature extractor config
......
...@@ -29,7 +29,7 @@ from unittest import skipIf ...@@ -29,7 +29,7 @@ from unittest import skipIf
import datasets import datasets
import numpy as np import numpy as np
from huggingface_hub import HfFolder, Repository, delete_repo, set_access_token from huggingface_hub import HfFolder, Repository, create_repo, delete_repo, set_access_token
from requests.exceptions import HTTPError from requests.exceptions import HTTPError
from transformers import ( from transformers import (
FEATURE_EXTRACTOR_MAPPING, FEATURE_EXTRACTOR_MAPPING,
...@@ -1023,7 +1023,8 @@ class DynamicPipelineTester(unittest.TestCase): ...@@ -1023,7 +1023,8 @@ class DynamicPipelineTester(unittest.TestCase):
model = BertForSequenceClassification(config).eval() model = BertForSequenceClassification(config).eval()
with tempfile.TemporaryDirectory() as tmp_dir: with tempfile.TemporaryDirectory() as tmp_dir:
repo = Repository(tmp_dir, clone_from=f"{USER}/test-dynamic-pipeline", use_auth_token=self._token) create_repo(f"{USER}/test-dynamic-pipeline", token=self._token)
repo = Repository(tmp_dir, clone_from=f"{USER}/test-dynamic-pipeline", token=self._token)
vocab_file = os.path.join(tmp_dir, "vocab.txt") vocab_file = os.path.join(tmp_dir, "vocab.txt")
with open(vocab_file, "w", encoding="utf-8") as vocab_writer: with open(vocab_file, "w", encoding="utf-8") as vocab_writer:
......
...@@ -2079,7 +2079,7 @@ class TrainerIntegrationWithHubTester(unittest.TestCase): ...@@ -2079,7 +2079,7 @@ class TrainerIntegrationWithHubTester(unittest.TestCase):
time.sleep(0.5) time.sleep(0.5)
with tempfile.TemporaryDirectory() as tmp_dir: with tempfile.TemporaryDirectory() as tmp_dir:
_ = Repository(tmp_dir, clone_from=f"{USER}/test-trainer-epoch", use_auth_token=self._token) _ = Repository(tmp_dir, clone_from=f"{USER}/test-trainer-epoch", token=self._token)
commits = self.get_commit_history(tmp_dir) commits = self.get_commit_history(tmp_dir)
self.assertIn("initial commit", commits) self.assertIn("initial commit", commits)
# We can't test that epoch 2 and 3 are in the commits without being flaky as those might be skipped if # We can't test that epoch 2 and 3 are in the commits without being flaky as those might be skipped if
...@@ -2106,7 +2106,7 @@ class TrainerIntegrationWithHubTester(unittest.TestCase): ...@@ -2106,7 +2106,7 @@ class TrainerIntegrationWithHubTester(unittest.TestCase):
time.sleep(0.5) time.sleep(0.5)
with tempfile.TemporaryDirectory() as tmp_dir: with tempfile.TemporaryDirectory() as tmp_dir:
_ = Repository(tmp_dir, clone_from=f"{USER}/test-trainer-step", use_auth_token=self._token) _ = Repository(tmp_dir, clone_from=f"{USER}/test-trainer-step", token=self._token)
commits = self.get_commit_history(tmp_dir) commits = self.get_commit_history(tmp_dir)
self.assertIn("initial commit", commits) self.assertIn("initial commit", commits)
# We can't test that epoch 2 and 3 are in the commits without being flaky as those might be skipped if # We can't test that epoch 2 and 3 are in the commits without being flaky as those might be skipped if
......
...@@ -214,9 +214,7 @@ def update_metadata(token, commit_sha): ...@@ -214,9 +214,7 @@ def update_metadata(token, commit_sha):
Update the metadata for the Transformers repo. Update the metadata for the Transformers repo.
""" """
with tempfile.TemporaryDirectory() as tmp_dir: with tempfile.TemporaryDirectory() as tmp_dir:
repo = Repository( repo = Repository(tmp_dir, clone_from="huggingface/transformers-metadata", repo_type="dataset", token=token)
tmp_dir, clone_from="huggingface/transformers-metadata", repo_type="dataset", use_auth_token=token
)
frameworks_table = get_frameworks_table() frameworks_table = get_frameworks_table()
frameworks_dataset = Dataset.from_pandas(frameworks_table) frameworks_dataset = Dataset.from_pandas(frameworks_table)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment