Unverified Commit b6404866 authored by Hilco van der Wilk's avatar Hilco van der Wilk Committed by GitHub
Browse files

Update legacy Repository usage in various example files (#29085)

* Update legacy Repository usage in `examples/pytorch/text-classification/run_glue_no_trainer.py`

Marked for deprecation here https://huggingface.co/docs/huggingface_hub/guides/upload#legacy-upload-files-with-git-lfs

* Fix import order

* Replace all example usage of deprecated Repository

* Fix remaining repo call and rename args variable

* Revert removing creation of gitignore files and don't change research examples
parent f1a565a3
...@@ -36,7 +36,7 @@ from accelerate.logging import get_logger ...@@ -36,7 +36,7 @@ from accelerate.logging import get_logger
from accelerate.utils import set_seed from accelerate.utils import set_seed
from datasets import load_dataset from datasets import load_dataset
from filelock import FileLock from filelock import FileLock
from huggingface_hub import Repository, create_repo from huggingface_hub import HfApi
from torch.utils.data import DataLoader from torch.utils.data import DataLoader
from tqdm.auto import tqdm from tqdm.auto import tqdm
...@@ -375,9 +375,8 @@ def main(): ...@@ -375,9 +375,8 @@ def main():
if repo_name is None: if repo_name is None:
repo_name = Path(args.output_dir).absolute().name repo_name = Path(args.output_dir).absolute().name
# Create repo and retrieve repo_id # Create repo and retrieve repo_id
repo_id = create_repo(repo_name, exist_ok=True, token=args.hub_token).repo_id api = HfApi()
# Clone repo locally repo_id = api.create_repo(repo_name, exist_ok=True, token=args.hub_token).repo_id
repo = Repository(args.output_dir, clone_from=repo_id, token=args.hub_token)
with open(os.path.join(args.output_dir, ".gitignore"), "w+") as gitignore: with open(os.path.join(args.output_dir, ".gitignore"), "w+") as gitignore:
if "step_*" not in gitignore: if "step_*" not in gitignore:
...@@ -755,8 +754,12 @@ def main(): ...@@ -755,8 +754,12 @@ def main():
) )
if accelerator.is_main_process: if accelerator.is_main_process:
tokenizer.save_pretrained(args.output_dir) tokenizer.save_pretrained(args.output_dir)
repo.push_to_hub( api.upload_folder(
commit_message=f"Training in progress epoch {epoch}", blocking=False, auto_lfs_prune=True commit_message=f"Training in progress epoch {epoch}",
folder_path=args.output_dir,
repo_id=repo_id,
repo_type="model",
token=args.hub_token,
) )
if args.checkpointing_steps == "epoch": if args.checkpointing_steps == "epoch":
...@@ -774,7 +777,13 @@ def main(): ...@@ -774,7 +777,13 @@ def main():
if accelerator.is_main_process: if accelerator.is_main_process:
tokenizer.save_pretrained(args.output_dir) tokenizer.save_pretrained(args.output_dir)
if args.push_to_hub: if args.push_to_hub:
repo.push_to_hub(commit_message="End of training", auto_lfs_prune=True) api.upload_folder(
commit_message="End of training",
folder_path=args.output_dir,
repo_id=repo_id,
repo_type="model",
token=args.hub_token,
)
all_results = {f"eval_{k}": v for k, v in result.items()} all_results = {f"eval_{k}": v for k, v in result.items()}
with open(os.path.join(args.output_dir, "all_results.json"), "w") as f: with open(os.path.join(args.output_dir, "all_results.json"), "w") as f:
......
...@@ -28,7 +28,7 @@ from accelerate import Accelerator ...@@ -28,7 +28,7 @@ from accelerate import Accelerator
from accelerate.logging import get_logger from accelerate.logging import get_logger
from accelerate.utils import set_seed from accelerate.utils import set_seed
from datasets import load_dataset from datasets import load_dataset
from huggingface_hub import Repository, create_repo from huggingface_hub import HfApi
from torch.utils.data import DataLoader from torch.utils.data import DataLoader
from tqdm.auto import tqdm from tqdm.auto import tqdm
...@@ -255,9 +255,8 @@ def main(): ...@@ -255,9 +255,8 @@ def main():
if repo_name is None: if repo_name is None:
repo_name = Path(args.output_dir).absolute().name repo_name = Path(args.output_dir).absolute().name
# Create repo and retrieve repo_id # Create repo and retrieve repo_id
repo_id = create_repo(repo_name, exist_ok=True, token=args.hub_token).repo_id api = HfApi()
# Clone repo locally repo_id = api.create_repo(repo_name, exist_ok=True, token=args.hub_token).repo_id
repo = Repository(args.output_dir, clone_from=repo_id, token=args.hub_token)
with open(os.path.join(args.output_dir, ".gitignore"), "w+") as gitignore: with open(os.path.join(args.output_dir, ".gitignore"), "w+") as gitignore:
if "step_*" not in gitignore: if "step_*" not in gitignore:
...@@ -611,8 +610,12 @@ def main(): ...@@ -611,8 +610,12 @@ def main():
) )
if accelerator.is_main_process: if accelerator.is_main_process:
tokenizer.save_pretrained(args.output_dir) tokenizer.save_pretrained(args.output_dir)
repo.push_to_hub( api.upload_folder(
commit_message=f"Training in progress epoch {epoch}", blocking=False, auto_lfs_prune=True commit_message=f"Training in progress epoch {epoch}",
folder_path=args.output_dir,
repo_id=repo_id,
repo_type="model",
token=args.hub_token,
) )
if args.checkpointing_steps == "epoch": if args.checkpointing_steps == "epoch":
...@@ -633,7 +636,13 @@ def main(): ...@@ -633,7 +636,13 @@ def main():
if accelerator.is_main_process: if accelerator.is_main_process:
tokenizer.save_pretrained(args.output_dir) tokenizer.save_pretrained(args.output_dir)
if args.push_to_hub: if args.push_to_hub:
repo.push_to_hub(commit_message="End of training", auto_lfs_prune=True) api.upload_folder(
commit_message="End of training",
folder_path=args.output_dir,
repo_id=repo_id,
repo_type="model",
token=args.hub_token,
)
if args.task_name == "mnli": if args.task_name == "mnli":
# Final evaluation on mismatched validation set # Final evaluation on mismatched validation set
......
...@@ -34,7 +34,7 @@ from accelerate import Accelerator ...@@ -34,7 +34,7 @@ from accelerate import Accelerator
from accelerate.logging import get_logger from accelerate.logging import get_logger
from accelerate.utils import set_seed from accelerate.utils import set_seed
from datasets import ClassLabel, load_dataset from datasets import ClassLabel, load_dataset
from huggingface_hub import Repository, create_repo from huggingface_hub import HfApi
from torch.utils.data import DataLoader from torch.utils.data import DataLoader
from tqdm.auto import tqdm from tqdm.auto import tqdm
...@@ -310,9 +310,8 @@ def main(): ...@@ -310,9 +310,8 @@ def main():
if repo_name is None: if repo_name is None:
repo_name = Path(args.output_dir).absolute().name repo_name = Path(args.output_dir).absolute().name
# Create repo and retrieve repo_id # Create repo and retrieve repo_id
repo_id = create_repo(repo_name, exist_ok=True, token=args.hub_token).repo_id api = HfApi()
# Clone repo locally repo_id = api.create_repo(repo_name, exist_ok=True, token=args.hub_token).repo_id
repo = Repository(args.output_dir, clone_from=repo_id, token=args.hub_token)
with open(os.path.join(args.output_dir, ".gitignore"), "w+") as gitignore: with open(os.path.join(args.output_dir, ".gitignore"), "w+") as gitignore:
if "step_*" not in gitignore: if "step_*" not in gitignore:
...@@ -776,8 +775,12 @@ def main(): ...@@ -776,8 +775,12 @@ def main():
) )
if accelerator.is_main_process: if accelerator.is_main_process:
tokenizer.save_pretrained(args.output_dir) tokenizer.save_pretrained(args.output_dir)
repo.push_to_hub( api.upload_folder(
commit_message=f"Training in progress epoch {epoch}", blocking=False, auto_lfs_prune=True commit_message=f"Training in progress epoch {epoch}",
folder_path=args.output_dir,
repo_id=repo_id,
repo_type="model",
token=args.hub_token,
) )
if args.checkpointing_steps == "epoch": if args.checkpointing_steps == "epoch":
...@@ -798,7 +801,13 @@ def main(): ...@@ -798,7 +801,13 @@ def main():
if accelerator.is_main_process: if accelerator.is_main_process:
tokenizer.save_pretrained(args.output_dir) tokenizer.save_pretrained(args.output_dir)
if args.push_to_hub: if args.push_to_hub:
repo.push_to_hub(commit_message="End of training", auto_lfs_prune=True) api.upload_folder(
commit_message="End of training",
folder_path=args.output_dir,
repo_id=repo_id,
repo_type="model",
token=args.hub_token,
)
all_results = {f"eval_{k}": v for k, v in eval_metric.items()} all_results = {f"eval_{k}": v for k, v in eval_metric.items()}
if args.with_tracking: if args.with_tracking:
......
...@@ -34,7 +34,7 @@ from accelerate import Accelerator ...@@ -34,7 +34,7 @@ from accelerate import Accelerator
from accelerate.logging import get_logger from accelerate.logging import get_logger
from accelerate.utils import set_seed from accelerate.utils import set_seed
from datasets import load_dataset from datasets import load_dataset
from huggingface_hub import Repository, create_repo from huggingface_hub import HfApi
from torch.utils.data import DataLoader from torch.utils.data import DataLoader
from tqdm.auto import tqdm from tqdm.auto import tqdm
...@@ -355,9 +355,8 @@ def main(): ...@@ -355,9 +355,8 @@ def main():
if repo_name is None: if repo_name is None:
repo_name = Path(args.output_dir).absolute().name repo_name = Path(args.output_dir).absolute().name
# Create repo and retrieve repo_id # Create repo and retrieve repo_id
repo_id = create_repo(repo_name, exist_ok=True, token=args.hub_token).repo_id api = HfApi()
# Clone repo locally repo_id = api.create_repo(repo_name, exist_ok=True, token=args.hub_token).repo_id
repo = Repository(args.output_dir, clone_from=repo_id, token=args.hub_token)
with open(os.path.join(args.output_dir, ".gitignore"), "w+") as gitignore: with open(os.path.join(args.output_dir, ".gitignore"), "w+") as gitignore:
if "step_*" not in gitignore: if "step_*" not in gitignore:
...@@ -743,8 +742,12 @@ def main(): ...@@ -743,8 +742,12 @@ def main():
) )
if accelerator.is_main_process: if accelerator.is_main_process:
tokenizer.save_pretrained(args.output_dir) tokenizer.save_pretrained(args.output_dir)
repo.push_to_hub( api.upload_folder(
commit_message=f"Training in progress epoch {epoch}", blocking=False, auto_lfs_prune=True commit_message=f"Training in progress epoch {epoch}",
folder_path=args.output_dir,
repo_id=repo_id,
repo_type="model",
token=args.hub_token,
) )
if args.checkpointing_steps == "epoch": if args.checkpointing_steps == "epoch":
...@@ -765,7 +768,13 @@ def main(): ...@@ -765,7 +768,13 @@ def main():
if accelerator.is_main_process: if accelerator.is_main_process:
tokenizer.save_pretrained(args.output_dir) tokenizer.save_pretrained(args.output_dir)
if args.push_to_hub: if args.push_to_hub:
repo.push_to_hub(commit_message="End of training", auto_lfs_prune=True) api.upload_folder(
commit_message="End of training",
folder_path=args.output_dir,
repo_id=repo_id,
repo_type="model",
token=args.hub_token,
)
with open(os.path.join(args.output_dir, "all_results.json"), "w") as f: with open(os.path.join(args.output_dir, "all_results.json"), "w") as f:
json.dump({"eval_bleu": eval_metric["score"]}, f) json.dump({"eval_bleu": eval_metric["score"]}, f)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment