Unverified Commit 0eaef0c7 authored by Hafedh's avatar Hafedh Committed by GitHub
Browse files

add `push_to_hub` to pipeline (#29172)



* add `push_to_hub` to pipeline

* fix docs

* format with ruff

* update save_pretrained

* update save_pretrained

* remove unnecessary comment

* switch to push_to_hub method in DynamicPipelineTester

* remove unused imports

* update docs for add_new_pipeline

* fix docs for add_new_pipeline

* add comment

* fix italien docs

* changes to token retrieval for pipelines

* Update src/transformers/pipelines/base.py
Co-authored-by: default avataramyeroberts <22614925+amyeroberts@users.noreply.github.com>

---------
Co-authored-by: default avataramyeroberts <22614925+amyeroberts@users.noreply.github.com>
parent 60dea593
...@@ -208,14 +208,10 @@ from transformers import pipeline ...@@ -208,14 +208,10 @@ from transformers import pipeline
classifier = pipeline("pair-classification", model="sgugger/finetuned-bert-mrpc") classifier = pipeline("pair-classification", model="sgugger/finetuned-bert-mrpc")
``` ```
Dann können wir sie auf dem Hub mit der Methode `save_pretrained` in einem `Repository` freigeben: Dann können wir sie auf dem Hub mit der Methode `push_to_hub` freigeben:
```py ```py
from huggingface_hub import Repository classifier.push_to_hub("test-dynamic-pipeline")
repo = Repository("test-dynamic-pipeline", clone_from="{your_username}/test-dynamic-pipeline")
classifier.save_pretrained("test-dynamic-pipeline")
repo.push_to_hub()
``` ```
Dadurch wird die Datei, in der Sie `PairClassificationPipeline` definiert haben, in den Ordner `"test-dynamic-pipeline"` kopiert, Dadurch wird die Datei, in der Sie `PairClassificationPipeline` definiert haben, in den Ordner `"test-dynamic-pipeline"` kopiert,
......
...@@ -208,14 +208,10 @@ from transformers import pipeline ...@@ -208,14 +208,10 @@ from transformers import pipeline
classifier = pipeline("pair-classification", model="sgugger/finetuned-bert-mrpc") classifier = pipeline("pair-classification", model="sgugger/finetuned-bert-mrpc")
``` ```
Then we can share it on the Hub by using the `save_pretrained` method in a `Repository`: Then we can share it on the Hub by using the `push_to_hub` method:
```py ```py
from huggingface_hub import Repository classifier.push_to_hub("test-dynamic-pipeline")
repo = Repository("test-dynamic-pipeline", clone_from="{your_username}/test-dynamic-pipeline")
classifier.save_pretrained("test-dynamic-pipeline")
repo.push_to_hub()
``` ```
This will copy the file where you defined `PairClassificationPipeline` inside the folder `"test-dynamic-pipeline"`, This will copy the file where you defined `PairClassificationPipeline` inside the folder `"test-dynamic-pipeline"`,
......
...@@ -212,14 +212,10 @@ from transformers import pipeline ...@@ -212,14 +212,10 @@ from transformers import pipeline
classifier = pipeline("pair-classification", model="sgugger/finetuned-bert-mrpc") classifier = pipeline("pair-classification", model="sgugger/finetuned-bert-mrpc")
``` ```
Ahora podemos compartirlo en el Hub usando el método `save_pretrained` (guardar pre-entrenado) en un `Repository`: Ahora podemos compartirlo en el Hub usando el método `save_pretrained`:
```py ```py
from huggingface_hub import Repository classifier.push_to_hub("test-dynamic-pipeline")
repo = Repository("test-dynamic-pipeline", clone_from="{your_username}/test-dynamic-pipeline")
classifier.save_pretrained("test-dynamic-pipeline")
repo.push_to_hub()
``` ```
Esto copiará el archivo donde definiste `PairClassificationPipeline` dentro de la carpeta `"test-dynamic-pipeline"`, Esto copiará el archivo donde definiste `PairClassificationPipeline` dentro de la carpeta `"test-dynamic-pipeline"`,
......
...@@ -202,14 +202,10 @@ from transformers import pipeline ...@@ -202,14 +202,10 @@ from transformers import pipeline
classifier = pipeline("pair-classification", model="sgugger/finetuned-bert-mrpc") classifier = pipeline("pair-classification", model="sgugger/finetuned-bert-mrpc")
``` ```
Successivamente possiamo condividerlo sull'Hub usando il metodo `save_pretrained` in un `Repository`: Successivamente possiamo condividerlo sull'Hub usando il metodo `push_to_hub`
```py ```py
from huggingface_hub import Repository classifier.push_to_hub("test-dynamic-pipeline")
repo = Repository("test-dynamic-pipeline", clone_from="{your_username}/test-dynamic-pipeline")
classifier.save_pretrained("test-dynamic-pipeline")
repo.push_to_hub()
``` ```
Questo codice copierà il file dove è stato definitp `PairClassificationPipeline` all'interno della cartella `"test-dynamic-pipeline"`, Questo codice copierà il file dove è stato definitp `PairClassificationPipeline` all'interno della cartella `"test-dynamic-pipeline"`,
......
...@@ -203,14 +203,10 @@ from transformers import pipeline ...@@ -203,14 +203,10 @@ from transformers import pipeline
classifier = pipeline("pair-classification", model="sgugger/finetuned-bert-mrpc") classifier = pipeline("pair-classification", model="sgugger/finetuned-bert-mrpc")
``` ```
그런 다음 `Repository``save_pretrained` 메소드를 사용하여 허브에 공유할 수 있습니다: 그런 다음 `push_to_hub` 메소드를 사용하여 허브에 공유할 수 있습니다:
```py ```py
from huggingface_hub import Repository classifier.push_to_hub("test-dynamic-pipeline")
repo = Repository("test-dynamic-pipeline", clone_from="{your_username}/test-dynamic-pipeline")
classifier.save_pretrained("test-dynamic-pipeline")
repo.push_to_hub()
``` ```
이렇게 하면 "test-dynamic-pipeline" 폴더 내에 `PairClassificationPipeline`을 정의한 파일이 복사되며, 파이프라인의 모델과 토크나이저도 저장한 후, `{your_username}/test-dynamic-pipeline` 저장소에 있는 모든 것을 푸시합니다. 이렇게 하면 "test-dynamic-pipeline" 폴더 내에 `PairClassificationPipeline`을 정의한 파일이 복사되며, 파이프라인의 모델과 토크나이저도 저장한 후, `{your_username}/test-dynamic-pipeline` 저장소에 있는 모든 것을 푸시합니다.
......
...@@ -36,7 +36,9 @@ from ..models.auto.configuration_auto import AutoConfig ...@@ -36,7 +36,9 @@ from ..models.auto.configuration_auto import AutoConfig
from ..tokenization_utils import PreTrainedTokenizer from ..tokenization_utils import PreTrainedTokenizer
from ..utils import ( from ..utils import (
ModelOutput, ModelOutput,
PushToHubMixin,
add_end_docstrings, add_end_docstrings,
copy_func,
infer_framework, infer_framework,
is_tf_available, is_tf_available,
is_torch_available, is_torch_available,
...@@ -781,7 +783,7 @@ if is_torch_available(): ...@@ -781,7 +783,7 @@ if is_torch_available():
@add_end_docstrings(build_pipeline_init_args(has_tokenizer=True, has_feature_extractor=True, has_image_processor=True)) @add_end_docstrings(build_pipeline_init_args(has_tokenizer=True, has_feature_extractor=True, has_image_processor=True))
class Pipeline(_ScikitCompat): class Pipeline(_ScikitCompat, PushToHubMixin):
""" """
The Pipeline class is the class from which all pipelines inherit. Refer to this class for methods shared across The Pipeline class is the class from which all pipelines inherit. Refer to this class for methods shared across
different pipelines. different pipelines.
...@@ -908,16 +910,36 @@ class Pipeline(_ScikitCompat): ...@@ -908,16 +910,36 @@ class Pipeline(_ScikitCompat):
# then we should keep working # then we should keep working
self.image_processor = self.feature_extractor self.image_processor = self.feature_extractor
def save_pretrained(self, save_directory: str, safe_serialization: bool = True): def save_pretrained(
self,
save_directory: Union[str, os.PathLike],
safe_serialization: bool = True,
**kwargs,
):
""" """
Save the pipeline's model and tokenizer. Save the pipeline's model and tokenizer.
Args: Args:
save_directory (`str`): save_directory (`str` or `os.PathLike`):
A path to the directory where to saved. It will be created if it doesn't exist. A path to the directory where to saved. It will be created if it doesn't exist.
safe_serialization (`str`): safe_serialization (`str`):
Whether to save the model using `safetensors` or the traditional way for PyTorch or Tensorflow. Whether to save the model using `safetensors` or the traditional way for PyTorch or Tensorflow.
kwargs (`Dict[str, Any]`, *optional*):
Additional key word arguments passed along to the [`~utils.PushToHubMixin.push_to_hub`] method.
""" """
use_auth_token = kwargs.pop("use_auth_token", None)
if use_auth_token is not None:
warnings.warn(
"The `use_auth_token` argument is deprecated and will be removed in v5 of Transformers. Please use `token` instead.",
FutureWarning,
)
if kwargs.get("token", None) is not None:
raise ValueError(
"`token` and `use_auth_token` are both specified. Please set only the argument `token`."
)
kwargs["token"] = use_auth_token
if os.path.isfile(save_directory): if os.path.isfile(save_directory):
logger.error(f"Provided path ({save_directory}) should be a directory, not a file") logger.error(f"Provided path ({save_directory}) should be a directory, not a file")
return return
...@@ -944,16 +966,17 @@ class Pipeline(_ScikitCompat): ...@@ -944,16 +966,17 @@ class Pipeline(_ScikitCompat):
# Save the pipeline custom code # Save the pipeline custom code
custom_object_save(self, save_directory) custom_object_save(self, save_directory)
self.model.save_pretrained(save_directory, safe_serialization=safe_serialization) kwargs["safe_serialization"] = safe_serialization
self.model.save_pretrained(save_directory, **kwargs)
if self.tokenizer is not None: if self.tokenizer is not None:
self.tokenizer.save_pretrained(save_directory) self.tokenizer.save_pretrained(save_directory, **kwargs)
if self.feature_extractor is not None: if self.feature_extractor is not None:
self.feature_extractor.save_pretrained(save_directory) self.feature_extractor.save_pretrained(save_directory, **kwargs)
if self.image_processor is not None: if self.image_processor is not None:
self.image_processor.save_pretrained(save_directory) self.image_processor.save_pretrained(save_directory, **kwargs)
if self.modelcard is not None: if self.modelcard is not None:
self.modelcard.save_pretrained(save_directory) self.modelcard.save_pretrained(save_directory)
...@@ -1234,6 +1257,13 @@ class Pipeline(_ScikitCompat): ...@@ -1234,6 +1257,13 @@ class Pipeline(_ScikitCompat):
yield self.run_single(input_, preprocess_params, forward_params, postprocess_params) yield self.run_single(input_, preprocess_params, forward_params, postprocess_params)
Pipeline.push_to_hub = copy_func(Pipeline.push_to_hub)
if Pipeline.push_to_hub.__doc__ is not None:
Pipeline.push_to_hub.__doc__ = Pipeline.push_to_hub.__doc__.format(
object="pipe", object_class="pipeline", object_files="pipeline file"
).replace(".from_pretrained", "")
class ChunkPipeline(Pipeline): class ChunkPipeline(Pipeline):
def run_single(self, inputs, preprocess_params, forward_params, postprocess_params): def run_single(self, inputs, preprocess_params, forward_params, postprocess_params):
all_outputs = [] all_outputs = []
......
...@@ -22,7 +22,7 @@ from pathlib import Path ...@@ -22,7 +22,7 @@ from pathlib import Path
import datasets import datasets
import numpy as np import numpy as np
from huggingface_hub import HfFolder, Repository, create_repo, delete_repo from huggingface_hub import HfFolder, delete_repo
from requests.exceptions import HTTPError from requests.exceptions import HTTPError
from transformers import ( from transformers import (
...@@ -846,9 +846,6 @@ class DynamicPipelineTester(unittest.TestCase): ...@@ -846,9 +846,6 @@ class DynamicPipelineTester(unittest.TestCase):
model = BertForSequenceClassification(config).eval() model = BertForSequenceClassification(config).eval()
with tempfile.TemporaryDirectory() as tmp_dir: with tempfile.TemporaryDirectory() as tmp_dir:
create_repo(f"{USER}/test-dynamic-pipeline", token=self._token)
repo = Repository(tmp_dir, clone_from=f"{USER}/test-dynamic-pipeline", token=self._token)
vocab_file = os.path.join(tmp_dir, "vocab.txt") vocab_file = os.path.join(tmp_dir, "vocab.txt")
with open(vocab_file, "w", encoding="utf-8") as vocab_writer: with open(vocab_file, "w", encoding="utf-8") as vocab_writer:
vocab_writer.write("".join([x + "\n" for x in self.vocab_tokens])) vocab_writer.write("".join([x + "\n" for x in self.vocab_tokens]))
...@@ -860,7 +857,7 @@ class DynamicPipelineTester(unittest.TestCase): ...@@ -860,7 +857,7 @@ class DynamicPipelineTester(unittest.TestCase):
del PIPELINE_REGISTRY.supported_tasks["pair-classification"] del PIPELINE_REGISTRY.supported_tasks["pair-classification"]
classifier.save_pretrained(tmp_dir) classifier.save_pretrained(tmp_dir)
# checks # checks if the configuration has been added after calling the save_pretrained method
self.assertDictEqual( self.assertDictEqual(
classifier.model.config.custom_pipelines, classifier.model.config.custom_pipelines,
{ {
...@@ -871,8 +868,8 @@ class DynamicPipelineTester(unittest.TestCase): ...@@ -871,8 +868,8 @@ class DynamicPipelineTester(unittest.TestCase):
} }
}, },
) )
# use push_to_hub method to push the pipeline
repo.push_to_hub() classifier.push_to_hub(f"{USER}/test-dynamic-pipeline", token=self._token)
# Fails if the user forget to pass along `trust_remote_code=True` # Fails if the user forget to pass along `trust_remote_code=True`
with self.assertRaises(ValueError): with self.assertRaises(ValueError):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment