Unverified Commit c11ac785 authored by Hafedh's avatar Hafedh Committed by GitHub
Browse files

fix for custom pipeline configuration (#29004)

* fix for custom pipeline configuration

* fix for custom pipelines

* remove extra exception

* added test for custom pipelines extra tag

* format with ruff

* limit extra tag for first time only

* format with ruff

* improve tests for custom pipelines
parent 7b4b4564
...@@ -32,6 +32,7 @@ from .utils import ( ...@@ -32,6 +32,7 @@ from .utils import (
CONFIG_NAME, CONFIG_NAME,
PushToHubMixin, PushToHubMixin,
add_model_info_to_auto_map, add_model_info_to_auto_map,
add_model_info_to_custom_pipelines,
cached_file, cached_file,
copy_func, copy_func,
download_url, download_url,
...@@ -736,6 +737,10 @@ class PretrainedConfig(PushToHubMixin): ...@@ -736,6 +737,10 @@ class PretrainedConfig(PushToHubMixin):
config_dict["auto_map"] = add_model_info_to_auto_map( config_dict["auto_map"] = add_model_info_to_auto_map(
config_dict["auto_map"], pretrained_model_name_or_path config_dict["auto_map"], pretrained_model_name_or_path
) )
if "custom_pipelines" in config_dict and not is_local:
config_dict["custom_pipelines"] = add_model_info_to_custom_pipelines(
config_dict["custom_pipelines"], pretrained_model_name_or_path
)
return config_dict, kwargs return config_dict, kwargs
@classmethod @classmethod
......
...@@ -31,6 +31,7 @@ from .utils import ( ...@@ -31,6 +31,7 @@ from .utils import (
PushToHubMixin, PushToHubMixin,
TensorType, TensorType,
add_model_info_to_auto_map, add_model_info_to_auto_map,
add_model_info_to_custom_pipelines,
cached_file, cached_file,
copy_func, copy_func,
download_url, download_url,
...@@ -539,10 +540,15 @@ class FeatureExtractionMixin(PushToHubMixin): ...@@ -539,10 +540,15 @@ class FeatureExtractionMixin(PushToHubMixin):
f"loading configuration file {feature_extractor_file} from cache at {resolved_feature_extractor_file}" f"loading configuration file {feature_extractor_file} from cache at {resolved_feature_extractor_file}"
) )
if "auto_map" in feature_extractor_dict and not is_local: if not is_local:
feature_extractor_dict["auto_map"] = add_model_info_to_auto_map( if "auto_map" in feature_extractor_dict:
feature_extractor_dict["auto_map"], pretrained_model_name_or_path feature_extractor_dict["auto_map"] = add_model_info_to_auto_map(
) feature_extractor_dict["auto_map"], pretrained_model_name_or_path
)
if "custom_pipelines" in feature_extractor_dict:
feature_extractor_dict["custom_pipelines"] = add_model_info_to_custom_pipelines(
feature_extractor_dict["custom_pipelines"], pretrained_model_name_or_path
)
return feature_extractor_dict, kwargs return feature_extractor_dict, kwargs
......
...@@ -31,6 +31,7 @@ from .utils import ( ...@@ -31,6 +31,7 @@ from .utils import (
IMAGE_PROCESSOR_NAME, IMAGE_PROCESSOR_NAME,
PushToHubMixin, PushToHubMixin,
add_model_info_to_auto_map, add_model_info_to_auto_map,
add_model_info_to_custom_pipelines,
cached_file, cached_file,
copy_func, copy_func,
download_url, download_url,
...@@ -375,11 +376,15 @@ class ImageProcessingMixin(PushToHubMixin): ...@@ -375,11 +376,15 @@ class ImageProcessingMixin(PushToHubMixin):
f"loading configuration file {image_processor_file} from cache at {resolved_image_processor_file}" f"loading configuration file {image_processor_file} from cache at {resolved_image_processor_file}"
) )
if "auto_map" in image_processor_dict and not is_local: if not is_local:
image_processor_dict["auto_map"] = add_model_info_to_auto_map( if "auto_map" in image_processor_dict:
image_processor_dict["auto_map"], pretrained_model_name_or_path image_processor_dict["auto_map"] = add_model_info_to_auto_map(
) image_processor_dict["auto_map"], pretrained_model_name_or_path
)
if "custom_pipelines" in image_processor_dict:
image_processor_dict["custom_pipelines"] = add_model_info_to_custom_pipelines(
image_processor_dict["custom_pipelines"], pretrained_model_name_or_path
)
return image_processor_dict, kwargs return image_processor_dict, kwargs
@classmethod @classmethod
......
...@@ -30,6 +30,7 @@ from .utils import ( ...@@ -30,6 +30,7 @@ from .utils import (
PROCESSOR_NAME, PROCESSOR_NAME,
PushToHubMixin, PushToHubMixin,
add_model_info_to_auto_map, add_model_info_to_auto_map,
add_model_info_to_custom_pipelines,
cached_file, cached_file,
copy_func, copy_func,
direct_transformers_import, direct_transformers_import,
...@@ -355,10 +356,15 @@ class ProcessorMixin(PushToHubMixin): ...@@ -355,10 +356,15 @@ class ProcessorMixin(PushToHubMixin):
else: else:
logger.info(f"loading configuration file {processor_file} from cache at {resolved_processor_file}") logger.info(f"loading configuration file {processor_file} from cache at {resolved_processor_file}")
if "auto_map" in processor_dict and not is_local: if not is_local:
processor_dict["auto_map"] = add_model_info_to_auto_map( if "auto_map" in processor_dict:
processor_dict["auto_map"], pretrained_model_name_or_path processor_dict["auto_map"] = add_model_info_to_auto_map(
) processor_dict["auto_map"], pretrained_model_name_or_path
)
if "custom_pipelines" in processor_dict:
processor_dict["custom_pipelines"] = add_model_info_to_custom_pipelines(
processor_dict["custom_pipelines"], pretrained_model_name_or_path
)
return processor_dict, kwargs return processor_dict, kwargs
......
...@@ -42,6 +42,7 @@ from .utils import ( ...@@ -42,6 +42,7 @@ from .utils import (
TensorType, TensorType,
add_end_docstrings, add_end_docstrings,
add_model_info_to_auto_map, add_model_info_to_auto_map,
add_model_info_to_custom_pipelines,
cached_file, cached_file,
copy_func, copy_func,
download_url, download_url,
...@@ -2177,13 +2178,18 @@ class PreTrainedTokenizerBase(SpecialTokensMixin, PushToHubMixin): ...@@ -2177,13 +2178,18 @@ class PreTrainedTokenizerBase(SpecialTokensMixin, PushToHubMixin):
config_tokenizer_class = None config_tokenizer_class = None
init_kwargs = init_configuration init_kwargs = init_configuration
if "auto_map" in init_kwargs and not _is_local: if not _is_local:
# For backward compatibility with odl format. if "auto_map" in init_kwargs:
if isinstance(init_kwargs["auto_map"], (tuple, list)): # For backward compatibility with odl format.
init_kwargs["auto_map"] = {"AutoTokenizer": init_kwargs["auto_map"]} if isinstance(init_kwargs["auto_map"], (tuple, list)):
init_kwargs["auto_map"] = add_model_info_to_auto_map( init_kwargs["auto_map"] = {"AutoTokenizer": init_kwargs["auto_map"]}
init_kwargs["auto_map"], pretrained_model_name_or_path init_kwargs["auto_map"] = add_model_info_to_auto_map(
) init_kwargs["auto_map"], pretrained_model_name_or_path
)
if "custom_pipelines" in init_kwargs:
init_kwargs["custom_pipelines"] = add_model_info_to_custom_pipelines(
init_kwargs["custom_pipelines"], pretrained_model_name_or_path
)
if config_tokenizer_class is None: if config_tokenizer_class is None:
# Matt: This entire block is only used to decide if the tokenizer class matches the class in the repo. # Matt: This entire block is only used to decide if the tokenizer class matches the class in the repo.
......
...@@ -37,6 +37,7 @@ from .generic import ( ...@@ -37,6 +37,7 @@ from .generic import (
PaddingStrategy, PaddingStrategy,
TensorType, TensorType,
add_model_info_to_auto_map, add_model_info_to_auto_map,
add_model_info_to_custom_pipelines,
cached_property, cached_property,
can_return_loss, can_return_loss,
expand_dims, expand_dims,
......
...@@ -721,6 +721,19 @@ def add_model_info_to_auto_map(auto_map, repo_id): ...@@ -721,6 +721,19 @@ def add_model_info_to_auto_map(auto_map, repo_id):
return auto_map return auto_map
def add_model_info_to_custom_pipelines(custom_pipeline, repo_id):
"""
Adds the information of the repo_id to a given custom pipeline.
"""
# {custom_pipelines : {task: {"impl": "path.to.task"},...} }
for task in custom_pipeline.keys():
if "impl" in custom_pipeline[task]:
module = custom_pipeline[task]["impl"]
if "--" not in module:
custom_pipeline[task]["impl"] = f"{repo_id}--{module}"
return custom_pipeline
def infer_framework(model_class): def infer_framework(model_class):
""" """
Infers the framework of a given model without using isinstance(), because we cannot guarantee that the relevant Infers the framework of a given model without using isinstance(), because we cannot guarantee that the relevant
......
...@@ -925,6 +925,24 @@ class DynamicPipelineTester(unittest.TestCase): ...@@ -925,6 +925,24 @@ class DynamicPipelineTester(unittest.TestCase):
# Can't make an isinstance check because the new_classifier is from the PairClassificationPipeline class of a # Can't make an isinstance check because the new_classifier is from the PairClassificationPipeline class of a
# dynamic module # dynamic module
self.assertEqual(new_classifier.__class__.__name__, "PairClassificationPipeline") self.assertEqual(new_classifier.__class__.__name__, "PairClassificationPipeline")
# check for tag exitence, tag needs to be added when we are calling a custom pipeline from the hub
# useful for cases such as finetuning
self.assertDictEqual(
new_classifier.model.config.custom_pipelines,
{
"pair-classification": {
"impl": f"{USER}/test-dynamic-pipeline--custom_pipeline.PairClassificationPipeline",
"pt": ("AutoModelForSequenceClassification",),
"tf": (),
}
},
)
# test if the pipeline still works after the model is finetuned
# (we are actually testing if the pipeline still works from the final repo)
# this is where the user/repo--module.class is used for
new_classifier.model.push_to_hub(repo_name=f"{USER}/test-pipeline-for-a-finetuned-model", token=self._token)
del new_classifier # free up memory
new_classifier = pipeline(model=f"{USER}/test-pipeline-for-a-finetuned-model", trust_remote_code=True)
results = classifier("I hate you", second_text="I love you") results = classifier("I hate you", second_text="I love you")
new_results = new_classifier("I hate you", second_text="I love you") new_results = new_classifier("I hate you", second_text="I love you")
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment