Unverified Commit db9a7e9d authored by Yih-Dar's avatar Yih-Dar Committed by GitHub
Browse files

Don't save `processor_config.json` if a processor has no extra attribute (#28584)



* not save if empty

* fix

* fix

* fix

* fix

* fix

---------
Co-authored-by: default avatarydshieh <ydshieh@users.noreply.github.com>
parent 772307be
...@@ -234,6 +234,9 @@ class ProcessorMixin(PushToHubMixin): ...@@ -234,6 +234,9 @@ class ProcessorMixin(PushToHubMixin):
# If we save using the predefined names, we can load using `from_pretrained` # If we save using the predefined names, we can load using `from_pretrained`
output_processor_file = os.path.join(save_directory, PROCESSOR_NAME) output_processor_file = os.path.join(save_directory, PROCESSOR_NAME)
# For now, let's not save to `processor_config.json` if the processor doesn't have extra attributes and
# `auto_map` is not specified.
if set(self.to_dict().keys()) != {"processor_class"}:
self.to_json_file(output_processor_file) self.to_json_file(output_processor_file)
logger.info(f"processor saved in {output_processor_file}") logger.info(f"processor saved in {output_processor_file}")
...@@ -246,6 +249,8 @@ class ProcessorMixin(PushToHubMixin): ...@@ -246,6 +249,8 @@ class ProcessorMixin(PushToHubMixin):
token=kwargs.get("token"), token=kwargs.get("token"),
) )
if set(self.to_dict().keys()) == {"processor_class"}:
return []
return [output_processor_file] return [output_processor_file]
@classmethod @classmethod
......
...@@ -101,6 +101,12 @@ class AutoFeatureExtractorTest(unittest.TestCase): ...@@ -101,6 +101,12 @@ class AutoFeatureExtractorTest(unittest.TestCase):
# save in new folder # save in new folder
processor.save_pretrained(tmpdirname) processor.save_pretrained(tmpdirname)
if not os.path.isfile(os.path.join(tmpdirname, PROCESSOR_NAME)):
# create one manually in order to perform this test's objective
config_dict = {"processor_class": "Wav2Vec2Processor"}
with open(os.path.join(tmpdirname, PROCESSOR_NAME), "w") as fp:
json.dump(config_dict, fp)
# drop `processor_class` in tokenizer config # drop `processor_class` in tokenizer config
with open(os.path.join(tmpdirname, TOKENIZER_CONFIG_FILE), "r") as f: with open(os.path.join(tmpdirname, TOKENIZER_CONFIG_FILE), "r") as f:
config_dict = json.load(f) config_dict = json.load(f)
...@@ -123,6 +129,7 @@ class AutoFeatureExtractorTest(unittest.TestCase): ...@@ -123,6 +129,7 @@ class AutoFeatureExtractorTest(unittest.TestCase):
# save in new folder # save in new folder
processor.save_pretrained(tmpdirname) processor.save_pretrained(tmpdirname)
if os.path.isfile(os.path.join(tmpdirname, PROCESSOR_NAME)):
# drop `processor_class` in processor # drop `processor_class` in processor
with open(os.path.join(tmpdirname, PROCESSOR_NAME), "r") as f: with open(os.path.join(tmpdirname, PROCESSOR_NAME), "r") as f:
config_dict = json.load(f) config_dict = json.load(f)
...@@ -153,6 +160,7 @@ class AutoFeatureExtractorTest(unittest.TestCase): ...@@ -153,6 +160,7 @@ class AutoFeatureExtractorTest(unittest.TestCase):
# save in new folder # save in new folder
processor.save_pretrained(tmpdirname) processor.save_pretrained(tmpdirname)
if os.path.isfile(os.path.join(tmpdirname, PROCESSOR_NAME)):
# drop `processor_class` in processor # drop `processor_class` in processor
with open(os.path.join(tmpdirname, PROCESSOR_NAME), "r") as f: with open(os.path.join(tmpdirname, PROCESSOR_NAME), "r") as f:
config_dict = json.load(f) config_dict = json.load(f)
......
...@@ -75,8 +75,9 @@ class ProcessorTesterMixin: ...@@ -75,8 +75,9 @@ class ProcessorTesterMixin:
processor_first = self.get_processor() processor_first = self.get_processor()
with tempfile.TemporaryDirectory() as tmpdirname: with tempfile.TemporaryDirectory() as tmpdirname:
saved_file = processor_first.save_pretrained(tmpdirname)[0] saved_files = processor_first.save_pretrained(tmpdirname)
check_json_file_has_correct_format(saved_file) if len(saved_files) > 0:
check_json_file_has_correct_format(saved_files[0])
processor_second = self.processor_class.from_pretrained(tmpdirname) processor_second = self.processor_class.from_pretrained(tmpdirname)
self.assertEqual(processor_second.to_dict(), processor_first.to_dict()) self.assertEqual(processor_second.to_dict(), processor_first.to_dict())
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment