Unverified Commit d9809298 authored by Nicolas Patry's avatar Nicolas Patry Committed by GitHub
Browse files

Enabling `imageGPT` auto feature extractor. (#16871)



* Enablign `imageGPT` auto feature extractor.
Co-authored-by: default avatarydshieh <ydshieh@users.noreply.github.com>

* Small updates.

* Update after rebase to use `input_ids` instead of `pixel_values`.
Co-authored-by: default avatarydshieh <ydshieh@users.noreply.github.com>
parent 31ee80d5
...@@ -50,6 +50,7 @@ FEATURE_EXTRACTOR_MAPPING_NAMES = OrderedDict( ...@@ -50,6 +50,7 @@ FEATURE_EXTRACTOR_MAPPING_NAMES = OrderedDict(
("flava", "FlavaFeatureExtractor"), ("flava", "FlavaFeatureExtractor"),
("glpn", "GLPNFeatureExtractor"), ("glpn", "GLPNFeatureExtractor"),
("hubert", "Wav2Vec2FeatureExtractor"), ("hubert", "Wav2Vec2FeatureExtractor"),
("imagegpt", "ImageGPTFeatureExtractor"),
("layoutlmv2", "LayoutLMv2FeatureExtractor"), ("layoutlmv2", "LayoutLMv2FeatureExtractor"),
("layoutlmv3", "LayoutLMv3FeatureExtractor"), ("layoutlmv3", "LayoutLMv3FeatureExtractor"),
("maskformer", "MaskFormerFeatureExtractor"), ("maskformer", "MaskFormerFeatureExtractor"),
......
...@@ -75,14 +75,19 @@ def _pad(items, key, padding_value, padding_side): ...@@ -75,14 +75,19 @@ def _pad(items, key, padding_value, padding_side):
# Others include `attention_mask` etc... # Others include `attention_mask` etc...
shape = items[0][key].shape shape = items[0][key].shape
dim = len(shape) dim = len(shape)
if dim == 4: if key == "pixel_values":
# This is probable image so padding shouldn't be necessary # This is probable image so padding shouldn't be necessary
# B, C, H, W # B, C, H, W
return torch.cat([item[key] for item in items], dim=0) return torch.cat([item[key] for item in items], dim=0)
max_length = max(item[key].shape[1] for item in items) max_length = max(item[key].shape[1] for item in items)
min_length = min(item[key].shape[1] for item in items)
dtype = items[0][key].dtype dtype = items[0][key].dtype
if dim == 2: if dim == 2:
if max_length == min_length:
# Bypass for `ImageGPT` which doesn't provide a padding value, yet
# we can consistently pad since the size should be matching
return torch.cat([item[key] for item in items], dim=0)
tensor = torch.zeros((batch_size, max_length), dtype=dtype) + padding_value tensor = torch.zeros((batch_size, max_length), dtype=dtype) + padding_value
elif dim == 3: elif dim == 3:
tensor = torch.zeros((batch_size, max_length, shape[-1]), dtype=dtype) + padding_value tensor = torch.zeros((batch_size, max_length, shape[-1]), dtype=dtype) + padding_value
...@@ -146,7 +151,11 @@ def pad_collate_fn(tokenizer, feature_extractor): ...@@ -146,7 +151,11 @@ def pad_collate_fn(tokenizer, feature_extractor):
padded = {} padded = {}
for key in keys: for key in keys:
if key in {"input_ids"}: if key in {"input_ids"}:
_padding_value = t_padding_value # ImageGPT uses a feature extractor
if feature_extractor is not None:
_padding_value = f_padding_value
else:
_padding_value = t_padding_value
elif key in {"input_values", "pixel_values", "input_features"}: elif key in {"input_values", "pixel_values", "input_features"}:
_padding_value = f_padding_value _padding_value = f_padding_value
elif key in {"p_mask", "special_tokens_mask"}: elif key in {"p_mask", "special_tokens_mask"}:
......
...@@ -171,6 +171,12 @@ class ImageGPTModelTester: ...@@ -171,6 +171,12 @@ class ImageGPTModelTester:
reorder_and_upcast_attn=reorder_and_upcast_attn, reorder_and_upcast_attn=reorder_and_upcast_attn,
) )
def get_pipeline_config(self):
config = self.get_config()
config.vocab_size = 513
config.max_position_embeddings = 1024
return config
def prepare_config_and_inputs_for_decoder(self): def prepare_config_and_inputs_for_decoder(self):
( (
config, config,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment