"...resnet50_tensorflow.git" did not exist on "d4e1f97fd8b929deab5b65f8fd2d0523f89d5b44"
Commit 719cb373 authored by LysandreJik's avatar LysandreJik
Browse files

Pruning for GPT and GPT-2

parent fc1fbae4
...@@ -453,6 +453,12 @@ class GPT2Model(GPT2PreTrainedModel): ...@@ -453,6 +453,12 @@ class GPT2Model(GPT2PreTrainedModel):
self.h = nn.ModuleList([Block(config.n_ctx, config, scale=True) for _ in range(config.n_layer)]) self.h = nn.ModuleList([Block(config.n_ctx, config, scale=True) for _ in range(config.n_layer)])
self.ln_f = LayerNorm(config.n_embd, eps=config.layer_norm_epsilon) self.ln_f = LayerNorm(config.n_embd, eps=config.layer_norm_epsilon)
if hasattr(config, "pruned_heads"):
pruned_heads = config.pruned_heads.copy().items()
for layer, heads in pruned_heads:
if self.h[int(layer)].attn.n_head == config.n_head:
self.prune_heads({int(layer): list(map(int, heads))})
self.apply(self.init_weights) self.apply(self.init_weights)
def _resize_token_embeddings(self, new_num_tokens): def _resize_token_embeddings(self, new_num_tokens):
......
...@@ -456,6 +456,12 @@ class OpenAIGPTModel(OpenAIGPTPreTrainedModel): ...@@ -456,6 +456,12 @@ class OpenAIGPTModel(OpenAIGPTPreTrainedModel):
self.drop = nn.Dropout(config.embd_pdrop) self.drop = nn.Dropout(config.embd_pdrop)
self.h = nn.ModuleList([Block(config.n_ctx, config, scale=True) for _ in range(config.n_layer)]) self.h = nn.ModuleList([Block(config.n_ctx, config, scale=True) for _ in range(config.n_layer)])
if hasattr(config, "pruned_heads"):
pruned_heads = config.pruned_heads.copy().items()
for layer, heads in pruned_heads:
if self.h[int(layer)].attn.n_head == config.n_head:
self.prune_heads({int(layer): list(map(int, heads))})
self.apply(self.init_weights) self.apply(self.init_weights)
def _resize_token_embeddings(self, new_num_tokens): def _resize_token_embeddings(self, new_num_tokens):
......
...@@ -213,13 +213,12 @@ class CommonTestCases: ...@@ -213,13 +213,12 @@ class CommonTestCases:
if not self.test_pruning: if not self.test_pruning:
return return
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
if "head_mask" in inputs_dict:
del inputs_dict["head_mask"]
for model_class in self.all_model_classes: for model_class in self.all_model_classes:
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
if "head_mask" in inputs_dict:
del inputs_dict["head_mask"]
config.output_attentions = True config.output_attentions = True
config.output_hidden_states = False config.output_hidden_states = False
model = model_class(config=config) model = model_class(config=config)
...@@ -244,6 +243,10 @@ class CommonTestCases: ...@@ -244,6 +243,10 @@ class CommonTestCases:
for model_class in self.all_model_classes: for model_class in self.all_model_classes:
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
if "head_mask" in inputs_dict:
del inputs_dict["head_mask"]
config.output_attentions = True config.output_attentions = True
config.output_hidden_states = False config.output_hidden_states = False
model = model_class(config=config) model = model_class(config=config)
...@@ -274,6 +277,10 @@ class CommonTestCases: ...@@ -274,6 +277,10 @@ class CommonTestCases:
for model_class in self.all_model_classes: for model_class in self.all_model_classes:
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
if "head_mask" in inputs_dict:
del inputs_dict["head_mask"]
config.output_attentions = True config.output_attentions = True
config.output_hidden_states = False config.output_hidden_states = False
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment