"examples/vscode:/vscode.git/clone" did not exist on "c366ce10113e1024e7c5e61d56c635516909a3d8"
Unverified Commit 785e52cd authored by Patrick von Platen's avatar Patrick von Platen Committed by GitHub
Browse files

improve templates (#9342)

parent 64103fb6
...@@ -174,6 +174,7 @@ class {{cookiecutter.camelcase_modelname}}Config(PretrainedConfig): ...@@ -174,6 +174,7 @@ class {{cookiecutter.camelcase_modelname}}Config(PretrainedConfig):
init_std=0.02, init_std=0.02,
decoder_start_token_id=2, decoder_start_token_id=2,
classifier_dropout=0.0, classifier_dropout=0.0,
scale_embedding=False,
gradient_checkpointing=False, gradient_checkpointing=False,
{% endif -%} {% endif -%}
pad_token_id=1, pad_token_id=1,
...@@ -226,6 +227,8 @@ class {{cookiecutter.camelcase_modelname}}Config(PretrainedConfig): ...@@ -226,6 +227,8 @@ class {{cookiecutter.camelcase_modelname}}Config(PretrainedConfig):
self.use_cache = use_cache self.use_cache = use_cache
self.num_hidden_layers = encoder_layers self.num_hidden_layers = encoder_layers
self.gradient_checkpointing = gradient_checkpointing self.gradient_checkpointing = gradient_checkpointing
self.scale_embedding = scale_embedding # scale factor will be sqrt(d_model) if True
{% endif -%} {% endif -%}
{% if cookiecutter.is_encoder_decoder_model == "False" %} {% if cookiecutter.is_encoder_decoder_model == "False" %}
......
...@@ -1893,6 +1893,8 @@ class TF{{cookiecutter.camelcase_modelname}}Encoder(tf.keras.layers.Layer): ...@@ -1893,6 +1893,8 @@ class TF{{cookiecutter.camelcase_modelname}}Encoder(tf.keras.layers.Layer):
self.layerdrop = config.encoder_layerdrop self.layerdrop = config.encoder_layerdrop
self.padding_idx = config.pad_token_id self.padding_idx = config.pad_token_id
self.max_source_positions = config.max_position_embeddings self.max_source_positions = config.max_position_embeddings
self.embed_scale = math.sqrt(config.d_model) if config.scale_embedding else 1.0
self.embed_tokens = embed_tokens self.embed_tokens = embed_tokens
self.embed_positions = TF{{cookiecutter.camelcase_modelname}}LearnedPositionalEmbedding( self.embed_positions = TF{{cookiecutter.camelcase_modelname}}LearnedPositionalEmbedding(
...@@ -1969,7 +1971,7 @@ class TF{{cookiecutter.camelcase_modelname}}Encoder(tf.keras.layers.Layer): ...@@ -1969,7 +1971,7 @@ class TF{{cookiecutter.camelcase_modelname}}Encoder(tf.keras.layers.Layer):
raise ValueError("You have to specify either input_ids or inputs_embeds") raise ValueError("You have to specify either input_ids or inputs_embeds")
if inputs["inputs_embeds"] is None: if inputs["inputs_embeds"] is None:
inputs_embeds = self.embed_tokens(inputs["input_ids"]) inputs_embeds = self.embed_tokens(inputs["input_ids"]) * self.embed_scale
else: else:
inputs_embeds = inputs["inputs_embeds"] inputs_embeds = inputs["inputs_embeds"]
...@@ -2038,6 +2040,7 @@ class TF{{cookiecutter.camelcase_modelname}}Decoder(tf.keras.layers.Layer): ...@@ -2038,6 +2040,7 @@ class TF{{cookiecutter.camelcase_modelname}}Decoder(tf.keras.layers.Layer):
self.padding_idx, self.padding_idx,
name="embed_positions", name="embed_positions",
) )
self.embed_scale = math.sqrt(config.d_model) if config.scale_embedding else 1.0
self.layers = [TF{{cookiecutter.camelcase_modelname}}DecoderLayer(config, name=f"layers.{i}") for i in range(config.decoder_layers)] self.layers = [TF{{cookiecutter.camelcase_modelname}}DecoderLayer(config, name=f"layers.{i}") for i in range(config.decoder_layers)]
self.layernorm_embedding = tf.keras.layers.LayerNormalization(epsilon=1e-5, name="layernorm_embedding") self.layernorm_embedding = tf.keras.layers.LayerNormalization(epsilon=1e-5, name="layernorm_embedding")
...@@ -2142,7 +2145,7 @@ class TF{{cookiecutter.camelcase_modelname}}Decoder(tf.keras.layers.Layer): ...@@ -2142,7 +2145,7 @@ class TF{{cookiecutter.camelcase_modelname}}Decoder(tf.keras.layers.Layer):
positions = self.embed_positions(input_shape, past_key_values_length) positions = self.embed_positions(input_shape, past_key_values_length)
if inputs["inputs_embeds"] is None: if inputs["inputs_embeds"] is None:
inputs["inputs_embeds"] = self.embed_tokens(inputs["input_ids"]) inputs["inputs_embeds"] = self.embed_tokens(inputs["input_ids"]) * self.embed_scale
hidden_states = inputs["inputs_embeds"] hidden_states = inputs["inputs_embeds"]
......
...@@ -2093,6 +2093,7 @@ class {{cookiecutter.camelcase_modelname}}Encoder({{cookiecutter.camelcase_model ...@@ -2093,6 +2093,7 @@ class {{cookiecutter.camelcase_modelname}}Encoder({{cookiecutter.camelcase_model
embed_dim = config.d_model embed_dim = config.d_model
self.padding_idx = config.pad_token_id self.padding_idx = config.pad_token_id
self.max_source_positions = config.max_position_embeddings self.max_source_positions = config.max_position_embeddings
self.embed_scale = math.sqrt(config.d_model) if config.scale_embedding else 1.0
if embed_tokens is not None: if embed_tokens is not None:
self.embed_tokens = embed_tokens self.embed_tokens = embed_tokens
...@@ -2167,7 +2168,7 @@ class {{cookiecutter.camelcase_modelname}}Encoder({{cookiecutter.camelcase_model ...@@ -2167,7 +2168,7 @@ class {{cookiecutter.camelcase_modelname}}Encoder({{cookiecutter.camelcase_model
raise ValueError("You have to specify either input_ids or inputs_embeds") raise ValueError("You have to specify either input_ids or inputs_embeds")
if inputs_embeds is None: if inputs_embeds is None:
inputs_embeds = self.embed_tokens(input_ids) inputs_embeds = self.embed_tokens(input_ids) * self.embed_scale
embed_pos = self.embed_positions(input_shape) embed_pos = self.embed_positions(input_shape)
...@@ -2236,6 +2237,7 @@ class {{cookiecutter.camelcase_modelname}}Decoder({{cookiecutter.camelcase_model ...@@ -2236,6 +2237,7 @@ class {{cookiecutter.camelcase_modelname}}Decoder({{cookiecutter.camelcase_model
self.layerdrop = config.decoder_layerdrop self.layerdrop = config.decoder_layerdrop
self.padding_idx = config.pad_token_id self.padding_idx = config.pad_token_id
self.max_target_positions = config.max_position_embeddings self.max_target_positions = config.max_position_embeddings
self.embed_scale = math.sqrt(config.d_model) if config.scale_embedding else 1.0
if embed_tokens is not None: if embed_tokens is not None:
self.embed_tokens = embed_tokens self.embed_tokens = embed_tokens
...@@ -2337,7 +2339,7 @@ class {{cookiecutter.camelcase_modelname}}Decoder({{cookiecutter.camelcase_model ...@@ -2337,7 +2339,7 @@ class {{cookiecutter.camelcase_modelname}}Decoder({{cookiecutter.camelcase_model
past_key_values_length = past_key_values[0][0].shape[2] if past_key_values is not None else 0 past_key_values_length = past_key_values[0][0].shape[2] if past_key_values is not None else 0
if inputs_embeds is None: if inputs_embeds is None:
inputs_embeds = self.embed_tokens(input_ids) inputs_embeds = self.embed_tokens(input_ids) * self.embed_scale
# create causal mask # create causal mask
# [bsz, seq_len] -> [bsz, 1, tgt_seq_len, src_seq_len] # [bsz, seq_len] -> [bsz, 1, tgt_seq_len, src_seq_len]
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment