"benchmark/git@developer.sourcefind.cn:change/sglang.git" did not exist on "c4831e2fcfcf76215f5645029fc64b20f613a002"
Unverified Commit f3d38632 authored by Shijie Wu's avatar Shijie Wu Committed by GitHub
Browse files

fix arg name in BLOOM testing and remove unused arg document (#18843)

parent 16242e1b
...@@ -62,18 +62,12 @@ class BloomConfig(PretrainedConfig): ...@@ -62,18 +62,12 @@ class BloomConfig(PretrainedConfig):
Number of hidden layers in the Transformer encoder. Number of hidden layers in the Transformer encoder.
n_head (`int`, *optional*, defaults to 12): n_head (`int`, *optional*, defaults to 12):
Number of attention heads for each attention layer in the Transformer encoder. Number of attention heads for each attention layer in the Transformer encoder.
attn_pdrop (`float`, *optional*, defaults to 0.1):
The dropout ratio for the attention.
layer_norm_epsilon (`float`, *optional*, defaults to 1e-5): layer_norm_epsilon (`float`, *optional*, defaults to 1e-5):
The epsilon to use in the layer normalization layers. The epsilon to use in the layer normalization layers.
initializer_range (`float`, *optional*, defaults to 0.02): initializer_range (`float`, *optional*, defaults to 0.02):
The standard deviation of the truncated_normal_initializer for initializing all weight matrices. The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
apply_residual_connection_post_layernorm (`bool`, *optional*, defaults to `False`): apply_residual_connection_post_layernorm (`bool`, *optional*, defaults to `False`):
If enabled, use the layer norm of the hidden states as the residual in the transformer blocks If enabled, use the layer norm of the hidden states as the residual in the transformer blocks
skip_bias_add (`bool`, *optional*, defaults to `True`):
If set to `True`, it will skip bias add for each linear layer in the transformer blocks
skip_bias_add_qkv (`bool`, *optional*, defaults to `False`):
If set to `True`, it will skip bias add for the first linear layer in the transformer blocks
hidden_dropout (`float`, *optional*, defaults to 0.1): hidden_dropout (`float`, *optional*, defaults to 0.1):
Dropout rate of the dropout function on the bias dropout. Dropout rate of the dropout function on the bias dropout.
attention_dropout (`float`, *optional*, defaults to 0.1): attention_dropout (`float`, *optional*, defaults to 0.1):
...@@ -124,7 +118,7 @@ class BloomConfig(PretrainedConfig): ...@@ -124,7 +118,7 @@ class BloomConfig(PretrainedConfig):
n_head=8, n_head=8,
layer_norm_epsilon=1e-5, layer_norm_epsilon=1e-5,
initializer_range=0.02, initializer_range=0.02,
use_cache=False, use_cache=True,
bos_token_id=1, bos_token_id=1,
eos_token_id=2, eos_token_id=2,
apply_residual_connection_post_layernorm=False, apply_residual_connection_post_layernorm=False,
......
...@@ -57,7 +57,7 @@ class BloomModelTester: ...@@ -57,7 +57,7 @@ class BloomModelTester:
intermediate_size=37, intermediate_size=37,
hidden_act="gelu", hidden_act="gelu",
hidden_dropout_prob=0.1, hidden_dropout_prob=0.1,
attention_probs_dropout_prob=0.1, attention_dropout_prob=0.1,
max_position_embeddings=512, max_position_embeddings=512,
type_vocab_size=16, type_vocab_size=16,
type_sequence_label_size=2, type_sequence_label_size=2,
...@@ -81,7 +81,7 @@ class BloomModelTester: ...@@ -81,7 +81,7 @@ class BloomModelTester:
self.intermediate_size = intermediate_size self.intermediate_size = intermediate_size
self.hidden_act = hidden_act self.hidden_act = hidden_act
self.hidden_dropout_prob = hidden_dropout_prob self.hidden_dropout_prob = hidden_dropout_prob
self.attention_probs_dropout_prob = attention_probs_dropout_prob self.attention_dropout_prob = attention_dropout_prob
self.max_position_embeddings = max_position_embeddings self.max_position_embeddings = max_position_embeddings
self.type_vocab_size = type_vocab_size self.type_vocab_size = type_vocab_size
self.type_sequence_label_size = type_sequence_label_size self.type_sequence_label_size = type_sequence_label_size
...@@ -118,8 +118,8 @@ class BloomModelTester: ...@@ -118,8 +118,8 @@ class BloomModelTester:
hidden_size=self.hidden_size, hidden_size=self.hidden_size,
n_layer=self.num_hidden_layers, n_layer=self.num_hidden_layers,
n_head=self.num_attention_heads, n_head=self.num_attention_heads,
resid_pdrop=self.hidden_dropout_prob, hidden_dropout=self.hidden_dropout_prob,
attn_pdrop=self.attention_probs_dropout_prob, attention_dropout=self.attention_dropout_prob,
n_positions=self.max_position_embeddings, n_positions=self.max_position_embeddings,
type_vocab_size=self.type_vocab_size, type_vocab_size=self.type_vocab_size,
initializer_range=self.initializer_range, initializer_range=self.initializer_range,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment