Commit ea2d13c2 authored by zhaoying1's avatar zhaoying1
Browse files

added llama_tencentpretrain_pytorch

parents
Pipeline #548 failed with stages
in 0 seconds
{
"stream_0": {
"vocab_size": 21128,
"emb_size": 768,
"feedforward_size": 3072,
"hidden_size": 768,
"hidden_act": "gelu",
"heads_num": 12,
"layers_num": 12,
"max_seq_length": 512,
"dropout": 0.1,
"embedding": ["word", "pos", "seg"],
"encoder": "transformer",
"mask": "fully_visible"
},
"stream_1": {
"vocab_size": 21128,
"emb_size": 768,
"feedforward_size": 3072,
"hidden_size": 768,
"hidden_act": "gelu",
"heads_num": 12,
"layers_num": 12,
"max_seq_length": 512,
"dropout": 0.1,
"embedding": ["word", "pos", "seg"],
"encoder": "transformer",
"mask": "fully_visible"
},
"embedding": "dual",
"encoder": "dual",
"pooling": "first",
"tie_weights": true
}
\ No newline at end of file
{
"stream_0": {
"vocab_size": 21128,
"emb_size": 1024,
"feedforward_size": 4096,
"hidden_size": 1024,
"hidden_act": "gelu",
"heads_num": 16,
"layers_num": 24,
"max_seq_length": 512,
"dropout": 0.1,
"embedding": ["word", "pos", "seg"],
"encoder": "transformer",
"mask": "fully_visible"
},
"stream_1": {
"vocab_size": 21128,
"emb_size": 1024,
"feedforward_size": 4096,
"hidden_size": 1024,
"hidden_act": "gelu",
"heads_num": 16,
"layers_num": 24,
"max_seq_length": 512,
"dropout": 0.1,
"embedding": ["word", "pos", "seg"],
"encoder": "transformer",
"mask": "fully_visible"
},
"embedding": "dual",
"encoder": "dual",
"pooling": "first",
"tie_weights": true
}
\ No newline at end of file
{
"stream_0": {
"vocab_size": 21128,
"emb_size": 512,
"hidden_size": 512,
"layers_num": 2,
"dropout": 0.1,
"embedding": ["word"],
"encoder": "lstm"
},
"stream_1": {
"vocab_size": 21128,
"emb_size": 512,
"hidden_size": 512,
"layers_num": 2,
"dropout": 0.1,
"embedding": ["word"],
"encoder": "lstm"
},
"embedding": "dual",
"encoder": "dual",
"pooling": "mean",
"tie_weights": true
}
\ No newline at end of file
{
"pad_token": "[PAD]",
"unk_token": "[UNK]",
"cls_token": "[CLS]",
"sep_token": "[SEP]",
"mask_token": "[MASK]",
"sentinel_token": "<extra_id_0>"
}
{
"emb_size": 768,
"feedforward_size": 2048,
"hidden_size": 768,
"hidden_act": "gelu_fast",
"attention_head_size": 64,
"heads_num": 12,
"layers_num": 12,
"decoder_layers_num": 12,
"dropout": 0.0,
"data_processor": "t5",
"embedding": ["word"],
"relative_position_embedding": true,
"remove_embedding_layernorm": true,
"tgt_embedding": ["word"],
"share_embedding": true,
"encoder": "transformer",
"mask": "fully_visible",
"layernorm_positioning": "pre",
"feed_forward": "gated",
"remove_attention_scale": true,
"layernorm": "t5",
"remove_transformer_bias": true,
"decoder": "transformer",
"target": ["lm"]
}
\ No newline at end of file
{
"emb_size": 1024,
"feedforward_size": 2816,
"hidden_size": 1024,
"hidden_act": "gelu_fast",
"attention_head_size": 64,
"heads_num": 16,
"layers_num": 24,
"decoder_layers_num": 24,
"dropout": 0.0,
"data_processor": "t5",
"embedding": ["word"],
"relative_position_embedding": true,
"remove_embedding_layernorm": true,
"tgt_embedding": ["word"],
"share_embedding": true,
"encoder": "transformer",
"mask": "fully_visible",
"layernorm_positioning": "pre",
"feed_forward": "gated",
"remove_attention_scale": true,
"layernorm": "t5",
"remove_transformer_bias": true,
"decoder": "transformer",
"target": ["lm"]
}
\ No newline at end of file
{
"emb_size": 512,
"feedforward_size": 1024,
"hidden_size": 512,
"hidden_act": "gelu_fast",
"attention_head_size": 64,
"heads_num": 6,
"layers_num": 8,
"decoder_layers_num": 8,
"dropout": 0.0,
"data_processor": "t5",
"embedding": ["word"],
"relative_position_embedding": true,
"remove_embedding_layernorm": true,
"tgt_embedding": ["word"],
"share_embedding": true,
"encoder": "transformer",
"mask": "fully_visible",
"layernorm_positioning": "pre",
"feed_forward": "gated",
"remove_attention_scale": true,
"layernorm": "t5",
"remove_transformer_bias": true,
"decoder": "transformer",
"target": ["lm"]
}
\ No newline at end of file
{
"emb_size": 2048,
"feedforward_size": 5120,
"hidden_size": 2048,
"hidden_act": "gelu_fast",
"attention_head_size": 64,
"heads_num": 32,
"layers_num": 24,
"decoder_layers_num": 24,
"dropout": 0.0,
"data_processor": "t5",
"embedding": ["word"],
"relative_position_embedding": true,
"remove_embedding_layernorm": true,
"tgt_embedding": ["word"],
"share_embedding": true,
"encoder": "transformer",
"mask": "fully_visible",
"layernorm_positioning": "pre",
"feed_forward": "gated",
"remove_attention_scale": true,
"layernorm": "t5",
"remove_transformer_bias": true,
"decoder": "transformer",
"target": ["lm"]
}
\ No newline at end of file
{
"emb_size": 4096,
"feedforward_size": 10240,
"hidden_size": 4096,
"hidden_act": "gelu_fast",
"attention_head_size": 64,
"heads_num": 64,
"layers_num": 24,
"decoder_layers_num": 24,
"dropout": 0.0,
"data_processor": "t5",
"embedding": ["word"],
"relative_position_embedding": true,
"remove_embedding_layernorm": true,
"tgt_embedding": ["word"],
"share_embedding": true,
"encoder": "transformer",
"mask": "fully_visible",
"layernorm_positioning": "pre",
"feed_forward": "gated",
"remove_attention_scale": true,
"layernorm": "t5",
"remove_transformer_bias": true,
"decoder": "transformer",
"target": ["lm"]
}
\ No newline at end of file
{
"emb_size": 1024,
"feedforward_size": 65536,
"hidden_size": 1024,
"hidden_act": "relu",
"attention_head_size": 128,
"heads_num": 128,
"layers_num": 24,
"decoder_layers_num": 24,
"dropout": 0.1,
"data_processor": "t5",
"embedding": ["word"],
"relative_position_embedding": true,
"remove_embedding_layernorm": true,
"tgt_embedding": ["word"],
"share_embedding": true,
"encoder": "transformer",
"mask": "fully_visible",
"layernorm_positioning": "pre",
"remove_attention_scale": true,
"layernorm": "t5",
"remove_transformer_bias": true,
"decoder": "transformer",
"target": ["lm"],
"tie_weights": true
}
\ No newline at end of file
{
"emb_size": 1024,
"feedforward_size": 16384,
"hidden_size": 1024,
"hidden_act": "relu",
"attention_head_size": 128,
"heads_num": 32,
"layers_num": 24,
"decoder_layers_num": 24,
"dropout": 0.1,
"data_processor": "t5",
"embedding": ["word"],
"relative_position_embedding": true,
"remove_embedding_layernorm": true,
"tgt_embedding": ["word"],
"share_embedding": true,
"encoder": "transformer",
"mask": "fully_visible",
"layernorm_positioning": "pre",
"remove_attention_scale": true,
"layernorm": "t5",
"remove_transformer_bias": true,
"decoder": "transformer",
"target": ["lm"],
"tie_weights": true
}
\ No newline at end of file
{
"emb_size": 768,
"feedforward_size": 3072,
"hidden_size": 768,
"hidden_act": "relu",
"heads_num": 12,
"layers_num": 12,
"decoder_layers_num": 12,
"dropout": 0.1,
"data_processor": "t5",
"embedding": ["word"],
"relative_position_embedding": true,
"remove_embedding_layernorm": true,
"tgt_embedding": ["word"],
"share_embedding": true,
"encoder": "transformer",
"mask": "fully_visible",
"layernorm_positioning": "pre",
"remove_attention_scale": true,
"layernorm": "t5",
"remove_transformer_bias": true,
"decoder": "transformer",
"target": ["lm"],
"tie_weights": true
}
\ No newline at end of file
{
"emb_size": 1024,
"feedforward_size": 4096,
"hidden_size": 1024,
"hidden_act": "relu",
"heads_num": 16,
"layers_num": 24,
"decoder_layers_num": 24,
"dropout": 0.1,
"data_processor": "t5",
"embedding": ["word"],
"relative_position_embedding": true,
"remove_embedding_layernorm": true,
"tgt_embedding": ["word"],
"share_embedding": true,
"encoder": "transformer",
"mask": "fully_visible",
"layernorm_positioning": "pre",
"remove_attention_scale": true,
"layernorm": "t5",
"remove_transformer_bias": true,
"decoder": "transformer",
"target": ["lm"],
"tie_weights": true
}
\ No newline at end of file
{
"emb_size": 512,
"feedforward_size": 2048,
"hidden_size": 512,
"hidden_act": "relu",
"heads_num": 8,
"layers_num": 6,
"decoder_layers_num": 6,
"dropout": 0.1,
"data_processor": "t5",
"embedding": ["word"],
"relative_position_embedding": true,
"remove_embedding_layernorm": true,
"tgt_embedding": ["word"],
"share_embedding": true,
"encoder": "transformer",
"mask": "fully_visible",
"layernorm_positioning": "pre",
"remove_attention_scale": true,
"layernorm": "t5",
"remove_transformer_bias": true,
"decoder": "transformer",
"target": ["lm"],
"tie_weights": true
}
\ No newline at end of file
{
"emb_size": 768,
"feedforward_size": 3072,
"hidden_size": 768,
"hidden_act": "gelu_fast",
"heads_num": 12,
"layers_num": 12,
"dropout": 0.1,
"max_seq_length": 1024,
"embedding": ["word", "pos", "seg"],
"remove_embedding_layernorm": true,
"layernorm_positioning": "pre",
"encoder": "transformer",
"mask": "prefix",
"target": ["lm"],
"image_height": 256,
"image_width": 256,
"patch_size": 16,
"seq_length": 334,
"tokenizer": "text_image",
"image_tokenizer": {
"is_gumbel": false,
"is_transformer": true,
"image_vocab_size": 1024,
"frame_size": 16
}
}
\ No newline at end of file
{
"emb_size": 512,
"feedforward_size": 2048,
"hidden_size": 512,
"hidden_act": "relu",
"heads_num": 8,
"layers_num": 6,
"decoder_layers_num": 6,
"max_seq_length": 512,
"dropout": 0.1,
"data_processor": "mt",
"embedding": ["word", "sinusoidalpos"],
"tgt_embedding": ["word", "sinusoidalpos"],
"encoder": "transformer",
"decoder": "transformer",
"mask": "fully_visible",
"target": ["lm"],
"tie_weights": true
}
\ No newline at end of file
{
"emb_size": 1024,
"feedforward_size": 4096,
"hidden_size": 1024,
"hidden_act": "relu",
"heads_num": 16,
"layers_num": 6,
"decoder_layers_num": 6,
"max_seq_length": 512,
"dropout": 0.3,
"data_processor": "mt",
"embedding": ["word", "sinusoidalpos"],
"tgt_embedding": ["word", "sinusoidalpos"],
"encoder": "transformer",
"decoder": "transformer",
"mask": "fully_visible",
"target": ["lm"],
"tie_weights": true
}
\ No newline at end of file
{
"emb_size": 768,
"feedforward_size": 3072,
"hidden_size": 768,
"hidden_act": "gelu",
"heads_num": 12,
"layers_num": 12,
"max_seq_length": 512,
"dropout": 0.1,
"data_processor": "vilt",
"embedding": ["word_patch", "pos", "seg"],
"encoder": "transformer",
"mask": "fully_visible",
"target": ["mlm", "sp"],
"image_height": 256,
"image_width": 256,
"patch_size": 32
}
\ No newline at end of file
{
"emb_size": 768,
"feedforward_size": 3072,
"hidden_size": 768,
"hidden_act": "gelu",
"heads_num": 12,
"layers_num": 12,
"dropout": 0.1,
"max_seq_length": 197,
"data_processor": "vit",
"embedding": ["patch", "pos"],
"remove_embedding_layernorm": true,
"encoder": "transformer",
"mask": "fully_visible",
"layernorm_positioning": "pre",
"target": ["cls"],
"image_height": 224,
"image_width": 224,
"patch_size": 16
}
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment