Commit ea2d13c2 authored by zhaoying1's avatar zhaoying1
Browse files

added llama_tencentpretrain_pytorch

parents
Pipeline #548 failed with stages
in 0 seconds
{
"emb_size": 1600,
"feedforward_size": 6400,
"hidden_size": 1600,
"hidden_act": "gelu_fast",
"heads_num": 25,
"layers_num": 48,
"max_seq_length": 1024,
"dropout": 0.1,
"data_processor": "lm",
"embedding": ["word", "pos"],
"remove_embedding_layernorm": true,
"encoder": "transformer",
"mask": "causal",
"layernorm_positioning": "pre",
"target": ["lm"],
"tie_weights": true
}
\ No newline at end of file
This diff is collapsed.
This diff is collapsed.
{
"emb_size": 5120,
"feedforward_size": 13824,
"hidden_size": 5120,
"hidden_act": "silu",
"heads_num": 40,
"layers_num": 40,
"dropout": 0.0,
"data_processor": "lm",
"max_seq_length": 2048,
"embedding": ["word"],
"remove_transformer_bias": true,
"remove_embedding_layernorm": true,
"rotary_position_embedding": true,
"encoder": "transformer",
"feed_forward": "gated",
"mask": "causal",
"layernorm_positioning": "pre",
"layernorm": "rms",
"target": ["lm"]
}
\ No newline at end of file
{
"emb_size": 6656,
"feedforward_size": 17920,
"hidden_size": 6656,
"hidden_act": "silu",
"heads_num": 52,
"layers_num": 60,
"dropout": 0.0,
"data_processor": "lm",
"max_seq_length": 2048,
"embedding": ["word"],
"remove_transformer_bias": true,
"remove_embedding_layernorm": true,
"rotary_position_embedding": true,
"encoder": "transformer",
"feed_forward": "gated",
"mask": "causal",
"layernorm_positioning": "pre",
"layernorm": "rms",
"target": ["lm"]
}
\ No newline at end of file
{
"emb_size": 8192,
"feedforward_size": 22016,
"hidden_size": 8192,
"hidden_act": "silu",
"heads_num": 64,
"layers_num": 80,
"dropout": 0.0,
"data_processor": "lm",
"max_seq_length": 2048,
"embedding": ["word"],
"remove_transformer_bias": true,
"remove_embedding_layernorm": true,
"rotary_position_embedding": true,
"encoder": "transformer",
"feed_forward": "gated",
"mask": "causal",
"layernorm_positioning": "pre",
"layernorm": "rms",
"target": ["lm"]
}
\ No newline at end of file
{
"emb_size": 4096,
"feedforward_size": 11008,
"hidden_size": 4096,
"hidden_act": "silu",
"heads_num": 32,
"layers_num": 32,
"dropout": 0.0,
"data_processor": "lm",
"max_seq_length": 2048,
"embedding": ["word"],
"remove_transformer_bias": true,
"remove_embedding_layernorm": true,
"rotary_position_embedding": true,
"encoder": "transformer",
"feed_forward": "gated",
"mask": "causal",
"layernorm_positioning": "pre",
"layernorm": "rms",
"target": ["lm"]
}
\ No newline at end of file
{
"pad_token": "<0x00>",
"unk_token": "<unk>",
"cls_token": "<s>",
"sep_token": "</s>",
"mask_token": "<mask>"
}
{
"emb_size": 2560,
"feedforward_size": 10240,
"hidden_size": 2560,
"hidden_act": "gelu",
"heads_num": 40,
"layers_num": 48,
"max_seq_length": 512,
"dropout": 0.1,
"data_processor": "bert",
"embedding": ["word", "pos", "seg"],
"encoder": "transformer",
"mask": "fully_visible",
"target": ["mlm", "sp"]
}
\ No newline at end of file
{
"emb_size": 3072,
"feedforward_size": 12288,
"hidden_size": 3072,
"hidden_act": "gelu_fast",
"heads_num": 24,
"layers_num": 72,
"max_seq_length": 1024,
"dropout": 0.1,
"data_processor": "lm",
"embedding": ["word", "pos"],
"remove_embedding_layernorm": true,
"encoder": "transformer",
"mask": "causal",
"layernorm_positioning": "pre",
"target": ["lm"],
"tie_weights": true
}
\ No newline at end of file
{
"emb_size": 768,
"feedforward_size": 3072,
"hidden_size": 768,
"hidden_act": "relu",
"heads_num": 12,
"layers_num": 12,
"decoder_layers_num": 12,
"max_seq_length": 1024,
"dropout": 0.1,
"data_processor": "gsg",
"embedding": ["word", "sinusoidalpos"],
"remove_embedding_layernorm": true,
"tgt_embedding": ["word", "sinusoidalpos"],
"share_embedding": true,
"encoder": "transformer",
"mask": "fully_visible",
"layernorm_positioning": "pre",
"decoder": "transformer",
"target": ["lm"],
"has_lmtarget_bias": true,
"tie_weights": true
}
\ No newline at end of file
{
"emb_size": 1024,
"feedforward_size": 4096,
"hidden_size": 1024,
"hidden_act": "relu",
"heads_num": 16,
"layers_num": 16,
"decoder_layers_num": 16,
"max_seq_length": 1024,
"dropout": 0.1,
"data_processor": "gsg",
"embedding": ["word", "sinusoidalpos"],
"remove_embedding_layernorm": true,
"tgt_embedding": ["word", "sinusoidalpos"],
"share_embedding": true,
"encoder": "transformer",
"mask": "fully_visible",
"layernorm_positioning": "pre",
"decoder": "transformer",
"target": ["lm"],
"has_lmtarget_bias": true,
"tie_weights": true
}
\ No newline at end of file
{
"chnsenticorp_char": {
"template": "[TEXT_A],我[ANS]满意",
"answer_words": {"0": "不","1":"很"}
},
"chnsenticorp_word": {
"template": "[TEXT_A],我[ANS]",
"answer_words": {"0": "不满意","1":"很满意"}
}
}
\ No newline at end of file
[PAD]
[unused1]
[unused2]
[unused3]
[unused4]
[unused5]
[unused6]
[unused7]
[unused8]
[unused9]
[unused10]
[unused11]
[unused12]
[unused13]
[unused14]
[unused15]
[unused16]
[unused17]
[unused18]
[unused19]
[unused20]
[unused21]
[unused22]
[unused23]
[unused24]
[unused25]
[unused26]
[unused27]
[unused28]
[unused29]
[unused30]
[unused31]
[unused32]
[unused33]
[unused34]
[unused35]
[unused36]
[unused37]
[unused38]
[unused39]
[unused40]
[unused41]
[unused42]
[unused43]
[unused44]
[unused45]
[unused46]
[unused47]
[unused48]
[unused49]
[unused50]
[unused51]
[unused52]
[unused53]
[unused54]
[unused55]
[unused56]
[unused57]
[unused58]
[unused59]
[unused60]
[unused61]
[unused62]
[unused63]
[unused64]
[unused65]
[unused66]
[unused67]
[unused68]
[unused69]
[unused70]
[unused71]
[unused72]
[unused73]
[unused74]
[unused75]
[unused76]
[unused77]
[unused78]
[unused79]
[unused80]
[unused81]
[unused82]
[unused83]
[unused84]
[unused85]
[unused86]
[unused87]
[unused88]
[unused89]
[unused90]
[unused91]
[unused92]
[unused93]
[unused94]
[unused95]
[unused96]
[unused97]
[unused98]
[unused99]
[UNK]
[CLS]
[SEP]
[MASK]
{
"emb_size": 512,
"hidden_size": 1536,
"layers_num": 2,
"dropout": 0.1,
"max_seq_length": 1024,
"data_processor": "bilm",
"embedding": ["word"],
"remove_embedding_layernorm": true,
"encoder": "bilstm",
"target": ["bilm"]
}
\ No newline at end of file
{
"emb_size": 512,
"hidden_size": 512,
"layers_num": 2,
"max_seq_length": 1024,
"dropout": 0.1,
"data_processor": "lm",
"embedding": ["word"],
"remove_embedding_layernorm": true,
"encoder": "gru",
"target": ["lm"]
}
{
"emb_size": 512,
"hidden_size": 512,
"layers_num": 2,
"max_seq_length": 1024,
"dropout": 0.1,
"data_processor": "lm",
"embedding": ["word"],
"remove_embedding_layernorm": true,
"encoder": "lstm",
"target": ["lm"]
}
{
"emb_size": 1024,
"feedforward_size": 4096,
"hidden_size": 1024,
"hidden_act": "relu",
"heads_num": 16,
"layers_num": 12,
"decoder_layers_num": 6,
"max_audio_frames": 6000,
"dropout": 0.1,
"data_processor": "s2t",
"embedding": ["speech", "sinusoidalpos"],
"tgt_embedding": ["word", "sinusoidalpos"],
"encoder": "transformer",
"mask": "fully_visible",
"decoder": "transformer",
"target": ["lm"],
"has_lmtarget_bias": false,
"conv_channels": [1024, 2048],
"audio_feature_size": 80,
"conv_kernel_sizes": [5, 5],
"layernorm_positioning": "pre",
"remove_embedding_layernorm": true,
"tie_weights": true,
"optimizer": "adamw",
"scheduler": "inverse_sqrt",
"audio_preprocess": ["normalize_means", "normalize_vars", "ceptral_normalize"],
"specaugment":{
"freq_mask_F": 27,
"freq_mask_N": 2,
"time_mask_N": 2,
"time_mask_T": 100,
"time_mask_p": 1.0,
"time_wrap_W": 0
},
"label_smoothing": 0.1,
"ignore_index": true
}
\ No newline at end of file
{
"emb_size": 512,
"feedforward_size": 2048,
"hidden_size": 512,
"hidden_act": "relu",
"heads_num": 8,
"layers_num": 12,
"decoder_layers_num": 6,
"max_audio_frames": 6000,
"dropout": 0.1,
"data_processor": "s2t",
"embedding": ["speech", "sinusoidalpos"],
"tgt_embedding": ["word", "sinusoidalpos"],
"encoder": "transformer",
"mask": "fully_visible",
"decoder": "transformer",
"target": ["lm"],
"has_lmtarget_bias": false,
"conv_channels": [1024, 1024],
"audio_feature_size": 80,
"conv_kernel_sizes": [5, 5],
"layernorm_positioning": "pre",
"remove_embedding_layernorm": true,
"tie_weights": true,
"optimizer": "adamw",
"scheduler": "inverse_sqrt",
"audio_preprocess": ["normalize_means", "normalize_vars", "ceptral_normalize"],
"specaugment":{
"freq_mask_F": 27,
"freq_mask_N": 2,
"time_mask_N": 2,
"time_mask_T": 100,
"time_mask_p": 1.0,
"time_wrap_W": 0
},
"label_smoothing": 0.1,
"ignore_index": true
}
{
"emb_size": 256,
"feedforward_size": 2048,
"hidden_size": 256,
"hidden_act": "relu",
"heads_num": 4,
"layers_num": 12,
"decoder_layers_num": 6,
"max_audio_frames": 6000,
"dropout": 0.1,
"data_processor": "s2t",
"embedding": ["speech", "sinusoidalpos"],
"tgt_embedding": ["word", "sinusoidalpos"],
"encoder": "transformer",
"mask": "fully_visible",
"decoder": "transformer",
"target": ["lm"],
"has_lmtarget_bias": false,
"conv_channels": [1024, 512],
"audio_feature_size": 80,
"conv_kernel_sizes": [5, 5],
"layernorm_positioning": "pre",
"remove_embedding_layernorm": true,
"tie_weights": true,
"optimizer": "adamw",
"scheduler": "inverse_sqrt",
"audio_preprocess": ["normalize_means", "normalize_vars", "ceptral_normalize"],
"specaugment":{
"freq_mask_F": 27,
"freq_mask_N": 2,
"time_mask_N": 2,
"time_mask_T": 100,
"time_mask_p": 1.0,
"time_wrap_W": 0
},
"label_smoothing": 0.1,
"ignore_index": true
}
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment