Commit ea2d13c2 authored by zhaoying1's avatar zhaoying1
Browse files

added llama_tencentpretrain_pytorch

parents
Pipeline #548 failed with stages
in 0 seconds
{
"emb_size": 128,
"feedforward_size": 512,
"hidden_size": 128,
"hidden_act": "gelu",
"heads_num": 2,
"layers_num": 2,
"max_seq_length": 512,
"dropout": 0.1,
"data_processor": "bert",
"embedding": ["word", "pos", "seg"],
"encoder": "transformer",
"mask": "fully_visible",
"target": ["mlm", "sp"],
"tie_weights": true
}
\ No newline at end of file
{
"emb_size": 1536,
"feedforward_size": 6144,
"hidden_size": 1536,
"hidden_act": "gelu",
"heads_num": 24,
"layers_num": 36,
"max_seq_length": 512,
"dropout": 0.1,
"data_processor": "bert",
"embedding": ["word", "pos", "seg"],
"encoder": "transformer",
"mask": "fully_visible",
"target": ["mlm", "sp"],
"tie_weights": true
}
\ No newline at end of file
{
"stream_0": {
"emb_size": 512,
"feedforward_size": 2048,
"hidden_size": 512,
"hidden_act": "gelu",
"heads_num": 8,
"layers_num": 12,
"max_seq_length": 512,
"embedding": ["word", "pos"],
"encoder": "transformer",
"mask": "fully_visible",
"remove_embedding_layernorm": false,
"layernorm_positioning": "post",
"pooling": "first"
},
"stream_1": {
"emb_size": 768,
"feedforward_size": 3072,
"hidden_size": 768,
"hidden_act": "gelu_fast",
"heads_num": 12,
"layers_num": 12,
"max_seq_length": 197,
"embedding": ["patch", "pos"],
"encoder": "transformer",
"mask": "fully_visible",
"remove_embedding_layernorm": true,
"layernorm_positioning": "pre",
"pooling": "first"
},
"data_processor": "clip",
"embedding": ["dual"],
"encoder": "dual",
"target": ["clr"],
"image_height": 224,
"image_width": 224,
"patch_size": 16,
"feature_size": 512,
"projection": true,
"tie_weights": false,
"dropout": 0.0
}
\ No newline at end of file
{
"stream_0": {
"emb_size": 512,
"feedforward_size": 2048,
"hidden_size": 512,
"hidden_act": "gelu",
"heads_num": 8,
"layers_num": 12,
"max_seq_length": 512,
"embedding": ["word", "pos"],
"encoder": "transformer",
"mask": "fully_visible",
"remove_embedding_layernorm": false,
"layernorm_positioning": "post",
"pooling": "first"
},
"stream_1": {
"emb_size": 768,
"feedforward_size": 3072,
"hidden_size": 768,
"hidden_act": "gelu_fast",
"heads_num": 12,
"layers_num": 12,
"max_seq_length": 50,
"embedding": ["patch", "pos"],
"encoder": "transformer",
"mask": "fully_visible",
"remove_embedding_layernorm": true,
"layernorm_positioning": "pre",
"pooling": "first"
},
"data_processor": "clip",
"embedding": ["dual"],
"encoder": "dual",
"target": ["clr"],
"image_height": 224,
"image_width": 224,
"patch_size": 32,
"feature_size": 512,
"projection": true,
"tie_weights": false,
"dropout": 0.0
}
\ No newline at end of file
{
"emb_size": 128,
"hidden_size": 1268,
"layers_num": 13,
"kernel_size": 4,
"block_size": 2,
"max_seq_length": 1024,
"dropout": 0.1,
"data_processor": "lm",
"embedding": ["word"],
"remove_embedding_layernorm": true,
"encoder": "gatedcnn",
"target": ["lm"]
}
\ No newline at end of file
{
"emb_size": 128,
"hidden_size": 807,
"layers_num": 9,
"kernel_size": 4,
"block_size": 2,
"max_seq_length": 1024,
"dropout": 0.1,
"data_processor": "lm",
"embedding": ["word"],
"remove_embedding_layernorm": true,
"encoder": "gatedcnn",
"target": ["lm"]
}
\ No newline at end of file
{
"emb_size": 768,
"feedforward_size": 3072,
"hidden_size": 768,
"hidden_act": "gelu",
"heads_num": 12,
"layers_num": 12,
"dropout": 0.1,
"data_processor": "dalle",
"embedding": ["word", "pos", "seg"],
"encoder": "transformer",
"mask": "prefix",
"target": ["lm"],
"image_height": 256,
"image_width": 256,
"patch_size": 16,
"tokenizer": "multimodal",
"image_tokenizer": {
"is_gumbel": false,
"is_transformer": true,
"image_vocab_size": 1024,
"frame_size": 16
}
}
\ No newline at end of file
{
"gradient_accumulation_steps": 1,
"train_micro_batch_size_per_gpu":1,
"steps_per_print": 100,
"optimizer": {
"type": "Adam",
"params": {
"lr": 2e-5,
"weight_decay": 1e-2
}
},
"flops_profiler": {
"enabled": false,
"profile_step": 1,
"module_depth": -1,
"top_modules": 3,
"detailed": true
},
"fp16": {
"enabled": true,
"loss_scale": 0,
"loss_scale_window": 1000,
"hysteresis": 2,
"min_loss_scale": 1
},
"zero_optimization": {
"stage": 3,
"offload_optimizer": {
"device": "cpu",
"pin_memory": true
},
"offload_param": {
"device": "cpu",
"pin_memory": true
}
},
"activation_checkpointing": {
"partition_activations": false,
"contiguous_memory_optimization": false,
"cpu_checkpointing": false
},
"wall_clock_breakdown": false,
"zero_allow_untested_optimizer": true
}
\ No newline at end of file
{
"gradient_accumulation_steps": 1,
"train_micro_batch_size_per_gpu":1,
"steps_per_print": 10,
"optimizer": {
"type": "Adam",
"params": {
"lr": 5e-5,
"weight_decay": 1e-2
}
},
"flops_profiler": {
"enabled": false,
"profile_step": 1,
"module_depth": -1,
"top_modules": 3,
"detailed": true
},
"fp16": {
"enabled": true,
"loss_scale": 0,
"loss_scale_window": 1000,
"hysteresis": 2,
"min_loss_scale": 1e-10
},
"zero_optimization": {
"stage": 3,
"stage3_gather_16bit_weights_on_model_save": true,
"offload_param": {
"device": "cpu",
"pin_memory": true
},
"offload_optimizer": {
"device": "cpu",
"pin_memory": true
}
},
"activation_checkpointing": {
"partition_activations": false,
"contiguous_memory_optimization": false,
"cpu_checkpointing": false
},
"wall_clock_breakdown": false,
"zero_allow_untested_optimizer": true
}
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
{
"emb_size": 768,
"feedforward_size": 3072,
"hidden_size": 768,
"hidden_act": "gelu_fast",
"heads_num": 12,
"layers_num": 12,
"max_seq_length": 1024,
"dropout": 0.1,
"data_processor": "lm",
"embedding": ["word", "pos"],
"remove_embedding_layernorm": true,
"encoder": "transformer",
"mask": "causal",
"layernorm_positioning": "pre",
"target": ["lm"],
"tie_weights": true
}
\ No newline at end of file
{
"emb_size": 768,
"feedforward_size": 3072,
"hidden_size": 768,
"hidden_act": "gelu_fast",
"heads_num": 12,
"layers_num": 6,
"max_seq_length": 1024,
"dropout": 0.1,
"data_processor": "lm",
"embedding": ["word", "pos"],
"remove_embedding_layernorm": true,
"encoder": "transformer",
"mask": "causal",
"layernorm_positioning": "pre",
"target": ["lm"],
"tie_weights": true
}
\ No newline at end of file
{
"emb_size": 1280,
"feedforward_size": 5120,
"hidden_size": 1280,
"hidden_act": "gelu_fast",
"heads_num": 20,
"layers_num": 36,
"max_seq_length": 1024,
"dropout": 0.1,
"data_processor": "lm",
"embedding": ["word", "pos"],
"remove_embedding_layernorm": true,
"encoder": "transformer",
"mask": "causal",
"layernorm_positioning": "pre",
"target": ["lm"],
"tie_weights": true
}
\ No newline at end of file
{
"emb_size": 1024,
"feedforward_size": 4096,
"hidden_size": 1024,
"hidden_act": "gelu_fast",
"heads_num": 16,
"layers_num": 24,
"max_seq_length": 1024,
"dropout": 0.1,
"data_processor": "lm",
"embedding": ["word", "pos"],
"remove_embedding_layernorm": true,
"encoder": "transformer",
"mask": "causal",
"layernorm_positioning": "pre",
"target": ["lm"],
"tie_weights": true
}
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment