"sgl-kernel/git@developer.sourcefind.cn:change/sglang.git" did not exist on "930746d93c1b3ef96a510b6f88284583f8fdb766"
Commit 1fed7f94 authored by A. Unique TensorFlower's avatar A. Unique TensorFlower
Browse files

Internal change

PiperOrigin-RevId: 377425143
parent 1ba20e04
...@@ -72,7 +72,7 @@ trainer: ...@@ -72,7 +72,7 @@ trainer:
type: 'cosine' type: 'cosine'
cosine: cosine:
initial_learning_rate: 0.6 # 0.3 × BatchSize / 256 initial_learning_rate: 0.6 # 0.3 × BatchSize / 256
decay_steps: 43200 # train_steps - warmup_steps decay_steps: 48000
warmup: warmup:
type: 'linear' type: 'linear'
linear: linear:
......
# ImageNet classification. # SimCLR Imagenet 10% finetuning.
runtime: runtime:
distribution_strategy: 'mirrored' distribution_strategy: 'mirrored'
mixed_precision_dtype: 'float16' mixed_precision_dtype: 'float16'
...@@ -55,7 +55,7 @@ trainer: ...@@ -55,7 +55,7 @@ trainer:
train_steps: 12500 # 100 epochs train_steps: 12500 # 100 epochs
validation_steps: 49 # NUM_EXAMPLES (50000) // global_batch_size validation_steps: 49 # NUM_EXAMPLES (50000) // global_batch_size
validation_interval: 125 validation_interval: 125
steps_per_loop: 125 # NUM_EXAMPLES (1281167) // global_batch_size steps_per_loop: 125 # NUM_EXAMPLES (128116) // global_batch_size
summary_interval: 125 summary_interval: 125
checkpoint_interval: 125 checkpoint_interval: 125
optimizer_config: optimizer_config:
......
# SimCLR Imagenet 10% finetuning.
runtime:
distribution_strategy: 'tpu'
mixed_precision_dtype: 'bfloat16'
task:
model:
mode: 'finetune'
input_size: [224, 224, 3]
backbone:
type: 'resnet'
resnet:
model_id: 50
backbone_trainable: true
projection_head:
proj_output_dim: 128
num_proj_layers: 3
ft_proj_idx: 1
supervised_head:
num_classes: 1001
zero_init: true
norm_activation:
use_sync_bn: false
norm_momentum: 0.9
norm_epsilon: 0.00001
loss:
label_smoothing: 0.0
one_hot: true
evaluation:
top_k: 5
one_hot: true
init_checkpoint: gs://tf_model_garden/vision/simclr/r50_1x
init_checkpoint_modules: 'backbone_projection'
train_data:
tfds_name: 'imagenet2012_subset/10pct'
tfds_split: 'train'
input_path: ''
is_training: true
global_batch_size: 1024
dtype: 'bfloat16'
parser:
mode: 'finetune'
validation_data:
tfds_name: 'imagenet2012_subset/10pct'
tfds_split: 'validation'
input_path: ''
is_training: false
global_batch_size: 1024
dtype: 'bfloat16'
drop_remainder: false
parser:
mode: 'finetune'
trainer:
train_steps: 12500 # 100 epochs
validation_steps: 49 # NUM_EXAMPLES (50000) // global_batch_size
validation_interval: 125
steps_per_loop: 125 # NUM_EXAMPLES (128116) // global_batch_size
summary_interval: 125
checkpoint_interval: 125
optimizer_config:
optimizer:
type: 'lars'
lars:
momentum: 0.9
weight_decay_rate: 0.0
exclude_from_weight_decay: ['batch_normalization', 'bias']
learning_rate:
type: 'cosine'
cosine:
initial_learning_rate: 0.04 # 0.01 × BatchSize / 512
decay_steps: 12500 # train_steps
# ImageNet classification. # SimCLR Imagenet pretraining.
runtime: runtime:
distribution_strategy: 'mirrored' distribution_strategy: 'mirrored'
mixed_precision_dtype: 'float16' mixed_precision_dtype: 'float16'
...@@ -49,12 +49,12 @@ task: ...@@ -49,12 +49,12 @@ task:
decoder: decoder:
decode_label: true decode_label: true
trainer: trainer:
train_steps: 187200 # 300 epochs train_steps: 500000 # 800 epochs
validation_steps: 24 # NUM_EXAMPLES (50000) // global_batch_size validation_steps: 24 # NUM_EXAMPLES (50000) // global_batch_size
validation_interval: 624 validation_interval: 625
steps_per_loop: 624 # NUM_EXAMPLES (1281167) // global_batch_size steps_per_loop: 625 # NUM_EXAMPLES (1281167) // global_batch_size
summary_interval: 624 summary_interval: 625
checkpoint_interval: 624 checkpoint_interval: 625
optimizer_config: optimizer_config:
optimizer: optimizer:
type: 'lars' type: 'lars'
...@@ -66,8 +66,8 @@ trainer: ...@@ -66,8 +66,8 @@ trainer:
type: 'cosine' type: 'cosine'
cosine: cosine:
initial_learning_rate: 1.6 # 0.2 * BatchSize / 256 initial_learning_rate: 1.6 # 0.2 * BatchSize / 256
decay_steps: 177840 # train_steps - warmup_steps decay_steps: 500000
warmup: warmup:
type: 'linear' type: 'linear'
linear: linear:
warmup_steps: 9360 # 5% of total epochs warmup_steps: 25000 # 5% of total epochs
# SimCLR Imagenet pretraining.
runtime:
distribution_strategy: 'tpu'
mixed_precision_dtype: 'bfloat16'
task:
model:
mode: 'pretrain'
input_size: [224, 224, 3]
backbone:
type: 'resnet'
resnet:
model_id: 50
backbone_trainable: true
projection_head:
proj_output_dim: 128
num_proj_layers: 3
ft_proj_idx: 0
supervised_head:
num_classes: 1001
norm_activation:
use_sync_bn: true
norm_momentum: 0.9
norm_epsilon: 0.00001
loss:
projection_norm: true
temperature: 0.1
evaluation:
top_k: 5
one_hot: true
train_data:
input_path: '/readahead/200M/placer/prod/home/distbelief/imagenet-tensorflow/imagenet-2012-tfrecord/train*'
is_training: true
global_batch_size: 2048
dtype: 'bfloat16'
parser:
mode: 'pretrain'
decoder:
decode_label: true
validation_data:
input_path: '/readahead/200M/placer/prod/home/distbelief/imagenet-tensorflow/imagenet-2012-tfrecord/valid*'
is_training: false
global_batch_size: 2048
dtype: 'bfloat16'
drop_remainder: false
parser:
mode: 'pretrain'
decoder:
decode_label: true
trainer:
train_steps: 500000 # 800 epochs
validation_steps: 24 # NUM_EXAMPLES (50000) // global_batch_size
validation_interval: 625
steps_per_loop: 625 # NUM_EXAMPLES (1281167) // global_batch_size
summary_interval: 625
checkpoint_interval: 625
optimizer_config:
optimizer:
type: 'lars'
lars:
momentum: 0.9
weight_decay_rate: 0.000001
exclude_from_weight_decay: ['batch_normalization', 'bias']
learning_rate:
type: 'cosine'
cosine:
initial_learning_rate: 1.6 # 0.2 * BatchSize / 256
decay_steps: 500000
warmup:
type: 'linear'
linear:
warmup_steps: 25000 # 5% of total epochs
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment