runtime: all_reduce_alg: null batchnorm_spatial_persistent: false dataset_num_private_threads: null default_shard_dim: -1 distribution_strategy: mirrored enable_xla: false gpu_thread_mode: null loss_scale: null mixed_precision_dtype: float32 num_cores_per_replica: 1 num_gpus: 0 num_packs: 1 per_gpu_thread_count: 0 run_eagerly: false task_index: -1 tpu: null worker_hosts: null task: gradient_clip_norm: 0.0 init_checkpoint: '' logging_dir: null losses: l2_weight_decay: 0.0005 label_smoothing: 0.0 one_hot: true model: add_head_batch_norm: false backbone: darknet: model_id: darknet53 type: darknet dropout_rate: 0.0 input_size: [256, 256, 3] norm_activation: activation: relu norm_epsilon: 0.001 norm_momentum: 0.99 use_sync_bn: false num_classes: 1001 train_data: block_length: 1 cache: false cycle_length: 10 deterministic: null drop_remainder: true dtype: float16 enable_tf_data_service: false global_batch_size: 128 input_path: '' is_training: true sharding: true shuffle_buffer_size: 10000 tf_data_service_address: null tf_data_service_job_name: null tfds_as_supervised: false tfds_data_dir: ~/tensorflow_datasets/ tfds_download: true tfds_name: imagenet2012 tfds_skip_decoding_feature: '' tfds_split: train validation_data: block_length: 1 cache: false cycle_length: 10 deterministic: null drop_remainder: false dtype: float16 enable_tf_data_service: false global_batch_size: 128 input_path: '' is_training: true sharding: true shuffle_buffer_size: 10000 tf_data_service_address: null tf_data_service_job_name: null tfds_as_supervised: false tfds_data_dir: ~/tensorflow_datasets/ tfds_download: true tfds_name: imagenet2012 tfds_skip_decoding_feature: '' tfds_split: validation trainer: allow_tpu_summary: false best_checkpoint_eval_metric: '' best_checkpoint_export_subdir: '' best_checkpoint_metric_comp: higher checkpoint_interval: 10000 continuous_eval_timeout: 3600 eval_tf_function: true max_to_keep: 5 optimizer_config: ema: null learning_rate: polynomial: cycle: false decay_steps: 799000 end_learning_rate: 0.0001 initial_learning_rate: 0.1 name: PolynomialDecay power: 4.0 type: polynomial optimizer: sgd: clipnorm: null clipvalue: null decay: 0.0 momentum: 0.9 name: SGD nesterov: false type: sgd warmup: linear: name: linear warmup_learning_rate: 0 warmup_steps: 1000 type: linear steps_per_loop: 10000 summary_interval: 10000 train_steps: 800000 train_tf_function: true train_tf_while_loop: true validation_interval: 10000 validation_steps: 400