tcn_default.yml

#  These configs are the defaults we used for both the pouring and pose
#  experiments.

# Train on TPU?
use_tpu: false # Default is to run without TPU locally.
tpu:
  num_shards: 1
  iterations: 100

# SGD / general learning hyperparameters.
learning:
  max_step: 1000000
  learning_rate: 0.001
  decay_steps: 10000
  decay_factor: 1.00
  l2_reg_weight: 0.000001
  optimizer: 'adam'

# Default metric learning loss hyperparameters.
triplet_semihard:
  embedding_l2: true # Suggestion from Hyun Oh Song's slides.
  margin: .2 # Default value for Facenet.
npairs:
  embedding_l2: false # Suggestion from Hyun Oh Song's slides.
clustering_loss:
  embedding_l2: true # Suggestion from Hyun Oh Song's slides.
  margin: 1.0 # Default in deep_metric_learning.
lifted_struct:
  embedding_l2: false # Suggestion from Hyun Oh Song's slides.
  margin: 1.0
contrastive:
  embedding_l2: true # Suggestion from Hyun Oh Song's slides.
  margin: 1.0

# Which method to use to train the embedding.
# Options are "mvtcn", "svtcn".
training_strategy: 'mvtcn'

# Which embedder architecture to use.
# Options are 'inception_conv_ss_fc' (used in pouring / pose experiments),
# 'resnet'.
embedder_strategy: 'inception_conv_ss_fc'

# Size of the TCN embedding.
embedding_size: 32

# Default hyperparameters for the different embedder architectures.
inception_conv_ss_fc:
  pretrained_checkpoint: 'pretrained_checkpoints/inception/inception_v3.ckpt'
  pretrained_layer: 'Mixed_5d'
  additional_conv_sizes: [512, 512]
  fc_hidden_sizes: [2048]
  finetune: false
  dropout:
    keep_pretrained: 1.0
    keep_conv: 1.0
    keep_fc: 1.0

resnet:
  pretrained_checkpoint: 'pretrained_checkpoints/resnet/resnet_v2_50.ckpt'
  pretrained_layer: 4
  finetune: false
  adaptation_blocks: '512_3-512_3'
  emb_connection: 'conv'
  fc_hidden_sizes: 'None'
  dropout:
    keep_pretrained: 1.0

# Loss hyperparameters.
mvtcn:
  # Size of the window in timesteps to get random anchor-positive pairs for
  # training.
  window: 580 # 29fps * 20 seconds.

svtcn:
  pos_radius: 6  # 0.2 seconds * 29fps ~ 6 timesteps.
  neg_radius: 12 # 2.0 * pos_radius.

# Data configs.
data:
  height: 299
  width: 299
  preprocessing:
    # Strategy to use when cropping images at inference time.
    # See preprocessing.py for options.
    eval_cropping: 'crop_center'
  # Training scale, color augmentation hyparameters.
  augmentation:
    # See preprocessing.py for a discussion of how to use these parameters.
    minscale: 1.0
    maxscale: 1.0
    proportion_scaled_up: 0.5
    color: true
    fast_mode: true
  num_parallel_calls: 12
  sequence_prefetch_size: 12
  batch_prefetch_size: 12
  batch_size: 36
  eval_batch_size: 36
  embed_batch_size: 128

val:
  recall_at_k_list: [1]
  num_eval_samples: 1000
  eval_interval_secs: 300

logging:
  summary:
    image_summaries: false
    save_summaries_steps: 100
    flush_secs: 600
  checkpoint:
    num_to_keep: 0 # Keep all checkpoints.
    save_checkpoints_steps: 1000
    secs: 1800