# apply scaled sigmoid on mask logits for memory encoder, and directly feed input mask as output mask
sigmoid_scale_for_mem_enc:20.0
sigmoid_bias_for_mem_enc:-10.0
use_mask_input_as_output_without_sam:true
# Memory
directly_add_no_mem_embed:true
no_obj_embed_spatial:true
# use high-resolution feature map in the SAM mask decoder
use_high_res_features_in_sam:true
# output 3 masks on the first click on initial conditioning frames
multimask_output_in_sam:true
# SAM heads
iou_prediction_use_sigmoid:True
# cross-attend to object pointers from other frames (based on SAM output tokens) in the encoder
use_obj_ptrs_in_encoder:true
add_tpos_enc_to_obj_ptrs:true
proj_tpos_enc_in_obj_ptrs:true
use_signed_tpos_enc_to_obj_ptrs:true
only_obj_ptrs_in_the_past_for_eval:true
# object occlusion prediction
pred_obj_scores:true
pred_obj_scores_mlp:true
fixed_no_obj_ptr:true
# multimask tracking settings
multimask_output_for_tracking:true
use_multimask_token_for_obj_ptr:true
multimask_min_pt_num:0
multimask_max_pt_num:1
use_mlp_for_obj_ptr_proj:true
# Compilation flag
# compile_image_encoder: False
####### Training specific params #######
# box/point input and corrections
prob_to_use_pt_input_for_train:0.5
prob_to_use_pt_input_for_eval:0.0
prob_to_use_box_input_for_train:0.5# 0.5*0.5 = 0.25 prob to use box instead of points
prob_to_use_box_input_for_eval:0.0
prob_to_sample_from_gt_for_train:0.1# with a small prob, sampling correction points from GT mask instead of prediction errors
num_frames_to_correct_for_train:2# iteratively sample on random 1~2 frames (always include the first frame)
num_frames_to_correct_for_eval:1# only iteratively sample on first frame
rand_frames_to_correct_for_train:True# random #init-cond-frame ~ 2
add_all_frames_to_correct_as_cond:True# when a frame receives a correction click, it becomes a conditioning frame (even if it's not initially a conditioning frame)
# maximum 2 initial conditioning frames
num_init_cond_frames_for_train:2
rand_init_cond_frames_for_train:True# random 1~2
num_correction_pt_per_frame:7
use_act_ckpt_iterative_pt_sampling:false
num_init_cond_frames_for_eval:1# only mask on the first frame