MODEL: META_ARCHITECTURE: Baseline PIXEL_MEAN: [127.5, 127.5, 127.5] PIXEL_STD: [127.5, 127.5, 127.5] BACKBONE: NAME: build_vit_backbone DEPTH: base FEAT_DIM: 768 PRETRAIN: True PRETRAIN_PATH: /export/home/lxy/.cache/torch/checkpoints/jx_vit_base_p16_224-80ecf9dd.pth STRIDE_SIZE: (16, 16) DROP_PATH_RATIO: 0.1 DROP_RATIO: 0.0 ATT_DROP_RATE: 0.0 HEADS: NAME: EmbeddingHead NORM: BN WITH_BNNECK: True POOL_LAYER: Identity NECK_FEAT: before CLS_LAYER: Linear LOSSES: NAME: ("CrossEntropyLoss", "TripletLoss",) CE: EPSILON: 0. # no smooth SCALE: 1. TRI: MARGIN: 0.0 HARD_MINING: True NORM_FEAT: False SCALE: 1. INPUT: SIZE_TRAIN: [ 256, 128 ] SIZE_TEST: [ 256, 128 ] REA: ENABLED: True PROB: 0.5 FLIP: ENABLED: True PADDING: ENABLED: True DATALOADER: SAMPLER_TRAIN: NaiveIdentitySampler NUM_INSTANCE: 4 NUM_WORKERS: 8 SOLVER: AMP: ENABLED: False OPT: SGD MAX_EPOCH: 120 BASE_LR: 0.008 WEIGHT_DECAY: 0.0001 IMS_PER_BATCH: 64 SCHED: CosineAnnealingLR ETA_MIN_LR: 0.000016 WARMUP_FACTOR: 0.01 WARMUP_ITERS: 1000 CLIP_GRADIENTS: ENABLED: True CHECKPOINT_PERIOD: 30 TEST: EVAL_PERIOD: 5 IMS_PER_BATCH: 128 CUDNN_BENCHMARK: True DATASETS: NAMES: ("Market1501",) TESTS: ("Market1501",) OUTPUT_DIR: logs/market1501/sbs_vit_base