# Copyright (c) 2022, salesforce.com, inc. # All rights reserved. # SPDX-License-Identifier: BSD-3-Clause # For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause model: arch: blip_pretrain model_type: base load_pretrained: False queue_size: 57600 alpha: 0.4 datasets: coco_caption: vis_processor: train: name: "blip_image_train" image_size: 224 text_processor: train: name: "blip_caption" conceptual_caption_3m: # name of the dataset builder vis_processor: train: name: "blip_image_train" image_size: 224 text_processor: train: name: "blip_caption" conceptual_caption_12m: # name of the dataset builder vis_processor: train: name: "blip_image_train" image_size: 224 text_processor: train: name: "blip_caption" vg_caption: # name of the dataset builder vis_processor: train: name: "blip_image_train" image_size: 224 text_processor: train: name: "blip_caption" sbu_caption: # name of the dataset builder vis_processor: train: name: "blip_image_train" image_size: 224 text_processor: train: name: "blip_caption" run: task: image_text_pretrain # optimizer lr_sched: "linear_warmup_step_lr" # lr_sched: "linear_warmup_cosine_lr" init_lr: 3e-4 min_lr: 1e-6 warmup_lr: 1e-6 lr_decay_rate: 0.9 weight_decay: 0.05 max_epoch: 20 batch_size_train: 75 batch_size_eval: 75 num_workers: 4 warmup_steps: 3000 seed: 42 output_dir: "output/BLIP/Pretrain" amp: False resume_ckpt_path: null evaluate: False train_splits: ["train"] device: "cuda" world_size: 1 dist_url: "env://" distributed: True