model: # full fine tune name_or_path: weka://oe-data-default/jakep/Qwen_Qwen2-VL-7B-Instruct-e4ecf8-01JAH8GMWHTJ376S2N7ETXRXH4/best_bf16/ #name_or_path: s3://ai2-oe-data/jakep/experiments/qwen2vl-pdf/v1/models/jakep/Qwen_Qwen2-VL-7B-Instruct-e4ecf8-01JAH8GMWHTJ376S2N7ETXRXH4/checkpoint-9500/bf16/ vlm: true # necessary to prevent random crashes, until vllm fixes some bugs num_scheduler_steps: 1 format: add_generation_prompt: true generate: # The model's max context length is 8096, but around 1500 tokens are reserved for the image itself max_context_length: 6500 temperature: 0.8 top_p: 1.0 drop_long_outputs: false pipeline: sqs_queue_name: jake-pdf num_workers: 3 generation_batch_size: 256 tokenization_batch_size: 64 output_serializer: default target_bucket: ai2-oe-data target_object_prefix: [your username]/pdfworkspaces/s2orc_3200k_v2/inference_outputs allowed_restarts_per_predictor: 10 task: budget: ai2/oe-data workspace: ai2/oe-data-model-based-cleanup name: qwen2vl-schedsteps-bg replicas: 128 priority: LOW gpu_count: 1 cluster: - ai2/jupiter-cirrascale-2 - ai2/saturn-cirrascale