BASE_SIZE = 1024 IMAGE_SIZE = 768 CROP_MODE = True MIN_CROPS= 2 MAX_CROPS= 6 # max:6 MAX_CONCURRENCY = 100 # If you have limited GPU memory, lower the concurrency count. NUM_WORKERS = 64 # image pre-process (resize/padding) workers PRINT_NUM_VIS_TOKENS = False SKIP_REPEAT = True MODEL_PATH = 'deepseek-ai/DeepSeek-OCR-2' # change to your model path # TODO: change INPUT_PATH # .pdf: run_dpsk_ocr_pdf.py; # .jpg, .png, .jpeg: run_dpsk_ocr_image.py; # Omnidocbench images path: run_dpsk_ocr_eval_batch.py INPUT_PATH = 'doc/docstructbench_dianzishu_zhongwenzaixian-o.O-63686436.pdf_57.jpg' OUTPUT_PATH = 'output/image/' PROMPT = '\n<|grounding|>Convert the document to markdown.' # PROMPT = '\nFree OCR.' # PROMPT = '\nParse the figure.' # TODO commonly used prompts # document: \n<|grounding|>Convert the document to markdown. # other image: \n<|grounding|>OCR this image. # without layouts: \nFree OCR. # figures in document: \nParse the figure. # general: \nDescribe this image in detail. # rec: \nLocate <|ref|>xxxx<|/ref|> in the image. # ....... from transformers import AutoTokenizer TOKENIZER = AutoTokenizer.from_pretrained(MODEL_PATH, trust_remote_code=True)