Feat: deepseek-ocr logits processor (#12415)

Co-authored-by: xinyuant <xinyuant@usc.edu>

Feat: deepseek-ocr logits processor (#12415)
Co-authored-by: xinyuant <xinyuant@usc.edu>
68486481 · Xinyuan Tong · GitHub · 410225b7 · 68486481 · 68486481
Unverified Commit 68486481 authored Oct 31, 2025 by Xinyuan Tong Committed by GitHub Oct 31, 2025
Showing with 89 additions and 1 deletion

python/sglang/srt/configs/deepseek_ocr.py python/sglang/srt/configs/deepseek_ocr.py +22 -0

python/sglang/srt/sampling/custom_logit_processor.py python/sglang/srt/sampling/custom_logit_processor.py +67 -1

No files found.
--- a/python/sglang/srt/configs/deepseek_ocr.py
+++ b/python/sglang/srt/configs/deepseek_ocr.py
@@ -15,6 +15,10 @@ from sglang.srt.multimodal.customized_mm_processor_utils import (
    register_customized_processor,
 )
+from sglang.srt.sampling.custom_logit_processor import (
+    DeepseekOCRNoRepeatNGramLogitProcessor,
+)
 BASE_SIZE = 1024
 IMAGE_SIZE = 640
 CROP_MODE = True
@@ -26,6 +30,24 @@ PRINT_NUM_VIS_TOKENS = False
 SKIP_REPEAT = True
 MODEL_PATH = "deepseek-ai/DeepSeek-OCR"  # change to your model path
+NGRAM_NO_REPEAT_SIZE = 30
+NGRAM_NO_REPEAT_WINDOW = 90
+# Whitelist `<td>` and `</td>` token ids to allow table structures.
+NGRAM_NO_REPEAT_WHITELIST = (128821, 128822)
+DEFAULT_CUSTOM_LOGIT_PROCESSOR = DeepseekOCRNoRepeatNGramLogitProcessor.to_str()
+def get_default_ngram_custom_params() -> Dict[str, Any]:
+    """Return default custom params for the DeepSeek-OCR n-gram no repeat processor."""
+    return {
+        "ngram_size": NGRAM_NO_REPEAT_SIZE,
+        "window_size": NGRAM_NO_REPEAT_WINDOW,
+        "whitelist_token_ids": list(NGRAM_NO_REPEAT_WHITELIST),
+    }
 PROMPT = "<image>\n<|grounding|>Convert the document to markdown."

--- a/python/sglang/srt/sampling/custom_logit_processor.py
+++ b/python/sglang/srt/sampling/custom_logit_processor.py
 import json
 from abc import ABC, abstractmethod
 from functools import lru_cache
-from typing import TYPE_CHECKING, Any, Dict, List, Optional
+from typing import TYPE_CHECKING, Any, Dict, List, Optional, Set
 import dill
 import orjson
@@ -126,3 +126,69 @@ class DeepSeekR1ThinkingBudgetLogitProcessor(ThinkingBudgetLogitProcessor):
    THINKING_START_TOKEN_ID: int = 128798
    THINKING_END_TOKEN_ID: int = 128799
    NEW_LINE_TOKEN_ID: int = 201
+# Adapted from DeepSeek's implementation: https://github.com/deepseek-ai/DeepSeek-OCR/blob/main/DeepSeek-OCR-master/DeepSeek-OCR-vllm/process/ngram_norepeat.py
+class DeepseekOCRNoRepeatNGramLogitProcessor(CustomLogitProcessor):
+    """Block n-gram repetitions within a sliding window for DeepSeek-OCR outputs."""
+    def __call__(
+        self,
+        logits: torch.Tensor,
+        custom_param_list: Optional[List[Dict[str, Any]]] = None,
+    ) -> torch.Tensor:
+        if not custom_param_list:
+            return logits
+        for batch_idx, params in enumerate(custom_param_list):
+            if not params:
+                continue
+            req = params.get("__req__")
+            if req is None:
+                continue
+            try:
+                ngram_size = int(params.get("ngram_size") or 0)
+                window_size = int(params.get("window_size") or 0)
+            except (TypeError, ValueError):
+                continue
+            if ngram_size <= 0 or window_size <= 0:
+                continue
+            sequence: List[int] = req.origin_input_ids + req.output_ids
+            if len(sequence) < ngram_size:
+                continue
+            search_start = max(0, len(sequence) - window_size)
+            search_end = len(sequence) - ngram_size + 1
+            if search_end <= search_start:
+                continue
+            if ngram_size > 1:
+                current_prefix = tuple(sequence[-(ngram_size - 1) :])
+            else:
+                current_prefix = tuple()
+            banned_tokens: Set[int] = set()
+            for idx in range(search_start, search_end):
+                ngram = sequence[idx : idx + ngram_size]
+                if ngram_size == 1 or tuple(ngram[:-1]) == current_prefix:
+                    banned_tokens.add(ngram[-1])
+            whitelist_ids = params.get("whitelist_token_ids") or []
+            try:
+                whitelist = {int(token_id) for token_id in whitelist_ids}
+            except (TypeError, ValueError):
+                whitelist = set()
+            banned_tokens.difference_update(whitelist)
+            if not banned_tokens:
+                continue
+            indices = list(banned_tokens)
+            logits[batch_idx, indices] = -float("inf")
+        return logits