Merge pull request #1897 from myhloli/dev

perf(inference): optimize batch processing for different GPU memory s…

Merge pull request #1897 from myhloli/dev
perf(inference): optimize batch processing for different GPU memory s…
734ae27b · Xiaomeng Zhao · GitHub · a19089a9 · 6116488d · 734ae27b
Unverified Commit 734ae27b authored Mar 11, 2025 by Xiaomeng Zhao Committed by GitHub Mar 11, 2025
Hide whitespace changes
Inline Side-by-side

Showing with 4 additions and 2 deletions

magic_pdf/model/doc_analyze_by_custom_model.py magic_pdf/model/doc_analyze_by_custom_model.py +4 -2

No files found.
--- a/magic_pdf/model/doc_analyze_by_custom_model.py
+++ b/magic_pdf/model/doc_analyze_by_custom_model.py
@@ -165,12 +165,14 @@ def doc_analyze(
        import torch_npu
        if torch_npu.npu.is_available():
            npu_support = True
+            torch.npu.set_compile_mode(jit_compile=False)

    if torch.cuda.is_available() and device != 'cpu' or npu_support:
        gpu_memory = int(os.getenv("VIRTUAL_VRAM_SIZE", round(get_vram(device))))
        if gpu_memory is not None and gpu_memory >= 8:
-
-            if gpu_memory >= 16:
+            if gpu_memory >= 20:
+                batch_ratio = 16
+            elif gpu_memory >= 15:
                batch_ratio = 8
            elif gpu_memory >= 10:
                batch_ratio = 4