Commit 6116488d authored by myhloli's avatar myhloli
Browse files

perf(inference): optimize batch processing for different GPU memory sizes

- Set NPUDTCompile to false for better performance on NPU
- Adjust batch ratio
parent 7a856804
...@@ -165,12 +165,14 @@ def doc_analyze( ...@@ -165,12 +165,14 @@ def doc_analyze(
import torch_npu import torch_npu
if torch_npu.npu.is_available(): if torch_npu.npu.is_available():
npu_support = True npu_support = True
torch.npu.set_compile_mode(jit_compile=False)
if torch.cuda.is_available() and device != 'cpu' or npu_support: if torch.cuda.is_available() and device != 'cpu' or npu_support:
gpu_memory = int(os.getenv("VIRTUAL_VRAM_SIZE", round(get_vram(device)))) gpu_memory = int(os.getenv("VIRTUAL_VRAM_SIZE", round(get_vram(device))))
if gpu_memory is not None and gpu_memory >= 8: if gpu_memory is not None and gpu_memory >= 8:
if gpu_memory >= 20:
if gpu_memory >= 16: batch_ratio = 16
elif gpu_memory >= 15:
batch_ratio = 8 batch_ratio = 8
elif gpu_memory >= 10: elif gpu_memory >= 10:
batch_ratio = 4 batch_ratio = 4
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment