refactor(ocr): remove unused OCR dictionaries and update model configurations

- Remove unused OCR dictionaries for Arabic, Belarusian, Bulgarian and Armenian languages - Update model configurations in arch_config.yaml: - Comment out 'out_channels' for various language models - Rename Arabic, Korean, Japanese, Tamil and Devanagari model configurations to use 'v3' instead of 'v4' - Delete ar_dict.txt, be_dict.txt and bg_dict.txt files - Update arabic_dict.txt to remove blank line at the start

refactor(ocr): remove unused OCR dictionaries and update model configurations
- Remove unused OCR dictionaries for Arabic, Belarusian, Bulgarian and Armenian languages - Update model configurations in arch_config.yaml: - Comment out 'out_channels' for various language models - Rename Arabic, Korean, Japanese, Tamil and Devanagari model configurations to use 'v3' instead of 'v4' - Delete ar_dict.txt, be_dict.txt and bg_dict.txt files - Update arabic_dict.txt to remove blank line at the start
41f1fb8a · myhloli · b3d6785d · 41f1fb8a · 41f1fb8a · 41f1fb8a
Commit 41f1fb8a authored Apr 01, 2025 by myhloli
20 changed files
--- a/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/models_config.yml
+++ b/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/models_config.yml
+lang:
+  ch:
+    det: ch_PP-OCRv4_det_infer.pth
+    rec: ch_PP-OCRv4_rec_infer.pth
+    dict: ppocr_keys_v1.txt
+  en:
+    det: en_PP-OCRv3_det_infer.pth
+    rec: en_PP-OCRv4_rec_infer.pth
+    dict: en_dict.txt
+  korean:
+    det: Multilingual_PP-OCRv3_det_infer.pth
+    rec: korean_PP-OCRv3_rec_infer.pth
+    dict: korean_dict.txt
+  japan:
+    det: Multilingual_PP-OCRv3_det_infer.pth
+    rec: japan_PP-OCRv3_rec_infer.pth
+    dict: japan_dict.txt
+  chinese_cht:
+    det: Multilingual_PP-OCRv3_det_infer.pth
+    rec: chinese_cht_PP-OCRv3_rec_infer.pth
+    dict: chinese_cht_dict.txt
+  ta:
+    det: Multilingual_PP-OCRv3_det_infer.pth
+    rec: ta_PP-OCRv3_rec_infer.pth
+    dict: ta_dict.txt
+  te:
+    det: Multilingual_PP-OCRv3_det_infer.pth
+    rec: te_PP-OCRv3_rec_infer.pth
+    dict: te_dict.txt
+  ka:
+    det: Multilingual_PP-OCRv3_det_infer.pth
+    rec: ka_PP-OCRv3_rec_infer.pth
+    dict: ka_dict.txt
+  latin:
+    det: en_PP-OCRv3_det_infer.pth
+    rec: latin_PP-OCRv3_rec_infer.pth
+    dict: latin_dict.txt
+  arabic:
+    det: Multilingual_PP-OCRv3_det_infer.pth
+    rec: arabic_PP-OCRv3_rec_infer.pth
+    dict: arabic_dict.txt
+  cyrillic:
+    det: Multilingual_PP-OCRv3_det_infer.pth
+    rec: cyrillic_PP-OCRv3_rec_infer.pth
+    dict: cyrillic_dict.txt
+  devanagari:
+    det: Multilingual_PP-OCRv3_det_infer.pth
+    rec: devanagari_PP-OCRv3_rec_infer.pth
+    dict: devanagari_dict.txt
\ No newline at end of file
--- a/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorch_paddle.py
+++ b/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorch_paddle.py
 # Copyright (c) Opendatalab. All rights reserved.
 import copy
+import os.path
+from pathlib import Path
 import cv2
 import numpy as np
+import yaml
 from loguru import logger
-from magic_pdf.libs.config_reader import get_device
+from magic_pdf.libs.config_reader import get_device, get_local_models_dir
 from .ocr_utils import check_img, preprocess_image, sorted_boxes, merge_det_boxes, update_det_boxes, get_rotate_crop_image
 from .tools.infer.predict_system import TextSystem
 from .tools.infer import pytorchocr_utility as utility
 import argparse
+latin_lang = [
+        'af', 'az', 'bs', 'cs', 'cy', 'da', 'de', 'es', 'et', 'fr', 'ga', 'hr',  # noqa: E126
+        'hu', 'id', 'is', 'it', 'ku', 'la', 'lt', 'lv', 'mi', 'ms', 'mt', 'nl',
+        'no', 'oc', 'pi', 'pl', 'pt', 'ro', 'rs_latin', 'sk', 'sl', 'sq', 'sv',
+        'sw', 'tl', 'tr', 'uz', 'vi', 'french', 'german'
+]
+arabic_lang = ['ar', 'fa', 'ug', 'ur']
+cyrillic_lang = [
+        'ru', 'rs_cyrillic', 'be', 'bg', 'uk', 'mn', 'abq', 'ady', 'kbd', 'ava',  # noqa: E126
+        'dar', 'inh', 'che', 'lbe', 'lez', 'tab'
+]
+devanagari_lang = [
+        'hi', 'mr', 'ne', 'bh', 'mai', 'ang', 'bho', 'mah', 'sck', 'new', 'gom',  # noqa: E126
+        'sa', 'bgc'
+]
+def get_model_params(lang, config):
+    if lang in config['lang']:
+        params = config['lang'][lang]
+        det = params.get('det')
+        rec = params.get('rec')
+        dict_file = params.get('dict')
+        return det, rec, dict_file
+    else:
+        raise Exception (f'Language {lang} not supported')
+root_dir = Path(__file__).resolve().parent
 class PytorchPaddleOCR(TextSystem):
    def __init__(self, *args, **kwargs):
        parser = utility.init_args()
        args = parser.parse_args(args)
        self.lang = kwargs.get('lang', 'ch')
+        if self.lang in latin_lang:
+            self.lang = 'latin'
+        elif self.lang in arabic_lang:
+            self.lang = 'arabic'
+        elif self.lang in cyrillic_lang:
+            self.lang = 'cyrillic'
+        elif self.lang in devanagari_lang:
+            self.lang = 'devanagari'
+        else:
+            pass
-        if self.lang == 'ch':
+        models_config_path = os.path.join(root_dir, 'models_config.yml')
-            kwargs['det_model_path'] = "/Users/myhloli/Downloads/ch_ptocr_v4_det_infer.pth"
+        with open(models_config_path) as file:
-            kwargs['rec_model_path'] = "/Users/myhloli/Downloads/ch_ptocr_v4_rec_infer.pth"
+            config = yaml.safe_load(file)
+            det, rec, dict_file = get_model_params(self.lang, config)
+        ocr_models_dir = os.path.join(get_local_models_dir(), 'OCR', 'paddleocr_torch')
+        kwargs['det_model_path'] = os.path.join(ocr_models_dir, det)
+        kwargs['rec_model_path'] = os.path.join(ocr_models_dir, rec)
+        kwargs['rec_char_dict_path'] = os.path.join(root_dir, 'pytorchocr', 'utils', 'dict', dict_file)
        kwargs['device'] = get_device()

--- a/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/arch_config.yaml
+++ b/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/arch_config.yaml
@@ -172,7 +172,7 @@ chinese_cht_PP-OCRv3_rec_infer:
    use_guide: True
  Head:
    name: CTCHead
-    out_channels: 8423
+#    out_channels: 8423
    fc_decay: 0.00001
 latin_PP-OCRv3_rec_infer:
@@ -193,7 +193,7 @@ latin_PP-OCRv3_rec_infer:
    use_guide: True
  Head:
    name: CTCHead
-    out_channels: 187
+#    out_channels: 187
    fc_decay: 0.00001
 cyrillic_PP-OCRv3_rec_infer:
@@ -214,10 +214,10 @@ cyrillic_PP-OCRv3_rec_infer:
    use_guide: True
  Head:
    name: CTCHead
-    out_channels: 165
+#    out_channels: 165
    fc_decay: 0.00001
-arabic_PP-OCRv4_rec_infer:
+arabic_PP-OCRv3_rec_infer:
  model_type: rec
  algorithm: SVTR
  Transform:
@@ -235,10 +235,10 @@ arabic_PP-OCRv4_rec_infer:
    use_guide: True
  Head:
    name: CTCHead
-    out_channels: 164
+#    out_channels: 164
    fc_decay: 0.00001
-korean_PP-OCRv4_rec_infer:
+korean_PP-OCRv3_rec_infer:
  model_type: rec
  algorithm: SVTR
  Transform:
@@ -256,10 +256,10 @@ korean_PP-OCRv4_rec_infer:
    use_guide: True
  Head:
    name: CTCHead
-    out_channels: 3690
+#    out_channels: 3690
    fc_decay: 0.00001
-japan_PP-OCRv4_rec_infer:
+japan_PP-OCRv3_rec_infer:
  model_type: rec
  algorithm: SVTR
  Transform:
@@ -277,10 +277,10 @@ japan_PP-OCRv4_rec_infer:
    use_guide: True
  Head:
    name: CTCHead
-    out_channels: 4401
+#    out_channels: 4401
    fc_decay: 0.00001
-ta_PP-OCRv4_rec_infer:
+ta_PP-OCRv3_rec_infer:
  model_type: rec
  algorithm: SVTR
  Transform:
@@ -298,10 +298,10 @@ ta_PP-OCRv4_rec_infer:
    use_guide: True
  Head:
    name: CTCHead
-    out_channels: 130
+#    out_channels: 130
    fc_decay: 0.00001
-te_PP-OCRv4_rec_infer:
+te_PP-OCRv3_rec_infer:
  model_type: rec
  algorithm: SVTR
  Transform:
@@ -319,10 +319,10 @@ te_PP-OCRv4_rec_infer:
    use_guide: True
  Head:
    name: CTCHead
-    out_channels: 153
+#    out_channels: 153
    fc_decay: 0.00001
-ka_PP-OCRv4_rec_infer:
+ka_PP-OCRv3_rec_infer:
  model_type: rec
  algorithm: SVTR
  Transform:
@@ -340,10 +340,10 @@ ka_PP-OCRv4_rec_infer:
    use_guide: True
  Head:
    name: CTCHead
-    out_channels: 155
+#    out_channels: 155
    fc_decay: 0.00001
-devanagari_PP-OCRv4_rec_infer:
+devanagari_PP-OCRv3_rec_infer:
  model_type: rec
  algorithm: SVTR
  Transform:
@@ -361,6 +361,6 @@ devanagari_PP-OCRv4_rec_infer:
    use_guide: True
  Head:
    name: CTCHead
-    out_channels: 169
+#    out_channels: 169
    fc_decay: 0.00001
--- a/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/EN_symbol_dict.txt
+++ b/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/EN_symbol_dict.txt
-0
-1
-2
-3
-4
-5
-6
-7
-8
-9
-a
-b
-c
-d
-e
-f
-g
-h
-i
-j
-k
-l
-m
-n
-o
-p
-q
-r
-s
-t
-u
-v
-w
-x
-y
-z
-A
-B
-C
-D
-E
-F
-G
-H
-I
-J
-K
-L
-M
-N
-O
-P
-Q
-R
-S
-T
-U
-V
-W
-X
-Y
-Z
-!
-"
-#
-$
-%
-&
-'
-(
-)
-*
-+
-,
-
-.
-/
-:
-;
-<
-=
->
-?
-@
-[
-\
-]
-^
-_
-`
-{
-|
-}
-~
\ No newline at end of file
--- a/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/dict/ar_dict.txt
+++ b/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/dict/ar_dict.txt
-a
-r
-b
-i
-c
-_
-m
-g
-/
-1
-0
-I
-L
-S
-V
-R
-C
-2
-v
-l
-6
-3
-9
-.
-j
-p	
-ا
-ل
-م
-ر
-ج
-و
-ح
-ي
-ة
-5
-8
-7
-أ
-ب
-ض
-4
-ك
-س
-ه
-ث
-ن
-ط
-ع
-ت
-غ
-خ
-ف
-ئ
-ز
-إ
-د
-ص
-ظ
-ذ
-ش
-ى
-ق
-ؤ
-آ
-ء
-s
-e
-n
-w
-t
-u
-z
-d
-A
-N
-G
-h
-o
-E
-T
-H
-O
-B
-y
-F
-U
-J
-X
-W
-P
-Z
-M
-k
-q
-Y
-Q
-D
-f
-K
-x
-'
-%
-
-#
-@
-!
-&
-$
-,
-:
-é
-?
-+
-É
-(
--- a/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/dict/arabic_dict.txt
+++ b/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/dict/arabic_dict.txt
 !
 #
 $

--- a/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/dict/be_dict.txt
+++ b/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/dict/be_dict.txt
-b
-e
-_
-i
-m
-g
-/
-2
-0
-I
-L
-S
-V
-R
-C
-1
-v
-a
-l
-6
-9
-4
-3
-.
-j
-p	
-п
-а
-з
-б
-у
-г
-н
-ц
-ь
-8
-м
-л
-і
-о
-ў
-ы
-7
-5
-М
-х
-с
-р
-ф
-я
-е
-д
-ж
-ю
-ч
-й
-к
-Д
-в
-Б
-т
-І
-ш
-ё
-э
-К
-Л
-Н
-А
-Ж
-Г
-В
-П
-З
-Е
-О
-Р
-С
-У
-Ё
-Й
-Т
-Ч
-Э
-Ц
-Ю
-Ш
-Ф
-Х
-Я
-Ь
-Ы
-Ў
-s
-c
-n
-w
-M
-o
-t
-T
-E
-A
-B
-u
-h
-y
-k
-r
-H
-d
-Y
-O
-U
-F
-f
-x
-D
-G
-N
-K
-P
-z
-J
-X
-W
-Z
-Q
-%
-
-q
-@
-'
-!
-#
-&
-,
-:
-$
-(
-?
-é
-+
-É
--- a/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/dict/bg_dict.txt
+++ b/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/dict/bg_dict.txt
-!
-#
-$
-%
-&
-'
-(
-+
-,
-
-.
-/
-0
-1
-2
-3
-4
-5
-6
-7
-8
-9
-:
-?
-@
-A
-B
-C
-D
-E
-F
-G
-H
-I
-J
-K
-L
-M
-N
-O
-P
-Q
-R
-S
-T
-U
-V
-W
-X
-Y
-Z
-_
-a
-b
-c
-d
-e
-f
-g
-h
-i
-j
-k
-l
-m
-n
-o
-p
-q
-r
-s
-t
-u
-v
-w
-x
-y
-z
-É
-é
-А
-Б
-В
-Г
-Д
-Е
-Ж
-З
-И
-Й
-К
-Л
-М
-Н
-О
-П
-Р
-С
-Т
-У
-Ф
-Х
-Ц
-Ч
-Ш
-Щ
-Ъ
-Ю
-Я
-а
-б
-в
-г
-д
-е
-ж
-з
-и
-й
-к
-л
-м
-н
-о
-п
-р
-с
-т
-у
-ф
-х
-ц
-ч
-ш
-щ
-ъ
-ь
-ю
-я
--- a/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/dict/ch_tra_dict.txt
+++ b/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/dict/ch_tra_dict.txt
--- a/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/dict/chinese_cht_dict.txt
+++ b/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/dict/chinese_cht_dict.txt
--- a/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/dict/cyrillic_dict.txt
+++ b/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/dict/cyrillic_dict.txt
--- a/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/dict/devanagari_dict.txt
+++ b/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/dict/devanagari_dict.txt
--- a/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/dict/en_dict.txt
+++ b/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/dict/en_dict.txt
@@ -8,32 +8,13 @@
 7
 8
 9
-a
+:
-b
+;
-c
+<
-d
+=
-e
+>
-f
+?
-g
+@
-h
-i
-j
-k
-l
-m
-n
-o
-p
-q
-r
-s
-t
-u
-v
-w
-x
-y
-z
 A
 B
 C
@@ -60,4 +41,55 @@ W
 X
 Y
 Z
+[
+\
+]
+^
+_
+`
+a
+b
+c
+d
+e
+f
+g
+h
+i
+j
+k
+l
+m
+n
+o
+p
+q
+r
+s
+t
+u
+v
+w
+x
+y
+z
+{
+|
+}
+~
+!
+"
+#
+$
+%
+&
+'
+(
+)
+*
+
+,
+-
+.
+/
--- a/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/dict/es_dict.txt
+++ b/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/dict/es_dict.txt
-x
-i
-_
-m
-g
-/
-1
-0
-I
-L
-S
-V
-R
-C
-2
-v
-a
-l
-3
-6
-4
-5
-.
-j
-p
-Q
-u
-e
-r
-o
-8
-7
-n
-c
-9
-t
-b
-é
-q
-d
-ó
-y
-F
-s
-,
-O
-í
-T
-f
-"
-U
-M
-h
-:
-P
-H
-A
-E
-D
-z
-N
-á
-ñ
-ú
-%
-;
-è
-+
-Y
-
-B
-G
-(
-)
-¿
-?
-w
-¡
-!
-X
-É
-K
-k
-Á
-ü
-Ú
-«
-»
-J
-'
-ö
-W
-Z
-º
-Ö
-[
-]
-Ç
-ç
-à
-ä
-û
-ò
-Í
-ê
-ô
-ø
-ª
--- a/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/dict/fa_dict.txt
+++ b/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/dict/fa_dict.txt
-f
-a
-_
-i
-m
-g
-/
-1
-3
-I
-L
-S
-V
-R
-C
-2
-0
-v
-l
-6
-8
-5
-.
-j
-p	
-و
-د
-ر
-ك
-ن
-ش
-ه
-ا
-4
-9
-ی
-ج
-ِ
-7
-غ
-ل
-س
-ز
-ّ
-ت
-ک
-گ
-ي
-م
-ب
-ف
-چ
-خ
-ق
-ژ
-آ
-ص
-پ
-َ
-ع
-ئ
-ح
-ٔ
-ض
-ُ
-ذ
-أ
-ى
-ط
-ظ
-ث
-ة
-ً
-ء
-ؤ
-ْ
-ۀ
-إ
-ٍ
-ٌ
-ٰ
-ٓ
-ٱ
-s
-c
-e
-n
-w
-N
-E
-W
-Y
-D
-O
-H
-A
-d
-z
-r
-T
-G
-o
-t
-x
-h
-b
-B
-M
-Z
-u
-P
-F
-y
-q
-U
-K
-k
-J
-Q
-'
-X
-#
-?
-%
-$
-,
-:
-&
-!
-
-(
-É
-@
-é
-+
--- a/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/dict/french_dict.txt
+++ b/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/dict/french_dict.txt
-f
-e
-n
-c
-h
-_
-i
-m
-g
-/
-r
-v
-a
-l
-t
-w
-o
-d
-6
-1
-.
-p
-B
-u
-2
-à
-3
-R
-y
-4
-U
-E
-A
-5
-P
-O
-S
-T
-D
-7
-Z
-8
-I
-N
-L
-G
-M
-H
-0
-J
-K
-
-9
-F
-C
-V
-é
-X
-'
-s
-Q
-:
-è
-x
-b
-Y
-Œ
-É
-z
-W
-Ç
-È
-k
-Ô
-ô
-€
-À
-Ê
-q
-ù
-°
-ê
-î
-*
-Â
-j
-"
-,
-â
-%
-û
-ç
-ü
-?
-!
-;
-ö
-(
-)
-ï
-º
-ó
-ø
-å
-+
-™
-á
-Ë
-<
-²
-Á
-Î
-&
-@
-œ
-ε
-Ü
-ë
-[
-]
-í
-ò
-Ö
-ä
-ß
-«
-»
-ú
-ñ
-æ
-µ
-³
-Å
-$
-#
--- a/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/dict/german_dict.txt
+++ b/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/dict/german_dict.txt
-!
-"
-#
-$
-%
-&
-'
-(
-)
-*
-+
-,
-
-.
-/
-0
-1
-2
-3
-4
-5
-6
-7
-8
-9
-:
-;
-=
->
-?
-@
-A
-B
-C
-D
-E
-F
-G
-H
-I
-J
-K
-L
-M
-N
-O
-P
-Q
-R
-S
-T
-U
-V
-W
-X
-Y
-Z
-[
-]
-_
-a
-b
-c
-d
-e
-f
-g
-h
-i
-j
-k
-l
-m
-n
-o
-p	
-q
-r
-s
-t
-u
-v
-w
-x
-y
-z
-£
-§
-°
-´
-µ
-·
-º
-¿
-Á
-Ä
-Å
-É
-Ï
-Ô
-Ö
-Ü
-ß
-à
-á
-â
-ã
-ä
-å
-æ
-ç
-è
-é
-ê
-ë
-í
-ï
-ñ
-ò
-ó
-ô
-ö
-ø
-ù
-ú
-û
-ü
-ō
-Š
-Ÿ
-ʒ
-β
-δ
-з
-Ṡ
-‘
-€
-©
-ª
-«
-¬
--- a/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/dict/hi_dict.txt
+++ b/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/dict/hi_dict.txt
-!
-#
-$
-%
-&
-'
-(
-+
-,
-
-.
-/
-0
-1
-2
-3
-4
-5
-6
-7
-8
-9
-:
-?
-@
-A
-B
-C
-D
-E
-F
-G
-H
-I
-J
-K
-L
-M
-N
-O
-P
-Q
-R
-S
-T
-U
-V
-W
-X
-Y
-Z
-_
-a
-b
-c
-d
-e
-f
-g
-h
-i
-j
-k
-l
-m
-n
-o
-p
-q
-r
-s
-t
-u
-v
-w
-x
-y
-z
-É
-é
-ँ
-ं
-ः
-अ
-आ
-इ
-ई
-उ
-ऊ
-ऋ
-ए
-ऐ
-ऑ
-ओ
-औ
-क
-ख
-ग
-घ
-ङ
-च
-छ
-ज
-झ
-ञ
-ट
-ठ
-ड
-ढ
-ण
-त
-थ
-द
-ध
-न
-प
-फ
-ब
-भ
-म
-य
-र
-ल
-ळ
-व
-श
-ष
-स
-ह
-़
-ा
-ि
-ी
-ु
-ू
-ृ
-ॅ
-े
-ै
-ॉ
-ो
-ौ
-्
-क़
-ख़
-ग़
-ज़
-ड़
-ढ़
-फ़
-०
-१
-२
-३
-४
-५
-६
-७
-८
-९
-॰
--- a/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/dict/it_dict.txt
+++ b/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/dict/it_dict.txt
-i
-t
-_
-m
-g
-/
-5
-I
-L
-S
-V
-R
-C
-2
-0
-1
-v
-a
-l
-7
-8
-9
-6
-.
-j
-p
-e
-r
-o
-d
-s
-n
-3
-4
-P
-u
-c
-A
-
-,
-"
-z
-h
-f
-b
-q
-ì
-'
-à
-O
-è
-G
-ù
-é
-ò
-;
-F
-E
-B
-N
-H
-k
-:
-U
-T
-X
-D
-K
-?
-[
-M
-x
-y
-(
-)
-W
-ö
-º
-w
-]
-Q
-J
-+
-ü
-!
-È
-á
-%
-=
-»
-ñ
-Ö
-Y
-ä
-í
-Z
-«
-@
-ó
-ø
-ï
-ú
-ê
-ç
-Á
-É
-Å
-ß
-{
-}
-&
-`
-û
-î
-#
-$
--- a/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/dict/japan_dict.txt
+++ b/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/dict/japan_dict.txt