Commit 43f21a77 authored by myhloli's avatar myhloli
Browse files

feat: add support for Korean and Latin configurations in OCR model processing

parent 86391acf
...@@ -133,15 +133,16 @@ latin_lang = [ ...@@ -133,15 +133,16 @@ latin_lang = [
] ]
arabic_lang = ['ar', 'fa', 'ug', 'ur'] arabic_lang = ['ar', 'fa', 'ug', 'ur']
cyrillic_lang = [ cyrillic_lang = [
'ru', 'rs_cyrillic', 'be', 'bg', 'uk', 'mn', 'abq', 'ady', 'kbd', 'ava', # noqa: E126 'rs_cyrillic', 'bg', 'mn', 'abq', 'ady', 'kbd', 'ava', # noqa: E126
'dar', 'inh', 'che', 'lbe', 'lez', 'tab' 'dar', 'inh', 'che', 'lbe', 'lez', 'tab'
] ]
east_slavic_lang = ["ru", "be", "uk"]
devanagari_lang = [ devanagari_lang = [
'hi', 'mr', 'ne', 'bh', 'mai', 'ang', 'bho', 'mah', 'sck', 'new', 'gom', # noqa: E126 'hi', 'mr', 'ne', 'bh', 'mai', 'ang', 'bho', 'mah', 'sck', 'new', 'gom', # noqa: E126
'sa', 'bgc' 'sa', 'bgc'
] ]
other_lang = ['ch', 'ch_lite', 'ch_server', 'en', 'korean', 'japan', 'chinese_cht', 'ta', 'te', 'ka'] other_lang = ['ch', 'ch_lite', 'ch_server', 'en', 'korean', 'japan', 'chinese_cht', 'ta', 'te', 'ka']
add_lang = ['latin', 'arabic', 'cyrillic', 'devanagari'] add_lang = ['latin', 'arabic', 'east_slavic', 'cyrillic', 'devanagari']
# all_lang = ['', 'auto'] # all_lang = ['', 'auto']
all_lang = [] all_lang = []
......
...@@ -26,9 +26,10 @@ latin_lang = [ ...@@ -26,9 +26,10 @@ latin_lang = [
] ]
arabic_lang = ['ar', 'fa', 'ug', 'ur'] arabic_lang = ['ar', 'fa', 'ug', 'ur']
cyrillic_lang = [ cyrillic_lang = [
'ru', 'rs_cyrillic', 'be', 'bg', 'uk', 'mn', 'abq', 'ady', 'kbd', 'ava', # noqa: E126 'rs_cyrillic', 'bg', 'mn', 'abq', 'ady', 'kbd', 'ava', # noqa: E126
'dar', 'inh', 'che', 'lbe', 'lez', 'tab' 'dar', 'inh', 'che', 'lbe', 'lez', 'tab'
] ]
east_slavic_lang = ["ru", "be", "uk"]
devanagari_lang = [ devanagari_lang = [
'hi', 'mr', 'ne', 'bh', 'mai', 'ang', 'bho', 'mah', 'sck', 'new', 'gom', # noqa: E126 'hi', 'mr', 'ne', 'bh', 'mai', 'ang', 'bho', 'mah', 'sck', 'new', 'gom', # noqa: E126
'sa', 'bgc' 'sa', 'bgc'
...@@ -69,6 +70,8 @@ class PytorchPaddleOCR(TextSystem): ...@@ -69,6 +70,8 @@ class PytorchPaddleOCR(TextSystem):
self.lang = 'cyrillic' self.lang = 'cyrillic'
elif self.lang in devanagari_lang: elif self.lang in devanagari_lang:
self.lang = 'devanagari' self.lang = 'devanagari'
elif self.lang in east_slavic_lang:
self.lang = 'east_slavic'
else: else:
pass pass
......
...@@ -490,3 +490,82 @@ devanagari_PP-OCRv3_rec_infer: ...@@ -490,3 +490,82 @@ devanagari_PP-OCRv3_rec_infer:
# out_channels: 169 # out_channels: 169
fc_decay: 0.00001 fc_decay: 0.00001
korean_PP-OCRv5_rec_infer:
model_type: rec
algorithm: SVTR_HGNet
Transform:
Backbone:
name: PPLCNetV3
scale: 0.95
Head:
name: MultiHead
out_channels_list:
CTCLabelDecode: 11947
head_list:
- CTCHead:
Neck:
name: svtr
dims: 120
depth: 2
hidden_dims: 120
kernel_size: [ 1, 3 ]
use_guide: True
Head:
fc_decay: 0.00001
- NRTRHead:
nrtr_dim: 384
max_text_length: 25
latin_PP-OCRv5_rec_infer:
model_type: rec
algorithm: SVTR_HGNet
Transform:
Backbone:
name: PPLCNetV3
scale: 0.95
Head:
name: MultiHead
out_channels_list:
CTCLabelDecode: 504
head_list:
- CTCHead:
Neck:
name: svtr
dims: 120
depth: 2
hidden_dims: 120
kernel_size: [ 1, 3 ]
use_guide: True
Head:
fc_decay: 0.00001
- NRTRHead:
nrtr_dim: 384
max_text_length: 25
eslav_PP-OCRv5_rec_infer:
model_type: rec
algorithm: SVTR_HGNet
Transform:
Backbone:
name: PPLCNetV3
scale: 0.95
Head:
name: MultiHead
out_channels_list:
CTCLabelDecode: 519
head_list:
- CTCHead:
Neck:
name: svtr
dims: 120
depth: 2
hidden_dims: 120
kernel_size: [ 1, 3 ]
use_guide: True
Head:
fc_decay: 0.00001
- NRTRHead:
nrtr_dim: 384
max_text_length: 25
!
"
#
$
%
&
'
(
)
*
+
,
-
.
/
0
1
2
3
4
5
6
7
8
9
:
;
<
=
>
?
A
B
C
D
E
F
G
H
I
J
K
L
M
N
O
P
Q
R
S
T
U
V
W
X
Y
Z
[
]
_
`
a
b
c
d
e
f
g
h
i
j
k
l
m
n
o
p
q
r
s
t
u
v
w
x
y
z
©
{
}
\
|
@
^
~
÷
·
±
®
Ω
¢
£
¥
𝑢
𝜓
ƒ
À
Á
Â
Ã
Ä
Å
Æ
Ç
È
É
Ê
Ë
Ì
Í
Î
Ï
Ð
Ñ
Ò
Ó
Ô
Õ
Ö
Ø
Ù
Ú
Û
Ü
Ý
Þ
à
á
â
ã
ä
å
æ
ç
è
é
ê
ë
ì
í
î
ï
ð
ñ
ò
ó
ô
õ
ö
ø
ù
ú
û
ü
ý
þ
ÿ
¡
¤
¦
§
¨
ª
«
¬
¯
°
²
³
´
µ
¸
¹
º
»
¼
½
¾
¿
×
Α
α
Β
β
Γ
γ
Δ
δ
Ε
ε
Ζ
ζ
Η
η
Θ
θ
Ι
ι
Κ
κ
Λ
λ
Μ
μ
Ν
ν
Ξ
ξ
Ο
ο
Π
π
Ρ
ρ
Σ
σ
ς
Τ
τ
Υ
υ
Φ
φ
Χ
χ
Ψ
ψ
ω
А
Б
В
Г
Ґ
Д
Е
Ё
Є
Ж
З
И
І
Ї
Й
К
Л
М
Н
О
П
Р
С
Т
У
Ў
Ф
Х
Ц
Ч
Ш
Щ
Ъ
Ы
Ь
Э
Ю
Я
а
б
в
г
ґ
д
е
ё
є
ж
з
и
і
ї
й
к
л
м
н
о
п
р
с
т
у
ў
ф
х
ц
ч
ш
щ
ъ
ы
ь
э
ю
я
!
"
#
$
%
&
'
(
)
*
+
,
-
.
/
0
1
2
3
4
5
6
7
8
9
:
;
<
=
>
?
@
A
B
C
D
E
F
G
H
I
J
K
L
M
N
O
P
Q
R
S
T
U
V
W
X
Y
Z
[
\
]
^
_
`
a
b
c
d
e
f
g
h
i
j
k
l
m
n
o
p
q
r
s
t
u
v
w
x
y
z
{
|
}
~
¡
¢
£
¤
¥
¦
§
¨
©
ª
«
¬
­
®
¯
°
±
²
³
´
µ
·
¸
¹
º
»
¼
½
¾
¿
À
Á
Â
Ã
Ä
Å
Æ
Ç
È
É
Ê
Ë
Ì
Í
Î
Ï
Ð
Ñ
Ò
Ó
Ô
Õ
Ö
×
Ø
Ù
Ú
Û
Ü
Ý
Þ
ß
à
á
â
ã
ä
å
æ
ç
è
é
ê
ë
ì
í
î
ï
ð
ñ
ò
ó
ô
õ
ö
÷
ø
ù
ú
û
ü
ý
þ
ÿ
Ą
ą
Ć
ć
Č
č
Ď
ď
Đ
đ
Ė
ė
Ę
ę
Ě
ě
Ğ
ğ
Į
į
İ
ı
Ĺ
ĺ
Ľ
ľ
Ł
ł
Ń
ń
Ň
ň
ō
Ő
ő
Œ
œ
Ŕ
ŕ
Ř
ř
Ś
ś
Ş
ş
Š
š
Ť
ť
Ū
ū
Ů
ů
Ű
ű
Ų
ų
Ÿ
Ź
ź
Ż
ż
Ž
ž
ƒ
ʒ
Ω
α
β
γ
δ
ε
ζ
η
θ
ι
κ
λ
μ
ν
ξ
ο
π
ρ
ς
σ
τ
υ
φ
χ
ψ
ω
з
𝑢
𝜓
...@@ -24,9 +24,9 @@ lang: ...@@ -24,9 +24,9 @@ lang:
rec: en_PP-OCRv4_rec_infer.pth rec: en_PP-OCRv4_rec_infer.pth
dict: en_dict.txt dict: en_dict.txt
korean: korean:
det: Multilingual_PP-OCRv3_det_infer.pth det: ch_PP-OCRv5_det_infer.pth
rec: korean_PP-OCRv3_rec_infer.pth rec: korean_PP-OCRv5_rec_infer.pth
dict: korean_dict.txt dict: ppocrv5_korean_dict.txt
japan: japan:
det: ch_PP-OCRv5_det_infer.pth det: ch_PP-OCRv5_det_infer.pth
rec: ch_PP-OCRv5_rec_server_infer.pth rec: ch_PP-OCRv5_rec_server_infer.pth
...@@ -48,9 +48,9 @@ lang: ...@@ -48,9 +48,9 @@ lang:
rec: ka_PP-OCRv3_rec_infer.pth rec: ka_PP-OCRv3_rec_infer.pth
dict: ka_dict.txt dict: ka_dict.txt
latin: latin:
det: en_PP-OCRv3_det_infer.pth det: ch_PP-OCRv5_det_infer.pth
rec: latin_PP-OCRv3_rec_infer.pth rec: latin_PP-OCRv5_rec_infer.pth
dict: latin_dict.txt dict: ppocrv5_latin_dict.txt
arabic: arabic:
det: Multilingual_PP-OCRv3_det_infer.pth det: Multilingual_PP-OCRv3_det_infer.pth
rec: arabic_PP-OCRv3_rec_infer.pth rec: arabic_PP-OCRv3_rec_infer.pth
...@@ -62,4 +62,8 @@ lang: ...@@ -62,4 +62,8 @@ lang:
devanagari: devanagari:
det: Multilingual_PP-OCRv3_det_infer.pth det: Multilingual_PP-OCRv3_det_infer.pth
rec: devanagari_PP-OCRv3_rec_infer.pth rec: devanagari_PP-OCRv3_rec_infer.pth
dict: devanagari_dict.txt dict: devanagari_dict.txt
\ No newline at end of file east_slavic:
det: ch_PP-OCRv5_det_infer.pth
rec: eslav_PP-OCRv5_rec_infer.pth
dict: ppocrv5_eslav_dict.txt
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment