Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
wangsen
paddle_dbnet
Commits
c1d19ce2
Unverified
Commit
c1d19ce2
authored
Aug 13, 2020
by
zhoujun
Committed by
GitHub
Aug 13, 2020
Browse files
Merge pull request #2 from PaddlePaddle/develop
mergepaddleocr
parents
56c6c3ae
bad9f6cd
Changes
363
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
797 additions
and
97 deletions
+797
-97
ppocr/data/rec/dataset_traversal.py
ppocr/data/rec/dataset_traversal.py
+155
-63
ppocr/data/rec/img_tools.py
ppocr/data/rec/img_tools.py
+294
-3
ppocr/modeling/architectures/__init__.py
ppocr/modeling/architectures/__init__.py
+13
-0
ppocr/modeling/architectures/det_model.py
ppocr/modeling/architectures/det_model.py
+14
-4
ppocr/modeling/architectures/rec_model.py
ppocr/modeling/architectures/rec_model.py
+21
-3
ppocr/modeling/backbones/__init__.py
ppocr/modeling/backbones/__init__.py
+13
-0
ppocr/modeling/backbones/rec_mobilenet_v3.py
ppocr/modeling/backbones/rec_mobilenet_v3.py
+23
-11
ppocr/modeling/heads/__init__.py
ppocr/modeling/heads/__init__.py
+13
-0
ppocr/modeling/heads/det_db_head.py
ppocr/modeling/heads/det_db_head.py
+1
-2
ppocr/modeling/heads/det_east_head.py
ppocr/modeling/heads/det_east_head.py
+2
-1
ppocr/modeling/heads/rec_attention_head.py
ppocr/modeling/heads/rec_attention_head.py
+9
-4
ppocr/modeling/heads/rec_ctc_head.py
ppocr/modeling/heads/rec_ctc_head.py
+2
-1
ppocr/modeling/losses/__init__.py
ppocr/modeling/losses/__init__.py
+13
-0
ppocr/modeling/stns/__init__.py
ppocr/modeling/stns/__init__.py
+13
-0
ppocr/optimizer.py
ppocr/optimizer.py
+31
-0
ppocr/postprocess/db_postprocess.py
ppocr/postprocess/db_postprocess.py
+10
-4
ppocr/postprocess/east_postprocess.py
ppocr/postprocess/east_postprocess.py
+16
-1
ppocr/postprocess/lanms/.gitignore
ppocr/postprocess/lanms/.gitignore
+1
-0
ppocr/postprocess/lanms/.ycm_extra_conf.py
ppocr/postprocess/lanms/.ycm_extra_conf.py
+140
-0
ppocr/postprocess/lanms/Makefile
ppocr/postprocess/lanms/Makefile
+13
-0
No files found.
Too many changes to show.
To preserve performance only
363 of 363+
files are displayed.
Plain diff
Email patch
ppocr/data/rec/dataset_traversal.py
View file @
c1d19ce2
...
...
@@ -13,6 +13,7 @@
#limitations under the License.
import
os
import
sys
import
math
import
random
import
numpy
as
np
...
...
@@ -22,6 +23,7 @@ import string
import
lmdb
from
ppocr.utils.utility
import
initial_logger
from
ppocr.utils.utility
import
get_image_file_list
logger
=
initial_logger
()
from
.img_tools
import
process_image
,
get_img_data
...
...
@@ -39,10 +41,25 @@ class LMDBReader(object):
self
.
loss_type
=
params
[
'loss_type'
]
self
.
max_text_length
=
params
[
'max_text_length'
]
self
.
mode
=
params
[
'mode'
]
self
.
drop_last
=
False
self
.
use_tps
=
False
if
"tps"
in
params
:
self
.
ues_tps
=
True
self
.
use_distort
=
False
if
"distort"
in
params
:
self
.
use_distort
=
params
[
'distort'
]
and
params
[
'use_gpu'
]
if
not
params
[
'use_gpu'
]:
logger
.
info
(
"Distort operation can only support in GPU. Distort will be set to False."
)
if
params
[
'mode'
]
==
'train'
:
self
.
batch_size
=
params
[
'train_batch_size_per_card'
]
self
.
drop_last
=
True
else
:
self
.
batch_size
=
params
[
'test_batch_size_per_card'
]
self
.
drop_last
=
False
self
.
use_distort
=
False
self
.
infer_img
=
params
[
'infer_img'
]
def
load_hierarchical_lmdb_dataset
(
self
):
lmdb_sets
=
{}
...
...
@@ -96,33 +113,52 @@ class LMDBReader(object):
process_id
=
0
def
sample_iter_reader
():
lmdb_sets
=
self
.
load_hierarchical_lmdb_dataset
()
if
process_id
==
0
:
self
.
print_lmdb_sets_info
(
lmdb_sets
)
cur_index_sets
=
[
1
+
process_id
]
*
len
(
lmdb_sets
)
while
True
:
finish_read_num
=
0
for
dataset_idx
in
range
(
len
(
lmdb_sets
)):
cur_index
=
cur_index_sets
[
dataset_idx
]
if
cur_index
>
lmdb_sets
[
dataset_idx
][
'num_samples'
]:
finish_read_num
+=
1
else
:
sample_info
=
self
.
get_lmdb_sample_info
(
lmdb_sets
[
dataset_idx
][
'txn'
],
cur_index
)
cur_index_sets
[
dataset_idx
]
+=
self
.
num_workers
if
sample_info
is
None
:
continue
img
,
label
=
sample_info
outs
=
process_image
(
img
,
self
.
image_shape
,
label
,
self
.
char_ops
,
self
.
loss_type
,
self
.
max_text_length
)
if
outs
is
None
:
continue
yield
outs
if
finish_read_num
==
len
(
lmdb_sets
):
break
self
.
close_lmdb_dataset
(
lmdb_sets
)
if
self
.
mode
!=
'train'
and
self
.
infer_img
is
not
None
:
image_file_list
=
get_image_file_list
(
self
.
infer_img
)
for
single_img
in
image_file_list
:
img
=
cv2
.
imread
(
single_img
)
if
img
.
shape
[
-
1
]
==
1
or
len
(
list
(
img
.
shape
))
==
2
:
img
=
cv2
.
cvtColor
(
img
,
cv2
.
COLOR_GRAY2BGR
)
norm_img
=
process_image
(
img
=
img
,
image_shape
=
self
.
image_shape
,
char_ops
=
self
.
char_ops
,
tps
=
self
.
use_tps
,
infer_mode
=
True
)
yield
norm_img
else
:
lmdb_sets
=
self
.
load_hierarchical_lmdb_dataset
()
if
process_id
==
0
:
self
.
print_lmdb_sets_info
(
lmdb_sets
)
cur_index_sets
=
[
1
+
process_id
]
*
len
(
lmdb_sets
)
while
True
:
finish_read_num
=
0
for
dataset_idx
in
range
(
len
(
lmdb_sets
)):
cur_index
=
cur_index_sets
[
dataset_idx
]
if
cur_index
>
lmdb_sets
[
dataset_idx
][
'num_samples'
]:
finish_read_num
+=
1
else
:
sample_info
=
self
.
get_lmdb_sample_info
(
lmdb_sets
[
dataset_idx
][
'txn'
],
cur_index
)
cur_index_sets
[
dataset_idx
]
+=
self
.
num_workers
if
sample_info
is
None
:
continue
img
,
label
=
sample_info
outs
=
process_image
(
img
=
img
,
image_shape
=
self
.
image_shape
,
label
=
label
,
char_ops
=
self
.
char_ops
,
loss_type
=
self
.
loss_type
,
max_text_length
=
self
.
max_text_length
,
distort
=
self
.
use_distort
)
if
outs
is
None
:
continue
yield
outs
if
finish_read_num
==
len
(
lmdb_sets
):
break
self
.
close_lmdb_dataset
(
lmdb_sets
)
def
batch_iter_reader
():
batch_outs
=
[]
...
...
@@ -131,10 +167,13 @@ class LMDBReader(object):
if
len
(
batch_outs
)
==
self
.
batch_size
:
yield
batch_outs
batch_outs
=
[]
if
len
(
batch_outs
)
!=
0
:
yield
batch_outs
if
not
self
.
drop_last
:
if
len
(
batch_outs
)
!=
0
:
yield
batch_outs
return
batch_iter_reader
if
self
.
infer_img
is
None
:
return
batch_iter_reader
return
sample_iter_reader
class
SimpleReader
(
object
):
...
...
@@ -143,50 +182,100 @@ class SimpleReader(object):
self
.
num_workers
=
1
else
:
self
.
num_workers
=
params
[
'num_workers'
]
self
.
img_set_dir
=
params
[
'img_set_dir'
]
self
.
label_file_path
=
params
[
'label_file_path'
]
if
params
[
'mode'
]
!=
'test'
:
self
.
img_set_dir
=
params
[
'img_set_dir'
]
self
.
label_file_path
=
params
[
'label_file_path'
]
self
.
use_gpu
=
params
[
'use_gpu'
]
self
.
char_ops
=
params
[
'char_ops'
]
self
.
image_shape
=
params
[
'image_shape'
]
self
.
loss_type
=
params
[
'loss_type'
]
self
.
max_text_length
=
params
[
'max_text_length'
]
self
.
mode
=
params
[
'mode'
]
self
.
infer_img
=
params
[
'infer_img'
]
self
.
use_tps
=
False
if
"tps"
in
params
:
self
.
use_tps
=
True
self
.
use_distort
=
False
if
"distort"
in
params
:
self
.
use_distort
=
params
[
'distort'
]
and
params
[
'use_gpu'
]
if
not
params
[
'use_gpu'
]:
logger
.
info
(
"Distort operation can only support in GPU.Distort will be set to False."
)
if
params
[
'mode'
]
==
'train'
:
self
.
batch_size
=
params
[
'train_batch_size_per_card'
]
elif
params
[
'mode'
]
==
'eval'
:
self
.
batch_size
=
params
[
'test_batch_size_per_card'
]
self
.
drop_last
=
True
else
:
self
.
batch_size
=
1
self
.
infer_img
=
params
[
'infer_img'
]
self
.
batch_size
=
params
[
'test_batch_size_per_card'
]
self
.
drop_last
=
False
self
.
use_distort
=
False
def
__call__
(
self
,
process_id
):
if
self
.
mode
!=
'train'
:
process_id
=
0
def
get_device_num
():
if
self
.
use_gpu
:
gpus
=
os
.
environ
.
get
(
"CUDA_VISIBLE_DEVICES"
,
1
)
gpu_num
=
len
(
gpus
.
split
(
','
))
return
gpu_num
else
:
cpu_num
=
os
.
environ
.
get
(
"CPU_NUM"
,
1
)
return
int
(
cpu_num
)
def
sample_iter_reader
():
if
self
.
mode
==
'test'
:
print
(
"infer_img:"
,
self
.
infer_img
)
img
=
cv2
.
imread
(
self
.
infer_img
)
norm_img
=
process_image
(
img
,
self
.
image_shape
)
yield
norm_img
with
open
(
self
.
label_file_path
,
"rb"
)
as
fin
:
label_infor_list
=
fin
.
readlines
()
img_num
=
len
(
label_infor_list
)
img_id_list
=
list
(
range
(
img_num
))
random
.
shuffle
(
img_id_list
)
for
img_id
in
range
(
process_id
,
img_num
,
self
.
num_workers
):
label_infor
=
label_infor_list
[
img_id_list
[
img_id
]]
substr
=
label_infor
.
decode
(
'utf-8'
).
strip
(
"
\n
"
).
split
(
"
\t
"
)
img_path
=
self
.
img_set_dir
+
"/"
+
substr
[
0
]
img
=
cv2
.
imread
(
img_path
)
if
img
is
None
:
continue
label
=
substr
[
1
]
outs
=
process_image
(
img
,
self
.
image_shape
,
label
,
self
.
char_ops
,
self
.
loss_type
,
self
.
max_text_length
)
if
outs
is
None
:
continue
yield
outs
if
self
.
mode
!=
'train'
and
self
.
infer_img
is
not
None
:
image_file_list
=
get_image_file_list
(
self
.
infer_img
)
for
single_img
in
image_file_list
:
img
=
cv2
.
imread
(
single_img
)
if
img
.
shape
[
-
1
]
==
1
or
len
(
list
(
img
.
shape
))
==
2
:
img
=
cv2
.
cvtColor
(
img
,
cv2
.
COLOR_GRAY2BGR
)
norm_img
=
process_image
(
img
=
img
,
image_shape
=
self
.
image_shape
,
char_ops
=
self
.
char_ops
,
tps
=
self
.
use_tps
,
infer_mode
=
True
)
yield
norm_img
else
:
with
open
(
self
.
label_file_path
,
"rb"
)
as
fin
:
label_infor_list
=
fin
.
readlines
()
img_num
=
len
(
label_infor_list
)
img_id_list
=
list
(
range
(
img_num
))
random
.
shuffle
(
img_id_list
)
if
sys
.
platform
==
"win32"
and
self
.
num_workers
!=
1
:
print
(
"multiprocess is not fully compatible with Windows."
"num_workers will be 1."
)
self
.
num_workers
=
1
if
self
.
batch_size
*
get_device_num
(
)
*
self
.
num_workers
>
img_num
:
raise
Exception
(
"The number of the whole data ({}) is smaller than the batch_size * devices_num * num_workers ({})"
.
format
(
img_num
,
self
.
batch_size
*
get_device_num
()
*
self
.
num_workers
))
for
img_id
in
range
(
process_id
,
img_num
,
self
.
num_workers
):
label_infor
=
label_infor_list
[
img_id_list
[
img_id
]]
substr
=
label_infor
.
decode
(
'utf-8'
).
strip
(
"
\n
"
).
split
(
"
\t
"
)
img_path
=
self
.
img_set_dir
+
"/"
+
substr
[
0
]
img
=
cv2
.
imread
(
img_path
)
if
img
is
None
:
logger
.
info
(
"{} does not exist!"
.
format
(
img_path
))
continue
if
img
.
shape
[
-
1
]
==
1
or
len
(
list
(
img
.
shape
))
==
2
:
img
=
cv2
.
cvtColor
(
img
,
cv2
.
COLOR_GRAY2BGR
)
label
=
substr
[
1
]
outs
=
process_image
(
img
=
img
,
image_shape
=
self
.
image_shape
,
label
=
label
,
char_ops
=
self
.
char_ops
,
loss_type
=
self
.
loss_type
,
max_text_length
=
self
.
max_text_length
,
distort
=
self
.
use_distort
)
if
outs
is
None
:
continue
yield
outs
def
batch_iter_reader
():
batch_outs
=
[]
...
...
@@ -195,7 +284,10 @@ class SimpleReader(object):
if
len
(
batch_outs
)
==
self
.
batch_size
:
yield
batch_outs
batch_outs
=
[]
if
len
(
batch_outs
)
!=
0
:
yield
batch_outs
if
not
self
.
drop_last
:
if
len
(
batch_outs
)
!=
0
:
yield
batch_outs
return
batch_iter_reader
if
self
.
infer_img
is
None
:
return
batch_iter_reader
return
sample_iter_reader
ppocr/data/rec/img_tools.py
View file @
c1d19ce2
...
...
@@ -15,6 +15,9 @@
import
math
import
cv2
import
numpy
as
np
import
random
from
ppocr.utils.utility
import
initial_logger
logger
=
initial_logger
()
def
get_bounding_box_rect
(
pos
):
...
...
@@ -48,6 +51,32 @@ def resize_norm_img(img, image_shape):
return
padding_im
def
resize_norm_img_chinese
(
img
,
image_shape
):
imgC
,
imgH
,
imgW
=
image_shape
# todo: change to 0 and modified image shape
max_wh_ratio
=
0
h
,
w
=
img
.
shape
[
0
],
img
.
shape
[
1
]
ratio
=
w
*
1.0
/
h
max_wh_ratio
=
max
(
max_wh_ratio
,
ratio
)
imgW
=
int
(
32
*
max_wh_ratio
)
if
math
.
ceil
(
imgH
*
ratio
)
>
imgW
:
resized_w
=
imgW
else
:
resized_w
=
int
(
math
.
ceil
(
imgH
*
ratio
))
resized_image
=
cv2
.
resize
(
img
,
(
resized_w
,
imgH
))
resized_image
=
resized_image
.
astype
(
'float32'
)
if
image_shape
[
0
]
==
1
:
resized_image
=
resized_image
/
255
resized_image
=
resized_image
[
np
.
newaxis
,
:]
else
:
resized_image
=
resized_image
.
transpose
((
2
,
0
,
1
))
/
255
resized_image
-=
0.5
resized_image
/=
0.5
padding_im
=
np
.
zeros
((
imgC
,
imgH
,
imgW
),
dtype
=
np
.
float32
)
padding_im
[:,
:,
0
:
resized_w
]
=
resized_image
return
padding_im
def
get_img_data
(
value
):
"""get_img_data"""
if
not
value
:
...
...
@@ -61,18 +90,280 @@ def get_img_data(value):
return
imgori
def
flag
():
"""
flag
"""
return
1
if
random
.
random
()
>
0.5000001
else
-
1
def
cvtColor
(
img
):
"""
cvtColor
"""
hsv
=
cv2
.
cvtColor
(
img
,
cv2
.
COLOR_BGR2HSV
)
delta
=
0.001
*
random
.
random
()
*
flag
()
hsv
[:,
:,
2
]
=
hsv
[:,
:,
2
]
*
(
1
+
delta
)
new_img
=
cv2
.
cvtColor
(
hsv
,
cv2
.
COLOR_HSV2BGR
)
return
new_img
def
blur
(
img
):
"""
blur
"""
h
,
w
,
_
=
img
.
shape
if
h
>
10
and
w
>
10
:
return
cv2
.
GaussianBlur
(
img
,
(
5
,
5
),
1
)
else
:
return
img
def
jitter
(
img
):
"""
jitter
"""
w
,
h
,
_
=
img
.
shape
if
h
>
10
and
w
>
10
:
thres
=
min
(
w
,
h
)
s
=
int
(
random
.
random
()
*
thres
*
0.01
)
src_img
=
img
.
copy
()
for
i
in
range
(
s
):
img
[
i
:,
i
:,
:]
=
src_img
[:
w
-
i
,
:
h
-
i
,
:]
return
img
else
:
return
img
def
add_gasuss_noise
(
image
,
mean
=
0
,
var
=
0.1
):
"""
Gasuss noise
"""
noise
=
np
.
random
.
normal
(
mean
,
var
**
0.5
,
image
.
shape
)
out
=
image
+
0.5
*
noise
out
=
np
.
clip
(
out
,
0
,
255
)
out
=
np
.
uint8
(
out
)
return
out
def
get_crop
(
image
):
"""
random crop
"""
h
,
w
,
_
=
image
.
shape
top_min
=
1
top_max
=
8
top_crop
=
int
(
random
.
randint
(
top_min
,
top_max
))
top_crop
=
min
(
top_crop
,
h
-
1
)
crop_img
=
image
.
copy
()
ratio
=
random
.
randint
(
0
,
1
)
if
ratio
:
crop_img
=
crop_img
[
top_crop
:
h
,
:,
:]
else
:
crop_img
=
crop_img
[
0
:
h
-
top_crop
,
:,
:]
return
crop_img
class
Config
:
"""
Config
"""
def
__init__
(
self
,
):
self
.
anglex
=
random
.
random
()
*
30
self
.
angley
=
random
.
random
()
*
15
self
.
anglez
=
random
.
random
()
*
10
self
.
fov
=
42
self
.
r
=
0
self
.
shearx
=
random
.
random
()
*
0.3
self
.
sheary
=
random
.
random
()
*
0.05
self
.
borderMode
=
cv2
.
BORDER_REPLICATE
def
make
(
self
,
w
,
h
,
ang
):
"""
make
"""
self
.
anglex
=
random
.
random
()
*
5
*
flag
()
self
.
angley
=
random
.
random
()
*
5
*
flag
()
self
.
anglez
=
-
1
*
random
.
random
()
*
int
(
ang
)
*
flag
()
self
.
fov
=
42
self
.
r
=
0
self
.
shearx
=
0
self
.
sheary
=
0
self
.
borderMode
=
cv2
.
BORDER_REPLICATE
self
.
w
=
w
self
.
h
=
h
self
.
perspective
=
True
self
.
crop
=
True
self
.
affine
=
False
self
.
reverse
=
True
self
.
noise
=
True
self
.
jitter
=
True
self
.
blur
=
True
self
.
color
=
True
def
rad
(
x
):
"""
rad
"""
return
x
*
np
.
pi
/
180
def
get_warpR
(
config
):
"""
get_warpR
"""
anglex
,
angley
,
anglez
,
fov
,
w
,
h
,
r
=
\
config
.
anglex
,
config
.
angley
,
config
.
anglez
,
config
.
fov
,
config
.
w
,
config
.
h
,
config
.
r
if
w
>
69
and
w
<
112
:
anglex
=
anglex
*
1.5
z
=
np
.
sqrt
(
w
**
2
+
h
**
2
)
/
2
/
np
.
tan
(
rad
(
fov
/
2
))
# Homogeneous coordinate transformation matrix
rx
=
np
.
array
([[
1
,
0
,
0
,
0
],
[
0
,
np
.
cos
(
rad
(
anglex
)),
-
np
.
sin
(
rad
(
anglex
)),
0
],
[
0
,
-
np
.
sin
(
rad
(
anglex
)),
np
.
cos
(
rad
(
anglex
)),
0
,
],
[
0
,
0
,
0
,
1
]],
np
.
float32
)
ry
=
np
.
array
([[
np
.
cos
(
rad
(
angley
)),
0
,
np
.
sin
(
rad
(
angley
)),
0
],
[
0
,
1
,
0
,
0
],
[
-
np
.
sin
(
rad
(
angley
)),
0
,
np
.
cos
(
rad
(
angley
)),
0
,
],
[
0
,
0
,
0
,
1
]],
np
.
float32
)
rz
=
np
.
array
([[
np
.
cos
(
rad
(
anglez
)),
np
.
sin
(
rad
(
anglez
)),
0
,
0
],
[
-
np
.
sin
(
rad
(
anglez
)),
np
.
cos
(
rad
(
anglez
)),
0
,
0
],
[
0
,
0
,
1
,
0
],
[
0
,
0
,
0
,
1
]],
np
.
float32
)
r
=
rx
.
dot
(
ry
).
dot
(
rz
)
# generate 4 points
pcenter
=
np
.
array
([
h
/
2
,
w
/
2
,
0
,
0
],
np
.
float32
)
p1
=
np
.
array
([
0
,
0
,
0
,
0
],
np
.
float32
)
-
pcenter
p2
=
np
.
array
([
w
,
0
,
0
,
0
],
np
.
float32
)
-
pcenter
p3
=
np
.
array
([
0
,
h
,
0
,
0
],
np
.
float32
)
-
pcenter
p4
=
np
.
array
([
w
,
h
,
0
,
0
],
np
.
float32
)
-
pcenter
dst1
=
r
.
dot
(
p1
)
dst2
=
r
.
dot
(
p2
)
dst3
=
r
.
dot
(
p3
)
dst4
=
r
.
dot
(
p4
)
list_dst
=
np
.
array
([
dst1
,
dst2
,
dst3
,
dst4
])
org
=
np
.
array
([[
0
,
0
],
[
w
,
0
],
[
0
,
h
],
[
w
,
h
]],
np
.
float32
)
dst
=
np
.
zeros
((
4
,
2
),
np
.
float32
)
# Project onto the image plane
dst
[:,
0
]
=
list_dst
[:,
0
]
*
z
/
(
z
-
list_dst
[:,
2
])
+
pcenter
[
0
]
dst
[:,
1
]
=
list_dst
[:,
1
]
*
z
/
(
z
-
list_dst
[:,
2
])
+
pcenter
[
1
]
warpR
=
cv2
.
getPerspectiveTransform
(
org
,
dst
)
dst1
,
dst2
,
dst3
,
dst4
=
dst
r1
=
int
(
min
(
dst1
[
1
],
dst2
[
1
]))
r2
=
int
(
max
(
dst3
[
1
],
dst4
[
1
]))
c1
=
int
(
min
(
dst1
[
0
],
dst3
[
0
]))
c2
=
int
(
max
(
dst2
[
0
],
dst4
[
0
]))
try
:
ratio
=
min
(
1.0
*
h
/
(
r2
-
r1
),
1.0
*
w
/
(
c2
-
c1
))
dx
=
-
c1
dy
=
-
r1
T1
=
np
.
float32
([[
1.
,
0
,
dx
],
[
0
,
1.
,
dy
],
[
0
,
0
,
1.0
/
ratio
]])
ret
=
T1
.
dot
(
warpR
)
except
:
ratio
=
1.0
T1
=
np
.
float32
([[
1.
,
0
,
0
],
[
0
,
1.
,
0
],
[
0
,
0
,
1.
]])
ret
=
T1
return
ret
,
(
-
r1
,
-
c1
),
ratio
,
dst
def
get_warpAffine
(
config
):
"""
get_warpAffine
"""
anglez
=
config
.
anglez
rz
=
np
.
array
([[
np
.
cos
(
rad
(
anglez
)),
np
.
sin
(
rad
(
anglez
)),
0
],
[
-
np
.
sin
(
rad
(
anglez
)),
np
.
cos
(
rad
(
anglez
)),
0
]],
np
.
float32
)
return
rz
def
warp
(
img
,
ang
):
"""
warp
"""
h
,
w
,
_
=
img
.
shape
config
=
Config
()
config
.
make
(
w
,
h
,
ang
)
new_img
=
img
if
config
.
perspective
:
tp
=
random
.
randint
(
1
,
100
)
if
tp
>=
50
:
warpR
,
(
r1
,
c1
),
ratio
,
dst
=
get_warpR
(
config
)
new_w
=
int
(
np
.
max
(
dst
[:,
0
]))
-
int
(
np
.
min
(
dst
[:,
0
]))
new_img
=
cv2
.
warpPerspective
(
new_img
,
warpR
,
(
int
(
new_w
*
ratio
),
h
),
borderMode
=
config
.
borderMode
)
if
config
.
crop
:
img_height
,
img_width
=
img
.
shape
[
0
:
2
]
tp
=
random
.
randint
(
1
,
100
)
if
tp
>=
50
and
img_height
>=
20
and
img_width
>=
20
:
new_img
=
get_crop
(
new_img
)
if
config
.
affine
:
warpT
=
get_warpAffine
(
config
)
new_img
=
cv2
.
warpAffine
(
new_img
,
warpT
,
(
w
,
h
),
borderMode
=
config
.
borderMode
)
if
config
.
blur
:
tp
=
random
.
randint
(
1
,
100
)
if
tp
>=
50
:
new_img
=
blur
(
new_img
)
if
config
.
color
:
tp
=
random
.
randint
(
1
,
100
)
if
tp
>=
50
:
new_img
=
cvtColor
(
new_img
)
if
config
.
jitter
:
new_img
=
jitter
(
new_img
)
if
config
.
noise
:
tp
=
random
.
randint
(
1
,
100
)
if
tp
>=
50
:
new_img
=
add_gasuss_noise
(
new_img
)
if
config
.
reverse
:
tp
=
random
.
randint
(
1
,
100
)
if
tp
>=
50
:
new_img
=
255
-
new_img
return
new_img
def
process_image
(
img
,
image_shape
,
label
=
None
,
char_ops
=
None
,
loss_type
=
None
,
max_text_length
=
None
):
norm_img
=
resize_norm_img
(
img
,
image_shape
)
max_text_length
=
None
,
tps
=
None
,
infer_mode
=
False
,
distort
=
False
):
if
distort
:
img
=
warp
(
img
,
10
)
if
infer_mode
and
char_ops
.
character_type
==
"ch"
and
not
tps
:
norm_img
=
resize_norm_img_chinese
(
img
,
image_shape
)
else
:
norm_img
=
resize_norm_img
(
img
,
image_shape
)
norm_img
=
norm_img
[
np
.
newaxis
,
:]
if
label
is
not
None
:
char_num
=
char_ops
.
get_char_num
()
#
char_num = char_ops.get_char_num()
text
=
char_ops
.
encode
(
label
)
if
len
(
text
)
==
0
or
len
(
text
)
>
max_text_length
:
logger
.
info
(
"Warning in ppocr/data/rec/img_tools.py: Wrong data type."
"Excepted string with length between 1 and {}, but "
"got '{}'. Label is '{}'"
.
format
(
max_text_length
,
len
(
text
),
label
))
return
None
else
:
if
loss_type
==
"ctc"
:
...
...
ppocr/modeling/architectures/__init__.py
0 → 100755
View file @
c1d19ce2
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
ppocr/modeling/architectures/det_model.py
View file @
c1d19ce2
...
...
@@ -59,16 +59,23 @@ class DetModel(object):
return: (image, corresponding label, dataloader)
"""
image_shape
=
deepcopy
(
self
.
image_shape
)
if
image_shape
[
1
]
%
4
!=
0
or
image_shape
[
2
]
%
4
!=
0
:
raise
Exception
(
"The size of the image must be divisible by 4, "
"received image shape is {}, please reset the "
"Global.image_shape in the yml file"
.
format
(
image_shape
))
image
=
fluid
.
layers
.
data
(
name
=
'image'
,
shape
=
image_shape
,
dtype
=
'float32'
)
if
mode
==
"train"
:
if
self
.
algorithm
==
"EAST"
:
h
,
w
=
int
(
image_shape
[
1
]
//
4
),
int
(
image_shape
[
2
]
//
4
)
score
=
fluid
.
layers
.
data
(
name
=
'score'
,
shape
=
[
1
,
128
,
128
],
dtype
=
'float32'
)
name
=
'score'
,
shape
=
[
1
,
h
,
w
],
dtype
=
'float32'
)
geo
=
fluid
.
layers
.
data
(
name
=
'geo'
,
shape
=
[
9
,
128
,
128
],
dtype
=
'float32'
)
name
=
'geo'
,
shape
=
[
9
,
h
,
w
],
dtype
=
'float32'
)
mask
=
fluid
.
layers
.
data
(
name
=
'mask'
,
shape
=
[
1
,
128
,
128
],
dtype
=
'float32'
)
name
=
'mask'
,
shape
=
[
1
,
h
,
w
],
dtype
=
'float32'
)
feed_list
=
[
image
,
score
,
geo
,
mask
]
labels
=
{
'score'
:
score
,
'geo'
:
geo
,
'mask'
:
mask
}
elif
self
.
algorithm
==
"DB"
:
...
...
@@ -109,7 +116,10 @@ class DetModel(object):
"""
image
,
labels
,
loader
=
self
.
create_feed
(
mode
)
conv_feas
=
self
.
backbone
(
image
)
predicts
=
self
.
head
(
conv_feas
)
if
self
.
algorithm
==
"DB"
:
predicts
=
self
.
head
(
conv_feas
,
mode
)
else
:
predicts
=
self
.
head
(
conv_feas
)
if
mode
==
"train"
:
losses
=
self
.
loss
(
predicts
,
labels
)
return
loader
,
losses
...
...
ppocr/modeling/architectures/rec_model.py
View file @
c1d19ce2
...
...
@@ -30,6 +30,8 @@ class RecModel(object):
global_params
=
params
[
'Global'
]
char_num
=
global_params
[
'char_ops'
].
get_char_num
()
global_params
[
'char_num'
]
=
char_num
self
.
char_type
=
global_params
[
'character_type'
]
self
.
infer_img
=
global_params
[
'infer_img'
]
if
"TPS"
in
params
:
tps_params
=
deepcopy
(
params
[
"TPS"
])
tps_params
.
update
(
global_params
)
...
...
@@ -60,8 +62,8 @@ class RecModel(object):
def
create_feed
(
self
,
mode
):
image_shape
=
deepcopy
(
self
.
image_shape
)
image_shape
.
insert
(
0
,
-
1
)
image
=
fluid
.
data
(
name
=
'image'
,
shape
=
image_shape
,
dtype
=
'float32'
)
if
mode
==
"train"
:
image
=
fluid
.
data
(
name
=
'image'
,
shape
=
image_shape
,
dtype
=
'float32'
)
if
self
.
loss_type
==
"attention"
:
label_in
=
fluid
.
data
(
name
=
'label_in'
,
...
...
@@ -86,6 +88,16 @@ class RecModel(object):
use_double_buffer
=
True
,
iterable
=
False
)
else
:
if
self
.
char_type
==
"ch"
and
self
.
infer_img
:
image_shape
[
-
1
]
=
-
1
if
self
.
tps
!=
None
:
logger
.
info
(
"WARNRNG!!!
\n
"
"TPS does not support variable shape in chinese!"
"We set img_shape to be the same , it may affect the inference effect"
)
image_shape
=
deepcopy
(
self
.
image_shape
)
image
=
fluid
.
data
(
name
=
'image'
,
shape
=
image_shape
,
dtype
=
'float32'
)
labels
=
None
loader
=
None
return
image
,
labels
,
loader
...
...
@@ -109,6 +121,12 @@ class RecModel(object):
decoded_out
,
'label'
:
label
}
return
loader
,
outputs
elif
mode
==
"export"
:
return
[
image
,
{
'decoded_out'
:
decoded_out
}]
predict
=
predicts
[
'predict'
]
if
self
.
loss_type
==
"ctc"
:
predict
=
fluid
.
layers
.
softmax
(
predict
)
return
[
image
,
{
'decoded_out'
:
decoded_out
,
'predicts'
:
predict
}]
else
:
return
loader
,
{
'decoded_out'
:
decoded_out
}
predict
=
predicts
[
'predict'
]
if
self
.
loss_type
==
"ctc"
:
predict
=
fluid
.
layers
.
softmax
(
predict
)
return
loader
,
{
'decoded_out'
:
decoded_out
,
'predicts'
:
predict
}
ppocr/modeling/backbones/__init__.py
0 → 100755
View file @
c1d19ce2
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
ppocr/modeling/backbones/rec_mobilenet_v3.py
View file @
c1d19ce2
...
...
@@ -31,16 +31,28 @@ __all__ = [
class
MobileNetV3
():
def
__init__
(
self
,
params
):
self
.
scale
=
params
[
'scale'
]
model_name
=
params
[
'model_name'
]
self
.
scale
=
params
.
get
(
"scale"
,
0.5
)
model_name
=
params
.
get
(
"model_name"
,
"small"
)
large_stride
=
params
.
get
(
"large_stride"
,
[
1
,
2
,
2
,
2
])
small_stride
=
params
.
get
(
"small_stride"
,
[
2
,
2
,
2
,
2
])
assert
isinstance
(
large_stride
,
list
),
"large_stride type must "
\
"be list but got {}"
.
format
(
type
(
large_stride
))
assert
isinstance
(
small_stride
,
list
),
"small_stride type must "
\
"be list but got {}"
.
format
(
type
(
small_stride
))
assert
len
(
large_stride
)
==
4
,
"large_stride length must be "
\
"4 but got {}"
.
format
(
len
(
large_stride
))
assert
len
(
small_stride
)
==
4
,
"small_stride length must be "
\
"4 but got {}"
.
format
(
len
(
small_stride
))
self
.
inplanes
=
16
if
model_name
==
"large"
:
self
.
cfg
=
[
# k, exp, c, se, nl, s,
[
3
,
16
,
16
,
False
,
'relu'
,
1
],
[
3
,
64
,
24
,
False
,
'relu'
,
(
2
,
1
)],
[
3
,
16
,
16
,
False
,
'relu'
,
large_stride
[
0
]
],
[
3
,
64
,
24
,
False
,
'relu'
,
(
large_stride
[
1
]
,
1
)],
[
3
,
72
,
24
,
False
,
'relu'
,
1
],
[
5
,
72
,
40
,
True
,
'relu'
,
(
2
,
1
)],
[
5
,
72
,
40
,
True
,
'relu'
,
(
large_stride
[
2
]
,
1
)],
[
5
,
120
,
40
,
True
,
'relu'
,
1
],
[
5
,
120
,
40
,
True
,
'relu'
,
1
],
[
3
,
240
,
80
,
False
,
'hard_swish'
,
1
],
...
...
@@ -49,7 +61,7 @@ class MobileNetV3():
[
3
,
184
,
80
,
False
,
'hard_swish'
,
1
],
[
3
,
480
,
112
,
True
,
'hard_swish'
,
1
],
[
3
,
672
,
112
,
True
,
'hard_swish'
,
1
],
[
5
,
672
,
160
,
True
,
'hard_swish'
,
(
2
,
1
)],
[
5
,
672
,
160
,
True
,
'hard_swish'
,
(
large_stride
[
3
]
,
1
)],
[
5
,
960
,
160
,
True
,
'hard_swish'
,
1
],
[
5
,
960
,
160
,
True
,
'hard_swish'
,
1
],
]
...
...
@@ -58,15 +70,15 @@ class MobileNetV3():
elif
model_name
==
"small"
:
self
.
cfg
=
[
# k, exp, c, se, nl, s,
[
3
,
16
,
16
,
True
,
'relu'
,
(
2
,
1
)],
[
3
,
72
,
24
,
False
,
'relu'
,
(
2
,
1
)],
[
3
,
16
,
16
,
True
,
'relu'
,
(
small_stride
[
0
]
,
1
)],
[
3
,
72
,
24
,
False
,
'relu'
,
(
small_stride
[
1
]
,
1
)],
[
3
,
88
,
24
,
False
,
'relu'
,
1
],
[
5
,
96
,
40
,
True
,
'hard_swish'
,
(
2
,
1
)],
[
5
,
96
,
40
,
True
,
'hard_swish'
,
(
small_stride
[
2
]
,
1
)],
[
5
,
240
,
40
,
True
,
'hard_swish'
,
1
],
[
5
,
240
,
40
,
True
,
'hard_swish'
,
1
],
[
5
,
120
,
48
,
True
,
'hard_swish'
,
1
],
[
5
,
144
,
48
,
True
,
'hard_swish'
,
1
],
[
5
,
288
,
96
,
True
,
'hard_swish'
,
(
2
,
1
)],
[
5
,
288
,
96
,
True
,
'hard_swish'
,
(
small_stride
[
3
]
,
1
)],
[
5
,
576
,
96
,
True
,
'hard_swish'
,
1
],
[
5
,
576
,
96
,
True
,
'hard_swish'
,
1
],
]
...
...
@@ -78,7 +90,7 @@ class MobileNetV3():
supported_scale
=
[
0.35
,
0.5
,
0.75
,
1.0
,
1.25
]
assert
self
.
scale
in
supported_scale
,
\
"supported scale are {} but input scale is {}"
.
format
(
supported_scale
,
scale
)
"supported scale
s
are {} but input scale is {}"
.
format
(
supported_scale
,
self
.
scale
)
def
__call__
(
self
,
input
):
scale
=
self
.
scale
...
...
ppocr/modeling/heads/__init__.py
0 → 100755
View file @
c1d19ce2
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
ppocr/modeling/heads/det_db_head.py
View file @
c1d19ce2
...
...
@@ -115,7 +115,6 @@ class DBHead(object):
initializer
=
fluid
.
initializer
.
Uniform
(
-
stdv
,
stdv
)
bias_attr
=
fluid
.
ParamAttr
(
regularizer
=
regularizer
,
gradient_clip
=
gradient_clip
,
initializer
=
initializer
,
name
=
name
+
"_b_attr"
)
return
bias_attr
...
...
@@ -196,7 +195,7 @@ class DBHead(object):
fuse
=
fluid
.
layers
.
concat
(
input
=
[
p5
,
p4
,
p3
,
p2
],
axis
=
1
)
shrink_maps
=
self
.
binarize
(
fuse
)
if
mode
!=
"train"
:
return
shrink_maps
return
{
"maps"
:
shrink_maps
}
threshold_maps
=
self
.
thresh
(
fuse
)
binary_maps
=
self
.
step_function
(
shrink_maps
,
threshold_maps
)
y
=
fluid
.
layers
.
concat
(
...
...
ppocr/modeling/heads/det_east_head.py
View file @
c1d19ce2
...
...
@@ -18,6 +18,7 @@ from __future__ import print_function
import
paddle.fluid
as
fluid
from
..common_functions
import
conv_bn_layer
,
deconv_bn_layer
from
collections
import
OrderedDict
class
EASTHead
(
object
):
...
...
@@ -110,7 +111,7 @@ class EASTHead(object):
def
__call__
(
self
,
inputs
):
f_common
=
self
.
unet_fusion
(
inputs
)
f_score
,
f_geo
=
self
.
detector_header
(
f_common
)
predicts
=
{}
predicts
=
OrderedDict
()
predicts
[
'f_score'
]
=
f_score
predicts
[
'f_geo'
]
=
f_geo
return
predicts
ppocr/modeling/heads/rec_attention_head.py
View file @
c1d19ce2
...
...
@@ -123,6 +123,8 @@ class AttentionPredict(object):
full_ids
=
fluid
.
layers
.
fill_constant_batch_size_like
(
input
=
init_state
,
shape
=
[
-
1
,
1
],
dtype
=
'int64'
,
value
=
1
)
full_scores
=
fluid
.
layers
.
fill_constant_batch_size_like
(
input
=
init_state
,
shape
=
[
-
1
,
1
],
dtype
=
'float32'
,
value
=
1
)
cond
=
layers
.
less_than
(
x
=
counter
,
y
=
array_len
)
while_op
=
layers
.
While
(
cond
=
cond
)
...
...
@@ -171,6 +173,9 @@ class AttentionPredict(object):
new_ids
=
fluid
.
layers
.
concat
([
full_ids
,
topk_indices
],
axis
=
1
)
fluid
.
layers
.
assign
(
new_ids
,
full_ids
)
new_scores
=
fluid
.
layers
.
concat
([
full_scores
,
topk_scores
],
axis
=
1
)
fluid
.
layers
.
assign
(
new_scores
,
full_scores
)
layers
.
increment
(
x
=
counter
,
value
=
1
,
in_place
=
True
)
# update the memories
...
...
@@ -184,7 +189,7 @@ class AttentionPredict(object):
length_cond
=
layers
.
less_than
(
x
=
counter
,
y
=
array_len
)
finish_cond
=
layers
.
logical_not
(
layers
.
is_empty
(
x
=
topk_indices
))
layers
.
logical_and
(
x
=
length_cond
,
y
=
finish_cond
,
out
=
cond
)
return
full_ids
return
full_ids
,
full_scores
def
__call__
(
self
,
inputs
,
labels
=
None
,
mode
=
None
):
encoder_features
=
self
.
encoder
(
inputs
)
...
...
@@ -223,10 +228,10 @@ class AttentionPredict(object):
decoder_size
,
char_num
)
_
,
decoded_out
=
layers
.
topk
(
input
=
predict
,
k
=
1
)
decoded_out
=
layers
.
lod_reset
(
decoded_out
,
y
=
label_out
)
predicts
=
{
'predict'
:
predict
,
'decoded_out'
:
decoded_out
}
predicts
=
{
'predict'
:
predict
,
'decoded_out'
:
decoded_out
}
else
:
ids
=
self
.
gru_attention_infer
(
ids
,
predict
=
self
.
gru_attention_infer
(
decoder_boot
,
self
.
max_length
,
char_num
,
word_vector_dim
,
encoded_vector
,
encoded_proj
,
decoder_size
)
predicts
=
{
'decoded_out'
:
ids
}
predicts
=
{
'predict'
:
predict
,
'decoded_out'
:
ids
}
return
predicts
ppocr/modeling/heads/rec_ctc_head.py
View file @
c1d19ce2
...
...
@@ -32,6 +32,7 @@ class CTCPredict(object):
self
.
char_num
=
params
[
'char_num'
]
self
.
encoder
=
SequenceEncoder
(
params
)
self
.
encoder_type
=
params
[
'encoder_type'
]
self
.
fc_decay
=
params
.
get
(
"fc_decay"
,
0.0004
)
def
__call__
(
self
,
inputs
,
labels
=
None
,
mode
=
None
):
encoder_features
=
self
.
encoder
(
inputs
)
...
...
@@ -39,7 +40,7 @@ class CTCPredict(object):
encoder_features
=
fluid
.
layers
.
concat
(
encoder_features
,
axis
=
1
)
name
=
"ctc_fc"
para_attr
,
bias_attr
=
get_para_bias_attr
(
l2_decay
=
0.0004
,
k
=
encoder_features
.
shape
[
1
],
name
=
name
)
l2_decay
=
self
.
fc_decay
,
k
=
encoder_features
.
shape
[
1
],
name
=
name
)
predict
=
fluid
.
layers
.
fc
(
input
=
encoder_features
,
size
=
self
.
char_num
+
1
,
param_attr
=
para_attr
,
...
...
ppocr/modeling/losses/__init__.py
0 → 100755
View file @
c1d19ce2
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
ppocr/modeling/stns/__init__.py
0 → 100755
View file @
c1d19ce2
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
ppocr/optimizer.py
View file @
c1d19ce2
...
...
@@ -15,6 +15,11 @@ from __future__ import absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
paddle.fluid
as
fluid
from
paddle.fluid.regularizer
import
L2Decay
from
ppocr.utils.utility
import
initial_logger
logger
=
initial_logger
()
def
AdamDecay
(
params
,
parameter_list
=
None
):
...
...
@@ -28,9 +33,35 @@ def AdamDecay(params, parameter_list=None):
base_lr
=
params
[
'base_lr'
]
beta1
=
params
[
'beta1'
]
beta2
=
params
[
'beta2'
]
l2_decay
=
params
.
get
(
"l2_decay"
,
0.0
)
if
'decay'
in
params
:
supported_decay_mode
=
[
"cosine_decay"
,
"piecewise_decay"
]
params
=
params
[
'decay'
]
decay_mode
=
params
[
'function'
]
assert
decay_mode
in
supported_decay_mode
,
"Supported decay mode is {}, but got {}"
.
format
(
supported_decay_mode
,
decay_mode
)
if
decay_mode
==
"cosine_decay"
:
step_each_epoch
=
params
[
'step_each_epoch'
]
total_epoch
=
params
[
'total_epoch'
]
base_lr
=
fluid
.
layers
.
cosine_decay
(
learning_rate
=
base_lr
,
step_each_epoch
=
step_each_epoch
,
epochs
=
total_epoch
)
elif
decay_mode
==
"piecewise_decay"
:
boundaries
=
params
[
"boundaries"
]
decay_rate
=
params
[
"decay_rate"
]
values
=
[
base_lr
*
decay_rate
**
idx
for
idx
in
range
(
len
(
boundaries
)
+
1
)
]
base_lr
=
fluid
.
layers
.
piecewise_decay
(
boundaries
,
values
)
optimizer
=
fluid
.
optimizer
.
Adam
(
learning_rate
=
base_lr
,
beta1
=
beta1
,
beta2
=
beta2
,
regularization
=
L2Decay
(
regularization_coeff
=
l2_decay
),
parameter_list
=
parameter_list
)
return
optimizer
ppocr/postprocess/db_postprocess.py
View file @
c1d19ce2
...
...
@@ -35,6 +35,7 @@ class DBPostProcess(object):
self
.
thresh
=
params
[
'thresh'
]
self
.
box_thresh
=
params
[
'box_thresh'
]
self
.
max_candidates
=
params
[
'max_candidates'
]
self
.
unclip_ratio
=
params
[
'unclip_ratio'
]
self
.
min_size
=
3
def
boxes_from_bitmap
(
self
,
pred
,
_bitmap
,
dest_width
,
dest_height
):
...
...
@@ -46,9 +47,12 @@ class DBPostProcess(object):
bitmap
=
_bitmap
height
,
width
=
bitmap
.
shape
# img, contours, _ = cv2.findContours((bitmap * 255).astype(np.uint8), cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
contours
,
_
=
cv2
.
findContours
((
bitmap
*
255
).
astype
(
np
.
uint8
),
cv2
.
RETR_LIST
,
cv2
.
CHAIN_APPROX_SIMPLE
)
outs
=
cv2
.
findContours
((
bitmap
*
255
).
astype
(
np
.
uint8
),
cv2
.
RETR_LIST
,
cv2
.
CHAIN_APPROX_SIMPLE
)
if
len
(
outs
)
==
3
:
img
,
contours
,
_
=
outs
[
0
],
outs
[
1
],
outs
[
2
]
elif
len
(
outs
)
==
2
:
contours
,
_
=
outs
[
0
],
outs
[
1
]
num_contours
=
min
(
len
(
contours
),
self
.
max_candidates
)
boxes
=
np
.
zeros
((
num_contours
,
4
,
2
),
dtype
=
np
.
int16
)
...
...
@@ -81,7 +85,8 @@ class DBPostProcess(object):
scores
[
index
]
=
score
return
boxes
,
scores
def
unclip
(
self
,
box
,
unclip_ratio
=
1.5
):
def
unclip
(
self
,
box
):
unclip_ratio
=
self
.
unclip_ratio
poly
=
Polygon
(
box
)
distance
=
poly
.
area
*
unclip_ratio
/
poly
.
length
offset
=
pyclipper
.
PyclipperOffset
()
...
...
@@ -128,6 +133,7 @@ class DBPostProcess(object):
def
__call__
(
self
,
outs_dict
,
ratio_list
):
pred
=
outs_dict
[
'maps'
]
pred
=
pred
[:,
0
,
:,
:]
segmentation
=
pred
>
self
.
thresh
...
...
ppocr/postprocess/east_postprocess.py
View file @
c1d19ce2
...
...
@@ -20,6 +20,12 @@ import numpy as np
from
.locality_aware_nms
import
nms_locality
import
cv2
import
os
import
sys
__dir__
=
os
.
path
.
dirname
(
__file__
)
sys
.
path
.
append
(
__dir__
)
sys
.
path
.
append
(
os
.
path
.
join
(
__dir__
,
'..'
))
class
EASTPostPocess
(
object
):
"""
...
...
@@ -30,6 +36,11 @@ class EASTPostPocess(object):
self
.
score_thresh
=
params
[
'score_thresh'
]
self
.
cover_thresh
=
params
[
'cover_thresh'
]
self
.
nms_thresh
=
params
[
'nms_thresh'
]
# c++ la-nms is faster, but only support python 3.5
self
.
is_python35
=
False
if
sys
.
version_info
.
major
==
3
and
sys
.
version_info
.
minor
==
5
:
self
.
is_python35
=
True
def
restore_rectangle_quad
(
self
,
origin
,
geometry
):
"""
...
...
@@ -66,7 +77,11 @@ class EASTPostPocess(object):
boxes
=
np
.
zeros
((
text_box_restored
.
shape
[
0
],
9
),
dtype
=
np
.
float32
)
boxes
[:,
:
8
]
=
text_box_restored
.
reshape
((
-
1
,
8
))
boxes
[:,
8
]
=
score_map
[
xy_text
[:,
0
],
xy_text
[:,
1
]]
boxes
=
nms_locality
(
boxes
.
astype
(
np
.
float64
),
nms_thresh
)
if
self
.
is_python35
:
import
lanms
boxes
=
lanms
.
merge_quadrangle_n9
(
boxes
,
nms_thresh
)
else
:
boxes
=
nms_locality
(
boxes
.
astype
(
np
.
float64
),
nms_thresh
)
if
boxes
.
shape
[
0
]
==
0
:
return
[]
# Here we filter some low score boxes by the average score map,
...
...
ppocr/postprocess/lanms/.gitignore
0 → 100644
View file @
c1d19ce2
adaptor.so
ppocr/postprocess/lanms/.ycm_extra_conf.py
0 → 100644
View file @
c1d19ce2
#!/usr/bin/env python
#
# Copyright (C) 2014 Google Inc.
#
# This file is part of YouCompleteMe.
#
# YouCompleteMe is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# YouCompleteMe is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with YouCompleteMe. If not, see <http://www.gnu.org/licenses/>.
import
os
import
sys
import
glob
import
ycm_core
# These are the compilation flags that will be used in case there's no
# compilation database set (by default, one is not set).
# CHANGE THIS LIST OF FLAGS. YES, THIS IS THE DROID YOU HAVE BEEN LOOKING FOR.
sys
.
path
.
append
(
os
.
path
.
dirname
(
__file__
))
BASE_DIR
=
os
.
path
.
dirname
(
os
.
path
.
realpath
(
__file__
))
from
plumbum.cmd
import
python_config
flags
=
[
'-Wall'
,
'-Wextra'
,
'-Wnon-virtual-dtor'
,
'-Winvalid-pch'
,
'-Wno-unused-local-typedefs'
,
'-std=c++11'
,
'-x'
,
'c++'
,
'-Iinclude'
,
]
+
python_config
(
'--cflags'
).
split
()
# Set this to the absolute path to the folder (NOT the file!) containing the
# compile_commands.json file to use that instead of 'flags'. See here for
# more details: http://clang.llvm.org/docs/JSONCompilationDatabase.html
#
# Most projects will NOT need to set this to anything; you can just change the
# 'flags' list of compilation flags.
compilation_database_folder
=
''
if
os
.
path
.
exists
(
compilation_database_folder
):
database
=
ycm_core
.
CompilationDatabase
(
compilation_database_folder
)
else
:
database
=
None
SOURCE_EXTENSIONS
=
[
'.cpp'
,
'.cxx'
,
'.cc'
,
'.c'
,
'.m'
,
'.mm'
]
def
DirectoryOfThisScript
():
return
os
.
path
.
dirname
(
os
.
path
.
abspath
(
__file__
)
)
def
MakeRelativePathsInFlagsAbsolute
(
flags
,
working_directory
):
if
not
working_directory
:
return
list
(
flags
)
new_flags
=
[]
make_next_absolute
=
False
path_flags
=
[
'-isystem'
,
'-I'
,
'-iquote'
,
'--sysroot='
]
for
flag
in
flags
:
new_flag
=
flag
if
make_next_absolute
:
make_next_absolute
=
False
if
not
flag
.
startswith
(
'/'
):
new_flag
=
os
.
path
.
join
(
working_directory
,
flag
)
for
path_flag
in
path_flags
:
if
flag
==
path_flag
:
make_next_absolute
=
True
break
if
flag
.
startswith
(
path_flag
):
path
=
flag
[
len
(
path_flag
):
]
new_flag
=
path_flag
+
os
.
path
.
join
(
working_directory
,
path
)
break
if
new_flag
:
new_flags
.
append
(
new_flag
)
return
new_flags
def
IsHeaderFile
(
filename
):
extension
=
os
.
path
.
splitext
(
filename
)[
1
]
return
extension
in
[
'.h'
,
'.hxx'
,
'.hpp'
,
'.hh'
]
def
GetCompilationInfoForFile
(
filename
):
# The compilation_commands.json file generated by CMake does not have entries
# for header files. So we do our best by asking the db for flags for a
# corresponding source file, if any. If one exists, the flags for that file
# should be good enough.
if
IsHeaderFile
(
filename
):
basename
=
os
.
path
.
splitext
(
filename
)[
0
]
for
extension
in
SOURCE_EXTENSIONS
:
replacement_file
=
basename
+
extension
if
os
.
path
.
exists
(
replacement_file
):
compilation_info
=
database
.
GetCompilationInfoForFile
(
replacement_file
)
if
compilation_info
.
compiler_flags_
:
return
compilation_info
return
None
return
database
.
GetCompilationInfoForFile
(
filename
)
# This is the entry point; this function is called by ycmd to produce flags for
# a file.
def
FlagsForFile
(
filename
,
**
kwargs
):
if
database
:
# Bear in mind that compilation_info.compiler_flags_ does NOT return a
# python list, but a "list-like" StringVec object
compilation_info
=
GetCompilationInfoForFile
(
filename
)
if
not
compilation_info
:
return
None
final_flags
=
MakeRelativePathsInFlagsAbsolute
(
compilation_info
.
compiler_flags_
,
compilation_info
.
compiler_working_dir_
)
else
:
relative_to
=
DirectoryOfThisScript
()
final_flags
=
MakeRelativePathsInFlagsAbsolute
(
flags
,
relative_to
)
return
{
'flags'
:
final_flags
,
'do_cache'
:
True
}
ppocr/postprocess/lanms/Makefile
0 → 100644
View file @
c1d19ce2
CXXFLAGS
=
-I
include
-std
=
c++11
-O3
$(
shell
python3-config
--cflags
)
LDFLAGS
=
$(
shell
python3-config
--ldflags
)
DEPS
=
lanms.h
$(
shell
find include
-xtype
f
)
CXX_SOURCES
=
adaptor.cpp include/clipper/clipper.cpp
LIB_SO
=
adaptor.so
$(LIB_SO)
:
$(CXX_SOURCES) $(DEPS)
$(CXX)
-o
$@
$(CXXFLAGS)
$(LDFLAGS)
$(CXX_SOURCES)
--shared
-fPIC
clean
:
rm
-rf
$(LIB_SO)
Prev
1
…
14
15
16
17
18
19
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment