Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
crnn_pytorch
Commits
39b62820
Commit
39b62820
authored
Oct 09, 2024
by
dengjb
Browse files
Update create_dataset.py
parent
323a2615
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
8 additions
and
6 deletions
+8
-6
create_dataset.py
create_dataset.py
+8
-6
No files found.
create_dataset.py
View file @
39b62820
...
...
@@ -37,6 +37,8 @@ def createDataset(outputPath, imagePathList, labelList, lexiconList=None, checkV
checkValid : if true, check the validity of every image
"""
assert
(
len
(
imagePathList
)
==
len
(
labelList
))
if
not
os
.
path
.
exists
(
outputPath
):
os
.
makedirs
(
outputPath
)
nSamples
=
len
(
imagePathList
)
env
=
lmdb
.
open
(
outputPath
,
map_size
=
1099511627776
)
cache
=
{}
...
...
@@ -84,14 +86,14 @@ def parse_labels(path):
return
labels
,
image_path
if
__name__
==
'__main__'
:
train_labels
,
train_images
=
parse_labels
(
"90kDICT32px/annotation_train.txt"
)
with
open
(
"90kDICT32px/lexicon.txt"
,
'r'
)
as
f
:
lexicon_list
=
[]
for
line
in
f
.
readlines
():
lexicon_list
.
append
(
line
.
strip
(
"
\n
"
))
output_path
=
"./synth90k"
dataset_path
=
"90kDICT32px/"
train_labels
,
train_images
=
parse_labels
(
dataset_path
+
"annotation_train.txt"
)
val_labels
,
val_images
=
paese_labels
(
dataset_path
+
"annotation_val.txt"
)
#print(train_labels)
print
(
"="
*
50
)
#print(train_images)
createDataset
(
"./output_dataset"
,
train_images
,
train_labels
)
createDataset
(
output_path
+
'/train'
,
train_images
,
train_labels
)
createDataset
(
output_path
+
"/val"
,
train_images
,
train_labels
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment