Commit 39b62820 authored by dengjb's avatar dengjb
Browse files

Update create_dataset.py

parent 323a2615
......@@ -37,6 +37,8 @@ def createDataset(outputPath, imagePathList, labelList, lexiconList=None, checkV
checkValid : if true, check the validity of every image
"""
assert(len(imagePathList) == len(labelList))
if not os.path.exists(outputPath):
os.makedirs(outputPath)
nSamples = len(imagePathList)
env = lmdb.open(outputPath, map_size=1099511627776)
cache = {}
......@@ -84,14 +86,14 @@ def parse_labels(path):
return labels, image_path
if __name__ == '__main__':
train_labels, train_images = parse_labels("90kDICT32px/annotation_train.txt")
with open("90kDICT32px/lexicon.txt",'r') as f:
lexicon_list = []
for line in f.readlines():
lexicon_list.append(line.strip("\n"))
output_path = "./synth90k"
dataset_path = "90kDICT32px/"
train_labels, train_images = parse_labels(dataset_path + "annotation_train.txt")
val_labels,val_images = paese_labels(dataset_path + "annotation_val.txt")
#print(train_labels)
print("="*50)
#print(train_images)
createDataset("./output_dataset",train_images,train_labels)
createDataset(output_path+'/train',train_images,train_labels)
createDataset(output_path+"/val",train_images,train_labels)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment