Update create_dataset.py

39b62820 · dengjb · 323a2615 · 39b62820
Commit 39b62820 authored Oct 09, 2024 by dengjb
Hide whitespace changes
Inline Side-by-side

Showing with 8 additions and 6 deletions

create_dataset.py create_dataset.py +8 -6

No files found.
--- a/create_dataset.py
+++ b/create_dataset.py
@@ -37,6 +37,8 @@ def createDataset(outputPath, imagePathList, labelList, lexiconList=None, checkV
        checkValid    : if true, check the validity of every image
    """
    assert(len(imagePathList) == len(labelList))
+    if not os.path.exists(outputPath):
+        os.makedirs(outputPath)
    nSamples = len(imagePathList)
    env = lmdb.open(outputPath, map_size=1099511627776)
    cache = {}
@@ -84,14 +86,14 @@ def parse_labels(path):
    return labels, image_path

 if __name__ == '__main__':
-    train_labels, train_images = parse_labels("90kDICT32px/annotation_train.txt")
-    with open("90kDICT32px/lexicon.txt",'r') as f:
-        lexicon_list = []
-        for line in f.readlines():
-            lexicon_list.append(line.strip("\n"))
+    output_path = "./synth90k"
+    dataset_path = "90kDICT32px/"
+    train_labels, train_images = parse_labels(dataset_path + "annotation_train.txt")
+    val_labels,val_images = paese_labels(dataset_path + "annotation_val.txt")
   
    #print(train_labels)
    print("="*50)
    #print(train_images)
-    createDataset("./output_dataset",train_images,train_labels)
+    createDataset(output_path+'/train',train_images,train_labels)
+    createDataset(output_path+"/val",train_images,train_labels)