"src/diffusers/models/attention.py" did not exist on "d8287fcd1d94f33df55b54e2e1c140c2ab15b444"
Commit 39b62820 authored by dengjb's avatar dengjb
Browse files

Update create_dataset.py

parent 323a2615
...@@ -37,6 +37,8 @@ def createDataset(outputPath, imagePathList, labelList, lexiconList=None, checkV ...@@ -37,6 +37,8 @@ def createDataset(outputPath, imagePathList, labelList, lexiconList=None, checkV
checkValid : if true, check the validity of every image checkValid : if true, check the validity of every image
""" """
assert(len(imagePathList) == len(labelList)) assert(len(imagePathList) == len(labelList))
if not os.path.exists(outputPath):
os.makedirs(outputPath)
nSamples = len(imagePathList) nSamples = len(imagePathList)
env = lmdb.open(outputPath, map_size=1099511627776) env = lmdb.open(outputPath, map_size=1099511627776)
cache = {} cache = {}
...@@ -84,14 +86,14 @@ def parse_labels(path): ...@@ -84,14 +86,14 @@ def parse_labels(path):
return labels, image_path return labels, image_path
if __name__ == '__main__': if __name__ == '__main__':
train_labels, train_images = parse_labels("90kDICT32px/annotation_train.txt") output_path = "./synth90k"
with open("90kDICT32px/lexicon.txt",'r') as f: dataset_path = "90kDICT32px/"
lexicon_list = [] train_labels, train_images = parse_labels(dataset_path + "annotation_train.txt")
for line in f.readlines(): val_labels,val_images = paese_labels(dataset_path + "annotation_val.txt")
lexicon_list.append(line.strip("\n"))
#print(train_labels) #print(train_labels)
print("="*50) print("="*50)
#print(train_images) #print(train_images)
createDataset("./output_dataset",train_images,train_labels) createDataset(output_path+'/train',train_images,train_labels)
createDataset(output_path+"/val",train_images,train_labels)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment