Commit 0cd2527c authored by WenmuZhou's avatar WenmuZhou
Browse files

Merge branch 'dygraph' of https://github.com/PaddlePaddle/PaddleOCR into update_requirements

parents df05d1fd 479b7672
...@@ -4,4 +4,5 @@ include README.md ...@@ -4,4 +4,5 @@ include README.md
recursive-include ppocr/utils *.txt utility.py logging.py recursive-include ppocr/utils *.txt utility.py logging.py
recursive-include ppocr/data/ *.py recursive-include ppocr/data/ *.py
recursive-include ppocr/postprocess *.py recursive-include ppocr/postprocess *.py
recursive-include tools/infer *.py recursive-include tools/infer *.py
\ No newline at end of file recursive-include ppocr/utils/e2e_utils/ *.py
\ No newline at end of file
...@@ -206,7 +206,7 @@ class MainWindow(QMainWindow, WindowMixin): ...@@ -206,7 +206,7 @@ class MainWindow(QMainWindow, WindowMixin):
self.labelList = EditInList() self.labelList = EditInList()
labelListContainer = QWidget() labelListContainer = QWidget()
labelListContainer.setLayout(listLayout) labelListContainer.setLayout(listLayout)
self.labelList.itemActivated.connect(self.labelSelectionChanged) #self.labelList.itemActivated.connect(self.labelSelectionChanged)
self.labelList.itemSelectionChanged.connect(self.labelSelectionChanged) self.labelList.itemSelectionChanged.connect(self.labelSelectionChanged)
self.labelList.clicked.connect(self.labelList.item_clicked) self.labelList.clicked.connect(self.labelList.item_clicked)
# Connect to itemChanged to detect checkbox changes. # Connect to itemChanged to detect checkbox changes.
...@@ -219,7 +219,7 @@ class MainWindow(QMainWindow, WindowMixin): ...@@ -219,7 +219,7 @@ class MainWindow(QMainWindow, WindowMixin):
################## detection box #################### ################## detection box ####################
self.BoxList = QListWidget() self.BoxList = QListWidget()
self.BoxList.itemActivated.connect(self.boxSelectionChanged) #self.BoxList.itemActivated.connect(self.boxSelectionChanged)
self.BoxList.itemSelectionChanged.connect(self.boxSelectionChanged) self.BoxList.itemSelectionChanged.connect(self.boxSelectionChanged)
self.BoxList.itemDoubleClicked.connect(self.editBox) self.BoxList.itemDoubleClicked.connect(self.editBox)
# Connect to itemChanged to detect checkbox changes. # Connect to itemChanged to detect checkbox changes.
...@@ -435,7 +435,7 @@ class MainWindow(QMainWindow, WindowMixin): ...@@ -435,7 +435,7 @@ class MainWindow(QMainWindow, WindowMixin):
######## New actions ####### ######## New actions #######
AutoRec = action(getStr('autoRecognition'), self.autoRecognition, AutoRec = action(getStr('autoRecognition'), self.autoRecognition,
'Ctrl+Shift+A', 'Auto', getStr('autoRecognition'), enabled=False) '', 'Auto', getStr('autoRecognition'), enabled=False)
reRec = action(getStr('reRecognition'), self.reRecognition, reRec = action(getStr('reRecognition'), self.reRecognition,
'Ctrl+Shift+R', 'reRec', getStr('reRecognition'), enabled=False) 'Ctrl+Shift+R', 'reRec', getStr('reRecognition'), enabled=False)
...@@ -444,7 +444,7 @@ class MainWindow(QMainWindow, WindowMixin): ...@@ -444,7 +444,7 @@ class MainWindow(QMainWindow, WindowMixin):
'Ctrl+R', 'reRec', getStr('singleRe'), enabled=False) 'Ctrl+R', 'reRec', getStr('singleRe'), enabled=False)
createpoly = action(getStr('creatPolygon'), self.createPolygon, createpoly = action(getStr('creatPolygon'), self.createPolygon,
'q', 'new', 'Creat Polygon', enabled=True) 'q', 'new', getStr('creatPolygon'), enabled=True)
saveRec = action(getStr('saveRec'), self.saveRecResult, saveRec = action(getStr('saveRec'), self.saveRecResult,
'', 'save', getStr('saveRec'), enabled=False) '', 'save', getStr('saveRec'), enabled=False)
...@@ -452,6 +452,12 @@ class MainWindow(QMainWindow, WindowMixin): ...@@ -452,6 +452,12 @@ class MainWindow(QMainWindow, WindowMixin):
saveLabel = action(getStr('saveLabel'), self.saveLabelFile, # saveLabel = action(getStr('saveLabel'), self.saveLabelFile, #
'Ctrl+S', 'save', getStr('saveLabel'), enabled=False) 'Ctrl+S', 'save', getStr('saveLabel'), enabled=False)
undoLastPoint = action(getStr("undoLastPoint"), self.canvas.undoLastPoint,
'Ctrl+Z', "undo", getStr("undoLastPoint"), enabled=False)
undo = action(getStr("undo"), self.undoShapeEdit,
'Ctrl+Z', "undo", getStr("undo"), enabled=False)
self.editButton.setDefaultAction(edit) self.editButton.setDefaultAction(edit)
self.newButton.setDefaultAction(create) self.newButton.setDefaultAction(create)
self.DelButton.setDefaultAction(deleteImg) self.DelButton.setDefaultAction(deleteImg)
...@@ -512,10 +518,11 @@ class MainWindow(QMainWindow, WindowMixin): ...@@ -512,10 +518,11 @@ class MainWindow(QMainWindow, WindowMixin):
zoom=zoom, zoomIn=zoomIn, zoomOut=zoomOut, zoomOrg=zoomOrg, zoom=zoom, zoomIn=zoomIn, zoomOut=zoomOut, zoomOrg=zoomOrg,
fitWindow=fitWindow, fitWidth=fitWidth, fitWindow=fitWindow, fitWidth=fitWidth,
zoomActions=zoomActions, saveLabel=saveLabel, zoomActions=zoomActions, saveLabel=saveLabel,
undo=undo, undoLastPoint=undoLastPoint,
fileMenuActions=( fileMenuActions=(
opendir, saveLabel, resetAll, quit), opendir, saveLabel, resetAll, quit),
beginner=(), advanced=(), beginner=(), advanced=(),
editMenu=(createpoly, edit, copy, delete,singleRere, editMenu=(createpoly, edit, copy, delete,singleRere,None, undo, undoLastPoint,
None, color1, self.drawSquaresOption), None, color1, self.drawSquaresOption),
beginnerContext=(create, edit, copy, delete, singleRere), beginnerContext=(create, edit, copy, delete, singleRere),
advancedContext=(createMode, editMode, edit, copy, advancedContext=(createMode, editMode, edit, copy,
...@@ -549,8 +556,13 @@ class MainWindow(QMainWindow, WindowMixin): ...@@ -549,8 +556,13 @@ class MainWindow(QMainWindow, WindowMixin):
self.labelDialogOption.setChecked(settings.get(SETTING_PAINT_LABEL, False)) self.labelDialogOption.setChecked(settings.get(SETTING_PAINT_LABEL, False))
self.labelDialogOption.triggered.connect(self.speedChoose) self.labelDialogOption.triggered.connect(self.speedChoose)
self.autoSaveOption = QAction(getStr('autoSaveMode'), self)
self.autoSaveOption.setCheckable(True)
self.autoSaveOption.setChecked(settings.get(SETTING_PAINT_LABEL, False))
self.autoSaveOption.triggered.connect(self.autoSaveFunc)
addActions(self.menus.file, addActions(self.menus.file,
(opendir, None, saveLabel, saveRec, None, resetAll, deleteImg, quit)) (opendir, None, saveLabel, saveRec, self.autoSaveOption, None, resetAll, deleteImg, quit))
addActions(self.menus.help, (showSteps, showInfo)) addActions(self.menus.help, (showSteps, showInfo))
addActions(self.menus.view, ( addActions(self.menus.view, (
...@@ -566,9 +578,9 @@ class MainWindow(QMainWindow, WindowMixin): ...@@ -566,9 +578,9 @@ class MainWindow(QMainWindow, WindowMixin):
# Custom context menu for the canvas widget: # Custom context menu for the canvas widget:
addActions(self.canvas.menus[0], self.actions.beginnerContext) addActions(self.canvas.menus[0], self.actions.beginnerContext)
addActions(self.canvas.menus[1], ( #addActions(self.canvas.menus[1], (
action('&Copy here', self.copyShape), # action('&Copy here', self.copyShape),
action('&Move here', self.moveShape))) # action('&Move here', self.moveShape)))
self.statusBar().showMessage('%s started.' % __appname__) self.statusBar().showMessage('%s started.' % __appname__)
...@@ -758,6 +770,7 @@ class MainWindow(QMainWindow, WindowMixin): ...@@ -758,6 +770,7 @@ class MainWindow(QMainWindow, WindowMixin):
self.canvas.setEditing(False) self.canvas.setEditing(False)
self.canvas.fourpoint = True self.canvas.fourpoint = True
self.actions.create.setEnabled(False) self.actions.create.setEnabled(False)
self.actions.undoLastPoint.setEnabled(True)
def toggleDrawingSensitive(self, drawing=True): def toggleDrawingSensitive(self, drawing=True):
"""In the middle of drawing, toggling between modes should be disabled.""" """In the middle of drawing, toggling between modes should be disabled."""
...@@ -866,10 +879,11 @@ class MainWindow(QMainWindow, WindowMixin): ...@@ -866,10 +879,11 @@ class MainWindow(QMainWindow, WindowMixin):
self.updateComboBox() self.updateComboBox()
def updateBoxlist(self): def updateBoxlist(self):
shape = self.canvas.selectedShape for shape in self.canvas.selectedShapes+[self.canvas.hShape]:
item = self.shapesToItemsbox[shape] # listitem item = self.shapesToItemsbox[shape] # listitem
text = [(int(p.x()), int(p.y())) for p in shape.points] text = [(int(p.x()), int(p.y())) for p in shape.points]
item.setText(str(text)) item.setText(str(text))
self.actions.undo.setEnabled(True)
self.setDirty() self.setDirty()
def indexTo5Files(self, currIndex): def indexTo5Files(self, currIndex):
...@@ -902,23 +916,27 @@ class MainWindow(QMainWindow, WindowMixin): ...@@ -902,23 +916,27 @@ class MainWindow(QMainWindow, WindowMixin):
if len(self.mImgList) > 0: if len(self.mImgList) > 0:
self.zoomWidget.setValue(self.zoomWidgetValue + self.imgsplider.value()) self.zoomWidget.setValue(self.zoomWidgetValue + self.imgsplider.value())
# React to canvas signals.
def shapeSelectionChanged(self, selected=False): def shapeSelectionChanged(self, selected_shapes):
if self._noSelectionSlot: self._noSelectionSlot = True
self._noSelectionSlot = False for shape in self.canvas.selectedShapes:
else: shape.selected = False
shape = self.canvas.selectedShape self.labelList.clearSelection()
if shape: self.canvas.selectedShapes = selected_shapes
self.shapesToItems[shape].setSelected(True) for shape in self.canvas.selectedShapes:
self.shapesToItemsbox[shape].setSelected(True) # ADD shape.selected = True
else: self.shapesToItems[shape].setSelected(True)
self.labelList.clearSelection() self.shapesToItemsbox[shape].setSelected(True)
self.actions.delete.setEnabled(selected)
self.actions.copy.setEnabled(selected) self.labelList.scrollToItem(self.currentItem()) # QAbstractItemView.EnsureVisible
self.actions.edit.setEnabled(selected) self.BoxList.scrollToItem(self.currentBox())
self.actions.shapeLineColor.setEnabled(selected)
self.actions.shapeFillColor.setEnabled(selected) self._noSelectionSlot = False
self.actions.singleRere.setEnabled(selected) n_selected = len(selected_shapes)
self.actions.singleRere.setEnabled(n_selected)
self.actions.delete.setEnabled(n_selected)
self.actions.copy.setEnabled(n_selected)
self.actions.edit.setEnabled(n_selected == 1)
def addLabel(self, shape): def addLabel(self, shape):
shape.paintLabel = self.displayLabelOption.isChecked() shape.paintLabel = self.displayLabelOption.isChecked()
...@@ -941,22 +959,23 @@ class MainWindow(QMainWindow, WindowMixin): ...@@ -941,22 +959,23 @@ class MainWindow(QMainWindow, WindowMixin):
action.setEnabled(True) action.setEnabled(True)
self.updateComboBox() self.updateComboBox()
def remLabel(self, shape): def remLabels(self, shapes):
if shape is None: if shapes is None:
# print('rm empty label') # print('rm empty label')
return return
item = self.shapesToItems[shape] for shape in shapes:
self.labelList.takeItem(self.labelList.row(item)) item = self.shapesToItems[shape]
del self.shapesToItems[shape] self.labelList.takeItem(self.labelList.row(item))
del self.itemsToShapes[item] del self.shapesToItems[shape]
self.updateComboBox() del self.itemsToShapes[item]
self.updateComboBox()
# ADD: # ADD:
item = self.shapesToItemsbox[shape] item = self.shapesToItemsbox[shape]
self.BoxList.takeItem(self.BoxList.row(item)) self.BoxList.takeItem(self.BoxList.row(item))
del self.shapesToItemsbox[shape] del self.shapesToItemsbox[shape]
del self.itemsToShapesbox[item] del self.itemsToShapesbox[item]
self.updateComboBox() self.updateComboBox()
def loadLabels(self, shapes): def loadLabels(self, shapes):
s = [] s = []
...@@ -1001,7 +1020,7 @@ class MainWindow(QMainWindow, WindowMixin): ...@@ -1001,7 +1020,7 @@ class MainWindow(QMainWindow, WindowMixin):
item.setText(str([(int(p.x()), int(p.y())) for p in shape.points])) item.setText(str([(int(p.x()), int(p.y())) for p in shape.points]))
self.updateComboBox() self.updateComboBox()
def updateComboBox(self): def updateComboBox(self): # TODO:貌似没用
# Get the unique labels and add them to the Combobox. # Get the unique labels and add them to the Combobox.
itemsTextList = [str(self.labelList.item(i).text()) for i in range(self.labelList.count())] itemsTextList = [str(self.labelList.item(i).text()) for i in range(self.labelList.count())]
...@@ -1054,26 +1073,38 @@ class MainWindow(QMainWindow, WindowMixin): ...@@ -1054,26 +1073,38 @@ class MainWindow(QMainWindow, WindowMixin):
return False return False
def copySelectedShape(self): def copySelectedShape(self):
self.addLabel(self.canvas.copySelectedShape()) for shape in self.canvas.copySelectedShape():
self.addLabel(shape)
# fix copy and delete # fix copy and delete
self.shapeSelectionChanged(True) #self.shapeSelectionChanged(True)
def labelSelectionChanged(self): def labelSelectionChanged(self):
item = self.currentItem() if self._noSelectionSlot:
self.labelList.scrollToItem(item, QAbstractItemView.EnsureVisible) return
if item and self.canvas.editing(): if self.canvas.editing():
self._noSelectionSlot = True selected_shapes = []
self.canvas.selectShape(self.itemsToShapes[item]) for item in self.labelList.selectedItems():
shape = self.itemsToShapes[item] selected_shapes.append(self.itemsToShapes[item])
if selected_shapes:
self.canvas.selectShapes(selected_shapes)
else:
self.canvas.deSelectShape()
def boxSelectionChanged(self): def boxSelectionChanged(self):
item = self.currentBox() if self._noSelectionSlot:
self.BoxList.scrollToItem(item, QAbstractItemView.EnsureVisible) #self.BoxList.scrollToItem(self.currentBox(), QAbstractItemView.PositionAtCenter)
if item and self.canvas.editing(): return
self._noSelectionSlot = True if self.canvas.editing():
self.canvas.selectShape(self.itemsToShapesbox[item]) selected_shapes = []
shape = self.itemsToShapesbox[item] for item in self.BoxList.selectedItems():
selected_shapes.append(self.itemsToShapesbox[item])
if selected_shapes:
self.canvas.selectShapes(selected_shapes)
else:
self.canvas.deSelectShape()
def labelItemChanged(self, item): def labelItemChanged(self, item):
shape = self.itemsToShapes[item] shape = self.itemsToShapes[item]
...@@ -1113,6 +1144,8 @@ class MainWindow(QMainWindow, WindowMixin): ...@@ -1113,6 +1144,8 @@ class MainWindow(QMainWindow, WindowMixin):
if self.beginner(): # Switch to edit mode. if self.beginner(): # Switch to edit mode.
self.canvas.setEditing(True) self.canvas.setEditing(True)
self.actions.create.setEnabled(True) self.actions.create.setEnabled(True)
self.actions.undoLastPoint.setEnabled(False)
self.actions.undo.setEnabled(True)
else: else:
self.actions.editMode.setEnabled(True) self.actions.editMode.setEnabled(True)
self.setDirty() self.setDirty()
...@@ -1548,6 +1581,7 @@ class MainWindow(QMainWindow, WindowMixin): ...@@ -1548,6 +1581,7 @@ class MainWindow(QMainWindow, WindowMixin):
self.fileListWidget.insertItem(int(currIndex), item) self.fileListWidget.insertItem(int(currIndex), item)
self.openNextImg() self.openNextImg()
self.actions.saveRec.setEnabled(True) self.actions.saveRec.setEnabled(True)
self.actions.saveLabel.setEnabled(True)
elif mode == 'Auto': elif mode == 'Auto':
if annotationFilePath and self.saveLabels(annotationFilePath, mode=mode): if annotationFilePath and self.saveLabels(annotationFilePath, mode=mode):
...@@ -1643,7 +1677,8 @@ class MainWindow(QMainWindow, WindowMixin): ...@@ -1643,7 +1677,8 @@ class MainWindow(QMainWindow, WindowMixin):
self.setDirty() self.setDirty()
def deleteSelectedShape(self): def deleteSelectedShape(self):
self.remLabel(self.canvas.deleteSelected()) self.remLabels(self.canvas.deleteSelected())
self.actions.undo.setEnabled(True)
self.setDirty() self.setDirty()
if self.noShapes(): if self.noShapes():
for action in self.actions.onShapesPresent: for action in self.actions.onShapesPresent:
...@@ -1653,7 +1688,7 @@ class MainWindow(QMainWindow, WindowMixin): ...@@ -1653,7 +1688,7 @@ class MainWindow(QMainWindow, WindowMixin):
color = self.colorDialog.getColor(self.lineColor, u'Choose line color', color = self.colorDialog.getColor(self.lineColor, u'Choose line color',
default=DEFAULT_LINE_COLOR) default=DEFAULT_LINE_COLOR)
if color: if color:
self.canvas.selectedShape.line_color = color for shape in self.canvas.selectedShapes: shape.line_color = color
self.canvas.update() self.canvas.update()
self.setDirty() self.setDirty()
...@@ -1661,7 +1696,7 @@ class MainWindow(QMainWindow, WindowMixin): ...@@ -1661,7 +1696,7 @@ class MainWindow(QMainWindow, WindowMixin):
color = self.colorDialog.getColor(self.fillColor, u'Choose fill color', color = self.colorDialog.getColor(self.fillColor, u'Choose fill color',
default=DEFAULT_FILL_COLOR) default=DEFAULT_FILL_COLOR)
if color: if color:
self.canvas.selectedShape.fill_color = color for shape in self.canvas.selectedShapes: shape.fill_color = color
self.canvas.update() self.canvas.update()
self.setDirty() self.setDirty()
...@@ -1785,25 +1820,25 @@ class MainWindow(QMainWindow, WindowMixin): ...@@ -1785,25 +1820,25 @@ class MainWindow(QMainWindow, WindowMixin):
def singleRerecognition(self): def singleRerecognition(self):
img = cv2.imread(self.filePath) img = cv2.imread(self.filePath)
shape = self.canvas.selectedShape for shape in self.canvas.selectedShapes:
box = [[int(p.x()), int(p.y())] for p in shape.points] box = [[int(p.x()), int(p.y())] for p in shape.points]
assert len(box) == 4 assert len(box) == 4
img_crop = get_rotate_crop_image(img, np.array(box, np.float32)) img_crop = get_rotate_crop_image(img, np.array(box, np.float32))
if img_crop is None: if img_crop is None:
msg = 'Can not recognise the detection box in ' + self.filePath + '. Please change manually' msg = 'Can not recognise the detection box in ' + self.filePath + '. Please change manually'
QMessageBox.information(self, "Information", msg) QMessageBox.information(self, "Information", msg)
return return
result = self.ocr.ocr(img_crop, cls=True, det=False) result = self.ocr.ocr(img_crop, cls=True, det=False)
if result[0][0] != '': if result[0][0] != '':
result.insert(0, box) result.insert(0, box)
print('result in reRec is ', result) print('result in reRec is ', result)
if result[1][0] == shape.label: if result[1][0] == shape.label:
print('label no change') print('label no change')
else: else:
shape.label = result[1][0] shape.label = result[1][0]
self.singleLabel(shape) self.singleLabel(shape)
self.setDirty() self.setDirty()
print(box) print(box)
def autolcm(self): def autolcm(self):
vbox = QVBoxLayout() vbox = QVBoxLayout()
...@@ -1914,8 +1949,8 @@ class MainWindow(QMainWindow, WindowMixin): ...@@ -1914,8 +1949,8 @@ class MainWindow(QMainWindow, WindowMixin):
self.savePPlabel() self.savePPlabel()
def saveRecResult(self): def saveRecResult(self):
if None in [self.PPlabelpath, self.PPlabel, self.fileStatedict]: if {} in [self.PPlabelpath, self.PPlabel, self.fileStatedict]:
QMessageBox.information(self, "Information", "Save file first") QMessageBox.information(self, "Information", "Check the image first")
return return
rec_gt_dir = os.path.dirname(self.PPlabelpath) + '/rec_gt.txt' rec_gt_dir = os.path.dirname(self.PPlabelpath) + '/rec_gt.txt'
...@@ -1953,6 +1988,33 @@ class MainWindow(QMainWindow, WindowMixin): ...@@ -1953,6 +1988,33 @@ class MainWindow(QMainWindow, WindowMixin):
self.canvas.newShape.disconnect() self.canvas.newShape.disconnect()
self.canvas.newShape.connect(partial(self.newShape, False)) self.canvas.newShape.connect(partial(self.newShape, False))
def autoSaveFunc(self):
if self.autoSaveOption.isChecked():
self.autoSaveNum = 1 # Real auto_Save
try:
self.saveLabelFile()
except:
pass
print('The program will automatically save once after confirming an image')
else:
self.autoSaveNum = 5 # Used for backup
print('The program will automatically save once after confirming 5 images (default)')
def undoShapeEdit(self):
self.canvas.restoreShape()
self.labelList.clear()
self.BoxList.clear()
self.loadShapes(self.canvas.shapes)
self.actions.undo.setEnabled(self.canvas.isShapeRestorable)
def loadShapes(self, shapes, replace=True):
self._noSelectionSlot = True
for shape in shapes:
self.addLabel(shape)
self.labelList.clearSelection()
self._noSelectionSlot = False
self.canvas.loadShapes(shapes, replace=replace)
def inverted(color): def inverted(color):
return QColor(*[255 - v for v in color.getRgb()]) return QColor(*[255 - v for v in color.getRgb()])
......
...@@ -8,6 +8,10 @@ PPOCRLabel is a semi-automatic graphic annotation tool suitable for OCR field, w ...@@ -8,6 +8,10 @@ PPOCRLabel is a semi-automatic graphic annotation tool suitable for OCR field, w
### Recent Update ### Recent Update
- 2021.2.5: New batch processing and undo functions (by [Evezerest](https://github.com/Evezerest)):
- Batch processing function: Press and hold the Ctrl key to select the box, you can move, copy, and delete in batches.
- Undo function: In the process of drawing a four-point label box or after editing the box, press Ctrl+Z to undo the previous operation.
- Fix image rotation and size problems, optimize the process of editing the mark frame (by [ninetailskim](https://github.com/ninetailskim)[edencfc](https://github.com/edencfc)).
- 2021.1.11: Optimize the labeling experience (by [edencfc](https://github.com/edencfc)), - 2021.1.11: Optimize the labeling experience (by [edencfc](https://github.com/edencfc)),
- Users can choose whether to pop up the label input dialog after drawing the detection box in "View - Pop-up Label Input Dialog". - Users can choose whether to pop up the label input dialog after drawing the detection box in "View - Pop-up Label Input Dialog".
- The recognition result scrolls synchronously when users click related detection box. - The recognition result scrolls synchronously when users click related detection box.
...@@ -16,7 +20,6 @@ PPOCRLabel is a semi-automatic graphic annotation tool suitable for OCR field, w ...@@ -16,7 +20,6 @@ PPOCRLabel is a semi-automatic graphic annotation tool suitable for OCR field, w
### TODO: ### TODO:
- Lock box mode: For the same scene data, the size and position of the locked detection box can be transferred between different pictures. - Lock box mode: For the same scene data, the size and position of the locked detection box can be transferred between different pictures.
- Experience optimization: Add undo, batch operation include move, copy, delete and so on, optimize the annotation process.
## Installation ## Installation
...@@ -76,12 +79,11 @@ python3 PPOCRLabel.py ...@@ -76,12 +79,11 @@ python3 PPOCRLabel.py
7. Double click the result in 'recognition result' list to manually change inaccurate recognition results. 7. Double click the result in 'recognition result' list to manually change inaccurate recognition results.
8. Click "Check", the image status will switch to "√",then the program automatically jump to the next(The results will not be written directly to the file at this time). 8. Click "Check", the image status will switch to "√",then the program automatically jump to the next.
9. Click "Delete Image" and the image will be deleted to the recycle bin. 9. Click "Delete Image" and the image will be deleted to the recycle bin.
10. Labeling result: the user can save manually through the menu "File - Save Label", while the program will also save automatically after every 5 images confirmed by the user.the manually checked label will be stored in *Label.txt* under the opened picture folder. 10. Labeling result: the user can save manually through the menu "File - Save Label", while the program will also save automatically if "File - Auto Save Label Mode" is selected. The manually checked label will be stored in *Label.txt* under the opened picture folder. Click "PaddleOCR"-"Save Recognition Results" in the menu bar, the recognition training data of such pictures will be saved in the *crop_img* folder, and the recognition label will be saved in *rec_gt.txt*<sup>[4]</sup>.
Click "PaddleOCR"-"Save Recognition Results" in the menu bar, the recognition training data of such pictures will be saved in the *crop_img* folder, and the recognition label will be saved in *rec_gt.txt*<sup>[4]</sup>.
### Note ### Note
...@@ -89,8 +91,7 @@ python3 PPOCRLabel.py ...@@ -89,8 +91,7 @@ python3 PPOCRLabel.py
[2] The image status indicates whether the user has saved the image manually. If it has not been saved manually it is "X", otherwise it is "√", PPOCRLabel will not relabel pictures with a status of "√". [2] The image status indicates whether the user has saved the image manually. If it has not been saved manually it is "X", otherwise it is "√", PPOCRLabel will not relabel pictures with a status of "√".
[3] After clicking "Re-recognize", the model will overwrite ALL recognition results in the picture. [3] After clicking "Re-recognize", the model will overwrite ALL recognition results in the picture. Therefore, if the recognition result has been manually changed before, it may change after re-recognition.
Therefore, if the recognition result has been manually changed before, it may change after re-recognition.
[4] The files produced by PPOCRLabel can be found under the opened picture folder including the following, please do not manually change the contents, otherwise it will cause the program to be abnormal. [4] The files produced by PPOCRLabel can be found under the opened picture folder including the following, please do not manually change the contents, otherwise it will cause the program to be abnormal.
...@@ -106,22 +107,23 @@ Therefore, if the recognition result has been manually changed before, it may ch ...@@ -106,22 +107,23 @@ Therefore, if the recognition result has been manually changed before, it may ch
### Shortcut keys ### Shortcut keys
| Shortcut keys | Description | | Shortcut keys | Description |
| ---------------- | ------------------------------------------------ | | ------------------------ | ------------------------------------------------ |
| Ctrl + shift + A | Automatically label all unchecked images | | Ctrl + Shift + R | Re-recognize all the labels of the current image |
| Ctrl + shift + R | Re-recognize all the labels of the current image | | W | Create a rect box |
| W | Create a rect box | | Q | Create a four-points box |
| Q | Create a four-points box | | Ctrl + E | Edit label of the selected box |
| Ctrl + E | Edit label of the selected box | | Ctrl + R | Re-recognize the selected box |
| Ctrl + R | Re-recognize the selected box | | Ctrl + C | Copy and paste the selected box |
| Backspace | Delete the selected box | | Ctrl + Left Mouse Button | Multi select the label box |
| Ctrl + V | Check image | | Backspace | Delete the selected box |
| Ctrl + Shift + d | Delete image | | Ctrl + V | Check image |
| D | Next image | | Ctrl + Shift + d | Delete image |
| A | Previous image | | D | Next image |
| Ctrl++ | Zoom in | | A | Previous image |
| Ctrl-- | Zoom out | | Ctrl++ | Zoom in |
| ↑→↓← | Move selected box | | Ctrl-- | Zoom out |
| ↑→↓← | Move selected box |
### Built-in Model ### Built-in Model
...@@ -136,7 +138,7 @@ Therefore, if the recognition result has been manually changed before, it may ch ...@@ -136,7 +138,7 @@ Therefore, if the recognition result has been manually changed before, it may ch
PPOCRLabel supports three ways to save Label.txt PPOCRLabel supports three ways to save Label.txt
- Automatically save: When it detects that the user has manually checked 5 pictures, the program automatically writes the annotations into Label.txt. The user can change the value of ``self.autoSaveNum`` in ``PPOCRLabel.py`` to set the number of images to be automatically saved after confirmation. - Automatically save: After selecting "File - Auto Save Label Mode", the program will automatically write the annotations into Label.txt every time the user confirms an image. If this option is not turned on, it will be automatically saved after detecting that the user has manually checked 5 images.
- Manual save: Click "File-Save Marking Results" to manually save the label. - Manual save: Click "File-Save Marking Results" to manually save the label.
- Close application save - Close application save
...@@ -167,4 +169,4 @@ For some data that are difficult to recognize, the recognition results will not ...@@ -167,4 +169,4 @@ For some data that are difficult to recognize, the recognition results will not
### Related ### Related
1.[Tzutalin. LabelImg. Git code (2015)](https://github.com/tzutalin/labelImg) 1.[Tzutalin. LabelImg. Git code (2015)](https://github.com/tzutalin/labelImg)
\ No newline at end of file
...@@ -8,6 +8,10 @@ PPOCRLabel是一款适用于OCR领域的半自动化图形标注工具,内置P ...@@ -8,6 +8,10 @@ PPOCRLabel是一款适用于OCR领域的半自动化图形标注工具,内置P
#### 近期更新 #### 近期更新
- 2021.2.5:新增批处理与撤销功能(by [Evezerest](https://github.com/Evezerest))
- 批处理功能:按住Ctrl键选择标记框后可批量移动、复制、删除。
- 撤销功能:在绘制四点标注框过程中或对框进行编辑操作后,按下Ctrl+Z可撤销上一部操作。
- 修复图像旋转和尺寸问题、优化编辑标记框过程(by [ninetailskim](https://github.com/ninetailskim)[edencfc](https://github.com/edencfc)
- 2021.1.11:优化标注体验(by [edencfc](https://github.com/edencfc)): - 2021.1.11:优化标注体验(by [edencfc](https://github.com/edencfc)):
- 用户可在“视图 - 弹出标记输入框”选择在画完检测框后标记输入框是否弹出。 - 用户可在“视图 - 弹出标记输入框”选择在画完检测框后标记输入框是否弹出。
- 识别结果与检测框同步滚动。 - 识别结果与检测框同步滚动。
...@@ -17,9 +21,8 @@ PPOCRLabel是一款适用于OCR领域的半自动化图形标注工具,内置P ...@@ -17,9 +21,8 @@ PPOCRLabel是一款适用于OCR领域的半自动化图形标注工具,内置P
#### 尽请期待 #### 尽请期待
- 锁定框模式:针对同一场景数据,被锁定的检测框的大小与位置能在不同图片之间传递。 - 锁定框模式:针对同一场景数据,被锁定的检测框的大小与位置能在不同图片之间传递。
- 体验优化:增加撤销操作,批量移动、复制、删除等功能。优化标注流程。
如果您对以上内容感兴趣或对完善工具有不一样的想法,欢迎加入我们的队伍与我们共同开发 如果您对以上内容感兴趣或对完善工具有不一样的想法,欢迎加入我们的SIG队伍与我们共同开发。可以在[此处](https://github.com/PaddlePaddle/PaddleOCR/issues/1728)完成问卷和前置任务,经过我们确认相关内容后即可正式加入,享受SIG福利,共同为OCR开源事业贡献(特别说明:针对PPOCRLabel的改进也属于PaddleOCR前置任务)
## 安装 ## 安装
...@@ -65,9 +68,9 @@ python3 PPOCRLabel.py --lang ch ...@@ -65,9 +68,9 @@ python3 PPOCRLabel.py --lang ch
5. 标记框绘制完成后,用户点击 “确认”,检测框会先被预分配一个 “待识别” 标签。 5. 标记框绘制完成后,用户点击 “确认”,检测框会先被预分配一个 “待识别” 标签。
6. 重新识别:将图片中的所有检测画绘制/调整完成后,点击 “重新识别”,PPOCR模型会对当前图片中的**所有检测框**重新识别<sup>[3]</sup> 6. 重新识别:将图片中的所有检测画绘制/调整完成后,点击 “重新识别”,PPOCR模型会对当前图片中的**所有检测框**重新识别<sup>[3]</sup>
7. 内容更改:双击识别结果,对不准确的识别结果进行手动更改。 7. 内容更改:双击识别结果,对不准确的识别结果进行手动更改。
8. 确认标记:点击 “确认”,图片状态切换为 “√”,跳转至下一张(此时不会直接将结果写入文件) 8. **确认标记**:点击 “确认”,图片状态切换为 “√”,跳转至下一张。
9. 删除:点击 “删除图像”,图片将会被删除至回收站。 9. 删除:点击 “删除图像”,图片将会被删除至回收站。
10. 保存结果:用户可以通过菜单中“文件-保存标记结果”手动保存,同时程序也会在用户每确认5张图片后自动保存一次。手动确认过的标记将会被存放在所打开图片文件夹下的*Label.txt*中。在菜单栏点击 “文件” - "保存识别结果"后,会将此类图片的识别训练数据保存在*crop_img*文件夹下,识别标签保存在*rec_gt.txt*<sup>[4]</sup> 10. 保存结果:用户可以通过菜单中“文件-保存标记结果”手动保存,同时也可以点击“文件 - 自动保存标记结果”开启自动保存。手动确认过的标记将会被存放在所打开图片文件夹下的*Label.txt*中。在菜单栏点击 “文件” - "保存识别结果"后,会将此类图片的识别训练数据保存在*crop_img*文件夹下,识别标签保存在*rec_gt.txt*<sup>[4]</sup>
### 注意 ### 注意
...@@ -93,12 +96,13 @@ python3 PPOCRLabel.py --lang ch ...@@ -93,12 +96,13 @@ python3 PPOCRLabel.py --lang ch
| 快捷键 | 说明 | | 快捷键 | 说明 |
| ---------------- | ---------------------------- | | ---------------- | ---------------------------- |
| Ctrl + shift + A | 自动标注所有未确认过的图片 |
| Ctrl + shift + R | 对当前图片的所有标记重新识别 | | Ctrl + shift + R | 对当前图片的所有标记重新识别 |
| W | 新建矩形框 | | W | 新建矩形框 |
| Q | 新建四点框 | | Q | 新建四点框 |
| Ctrl + E | 编辑所选框标签 | | Ctrl + E | 编辑所选框标签 |
| Ctrl + R | 重新识别所选标记 | | Ctrl + R | 重新识别所选标记 |
| Ctrl + C | 复制并粘贴选中的标记框 |
| Ctrl + 鼠标左键 | 多选标记框 |
| Backspace | 删除所选框 | | Backspace | 删除所选框 |
| Ctrl + V | 确认本张图片标记 | | Ctrl + V | 确认本张图片标记 |
| Ctrl + Shift + d | 删除本张图片 | | Ctrl + Shift + d | 删除本张图片 |
...@@ -120,7 +124,7 @@ python3 PPOCRLabel.py --lang ch ...@@ -120,7 +124,7 @@ python3 PPOCRLabel.py --lang ch
PPOCRLabel支持三种保存方式: PPOCRLabel支持三种保存方式:
- 程序自动保存:当检测到用户手动确认过5张图片,程序自动将标记结果写入Label.txt中。其中用户可通过更改```PPOCRLabel.py```中的```self.autoSaveNum```的数值设置确认几张图片后进行自动保存。 - 自动保存:点击“文件 - 自动保存标记结果”后,用户每确认过张图片,程序自动将标记结果写入Label.txt中。若未开启此选项,则检测到用户手动确认过5张图片后进行自动保存。
- 手动保存:点击“文件 - 保存标记结果”手动保存标记。 - 手动保存:点击“文件 - 保存标记结果”手动保存标记。
- 关闭应用程序保存 - 关闭应用程序保存
......
This diff is collapsed.
This diff is collapsed.
...@@ -82,7 +82,7 @@ class Shape(object): ...@@ -82,7 +82,7 @@ class Shape(object):
return False return False
def addPoint(self, point): def addPoint(self, point):
if not self.reachMaxPoints(): if not self.reachMaxPoints(): # 4个点时发出close信号
self.points.append(point) self.points.append(point)
def popPoint(self): def popPoint(self):
......
...@@ -96,4 +96,7 @@ hideBox=隐藏所有标注 ...@@ -96,4 +96,7 @@ hideBox=隐藏所有标注
showBox=显示所有标注 showBox=显示所有标注
saveLabel=保存标记结果 saveLabel=保存标记结果
singleRe=重识别此区块 singleRe=重识别此区块
labelDialogOption=弹出标记输入框 labelDialogOption=弹出标记输入框
\ No newline at end of file undo=撤销
undoLastPoint=撤销上个点
autoSaveMode=自动保存标记结果
\ No newline at end of file
...@@ -96,4 +96,7 @@ hideBox=Hide All Box ...@@ -96,4 +96,7 @@ hideBox=Hide All Box
showBox=Show All Box showBox=Show All Box
saveLabel=Save Label saveLabel=Save Label
singleRe=Re-recognition RectBox singleRe=Re-recognition RectBox
labelDialogOption=Pop-up Label Input Dialog labelDialogOption=Pop-up Label Input Dialog
\ No newline at end of file undo=Undo
undoLastPoint=Undo Last Point
autoSaveMode=Auto Save Label Mode
\ No newline at end of file
...@@ -42,7 +42,7 @@ The above pictures are the visualizations of the general ppocr_server model. For ...@@ -42,7 +42,7 @@ The above pictures are the visualizations of the general ppocr_server model. For
- Scan the QR code below with your Wechat, you can access to official technical exchange group. Look forward to your participation. - Scan the QR code below with your Wechat, you can access to official technical exchange group. Look forward to your participation.
<div align="center"> <div align="center">
<img src="./doc/joinus.PNG" width = "200" height = "200" /> <img src="https://raw.githubusercontent.com/PaddlePaddle/PaddleOCR/release/2.0/doc/joinus.PNG" width = "200" height = "200" />
</div> </div>
...@@ -93,7 +93,7 @@ For a new language request, please refer to [Guideline for new language_requests ...@@ -93,7 +93,7 @@ For a new language request, please refer to [Guideline for new language_requests
- [Quick Inference Based on PIP](./doc/doc_en/whl_en.md) - [Quick Inference Based on PIP](./doc/doc_en/whl_en.md)
- [Python Inference](./doc/doc_en/inference_en.md) - [Python Inference](./doc/doc_en/inference_en.md)
- [C++ Inference](./deploy/cpp_infer/readme_en.md) - [C++ Inference](./deploy/cpp_infer/readme_en.md)
- [Serving](./deploy/hubserving/readme_en.md) - [Serving](./deploy/pdserving/README.md)
- [Mobile](https://github.com/PaddlePaddle/PaddleOCR/blob/develop/deploy/lite/readme_en.md) - [Mobile](https://github.com/PaddlePaddle/PaddleOCR/blob/develop/deploy/lite/readme_en.md)
- [Benchmark](./doc/doc_en/benchmark_en.md) - [Benchmark](./doc/doc_en/benchmark_en.md)
- Data Annotation and Synthesis - Data Annotation and Synthesis
......
...@@ -8,9 +8,10 @@ PaddleOCR同时支持动态图与静态图两种编程范式 ...@@ -8,9 +8,10 @@ PaddleOCR同时支持动态图与静态图两种编程范式
- 静态图版本:develop分支 - 静态图版本:develop分支
**近期更新** **近期更新**
- 【预告】 PaddleOCR研发团队对最新发版内容技术深入解读,4月13日晚上19:00,[直播地址](https://live.bilibili.com/21689802)
- 2021.4.8 release 2.1版本,新增AAAI 2021论文[端到端识别算法PGNet](./doc/doc_ch/pgnet.md)开源,[多语言模型](./doc/doc_ch/multi_languages.md)支持种类增加到80+。
- 2021.2.1 [FAQ](./doc/doc_ch/FAQ.md)新增5个高频问题,总数162个,每周一都会更新,欢迎大家持续关注。 - 2021.2.1 [FAQ](./doc/doc_ch/FAQ.md)新增5个高频问题,总数162个,每周一都会更新,欢迎大家持续关注。
- 2021.1.26,28,29 PaddleOCR官方研发团队带来技术深入解读三日直播课,1月26日、28日、29日晚上19:30,[直播地址](https://live.bilibili.com/21689802) - 2021.1.21 更新多语言识别模型,目前支持语种超过27种,包括中文简体、中文繁体、英文、法文、德文、韩文、日文、意大利文、西班牙文、葡萄牙文、俄罗斯文、阿拉伯文等,后续计划可以参考[多语言研发计划](https://github.com/PaddlePaddle/PaddleOCR/issues/1048)
- 2021.1.21 更新多语言识别模型,目前支持语种超过27种,[多语言模型下载](./doc/doc_ch/models_list.md),包括中文简体、中文繁体、英文、法文、德文、韩文、日文、意大利文、西班牙文、葡萄牙文、俄罗斯文、阿拉伯文等,后续计划可以参考[多语言研发计划](https://github.com/PaddlePaddle/PaddleOCR/issues/1048)
- 2020.12.15 更新数据合成工具[Style-Text](./StyleText/README_ch.md),可以批量合成大量与目标场景类似的图像,在多个场景验证,效果明显提升。 - 2020.12.15 更新数据合成工具[Style-Text](./StyleText/README_ch.md),可以批量合成大量与目标场景类似的图像,在多个场景验证,效果明显提升。
- 2020.11.25 更新半自动标注工具[PPOCRLabel](./PPOCRLabel/README_ch.md),辅助开发者高效完成标注任务,输出格式与PP-OCR训练任务完美衔接。 - 2020.11.25 更新半自动标注工具[PPOCRLabel](./PPOCRLabel/README_ch.md),辅助开发者高效完成标注任务,输出格式与PP-OCR训练任务完美衔接。
- 2020.9.22 更新PP-OCR技术文章,https://arxiv.org/abs/2009.09941 - 2020.9.22 更新PP-OCR技术文章,https://arxiv.org/abs/2009.09941
...@@ -46,7 +47,7 @@ PaddleOCR同时支持动态图与静态图两种编程范式 ...@@ -46,7 +47,7 @@ PaddleOCR同时支持动态图与静态图两种编程范式
- 微信扫描二维码加入官方交流群,获得更高效的问题答疑,与各行各业开发者充分交流,期待您的加入。 - 微信扫描二维码加入官方交流群,获得更高效的问题答疑,与各行各业开发者充分交流,期待您的加入。
<div align="center"> <div align="center">
<img src="./doc/joinus.PNG" width = "200" height = "200" /> <img src="https://raw.githubusercontent.com/PaddlePaddle/PaddleOCR/release/2.0/doc/joinus.PNG" width = "200" height = "200" />
</div> </div>
## 快速体验 ## 快速体验
...@@ -74,11 +75,13 @@ PaddleOCR同时支持动态图与静态图两种编程范式 ...@@ -74,11 +75,13 @@ PaddleOCR同时支持动态图与静态图两种编程范式
## 文档教程 ## 文档教程
- [快速安装](./doc/doc_ch/installation.md) - [快速安装](./doc/doc_ch/installation.md)
- [中文OCR模型快速使用](./doc/doc_ch/quickstart.md) - [中文OCR模型快速使用](./doc/doc_ch/quickstart.md)
- [多语言OCR模型快速使用](./doc/doc_ch/multi_languages.md)
- [代码组织结构](./doc/doc_ch/tree.md) - [代码组织结构](./doc/doc_ch/tree.md)
- 算法介绍 - 算法介绍
- [文本检测](./doc/doc_ch/algorithm_overview.md) - [文本检测](./doc/doc_ch/algorithm_overview.md)
- [文本识别](./doc/doc_ch/algorithm_overview.md) - [文本识别](./doc/doc_ch/algorithm_overview.md)
- [PP-OCR Pipline](#PP-OCR) - [PP-OCR Pipline](#PP-OCR)
- [端到端PGNet算法](./doc/doc_ch/pgnet.md)
- 模型训练/评估 - 模型训练/评估
- [文本检测](./doc/doc_ch/detection.md) - [文本检测](./doc/doc_ch/detection.md)
- [文本识别](./doc/doc_ch/recognition.md) - [文本识别](./doc/doc_ch/recognition.md)
...@@ -88,7 +91,7 @@ PaddleOCR同时支持动态图与静态图两种编程范式 ...@@ -88,7 +91,7 @@ PaddleOCR同时支持动态图与静态图两种编程范式
- [基于pip安装whl包快速推理](./doc/doc_ch/whl.md) - [基于pip安装whl包快速推理](./doc/doc_ch/whl.md)
- [基于Python脚本预测引擎推理](./doc/doc_ch/inference.md) - [基于Python脚本预测引擎推理](./doc/doc_ch/inference.md)
- [基于C++预测引擎推理](./deploy/cpp_infer/readme.md) - [基于C++预测引擎推理](./deploy/cpp_infer/readme.md)
- [服务化部署](./deploy/hubserving/readme.md) - [服务化部署](./deploy/pdserving/README_CN.md)
- [端侧部署](https://github.com/PaddlePaddle/PaddleOCR/blob/develop/deploy/lite/readme.md) - [端侧部署](https://github.com/PaddlePaddle/PaddleOCR/blob/develop/deploy/lite/readme.md)
- [Benchmark](./doc/doc_ch/benchmark.md) - [Benchmark](./doc/doc_ch/benchmark.md)
- 数据集 - 数据集
......
...@@ -38,7 +38,15 @@ class StyleTextRecPredictor(object): ...@@ -38,7 +38,15 @@ class StyleTextRecPredictor(object):
self.std = config["Predictor"]["std"] self.std = config["Predictor"]["std"]
self.expand_result = config["Predictor"]["expand_result"] self.expand_result = config["Predictor"]["expand_result"]
def predict(self, style_input, text_input): def reshape_to_same_height(self, img_list):
h = img_list[0].shape[0]
for idx in range(1, len(img_list)):
new_w = round(1.0 * img_list[idx].shape[1] /
img_list[idx].shape[0] * h)
img_list[idx] = cv2.resize(img_list[idx], (new_w, h))
return img_list
def predict_single_image(self, style_input, text_input):
style_input = self.rep_style_input(style_input, text_input) style_input = self.rep_style_input(style_input, text_input)
tensor_style_input = self.preprocess(style_input) tensor_style_input = self.preprocess(style_input)
tensor_text_input = self.preprocess(text_input) tensor_text_input = self.preprocess(text_input)
...@@ -64,6 +72,21 @@ class StyleTextRecPredictor(object): ...@@ -64,6 +72,21 @@ class StyleTextRecPredictor(object):
"fake_bg": fake_bg, "fake_bg": fake_bg,
} }
def predict(self, style_input, text_input_list):
if not isinstance(text_input_list, (tuple, list)):
return self.predict_single_image(style_input, text_input_list)
synth_result_list = []
for text_input in text_input_list:
synth_result = self.predict_single_image(style_input, text_input)
synth_result_list.append(synth_result)
for key in synth_result:
res = [r[key] for r in synth_result_list]
res = self.reshape_to_same_height(res)
synth_result[key] = np.concatenate(res, axis=1)
return synth_result
def preprocess(self, img): def preprocess(self, img):
img = (img.astype('float32') * self.scale - self.mean) / self.std img = (img.astype('float32') * self.scale - self.mean) / self.std
img_height, img_width, channel = img.shape img_height, img_width, channel = img.shape
......
...@@ -12,6 +12,8 @@ ...@@ -12,6 +12,8 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import os import os
import numpy as np
import cv2
from utils.config import ArgsParser, load_config, override_config from utils.config import ArgsParser, load_config, override_config
from utils.logging import get_logger from utils.logging import get_logger
...@@ -36,8 +38,9 @@ class ImageSynthesiser(object): ...@@ -36,8 +38,9 @@ class ImageSynthesiser(object):
self.predictor = getattr(predictors, predictor_method)(self.config) self.predictor = getattr(predictors, predictor_method)(self.config)
def synth_image(self, corpus, style_input, language="en"): def synth_image(self, corpus, style_input, language="en"):
corpus, text_input = self.text_drawer.draw_text(corpus, language) corpus_list, text_input_list = self.text_drawer.draw_text(
synth_result = self.predictor.predict(style_input, text_input) corpus, language, style_input_width=style_input.shape[1])
synth_result = self.predictor.predict(style_input, text_input_list)
return synth_result return synth_result
...@@ -59,12 +62,15 @@ class DatasetSynthesiser(ImageSynthesiser): ...@@ -59,12 +62,15 @@ class DatasetSynthesiser(ImageSynthesiser):
for i in range(self.output_num): for i in range(self.output_num):
style_data = self.style_sampler.sample() style_data = self.style_sampler.sample()
style_input = style_data["image"] style_input = style_data["image"]
corpus_language, text_input_label = self.corpus_generator.generate( corpus_language, text_input_label = self.corpus_generator.generate()
) text_input_label_list, text_input_list = self.text_drawer.draw_text(
text_input_label, text_input = self.text_drawer.draw_text( text_input_label,
text_input_label, corpus_language) corpus_language,
style_input_width=style_input.shape[1])
synth_result = self.predictor.predict(style_input, text_input) text_input_label = "".join(text_input_label_list)
synth_result = self.predictor.predict(style_input, text_input_list)
fake_fusion = synth_result["fake_fusion"] fake_fusion = synth_result["fake_fusion"]
self.writer.save_image(fake_fusion, text_input_label) self.writer.save_image(fake_fusion, text_input_label)
self.writer.save_label() self.writer.save_label()
......
from PIL import Image, ImageDraw, ImageFont from PIL import Image, ImageDraw, ImageFont
import numpy as np import numpy as np
import cv2
from utils.logging import get_logger from utils.logging import get_logger
...@@ -28,7 +29,11 @@ class StdTextDrawer(object): ...@@ -28,7 +29,11 @@ class StdTextDrawer(object):
else: else:
return int((self.height - 4)**2 / font_height) return int((self.height - 4)**2 / font_height)
def draw_text(self, corpus, language="en", crop=True): def draw_text(self,
corpus,
language="en",
crop=True,
style_input_width=None):
if language not in self.support_languages: if language not in self.support_languages:
self.logger.warning( self.logger.warning(
"language {} not supported, use en instead.".format(language)) "language {} not supported, use en instead.".format(language))
...@@ -37,21 +42,43 @@ class StdTextDrawer(object): ...@@ -37,21 +42,43 @@ class StdTextDrawer(object):
width = min(self.max_width, len(corpus) * self.height) + 4 width = min(self.max_width, len(corpus) * self.height) + 4
else: else:
width = len(corpus) * self.height + 4 width = len(corpus) * self.height + 4
bg = Image.new("RGB", (width, self.height), color=(127, 127, 127))
draw = ImageDraw.Draw(bg) if style_input_width is not None:
width = min(width, style_input_width)
char_x = 2
font = self.font_dict[language] corpus_list = []
for i, char_i in enumerate(corpus): text_input_list = []
char_size = font.getsize(char_i)[0]
draw.text((char_x, 2), char_i, fill=(0, 0, 0), font=font) while len(corpus) != 0:
char_x += char_size bg = Image.new("RGB", (width, self.height), color=(127, 127, 127))
if char_x >= width: draw = ImageDraw.Draw(bg)
corpus = corpus[0:i + 1] char_x = 2
self.logger.warning("corpus length exceed limit: {}".format( font = self.font_dict[language]
corpus)) i = 0
while i < len(corpus):
char_i = corpus[i]
char_size = font.getsize(char_i)[0]
# split when char_x exceeds char size and index is not 0 (at least 1 char should be wroten on the image)
if char_x + char_size >= width and i != 0:
text_input = np.array(bg).astype(np.uint8)
text_input = text_input[:, 0:char_x, :]
corpus_list.append(corpus[0:i])
text_input_list.append(text_input)
corpus = corpus[i:]
break
draw.text((char_x, 2), char_i, fill=(0, 0, 0), font=font)
char_x += char_size
i += 1
# the whole text is shorter than style input
if i == len(corpus):
text_input = np.array(bg).astype(np.uint8)
text_input = text_input[:, 0:char_x, :]
corpus_list.append(corpus[0:i])
text_input_list.append(text_input)
corpus = corpus[i:]
break break
text_input = np.array(bg).astype(np.uint8) return corpus_list, text_input_list
text_input = text_input[:, 0:char_x, :]
return corpus, text_input
...@@ -14,12 +14,13 @@ Global: ...@@ -14,12 +14,13 @@ Global:
load_static_weights: True load_static_weights: True
cal_metric_during_train: False cal_metric_during_train: False
pretrained_model: ./pretrain_models/ResNet50_vd_ssld_pretrained/ pretrained_model: ./pretrain_models/ResNet50_vd_ssld_pretrained/
checkpoints: checkpoints:
save_inference_dir: save_inference_dir:
use_visualdl: False use_visualdl: False
infer_img: infer_img:
save_res_path: ./output/sast_r50_vd_ic15/predicts_sast.txt save_res_path: ./output/sast_r50_vd_ic15/predicts_sast.txt
Architecture: Architecture:
model_type: det model_type: det
algorithm: SAST algorithm: SAST
......
Global:
use_gpu: True
epoch_num: 600
log_smooth_window: 20
print_batch_step: 10
save_model_dir: ./output/pgnet_r50_vd_totaltext/
save_epoch_step: 10
# evaluation is run every 0 iterationss after the 1000th iteration
eval_batch_step: [ 0, 1000 ]
# 1. If pretrained_model is saved in static mode, such as classification pretrained model
# from static branch, load_static_weights must be set as True.
# 2. If you want to finetune the pretrained models we provide in the docs,
# you should set load_static_weights as False.
load_static_weights: False
cal_metric_during_train: False
pretrained_model:
checkpoints:
save_inference_dir:
use_visualdl: False
infer_img:
valid_set: totaltext # two mode: totaltext valid curved words, partvgg valid non-curved words
save_res_path: ./output/pgnet_r50_vd_totaltext/predicts_pgnet.txt
character_dict_path: ppocr/utils/ic15_dict.txt
character_type: EN
max_text_length: 50 # the max length in seq
max_text_nums: 30 # the max seq nums in a pic
tcl_len: 64
Architecture:
model_type: e2e
algorithm: PGNet
Transform:
Backbone:
name: ResNet
layers: 50
Neck:
name: PGFPN
Head:
name: PGHead
Loss:
name: PGLoss
tcl_bs: 64
max_text_length: 50 # the same as Global: max_text_length
max_text_nums: 30 # the same as Global:max_text_nums
pad_num: 36 # the length of dict for pad
Optimizer:
name: Adam
beta1: 0.9
beta2: 0.999
lr:
learning_rate: 0.001
regularizer:
name: 'L2'
factor: 0
PostProcess:
name: PGPostProcess
score_thresh: 0.5
mode: fast # fast or slow two ways
Metric:
name: E2EMetric
gt_mat_dir: # the dir of gt_mat
character_dict_path: ppocr/utils/ic15_dict.txt
main_indicator: f_score_e2e
Train:
dataset:
name: PGDataSet
label_file_list: [.././train_data/total_text/train/]
ratio_list: [1.0]
data_format: icdar #two data format: icdar/textnet
transforms:
- DecodeImage: # load image
img_mode: BGR
channel_first: False
- PGProcessTrain:
batch_size: 14 # same as loader: batch_size_per_card
min_crop_size: 24
min_text_size: 4
max_text_size: 512
- KeepKeys:
keep_keys: [ 'images', 'tcl_maps', 'tcl_label_maps', 'border_maps','direction_maps', 'training_masks', 'label_list', 'pos_list', 'pos_mask' ] # dataloader will return list in this order
loader:
shuffle: True
drop_last: True
batch_size_per_card: 14
num_workers: 16
Eval:
dataset:
name: PGDataSet
data_dir: ./train_data/
label_file_list: [./train_data/total_text/test/]
transforms:
- DecodeImage: # load image
img_mode: RGB
channel_first: False
- E2ELabelEncode:
- E2EResizeForTest:
max_side_len: 768
- NormalizeImage:
scale: 1./255.
mean: [ 0.485, 0.456, 0.406 ]
std: [ 0.229, 0.224, 0.225 ]
order: 'hwc'
- ToCHWImage:
- KeepKeys:
keep_keys: [ 'image', 'shape', 'polys', 'strs', 'tags', 'img_id']
loader:
shuffle: False
drop_last: False
batch_size_per_card: 1 # must be 1
num_workers: 2
\ No newline at end of file
...@@ -19,21 +19,56 @@ import logging ...@@ -19,21 +19,56 @@ import logging
logging.basicConfig(level=logging.INFO) logging.basicConfig(level=logging.INFO)
support_list = { support_list = {
'it':'italian', 'xi':'spanish', 'pu':'portuguese', 'ru':'russian', 'ar':'arabic', 'it': 'italian',
'ta':'tamil', 'ug':'uyghur', 'fa':'persian', 'ur':'urdu', 'rs':'serbian latin', 'xi': 'spanish',
'oc':'occitan', 'rsc':'serbian cyrillic', 'bg':'bulgarian', 'uk':'ukranian', 'be':'belarusian', 'pu': 'portuguese',
'te':'telugu', 'ka':'kannada', 'chinese_cht':'chinese tradition','hi':'hindi','mr':'marathi', 'ru': 'russian',
'ne':'nepali', 'ar': 'arabic',
'ta': 'tamil',
'ug': 'uyghur',
'fa': 'persian',
'ur': 'urdu',
'rs': 'serbian latin',
'oc': 'occitan',
'rsc': 'serbian cyrillic',
'bg': 'bulgarian',
'uk': 'ukranian',
'be': 'belarusian',
'te': 'telugu',
'ka': 'kannada',
'chinese_cht': 'chinese tradition',
'hi': 'hindi',
'mr': 'marathi',
'ne': 'nepali',
} }
assert(
os.path.isfile("./rec_multi_language_lite_train.yml") latin_lang = [
),"Loss basic configuration file rec_multi_language_lite_train.yml.\ 'af', 'az', 'bs', 'cs', 'cy', 'da', 'de', 'es', 'et', 'fr', 'ga', 'hr',
'hu', 'id', 'is', 'it', 'ku', 'la', 'lt', 'lv', 'mi', 'ms', 'mt', 'nl',
'no', 'oc', 'pi', 'pl', 'pt', 'ro', 'rs_latin', 'sk', 'sl', 'sq', 'sv',
'sw', 'tl', 'tr', 'uz', 'vi', 'latin'
]
arabic_lang = ['ar', 'fa', 'ug', 'ur']
cyrillic_lang = [
'ru', 'rs_cyrillic', 'be', 'bg', 'uk', 'mn', 'abq', 'ady', 'kbd', 'ava',
'dar', 'inh', 'che', 'lbe', 'lez', 'tab', 'cyrillic'
]
devanagari_lang = [
'hi', 'mr', 'ne', 'bh', 'mai', 'ang', 'bho', 'mah', 'sck', 'new', 'gom',
'sa', 'bgc', 'devanagari'
]
multi_lang = latin_lang + arabic_lang + cyrillic_lang + devanagari_lang
assert (os.path.isfile("./rec_multi_language_lite_train.yml")
), "Loss basic configuration file rec_multi_language_lite_train.yml.\
You can download it from \ You can download it from \
https://github.com/PaddlePaddle/PaddleOCR/tree/dygraph/configs/rec/multi_language/" https://github.com/PaddlePaddle/PaddleOCR/tree/dygraph/configs/rec/multi_language/"
global_config = yaml.load(open("./rec_multi_language_lite_train.yml", 'rb'), Loader=yaml.Loader) global_config = yaml.load(
open("./rec_multi_language_lite_train.yml", 'rb'), Loader=yaml.Loader)
project_path = os.path.abspath(os.path.join(os.getcwd(), "../../../")) project_path = os.path.abspath(os.path.join(os.getcwd(), "../../../"))
class ArgsParser(ArgumentParser): class ArgsParser(ArgumentParser):
def __init__(self): def __init__(self):
super(ArgsParser, self).__init__( super(ArgsParser, self).__init__(
...@@ -41,15 +76,30 @@ class ArgsParser(ArgumentParser): ...@@ -41,15 +76,30 @@ class ArgsParser(ArgumentParser):
self.add_argument( self.add_argument(
"-o", "--opt", nargs='+', help="set configuration options") "-o", "--opt", nargs='+', help="set configuration options")
self.add_argument( self.add_argument(
"-l", "--language", nargs='+', help="set language type, support {}".format(support_list)) "-l",
"--language",
nargs='+',
help="set language type, support {}".format(support_list))
self.add_argument( self.add_argument(
"--train",type=str,help="you can use this command to change the train dataset default path") "--train",
type=str,
help="you can use this command to change the train dataset default path"
)
self.add_argument( self.add_argument(
"--val",type=str,help="you can use this command to change the eval dataset default path") "--val",
type=str,
help="you can use this command to change the eval dataset default path"
)
self.add_argument( self.add_argument(
"--dict",type=str,help="you can use this command to change the dictionary default path") "--dict",
type=str,
help="you can use this command to change the dictionary default path"
)
self.add_argument( self.add_argument(
"--data_dir",type=str,help="you can use this command to change the dataset default root path") "--data_dir",
type=str,
help="you can use this command to change the dataset default root path"
)
def parse_args(self, argv=None): def parse_args(self, argv=None):
args = super(ArgsParser, self).parse_args(argv) args = super(ArgsParser, self).parse_args(argv)
...@@ -68,21 +118,38 @@ class ArgsParser(ArgumentParser): ...@@ -68,21 +118,38 @@ class ArgsParser(ArgumentParser):
return config return config
def _set_language(self, type): def _set_language(self, type):
assert(type),"please use -l or --language to choose language type" print("type:", type)
lang = type[0]
assert (type), "please use -l or --language to choose language type"
assert( assert(
type[0] in support_list.keys() lang in support_list.keys() or lang in multi_lang
),"the sub_keys(-l or --language) can only be one of support list: \n{},\nbut get: {}, " \ ),"the sub_keys(-l or --language) can only be one of support list: \n{},\nbut get: {}, " \
"please check your running command".format(support_list, type) "please check your running command".format(multi_lang, type)
global_config['Global']['character_dict_path'] = 'ppocr/utils/dict/{}_dict.txt'.format(type[0]) if lang in latin_lang:
global_config['Global']['save_model_dir'] = './output/rec_{}_lite'.format(type[0]) lang = "latin"
global_config['Train']['dataset']['label_file_list'] = ["train_data/{}_train.txt".format(type[0])] elif lang in arabic_lang:
global_config['Eval']['dataset']['label_file_list'] = ["train_data/{}_val.txt".format(type[0])] lang = "arabic"
global_config['Global']['character_type'] = type[0] elif lang in cyrillic_lang:
assert( lang = "cyrillic"
os.path.isfile(os.path.join(project_path,global_config['Global']['character_dict_path'])) elif lang in devanagari_lang:
),"Loss default dictionary file {}_dict.txt.You can download it from \ lang = "devanagari"
https://github.com/PaddlePaddle/PaddleOCR/tree/dygraph/ppocr/utils/dict/".format(type[0]) global_config['Global'][
return type[0] 'character_dict_path'] = 'ppocr/utils/dict/{}_dict.txt'.format(lang)
global_config['Global'][
'save_model_dir'] = './output/rec_{}_lite'.format(lang)
global_config['Train']['dataset'][
'label_file_list'] = ["train_data/{}_train.txt".format(lang)]
global_config['Eval']['dataset'][
'label_file_list'] = ["train_data/{}_val.txt".format(lang)]
global_config['Global']['character_type'] = lang
assert (
os.path.isfile(
os.path.join(project_path, global_config['Global'][
'character_dict_path']))
), "Loss default dictionary file {}_dict.txt.You can download it from \
https://github.com/PaddlePaddle/PaddleOCR/tree/dygraph/ppocr/utils/dict/".format(
lang)
return lang
def merge_config(config): def merge_config(config):
...@@ -110,43 +177,51 @@ def merge_config(config): ...@@ -110,43 +177,51 @@ def merge_config(config):
cur[sub_key] = value cur[sub_key] = value
else: else:
cur = cur[sub_key] cur = cur[sub_key]
def loss_file(path): def loss_file(path):
assert( assert (
os.path.exists(path) os.path.exists(path)
),"There is no such file:{},Please do not forget to put in the specified file".format(path) ), "There is no such file:{},Please do not forget to put in the specified file".format(
path)
if __name__ == '__main__': if __name__ == '__main__':
FLAGS = ArgsParser().parse_args() FLAGS = ArgsParser().parse_args()
merge_config(FLAGS.opt) merge_config(FLAGS.opt)
save_file_path = 'rec_{}_lite_train.yml'.format(FLAGS.language) save_file_path = 'rec_{}_lite_train.yml'.format(FLAGS.language)
if os.path.isfile(save_file_path): if os.path.isfile(save_file_path):
os.remove(save_file_path) os.remove(save_file_path)
if FLAGS.train: if FLAGS.train:
global_config['Train']['dataset']['label_file_list'] = [FLAGS.train] global_config['Train']['dataset']['label_file_list'] = [FLAGS.train]
train_label_path = os.path.join(project_path,FLAGS.train) train_label_path = os.path.join(project_path, FLAGS.train)
loss_file(train_label_path) loss_file(train_label_path)
if FLAGS.val: if FLAGS.val:
global_config['Eval']['dataset']['label_file_list'] = [FLAGS.val] global_config['Eval']['dataset']['label_file_list'] = [FLAGS.val]
eval_label_path = os.path.join(project_path,FLAGS.val) eval_label_path = os.path.join(project_path, FLAGS.val)
loss_file(Eval_label_path) loss_file(eval_label_path)
if FLAGS.dict: if FLAGS.dict:
global_config['Global']['character_dict_path'] = FLAGS.dict global_config['Global']['character_dict_path'] = FLAGS.dict
dict_path = os.path.join(project_path,FLAGS.dict) dict_path = os.path.join(project_path, FLAGS.dict)
loss_file(dict_path) loss_file(dict_path)
if FLAGS.data_dir: if FLAGS.data_dir:
global_config['Eval']['dataset']['data_dir'] = FLAGS.data_dir global_config['Eval']['dataset']['data_dir'] = FLAGS.data_dir
global_config['Train']['dataset']['data_dir'] = FLAGS.data_dir global_config['Train']['dataset']['data_dir'] = FLAGS.data_dir
data_dir = os.path.join(project_path,FLAGS.data_dir) data_dir = os.path.join(project_path, FLAGS.data_dir)
loss_file(data_dir) loss_file(data_dir)
with open(save_file_path, 'w') as f: with open(save_file_path, 'w') as f:
yaml.dump(dict(global_config), f, default_flow_style=False, sort_keys=False) yaml.dump(
dict(global_config), f, default_flow_style=False, sort_keys=False)
logging.info("Project path is :{}".format(project_path)) logging.info("Project path is :{}".format(project_path))
logging.info("Train list path set to :{}".format(global_config['Train']['dataset']['label_file_list'][0])) logging.info("Train list path set to :{}".format(global_config['Train'][
logging.info("Eval list path set to :{}".format(global_config['Eval']['dataset']['label_file_list'][0])) 'dataset']['label_file_list'][0]))
logging.info("Dataset root path set to :{}".format(global_config['Eval']['dataset']['data_dir'])) logging.info("Eval list path set to :{}".format(global_config['Eval'][
logging.info("Dict path set to :{}".format(global_config['Global']['character_dict_path'])) 'dataset']['label_file_list'][0]))
logging.info("Config file set to :configs/rec/multi_language/{}".format(save_file_path)) logging.info("Dataset root path set to :{}".format(global_config['Eval'][
'dataset']['data_dir']))
logging.info("Dict path set to :{}".format(global_config['Global'][
'character_dict_path']))
logging.info("Config file set to :configs/rec/multi_language/{}".
format(save_file_path))
Global:
use_gpu: true
epoch_num: 500
log_smooth_window: 20
print_batch_step: 10
save_model_dir: ./output/rec_arabic_lite
save_epoch_step: 3
eval_batch_step:
- 0
- 2000
cal_metric_during_train: true
pretrained_model: null
checkpoints: null
save_inference_dir: null
use_visualdl: false
infer_img: null
character_dict_path: ppocr/utils/dict/arabic_dict.txt
character_type: arabic
max_text_length: 25
infer_mode: false
use_space_char: true
Optimizer:
name: Adam
beta1: 0.9
beta2: 0.999
lr:
name: Cosine
learning_rate: 0.001
regularizer:
name: L2
factor: 1.0e-05
Architecture:
model_type: rec
algorithm: CRNN
Transform: null
Backbone:
name: MobileNetV3
scale: 0.5
model_name: small
small_stride:
- 1
- 2
- 2
- 2
Neck:
name: SequenceEncoder
encoder_type: rnn
hidden_size: 48
Head:
name: CTCHead
fc_decay: 1.0e-05
Loss:
name: CTCLoss
PostProcess:
name: CTCLabelDecode
Metric:
name: RecMetric
main_indicator: acc
Train:
dataset:
name: SimpleDataSet
data_dir: train_data/
label_file_list:
- train_data/arabic_train.txt
transforms:
- DecodeImage:
img_mode: BGR
channel_first: false
- RecAug: null
- CTCLabelEncode: null
- RecResizeImg:
image_shape:
- 3
- 32
- 320
- KeepKeys:
keep_keys:
- image
- label
- length
loader:
shuffle: true
batch_size_per_card: 256
drop_last: true
num_workers: 8
Eval:
dataset:
name: SimpleDataSet
data_dir: train_data/
label_file_list:
- train_data/arabic_val.txt
transforms:
- DecodeImage:
img_mode: BGR
channel_first: false
- CTCLabelEncode: null
- RecResizeImg:
image_shape:
- 3
- 32
- 320
- KeepKeys:
keep_keys:
- image
- label
- length
loader:
shuffle: false
drop_last: false
batch_size_per_card: 256
num_workers: 8
Global:
use_gpu: true
epoch_num: 500
log_smooth_window: 20
print_batch_step: 10
save_model_dir: ./output/rec_cyrillic_lite
save_epoch_step: 3
eval_batch_step:
- 0
- 2000
cal_metric_during_train: true
pretrained_model: null
checkpoints: null
save_inference_dir: null
use_visualdl: false
infer_img: null
character_dict_path: ppocr/utils/dict/cyrillic_dict.txt
character_type: cyrillic
max_text_length: 25
infer_mode: false
use_space_char: true
Optimizer:
name: Adam
beta1: 0.9
beta2: 0.999
lr:
name: Cosine
learning_rate: 0.001
regularizer:
name: L2
factor: 1.0e-05
Architecture:
model_type: rec
algorithm: CRNN
Transform: null
Backbone:
name: MobileNetV3
scale: 0.5
model_name: small
small_stride:
- 1
- 2
- 2
- 2
Neck:
name: SequenceEncoder
encoder_type: rnn
hidden_size: 48
Head:
name: CTCHead
fc_decay: 1.0e-05
Loss:
name: CTCLoss
PostProcess:
name: CTCLabelDecode
Metric:
name: RecMetric
main_indicator: acc
Train:
dataset:
name: SimpleDataSet
data_dir: train_data/
label_file_list:
- train_data/cyrillic_train.txt
transforms:
- DecodeImage:
img_mode: BGR
channel_first: false
- RecAug: null
- CTCLabelEncode: null
- RecResizeImg:
image_shape:
- 3
- 32
- 320
- KeepKeys:
keep_keys:
- image
- label
- length
loader:
shuffle: true
batch_size_per_card: 256
drop_last: true
num_workers: 8
Eval:
dataset:
name: SimpleDataSet
data_dir: train_data/
label_file_list:
- train_data/cyrillic_val.txt
transforms:
- DecodeImage:
img_mode: BGR
channel_first: false
- CTCLabelEncode: null
- RecResizeImg:
image_shape:
- 3
- 32
- 320
- KeepKeys:
keep_keys:
- image
- label
- length
loader:
shuffle: false
drop_last: false
batch_size_per_card: 256
num_workers: 8
Global:
use_gpu: true
epoch_num: 500
log_smooth_window: 20
print_batch_step: 10
save_model_dir: ./output/rec_devanagari_lite
save_epoch_step: 3
eval_batch_step:
- 0
- 2000
cal_metric_during_train: true
pretrained_model: null
checkpoints: null
save_inference_dir: null
use_visualdl: false
infer_img: null
character_dict_path: ppocr/utils/dict/devanagari_dict.txt
character_type: devanagari
max_text_length: 25
infer_mode: false
use_space_char: true
Optimizer:
name: Adam
beta1: 0.9
beta2: 0.999
lr:
name: Cosine
learning_rate: 0.001
regularizer:
name: L2
factor: 1.0e-05
Architecture:
model_type: rec
algorithm: CRNN
Transform: null
Backbone:
name: MobileNetV3
scale: 0.5
model_name: small
small_stride:
- 1
- 2
- 2
- 2
Neck:
name: SequenceEncoder
encoder_type: rnn
hidden_size: 48
Head:
name: CTCHead
fc_decay: 1.0e-05
Loss:
name: CTCLoss
PostProcess:
name: CTCLabelDecode
Metric:
name: RecMetric
main_indicator: acc
Train:
dataset:
name: SimpleDataSet
data_dir: train_data/
label_file_list:
- train_data/devanagari_train.txt
transforms:
- DecodeImage:
img_mode: BGR
channel_first: false
- RecAug: null
- CTCLabelEncode: null
- RecResizeImg:
image_shape:
- 3
- 32
- 320
- KeepKeys:
keep_keys:
- image
- label
- length
loader:
shuffle: true
batch_size_per_card: 256
drop_last: true
num_workers: 8
Eval:
dataset:
name: SimpleDataSet
data_dir: train_data/
label_file_list:
- train_data/devanagari_val.txt
transforms:
- DecodeImage:
img_mode: BGR
channel_first: false
- CTCLabelEncode: null
- RecResizeImg:
image_shape:
- 3
- 32
- 320
- KeepKeys:
keep_keys:
- image
- label
- length
loader:
shuffle: false
drop_last: false
batch_size_per_card: 256
num_workers: 8
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment