Commit 0d7ee968 authored by WenmuZhou's avatar WenmuZhou
Browse files

Merge branch 'dygraph' of https://github.com/PaddlePaddle/PaddleOCR into fix_vqa

parents a323fce6 a11fbc0f
...@@ -29,3 +29,5 @@ paddleocr.egg-info/ ...@@ -29,3 +29,5 @@ paddleocr.egg-info/
/deploy/android_demo/app/PaddleLite/ /deploy/android_demo/app/PaddleLite/
/deploy/android_demo/app/.cxx/ /deploy/android_demo/app/.cxx/
/deploy/android_demo/app/cache/ /deploy/android_demo/app/cache/
test_tipc/web/models/
test_tipc/web/node_modules/
...@@ -61,7 +61,7 @@ from combobox import ComboBox ...@@ -61,7 +61,7 @@ from combobox import ComboBox
from libs.constants import * from libs.constants import *
from libs.utils import * from libs.utils import *
from libs.settings import Settings from libs.settings import Settings
from libs.shape import Shape, DEFAULT_LINE_COLOR, DEFAULT_FILL_COLOR from libs.shape import Shape, DEFAULT_LINE_COLOR, DEFAULT_FILL_COLOR,DEFAULT_LOCK_COLOR
from libs.stringBundle import StringBundle from libs.stringBundle import StringBundle
from libs.canvas import Canvas from libs.canvas import Canvas
from libs.zoomWidget import ZoomWidget from libs.zoomWidget import ZoomWidget
...@@ -126,7 +126,7 @@ class MainWindow(QMainWindow, WindowMixin): ...@@ -126,7 +126,7 @@ class MainWindow(QMainWindow, WindowMixin):
self.labelHist = [] self.labelHist = []
self.lastOpenDir = None self.lastOpenDir = None
self.result_dic = [] self.result_dic = []
self.result_dic_locked = []
self.changeFileFolder = False self.changeFileFolder = False
self.haveAutoReced = False self.haveAutoReced = False
self.labelFile = None self.labelFile = None
...@@ -395,6 +395,7 @@ class MainWindow(QMainWindow, WindowMixin): ...@@ -395,6 +395,7 @@ class MainWindow(QMainWindow, WindowMixin):
delete = action(getStr('delBox'), self.deleteSelectedShape, delete = action(getStr('delBox'), self.deleteSelectedShape,
'backspace', 'delete', getStr('delBoxDetail'), enabled=False) 'backspace', 'delete', getStr('delBoxDetail'), enabled=False)
copy = action(getStr('dupBox'), self.copySelectedShape, copy = action(getStr('dupBox'), self.copySelectedShape,
'Ctrl+C', 'copy', getStr('dupBoxDetail'), 'Ctrl+C', 'copy', getStr('dupBoxDetail'),
enabled=False) enabled=False)
...@@ -405,6 +406,7 @@ class MainWindow(QMainWindow, WindowMixin): ...@@ -405,6 +406,7 @@ class MainWindow(QMainWindow, WindowMixin):
showAll = action(getStr('showBox'), partial(self.togglePolygons, True), showAll = action(getStr('showBox'), partial(self.togglePolygons, True),
'Ctrl+A', 'hide', getStr('showAllBoxDetail'), 'Ctrl+A', 'hide', getStr('showAllBoxDetail'),
enabled=False) enabled=False)
help = action(getStr('tutorial'), self.showTutorialDialog, None, 'help', getStr('tutorialDetail')) help = action(getStr('tutorial'), self.showTutorialDialog, None, 'help', getStr('tutorialDetail'))
showInfo = action(getStr('info'), self.showInfoDialog, None, 'help', getStr('info')) showInfo = action(getStr('info'), self.showInfoDialog, None, 'help', getStr('info'))
...@@ -476,6 +478,10 @@ class MainWindow(QMainWindow, WindowMixin): ...@@ -476,6 +478,10 @@ class MainWindow(QMainWindow, WindowMixin):
undo = action(getStr("undo"), self.undoShapeEdit, undo = action(getStr("undo"), self.undoShapeEdit,
'Ctrl+Z', "undo", getStr("undo"), enabled=False) 'Ctrl+Z', "undo", getStr("undo"), enabled=False)
lock = action(getStr("lockBox"), self.lockSelectedShape,
None, "lock", getStr("lockBoxDetail"),
enabled=False)
self.editButton.setDefaultAction(edit) self.editButton.setDefaultAction(edit)
self.newButton.setDefaultAction(create) self.newButton.setDefaultAction(create)
...@@ -538,13 +544,13 @@ class MainWindow(QMainWindow, WindowMixin): ...@@ -538,13 +544,13 @@ class MainWindow(QMainWindow, WindowMixin):
fitWindow=fitWindow, fitWidth=fitWidth, fitWindow=fitWindow, fitWidth=fitWidth,
zoomActions=zoomActions, saveLabel=saveLabel, zoomActions=zoomActions, saveLabel=saveLabel,
undo=undo, undoLastPoint=undoLastPoint,open_dataset_dir=open_dataset_dir, undo=undo, undoLastPoint=undoLastPoint,open_dataset_dir=open_dataset_dir,
rotateLeft=rotateLeft,rotateRight=rotateRight, rotateLeft=rotateLeft,rotateRight=rotateRight,lock=lock,
fileMenuActions=( fileMenuActions=(
opendir, open_dataset_dir, saveLabel, resetAll, quit), opendir, open_dataset_dir, saveLabel, resetAll, quit),
beginner=(), advanced=(), beginner=(), advanced=(),
editMenu=(createpoly, edit, copy, delete,singleRere,None, undo, undoLastPoint, editMenu=(createpoly, edit, copy, delete,singleRere,None, undo, undoLastPoint,
None, rotateLeft, rotateRight, None, color1, self.drawSquaresOption), None, rotateLeft, rotateRight, None, color1, self.drawSquaresOption,lock),
beginnerContext=(create, edit, copy, delete, singleRere, rotateLeft, rotateRight,), beginnerContext=(create, edit, copy, delete, singleRere, rotateLeft, rotateRight,lock),
advancedContext=(createMode, editMode, edit, copy, advancedContext=(createMode, editMode, edit, copy,
delete, shapeLineColor, shapeFillColor), delete, shapeLineColor, shapeFillColor),
onLoadActive=( onLoadActive=(
...@@ -998,6 +1004,7 @@ class MainWindow(QMainWindow, WindowMixin): ...@@ -998,6 +1004,7 @@ class MainWindow(QMainWindow, WindowMixin):
self.actions.delete.setEnabled(n_selected) self.actions.delete.setEnabled(n_selected)
self.actions.copy.setEnabled(n_selected) self.actions.copy.setEnabled(n_selected)
self.actions.edit.setEnabled(n_selected == 1) self.actions.edit.setEnabled(n_selected == 1)
self.actions.lock.setEnabled(n_selected)
def addLabel(self, shape): def addLabel(self, shape):
shape.paintLabel = self.displayLabelOption.isChecked() shape.paintLabel = self.displayLabelOption.isChecked()
...@@ -1041,7 +1048,7 @@ class MainWindow(QMainWindow, WindowMixin): ...@@ -1041,7 +1048,7 @@ class MainWindow(QMainWindow, WindowMixin):
def loadLabels(self, shapes): def loadLabels(self, shapes):
s = [] s = []
for label, points, line_color, fill_color, difficult in shapes: for label, points, line_color, fill_color, difficult in shapes:
shape = Shape(label=label) shape = Shape(label=label,line_color=line_color)
for x, y in points: for x, y in points:
# Ensure the labels are within the bounds of the image. If not, fix them. # Ensure the labels are within the bounds of the image. If not, fix them.
...@@ -1051,6 +1058,7 @@ class MainWindow(QMainWindow, WindowMixin): ...@@ -1051,6 +1058,7 @@ class MainWindow(QMainWindow, WindowMixin):
shape.addPoint(QPointF(x, y)) shape.addPoint(QPointF(x, y))
shape.difficult = difficult shape.difficult = difficult
#shape.locked = False
shape.close() shape.close()
s.append(shape) s.append(shape)
...@@ -1063,10 +1071,12 @@ class MainWindow(QMainWindow, WindowMixin): ...@@ -1063,10 +1071,12 @@ class MainWindow(QMainWindow, WindowMixin):
# shape.fill_color = QColor(*fill_color) # shape.fill_color = QColor(*fill_color)
# else: # else:
# shape.fill_color = generateColorByText(label) # shape.fill_color = generateColorByText(label)
self.addLabel(shape) self.addLabel(shape)
self.updateComboBox() self.updateComboBox()
self.canvas.loadShapes(s) self.canvas.loadShapes(s)
def singleLabel(self, shape): def singleLabel(self, shape):
if shape is None: if shape is None:
...@@ -1106,10 +1116,9 @@ class MainWindow(QMainWindow, WindowMixin): ...@@ -1106,10 +1116,9 @@ class MainWindow(QMainWindow, WindowMixin):
difficult=s.difficult) # bool difficult=s.difficult) # bool
shapes = [] if mode == 'Auto' else \ shapes = [] if mode == 'Auto' else \
[format_shape(shape) for shape in self.canvas.shapes] [format_shape(shape) for shape in self.canvas.shapes if shape.line_color != DEFAULT_LOCK_COLOR]
# Can add differrent annotation formats here # Can add differrent annotation formats here
for box in self.result_dic :
for box in self.result_dic:
trans_dic = {"label": box[1][0], "points": box[0], 'difficult': False} trans_dic = {"label": box[1][0], "points": box[0], 'difficult': False}
if trans_dic["label"] == "" and mode == 'Auto': if trans_dic["label"] == "" and mode == 'Auto':
continue continue
...@@ -1120,7 +1129,6 @@ class MainWindow(QMainWindow, WindowMixin): ...@@ -1120,7 +1129,6 @@ class MainWindow(QMainWindow, WindowMixin):
for box in shapes: for box in shapes:
trans_dic.append({"transcription": box['label'], "points": box['points'], 'difficult': box['difficult']}) trans_dic.append({"transcription": box['label'], "points": box['points'], 'difficult': box['difficult']})
self.PPlabel[annotationFilePath] = trans_dic self.PPlabel[annotationFilePath] = trans_dic
if mode == 'Auto': if mode == 'Auto':
self.Cachelabel[annotationFilePath] = trans_dic self.Cachelabel[annotationFilePath] = trans_dic
...@@ -1313,6 +1321,7 @@ class MainWindow(QMainWindow, WindowMixin): ...@@ -1313,6 +1321,7 @@ class MainWindow(QMainWindow, WindowMixin):
# unicodeFilePath = os.path.abspath(unicodeFilePath) # unicodeFilePath = os.path.abspath(unicodeFilePath)
# Tzutalin 20160906 : Add file list and dock to move faster # Tzutalin 20160906 : Add file list and dock to move faster
# Highlight the file item # Highlight the file item
if unicodeFilePath and self.fileListWidget.count() > 0: if unicodeFilePath and self.fileListWidget.count() > 0:
if unicodeFilePath in self.mImgList: if unicodeFilePath in self.mImgList:
index = self.mImgList.index(unicodeFilePath) index = self.mImgList.index(unicodeFilePath)
...@@ -1322,6 +1331,7 @@ class MainWindow(QMainWindow, WindowMixin): ...@@ -1322,6 +1331,7 @@ class MainWindow(QMainWindow, WindowMixin):
### ###
self.iconlist.clear() self.iconlist.clear()
self.additems5(None) self.additems5(None)
for i in range(5): for i in range(5):
item_tooltip = self.iconlist.item(i).toolTip() item_tooltip = self.iconlist.item(i).toolTip()
# print(i,"---",item_tooltip) # print(i,"---",item_tooltip)
...@@ -1340,7 +1350,6 @@ class MainWindow(QMainWindow, WindowMixin): ...@@ -1340,7 +1350,6 @@ class MainWindow(QMainWindow, WindowMixin):
if unicodeFilePath and os.path.exists(unicodeFilePath): if unicodeFilePath and os.path.exists(unicodeFilePath):
self.canvas.verified = False self.canvas.verified = False
cvimg = cv2.imdecode(np.fromfile(unicodeFilePath, dtype=np.uint8), 1) cvimg = cv2.imdecode(np.fromfile(unicodeFilePath, dtype=np.uint8), 1)
height, width, depth = cvimg.shape height, width, depth = cvimg.shape
cvimg = cv2.cvtColor(cvimg, cv2.COLOR_BGR2RGB) cvimg = cv2.cvtColor(cvimg, cv2.COLOR_BGR2RGB)
...@@ -1361,16 +1370,19 @@ class MainWindow(QMainWindow, WindowMixin): ...@@ -1361,16 +1370,19 @@ class MainWindow(QMainWindow, WindowMixin):
else: else:
self.dirty = False self.dirty = False
self.actions.save.setEnabled(True) self.actions.save.setEnabled(True)
if len(self.canvas.lockedShapes) != 0:
self.actions.save.setEnabled(True)
self.setDirty()
self.canvas.setEnabled(True) self.canvas.setEnabled(True)
self.adjustScale(initial=True) self.adjustScale(initial=True)
self.paintCanvas() self.paintCanvas()
self.addRecentFile(self.filePath) self.addRecentFile(self.filePath)
self.toggleActions(True) self.toggleActions(True)
self.showBoundingBoxFromPPlabel(filePath) self.showBoundingBoxFromPPlabel(filePath)
self.setWindowTitle(__appname__ + ' ' + filePath) self.setWindowTitle(__appname__ + ' ' + filePath)
# Default : select last item if there is at least one item # Default : select last item if there is at least one item
if self.labelList.count(): if self.labelList.count():
self.labelList.setCurrentItem(self.labelList.item(self.labelList.count() - 1)) self.labelList.setCurrentItem(self.labelList.item(self.labelList.count() - 1))
...@@ -1380,15 +1392,23 @@ class MainWindow(QMainWindow, WindowMixin): ...@@ -1380,15 +1392,23 @@ class MainWindow(QMainWindow, WindowMixin):
return True return True
return False return False
def showBoundingBoxFromPPlabel(self, filePath): def showBoundingBoxFromPPlabel(self, filePath):
width, height = self.image.width(), self.image.height()
imgidx = self.getImglabelidx(filePath) imgidx = self.getImglabelidx(filePath)
if imgidx not in self.PPlabel.keys(): shapes =[]
return #box['ratio'] of the shapes saved in lockedShapes contains the ratio of the
shapes = [] # four corner coordinates of the shapes to the height and width of the image
for box in self.PPlabel[imgidx]: for box in self.canvas.lockedShapes:
shapes.append((box['transcription'], box['points'], None, None, box['difficult'])) if self.canvas.isInTheSameImage:
shapes.append((box['transcription'], [[s[0]*width,s[1]*height]for s in box['ratio']],
DEFAULT_LOCK_COLOR, None, box['difficult']))
else:
shapes.append(('锁定框:待检测', [[s[0]*width,s[1]*height]for s in box['ratio']],
DEFAULT_LOCK_COLOR, None, box['difficult']))
if imgidx in self.PPlabel.keys():
for box in self.PPlabel[imgidx]:
shapes.append((box['transcription'], box['points'], None, None, box['difficult']))
self.loadLabels(shapes) self.loadLabels(shapes)
self.canvas.verified = False self.canvas.verified = False
...@@ -1646,9 +1666,37 @@ class MainWindow(QMainWindow, WindowMixin): ...@@ -1646,9 +1666,37 @@ class MainWindow(QMainWindow, WindowMixin):
else: else:
return fullFilePath return fullFilePath
return '' return ''
def saveLockedShapes(self):
self.canvas.lockedShapes = []
self.canvas.selectedShapes = []
for s in self.canvas.shapes:
if s.line_color == DEFAULT_LOCK_COLOR:
self.canvas.selectedShapes.append(s)
self.lockSelectedShape()
for s in self.canvas.shapes:
if s.line_color == DEFAULT_LOCK_COLOR:
self.canvas.selectedShapes.remove(s)
self.canvas.shapes.remove(s)
def _saveFile(self, annotationFilePath, mode='Manual'): def _saveFile(self, annotationFilePath, mode='Manual'):
if len(self.canvas.lockedShapes) != 0:
self.saveLockedShapes()
if mode == 'Manual': if mode == 'Manual':
self.result_dic_locked = []
img = cv2.imread(self.filePath)
width, height = self.image.width(), self.image.height()
for shape in self.canvas.lockedShapes:
box = [[int(p[0]*width), int(p[1]*height)] for p in shape['ratio']]
assert len(box) == 4
result = [(shape['transcription'],1)]
result.insert(0, box)
self.result_dic_locked.append(result)
self.result_dic += self.result_dic_locked
self.result_dic_locked = []
if annotationFilePath and self.saveLabels(annotationFilePath, mode=mode): if annotationFilePath and self.saveLabels(annotationFilePath, mode=mode):
self.setClean() self.setClean()
self.statusBar().showMessage('Saved to %s' % annotationFilePath) self.statusBar().showMessage('Saved to %s' % annotationFilePath)
...@@ -1663,13 +1711,13 @@ class MainWindow(QMainWindow, WindowMixin): ...@@ -1663,13 +1711,13 @@ class MainWindow(QMainWindow, WindowMixin):
self.savePPlabel(mode='Auto') self.savePPlabel(mode='Auto')
self.fileListWidget.insertItem(int(currIndex), item) self.fileListWidget.insertItem(int(currIndex), item)
self.openNextImg() if not self.canvas.isInTheSameImage:
self.openNextImg()
self.actions.saveRec.setEnabled(True) self.actions.saveRec.setEnabled(True)
self.actions.saveLabel.setEnabled(True) self.actions.saveLabel.setEnabled(True)
elif mode == 'Auto': elif mode == 'Auto':
if annotationFilePath and self.saveLabels(annotationFilePath, mode=mode): if annotationFilePath and self.saveLabels(annotationFilePath, mode=mode):
self.setClean() self.setClean()
self.statusBar().showMessage('Saved to %s' % annotationFilePath) self.statusBar().showMessage('Saved to %s' % annotationFilePath)
self.statusBar().show() self.statusBar().show()
...@@ -1733,7 +1781,9 @@ class MainWindow(QMainWindow, WindowMixin): ...@@ -1733,7 +1781,9 @@ class MainWindow(QMainWindow, WindowMixin):
if discardChanges == QMessageBox.No: if discardChanges == QMessageBox.No:
return True return True
elif discardChanges == QMessageBox.Yes: elif discardChanges == QMessageBox.Yes:
self.canvas.isInTheSameImage = True
self.saveFile() self.saveFile()
self.canvas.isInTheSameImage = False
return True return True
else: else:
return False return False
...@@ -1872,6 +1922,7 @@ class MainWindow(QMainWindow, WindowMixin): ...@@ -1872,6 +1922,7 @@ class MainWindow(QMainWindow, WindowMixin):
# org_box = [dic['points'] for dic in self.PPlabel[self.getImglabelidx(self.filePath)]] # org_box = [dic['points'] for dic in self.PPlabel[self.getImglabelidx(self.filePath)]]
if self.canvas.shapes: if self.canvas.shapes:
self.result_dic = [] self.result_dic = []
self.result_dic_locked = [] # result_dic_locked stores the ocr result of self.canvas.lockedShapes
rec_flag = 0 rec_flag = 0
for shape in self.canvas.shapes: for shape in self.canvas.shapes:
box = [[int(p.x()), int(p.y())] for p in shape.points] box = [[int(p.x()), int(p.y())] for p in shape.points]
...@@ -1883,21 +1934,32 @@ class MainWindow(QMainWindow, WindowMixin): ...@@ -1883,21 +1934,32 @@ class MainWindow(QMainWindow, WindowMixin):
return return
result = self.ocr.ocr(img_crop, cls=True, det=False) result = self.ocr.ocr(img_crop, cls=True, det=False)
if result[0][0] != '': if result[0][0] != '':
result.insert(0, box) if shape.line_color == DEFAULT_LOCK_COLOR:
print('result in reRec is ', result) shape.label = result[0][0]
self.result_dic.append(result) result.insert(0, box)
self.result_dic_locked.append(result)
else:
result.insert(0, box)
self.result_dic.append(result)
else: else:
print('Can not recognise the box') print('Can not recognise the box')
self.result_dic.append([box,(self.noLabelText,0)]) if shape.line_color == DEFAULT_LOCK_COLOR:
shape.label = result[0][0]
if self.noLabelText == shape.label or result[1][0] == shape.label: self.result_dic_locked.append([box,(self.noLabelText,0)])
print('label no change') else:
else: self.result_dic.append([box,(self.noLabelText,0)])
rec_flag += 1 try:
if self.noLabelText == shape.label or result[1][0] == shape.label:
if len(self.result_dic) > 0 and rec_flag > 0: print('label no change')
else:
rec_flag += 1
except IndexError as e:
print('Can not recognise the box')
if (len(self.result_dic) > 0 and rec_flag > 0)or self.canvas.lockedShapes:
self.canvas.isInTheSameImage = True
self.saveFile(mode='Auto') self.saveFile(mode='Auto')
self.loadFile(self.filePath) self.loadFile(self.filePath)
self.canvas.isInTheSameImage = False
self.setDirty() self.setDirty()
elif len(self.result_dic) == len(self.canvas.shapes) and rec_flag == 0: elif len(self.result_dic) == len(self.canvas.shapes) and rec_flag == 0:
QMessageBox.information(self, "Information", "The recognition result remains unchanged!") QMessageBox.information(self, "Information", "The recognition result remains unchanged!")
...@@ -2107,6 +2169,44 @@ class MainWindow(QMainWindow, WindowMixin): ...@@ -2107,6 +2169,44 @@ class MainWindow(QMainWindow, WindowMixin):
self.labelList.clearSelection() self.labelList.clearSelection()
self._noSelectionSlot = False self._noSelectionSlot = False
self.canvas.loadShapes(shapes, replace=replace) self.canvas.loadShapes(shapes, replace=replace)
print("loadShapes")#1
def lockSelectedShape(self):
"""lock the selsected shapes.
Add self.selectedShapes to lock self.canvas.lockedShapes,
which holds the ratio of the four coordinates of the locked shapes
to the width and height of the image
"""
width, height = self.image.width(), self.image.height()
def format_shape(s):
return dict(label=s.label, # str
line_color=s.line_color.getRgb(),
fill_color=s.fill_color.getRgb(),
ratio=[[int(p.x())/width, int(p.y())/height] for p in s.points], # QPonitF
# add chris
difficult=s.difficult) # bool
#lock
if len(self.canvas.lockedShapes) == 0:
for s in self.canvas.selectedShapes:
s.line_color = DEFAULT_LOCK_COLOR
s.locked = True
shapes = [format_shape(shape) for shape in self.canvas.selectedShapes]
trans_dic = []
for box in shapes:
trans_dic.append({"transcription": box['label'], "ratio": box['ratio'], 'difficult': box['difficult']})
self.canvas.lockedShapes = trans_dic
self.actions.save.setEnabled(True)
#unlock
else:
for s in self.canvas.shapes:
s.line_color = DEFAULT_LINE_COLOR
self.canvas.lockedShapes = []
self.result_dic_locked = []
self.setDirty()
self.actions.save.setEnabled(True)
def inverted(color): def inverted(color):
......
...@@ -78,14 +78,14 @@ PPOCRLabel # run ...@@ -78,14 +78,14 @@ PPOCRLabel # run
```bash ```bash
cd PaddleOCR/PPOCRLabel cd PaddleOCR/PPOCRLabel
python3 setup.py bdist_wheel python3 setup.py bdist_wheel
pip3 install dist/PPOCRLabel-1.0.0-py2.py3-none-any.whl pip3 install dist/PPOCRLabel-1.0.2-py2.py3-none-any.whl
``` ```
#### 1.2.3 Run PPOCRLabel by Python Script #### 1.2.3 Run PPOCRLabel by Python Script
```bash ```bash
cd ./PPOCRLabel # Switch to the PPOCRLabel directory cd ./PPOCRLabel # Switch to the PPOCRLabel directory
python PPOCRLabel.py --lang ch python PPOCRLabel.py
``` ```
......
...@@ -78,7 +78,7 @@ PPOCRLabel --lang ch # 启动 ...@@ -78,7 +78,7 @@ PPOCRLabel --lang ch # 启动
```bash ```bash
cd PaddleOCR/PPOCRLabel cd PaddleOCR/PPOCRLabel
python3 setup.py bdist_wheel python3 setup.py bdist_wheel
pip3 install dist/PPOCRLabel-1.0.0-py2.py3-none-any.whl -i https://mirror.baidu.com/pypi/simple pip3 install dist/PPOCRLabel-1.0.2-py2.py3-none-any.whl -i https://mirror.baidu.com/pypi/simple
``` ```
#### 1.2.3 通过Python脚本运行PPOCRLabel #### 1.2.3 通过Python脚本运行PPOCRLabel
......
...@@ -87,6 +87,10 @@ class Canvas(QWidget): ...@@ -87,6 +87,10 @@ class Canvas(QWidget):
#initialisation for panning #initialisation for panning
self.pan_initial_pos = QPoint() self.pan_initial_pos = QPoint()
#lockedshapes related
self.lockedShapes = []
self.isInTheSameImage = False
def setDrawingColor(self, qColor): def setDrawingColor(self, qColor):
self.drawingLineColor = qColor self.drawingLineColor = qColor
self.drawingRectColor = qColor self.drawingRectColor = qColor
......
This diff is collapsed.
...@@ -30,6 +30,7 @@ DEFAULT_SELECT_LINE_COLOR = QColor(255, 255, 255) ...@@ -30,6 +30,7 @@ DEFAULT_SELECT_LINE_COLOR = QColor(255, 255, 255)
DEFAULT_SELECT_FILL_COLOR = QColor(0, 128, 255, 155) DEFAULT_SELECT_FILL_COLOR = QColor(0, 128, 255, 155)
DEFAULT_VERTEX_FILL_COLOR = QColor(0, 255, 0, 255) DEFAULT_VERTEX_FILL_COLOR = QColor(0, 255, 0, 255)
DEFAULT_HVERTEX_FILL_COLOR = QColor(255, 0, 0) DEFAULT_HVERTEX_FILL_COLOR = QColor(255, 0, 0)
DEFAULT_LOCK_COLOR = QColor(255, 0, 255)
MIN_Y_LABEL = 10 MIN_Y_LABEL = 10
...@@ -57,7 +58,7 @@ class Shape(object): ...@@ -57,7 +58,7 @@ class Shape(object):
self.selected = False self.selected = False
self.difficult = difficult self.difficult = difficult
self.paintLabel = paintLabel self.paintLabel = paintLabel
self.locked = False
self._highlightIndex = None self._highlightIndex = None
self._highlightMode = self.NEAR_VERTEX self._highlightMode = self.NEAR_VERTEX
self._highlightSettings = { self._highlightSettings = {
......
...@@ -60,7 +60,7 @@ class StringBundle: ...@@ -60,7 +60,7 @@ class StringBundle:
def __createLookupFallbackList(self, localeStr): def __createLookupFallbackList(self, localeStr):
resultPaths = [] resultPaths = []
basePath = "\strings" if os.name == 'nt' else ":/strings" basePath = "\strings" if os.name == 'nt' else "/strings"
resultPaths.append(basePath) resultPaths.append(basePath)
if localeStr is not None: if localeStr is not None:
# Don't follow standard BCP47. Simple fallback # Don't follow standard BCP47. Simple fallback
......
...@@ -104,4 +104,6 @@ singleRe=Re-recognition RectBox ...@@ -104,4 +104,6 @@ singleRe=Re-recognition RectBox
labelDialogOption=Pop-up Label Input Dialog labelDialogOption=Pop-up Label Input Dialog
undo=Undo undo=Undo
undoLastPoint=Undo Last Point undoLastPoint=Undo Last Point
autoSaveMode=Auto Export Label Mode autoSaveMode=Auto Export Label Mode
\ No newline at end of file lockBox=Lock selected box/Unlock all box
lockBoxDetail=Lock selected box/Unlock all box
\ No newline at end of file
...@@ -104,4 +104,6 @@ singleRe=重识别此区块 ...@@ -104,4 +104,6 @@ singleRe=重识别此区块
labelDialogOption=弹出标记输入框 labelDialogOption=弹出标记输入框
undo=撤销 undo=撤销
undoLastPoint=撤销上个点 undoLastPoint=撤销上个点
autoSaveMode=自动导出标记结果 autoSaveMode=自动导出标记结果
\ No newline at end of file lockBox=锁定框/解除锁定框
lockBoxDetail=若当前没有框处于锁定状态则锁定选中的框,若存在锁定框则解除所有锁定框的锁定状态
...@@ -33,7 +33,7 @@ setup( ...@@ -33,7 +33,7 @@ setup(
package_dir={'PPOCRLabel': ''}, package_dir={'PPOCRLabel': ''},
include_package_data=True, include_package_data=True,
entry_points={"console_scripts": ["PPOCRLabel= PPOCRLabel.PPOCRLabel:main"]}, entry_points={"console_scripts": ["PPOCRLabel= PPOCRLabel.PPOCRLabel:main"]},
version='1.0.0', version='1.0.2',
install_requires=requirements, install_requires=requirements,
license='Apache License 2.0', license='Apache License 2.0',
description='PPOCRLabel is a semi-automatic graphic annotation tool suitable for OCR field, with built-in PPOCR model to automatically detect and re-recognize data. It is written in python3 and pyqt5, supporting rectangular box annotation and four-point annotation modes. Annotations can be directly used for the training of PPOCR detection and recognition models', description='PPOCRLabel is a semi-automatic graphic annotation tool suitable for OCR field, with built-in PPOCR model to automatically detect and re-recognize data. It is written in python3 and pyqt5, supporting rectangular box annotation and four-point annotation modes. Annotations can be directly used for the training of PPOCR detection and recognition models',
......
...@@ -24,7 +24,7 @@ PaddleOCR aims to create multilingual, awesome, leading, and practical OCR tools ...@@ -24,7 +24,7 @@ PaddleOCR aims to create multilingual, awesome, leading, and practical OCR tools
**Recent updates** **Recent updates**
- 2021.12.21 OCR open source online course starts. The lesson starts at 8:30 every night and lasts for ten days. Free registration: https://aistudio.baidu.com/aistudio/course/introduce/25207 - 2021.12.21 OCR open source online course starts. The lesson starts at 8:30 every night and lasts for ten days. Free registration: https://aistudio.baidu.com/aistudio/course/introduce/25207
- 2021.12.21 release PaddleOCR v2.4, release 1 text detection algorithm (PSENet), 3 text recognition algorithms (NRTR、SEED、SAR), 1 key information extraction algorithm (SDMGR) and 3 DocVQA algorithms (LayoutLMLayoutLMv2LayoutXLM). - 2021.12.21 release PaddleOCR v2.4, release 1 text detection algorithm (PSENet), 3 text recognition algorithms (NRTR、SEED、SAR), 1 key information extraction algorithm (SDMGR, [tutorial](https://github.com/PaddlePaddle/PaddleOCR/blob/release/2.4/ppstructure/docs/kie.md)) and 3 DocVQA algorithms (LayoutLM, LayoutLMv2, LayoutXLM, [tutorial](https://github.com/PaddlePaddle/PaddleOCR/tree/release/2.4/ppstructure/vqa)).
- PaddleOCR R&D team would like to share the key points of PP-OCRv2, at 20:15 pm on September 8th, [Course Address](https://aistudio.baidu.com/aistudio/education/group/info/6758). - PaddleOCR R&D team would like to share the key points of PP-OCRv2, at 20:15 pm on September 8th, [Course Address](https://aistudio.baidu.com/aistudio/education/group/info/6758).
- 2021.9.7 release PaddleOCR v2.3, [PP-OCRv2](#PP-OCRv2) is proposed. The inference speed of PP-OCRv2 is 220% higher than that of PP-OCR server in CPU device. The F-score of PP-OCRv2 is 7% higher than that of PP-OCR mobile. - 2021.9.7 release PaddleOCR v2.3, [PP-OCRv2](#PP-OCRv2) is proposed. The inference speed of PP-OCRv2 is 220% higher than that of PP-OCR server in CPU device. The F-score of PP-OCRv2 is 7% higher than that of PP-OCR mobile.
- 2021.8.3 released PaddleOCR v2.2, add a new structured documents analysis toolkit, i.e., [PP-Structure](https://github.com/PaddlePaddle/PaddleOCR/blob/release/2.2/ppstructure/README.md), support layout analysis and table recognition (One-key to export chart images to Excel files). - 2021.8.3 released PaddleOCR v2.2, add a new structured documents analysis toolkit, i.e., [PP-Structure](https://github.com/PaddlePaddle/PaddleOCR/blob/release/2.2/ppstructure/README.md), support layout analysis and table recognition (One-key to export chart images to Excel files).
...@@ -39,7 +39,7 @@ PaddleOCR aims to create multilingual, awesome, leading, and practical OCR tools ...@@ -39,7 +39,7 @@ PaddleOCR aims to create multilingual, awesome, leading, and practical OCR tools
- General PP-OCR server series models: detection (47.1M) + direction classifier (1.4M) + recognition (94.9M) = 143.4M - General PP-OCR server series models: detection (47.1M) + direction classifier (1.4M) + recognition (94.9M) = 143.4M
- Support Chinese, English, and digit recognition, vertical text recognition, and long text recognition - Support Chinese, English, and digit recognition, vertical text recognition, and long text recognition
- Support multi-language recognition: about 80 languages like Korean, Japanese, German, French, etc - Support multi-language recognition: about 80 languages like Korean, Japanese, German, French, etc
- document structurize system PP-Structure - PP-Structure: a document structurize system
- support layout analysis and table recognition (support export to Excel) - support layout analysis and table recognition (support export to Excel)
- support key information extraction - support key information extraction
- support DocVQA - support DocVQA
...@@ -90,7 +90,7 @@ Mobile DEMO experience (based on EasyEdge and Paddle-Lite, supports iOS and Andr ...@@ -90,7 +90,7 @@ Mobile DEMO experience (based on EasyEdge and Paddle-Lite, supports iOS and Andr
| Model introduction | Model name | Recommended scene | Detection model | Direction classifier | Recognition model | | Model introduction | Model name | Recommended scene | Detection model | Direction classifier | Recognition model |
| ------------------------------------------------------------ | ---------------------------- | ----------------- | ------------------------------------------------------------ | ------------------------------------------------------------ | ------------------------------------------------------------ | | ------------------------------------------------------------ | ---------------------------- | ----------------- | ------------------------------------------------------------ | ------------------------------------------------------------ | ------------------------------------------------------------ |
| Chinese and English ultra-lightweight PP-OCRv2 model(11.6M) | ch_PP-OCRv2_xx |Mobile & Server|[inference model](https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_det_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_det_distill_train.tar)| [inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_train.tar) |[inference model](https://paddleocr.bj.bcebos.com/PP-OCRv2/ch/ch_PP-OCRv2_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_rec_train.tar)| | Chinese and English ultra-lightweight PP-OCRv2 model(11.6M) | ch_PP-OCRv2_xx |Mobile & Server|[inference model](https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_det_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_det_distill_train.tar)| [inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_train.tar) |[inference model](https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_rec_train.tar)|
| Chinese and English ultra-lightweight PP-OCR model (9.4M) | ch_ppocr_mobile_v2.0_xx | Mobile & server |[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_det_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_det_train.tar)|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_train.tar) |[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_train.tar) | | Chinese and English ultra-lightweight PP-OCR model (9.4M) | ch_ppocr_mobile_v2.0_xx | Mobile & server |[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_det_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_det_train.tar)|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_train.tar) |[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_train.tar) |
| Chinese and English general PP-OCR model (143.4M) | ch_ppocr_server_v2.0_xx | Server |[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_det_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_det_train.tar) |[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_traingit.tar) |[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_rec_train.tar) | | Chinese and English general PP-OCR model (143.4M) | ch_ppocr_server_v2.0_xx | Server |[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_det_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_det_train.tar) |[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_traingit.tar) |[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_rec_train.tar) |
...@@ -102,11 +102,11 @@ For a new language request, please refer to [Guideline for new language_requests ...@@ -102,11 +102,11 @@ For a new language request, please refer to [Guideline for new language_requests
## Tutorials ## Tutorials
- [Environment Preparation](./doc/doc_en/environment_en.md) - [Environment Preparation](./doc/doc_en/environment_en.md)
- [Quick Start](./doc/doc_en/quickstart_en.md) - [Quick Start](./doc/doc_en/quickstart_en.md)
- [PaddleOCR Overview and Installation](./doc/doc_en/paddleOCR_overview_en.md) - [PaddleOCR Overview and Project Clone](./doc/doc_en/paddleOCR_overview_en.md)
- PP-OCR Industry Landing: from Training to Deployment - PP-OCR Industry Landing: from Training to Deployment
- [PP-OCR Model and Configuration](./doc/doc_en/models_and_config_en.md) - [PP-OCR Model Zoo](./doc/doc_en/models_en.md)
- [PP-OCR Model Download](./doc/doc_en/models_list_en.md) - [PP-OCR Model Download](./doc/doc_en/models_list_en.md)
- [Python Inference for PP-OCR Model Library](./doc/doc_en/inference_ppocr_en.md) - [Python Inference for PP-OCR Model Zoo](./doc/doc_en/inference_ppocr_en.md)
- [PP-OCR Training](./doc/doc_en/training_en.md) - [PP-OCR Training](./doc/doc_en/training_en.md)
- [Text Detection](./doc/doc_en/detection_en.md) - [Text Detection](./doc/doc_en/detection_en.md)
- [Text Recognition](./doc/doc_en/recognition_en.md) - [Text Recognition](./doc/doc_en/recognition_en.md)
...@@ -120,6 +120,8 @@ For a new language request, please refer to [Guideline for new language_requests ...@@ -120,6 +120,8 @@ For a new language request, please refer to [Guideline for new language_requests
- [PP-Structure: Information Extraction](./ppstructure/README.md) - [PP-Structure: Information Extraction](./ppstructure/README.md)
- [Layout Parser](./ppstructure/layout/README.md) - [Layout Parser](./ppstructure/layout/README.md)
- [Table Recognition](./ppstructure/table/README.md) - [Table Recognition](./ppstructure/table/README.md)
- [DocVQA](https://github.com/PaddlePaddle/PaddleOCR/tree/release/2.4/ppstructure/vqa)
- [Key Information Extraction](https://github.com/PaddlePaddle/PaddleOCR/blob/release/2.4/ppstructure/docs/kie.md)
- Academic Circles - Academic Circles
- [Two-stage Algorithm](./doc/doc_en/algorithm_overview_en.md) - [Two-stage Algorithm](./doc/doc_en/algorithm_overview_en.md)
- [PGNet Algorithm](./doc/doc_en/pgnet_en.md) - [PGNet Algorithm](./doc/doc_en/pgnet_en.md)
......
...@@ -19,8 +19,8 @@ PaddleOCR旨在打造一套丰富、领先、且实用的OCR工具库,助力 ...@@ -19,8 +19,8 @@ PaddleOCR旨在打造一套丰富、领先、且实用的OCR工具库,助力
## 近期更新 ## 近期更新
- 2021.12.21 《OCR十讲》课程开讲,12月21日起每晚八点半线上授课!免费报名地址https://aistudio.baidu.com/aistudio/course/introduce/25207 - 2021.12.21《动手学OCR · 十讲》课程开讲,12月21日起每晚八点半线上授课![免费报名地址](https://aistudio.baidu.com/aistudio/course/introduce/25207)
- 2021.12.21 发布PaddleOCR v2.4。OCR算法新增1种文本检测算法(PSENet),3种文本识别算法(NRTR、SEED、SAR);文档结构化算法新增1种关键信息提取算法(SDMGR),3种DocVQA算法(LayoutLM、LayoutLMv2,LayoutXLM)。 - 2021.12.21 发布PaddleOCR v2.4。OCR算法新增1种文本检测算法(PSENet),3种文本识别算法(NRTR、SEED、SAR);文档结构化算法新增1种关键信息提取算法(SDMGR[文档](https://github.com/PaddlePaddle/PaddleOCR/blob/release/2.4/ppstructure/docs/kie.md)),3种DocVQA算法(LayoutLM、LayoutLMv2,LayoutXLM,[文档](https://github.com/PaddlePaddle/PaddleOCR/tree/release/2.4/ppstructure/vqa))。
- PaddleOCR研发团队对最新发版内容技术深入解读,9月8日晚上20:15,[课程回放](https://aistudio.baidu.com/aistudio/education/group/info/6758) - PaddleOCR研发团队对最新发版内容技术深入解读,9月8日晚上20:15,[课程回放](https://aistudio.baidu.com/aistudio/education/group/info/6758)
- 2021.9.7 发布PaddleOCR v2.3与[PP-OCRv2](#PP-OCRv2),CPU推理速度相比于PP-OCR server提升220%;效果相比于PP-OCR mobile 提升7%。 - 2021.9.7 发布PaddleOCR v2.3与[PP-OCRv2](#PP-OCRv2),CPU推理速度相比于PP-OCR server提升220%;效果相比于PP-OCR mobile 提升7%。
- 2021.8.3 发布PaddleOCR v2.2,新增文档结构分析[PP-Structure](https://github.com/PaddlePaddle/PaddleOCR/blob/release/2.2/ppstructure/README_ch.md)工具包,支持版面分析与表格识别(含Excel导出)。 - 2021.8.3 发布PaddleOCR v2.2,新增文档结构分析[PP-Structure](https://github.com/PaddlePaddle/PaddleOCR/blob/release/2.2/ppstructure/README_ch.md)工具包,支持版面分析与表格识别(含Excel导出)。
...@@ -54,8 +54,7 @@ PaddleOCR旨在打造一套丰富、领先、且实用的OCR工具库,助力 ...@@ -54,8 +54,7 @@ PaddleOCR旨在打造一套丰富、领先、且实用的OCR工具库,助力
- 加入社区:微信扫描下方二维码加入官方交流群,与各行各业开发者充分交流,期待您的加入。 - 加入社区:微信扫描下方二维码加入官方交流群,与各行各业开发者充分交流,期待您的加入。
- 社区贡献:[社区贡献](./doc/doc_ch/thirdparty.md)文档中包含了社区用户**使用PaddleOCR开发的各种工具、应用**以及**为PaddleOCR贡献的功能、优化的文档与代码**等,是官方为社区开发者打造的荣誉墙、也是帮助优质项目宣传的广播站。如果您的OCR项目未被收集在文档中,可根据文档说明与我们联系。最新社区贡献可查看[此处](#社区贡献) - 社区贡献:[社区贡献](./doc/doc_ch/thirdparty.md)文档中包含了社区用户**使用PaddleOCR开发的各种工具、应用**以及**为PaddleOCR贡献的功能、优化的文档与代码**等,是官方为社区开发者打造的荣誉墙、也是帮助优质项目宣传的广播站。如果您的OCR项目未被收集在文档中,可根据文档说明与我们联系。最新社区贡献可查看[此处](#社区贡献)
- 社区常规赛:作为社区贡献的具体承载形式,社区常规赛是面向OCR开发者的积分赛事。首届社区常规赛与[《动手学OCR · 十讲》课程](https://aistudio.baidu.com/aistudio/course/introduce/25207)联合推广。社区常规赛的赛题详情与报名方法可参考[链接](https://github.com/PaddlePaddle/PaddleOCR/issues/4982)
- 社区常规赛:作为社区贡献的具体承载形式,社区常规赛是面向OCR开发者的积分赛事。首届社区常规赛与《动手学OCR · 十讲》课程联合推广,课程详情可参考[链接](https://aistudio.baidu.com/aistudio/course/introduce/25207),课程奖励与作业说明可参考[链接](https://github.com/PaddlePaddle/PaddleOCR/issues/4982)
<div align="center"> <div align="center">
<img src="https://raw.githubusercontent.com/PaddlePaddle/PaddleOCR/dygraph/doc/joinus.PNG" width = "200" height = "200" /> <img src="https://raw.githubusercontent.com/PaddlePaddle/PaddleOCR/dygraph/doc/joinus.PNG" width = "200" height = "200" />
...@@ -64,22 +63,33 @@ PaddleOCR旨在打造一套丰富、领先、且实用的OCR工具库,助力 ...@@ -64,22 +63,33 @@ PaddleOCR旨在打造一套丰富、领先、且实用的OCR工具库,助力
## 零代码体验 ## 零代码体验
- 在线网站体验:超轻量PP-OCR mobile模型体验地址:https://www.paddlepaddle.org.cn/hub/scene/ocr - 在线网站体验:超轻量PP-OCR mobile模型体验地址:https://www.paddlepaddle.org.cn/hub/scene/ocr
- 移动端:[安装包DEMO下载地址](https://ai.baidu.com/easyedge/app/openSource?from=paddlelite)(基于EasyEdge和Paddle-Lite, 支持iOS和Android系统) - 移动端:[安装包DEMO下载地址](https://ai.baidu.com/easyedge/app/openSource?from=paddlelite)(基于EasyEdge和Paddle-Lite, 支持iOS和Android系统)
<a name="模型下载"></a>
## PP-OCR系列模型列表(更新中)
| 模型简介 | 模型名称 | 推荐场景 | 检测模型 | 方向分类器 | 识别模型 |
| ------------------------------------- | ----------------------- | --------------- | ------------------------------------------------------------ | ------------------------------------------------------------ | ------------------------------------------------------------ |
| 中英文超轻量PP-OCRv2模型(13.0M) | ch_PP-OCRv2_xx | 移动端&服务器端 | [推理模型](https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_det_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_det_distill_train.tar) | [推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar) / [预训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_train.tar) | [推理模型](https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_rec_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_rec_train.tar) |
| 中英文超轻量PP-OCR mobile模型(9.4M) | ch_ppocr_mobile_v2.0_xx | 移动端&服务器端 | [推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_det_infer.tar) / [预训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_det_train.tar) | [推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar) / [预训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_train.tar) | [推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_infer.tar) / [预训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_pre.tar) |
| 中英文通用PP-OCR server模型(143.4M) | ch_ppocr_server_v2.0_xx | 服务器端 | [推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_det_infer.tar) / [预训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_det_train.tar) | [推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar) / [预训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_train.tar) | [推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_rec_infer.tar) / [预训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_rec_pre.tar) |
更多模型下载(包括多语言),可以参考[PP-OCR 系列模型下载](./doc/doc_ch/models_list.md)
## 文档教程 ## 文档教程
- [运行环境准备](./doc/doc_ch/environment.md) - [运行环境准备](./doc/doc_ch/environment.md)
- [快速开始(中英文/多语言/文档分析)](./doc/doc_ch/quickstart.md) - [快速开始(中英文/多语言/文档分析)](./doc/doc_ch/quickstart.md)
- [PaddleOCR全景图与项目克隆](./doc/doc_ch/paddleOCR_overview.md) - [PaddleOCR全景图与项目克隆](./doc/doc_ch/paddleOCR_overview.md)
- PP-OCR产业落地:从训练到部署 - PP-OCR产业落地:从训练到部署
- [PP-OCR模型与配置文件](./doc/doc_ch/models_and_config.md) - [PP-OCR模型](./doc/doc_ch/models.md)
- [PP-OCR模型下载](./doc/doc_ch/models_list.md) - [PP-OCR模型下载](./doc/doc_ch/models_list.md)
- [PP-OCR模型库快速推理](./doc/doc_ch/inference_ppocr.md) - [PP-OCR模型库快速推理](./doc/doc_ch/inference_ppocr.md)
- [PP-OCR模型训练](./doc/doc_ch/training.md) - [PP-OCR模型训练](./doc/doc_ch/training.md)
- [文本检测](./doc/doc_ch/detection.md) - [文本检测](./doc/doc_ch/detection.md)
- [文本识别](./doc/doc_ch/recognition.md) - [文本识别](./doc/doc_ch/recognition.md)
- [文本方向分类器](./doc/doc_ch/angle_class.md) - [文本方向分类器](./doc/doc_ch/angle_class.md)
- [知识蒸馏](./doc/doc_ch/knowledge_distillation.md)
- [配置文件内容与生成](./doc/doc_ch/config.md) - [配置文件内容与生成](./doc/doc_ch/config.md)
- PP-OCR模型推理部署 - PP-OCR模型推理部署
- [基于C++预测引擎推理](./deploy/cpp_infer/readme.md) - [基于C++预测引擎推理](./deploy/cpp_infer/readme.md)
...@@ -89,6 +99,8 @@ PaddleOCR旨在打造一套丰富、领先、且实用的OCR工具库,助力 ...@@ -89,6 +99,8 @@ PaddleOCR旨在打造一套丰富、领先、且实用的OCR工具库,助力
- [PP-Structure信息提取](./ppstructure/README_ch.md) - [PP-Structure信息提取](./ppstructure/README_ch.md)
- [版面分析](./ppstructure/layout/README_ch.md) - [版面分析](./ppstructure/layout/README_ch.md)
- [表格识别](./ppstructure/table/README_ch.md) - [表格识别](./ppstructure/table/README_ch.md)
- [DocVQA](https://github.com/PaddlePaddle/PaddleOCR/tree/release/2.4/ppstructure/vqa)
- [关键信息提取](https://github.com/PaddlePaddle/PaddleOCR/blob/release/2.4/ppstructure/docs/kie.md)
- OCR学术圈 - OCR学术圈
- [两阶段模型介绍与下载](./doc/doc_ch/algorithm_overview.md) - [两阶段模型介绍与下载](./doc/doc_ch/algorithm_overview.md)
- [端到端PGNet算法](./doc/doc_ch/pgnet.md) - [端到端PGNet算法](./doc/doc_ch/pgnet.md)
...@@ -119,7 +131,7 @@ PaddleOCR旨在打造一套丰富、领先、且实用的OCR工具库,助力 ...@@ -119,7 +131,7 @@ PaddleOCR旨在打造一套丰富、领先、且实用的OCR工具库,助力
</div> </div>
[1] PP-OCR是一个实用的超轻量OCR系统。主要由DB文本检测、检测框矫正和CRNN文本识别三部分组成。该系统从骨干网络选择和调整、预测头部的设计、数据增强、学习率变换策略、正则化参数选择、预训练模型使用以及模型自动裁剪量化8个方面,采用19个有效策略,对各个模块的模型进行效果调优和瘦身(如绿框所示),最终得到整体大小为3.5M的超轻量中英文OCR和2.8M的英文数字OCR。更多细节请参考PP-OCR技术方案 https://arxiv.org/abs/2009.09941 [1] PP-OCR是一个实用的超轻量OCR系统。主要由DB文本检测、检测框矫正和CRNN文本识别三部分组成。该系统从骨干网络选择和调整、预测头部的设计、数据增强、学习率变换策略、正则化参数选择、预训练模型使用以及模型自动裁剪量化8个方面,采用19个有效策略,对各个模块的模型进行效果调优和瘦身(如绿框所示),最终得到整体大小为3.5M的超轻量中英文OCR和2.8M的英文数字OCR。更多细节请参考PP-OCR技术方案 https://arxiv.org/abs/2009.09941
[2] PP-OCRv2在PP-OCR的基础上,进一步在5个方面重点优化,检测模型采用CML协同互学习知识蒸馏策略和CopyPaste数据增广策略;识别模型采用LCNet轻量级骨干网络、UDML 改进知识蒸馏策略和Enhanced CTC loss损失函数改进(如上图红框所示),进一步在推理速度和预测效果上取得明显提升。更多细节请参考PP-OCRv2[技术报告](https://arxiv.org/abs/2109.03144) [2] PP-OCRv2在PP-OCR的基础上,进一步在5个方面重点优化,检测模型采用CML协同互学习知识蒸馏策略和CopyPaste数据增广策略;识别模型采用LCNet轻量级骨干网络、UDML 改进知识蒸馏策略和[Enhanced CTC loss](./doc/doc_ch/enhanced_ctc_loss.md)损失函数改进(如上图红框所示),进一步在推理速度和预测效果上取得明显提升。更多细节请参考PP-OCRv2[技术报告](https://arxiv.org/abs/2109.03144)
<a name="效果展示"></a> <a name="效果展示"></a>
......
...@@ -21,6 +21,7 @@ Architecture: ...@@ -21,6 +21,7 @@ Architecture:
model_type: det model_type: det
Models: Models:
Teacher: Teacher:
pretrained: ./pretrain_models/ch_ppocr_server_v2.0_det_train/best_accuracy
freeze_params: true freeze_params: true
return_all_feats: false return_all_feats: false
model_type: det model_type: det
...@@ -36,6 +37,7 @@ Architecture: ...@@ -36,6 +37,7 @@ Architecture:
name: DBHead name: DBHead
k: 50 k: 50
Student: Student:
pretrained:
freeze_params: false freeze_params: false
return_all_feats: false return_all_feats: false
model_type: det model_type: det
...@@ -52,6 +54,7 @@ Architecture: ...@@ -52,6 +54,7 @@ Architecture:
name: DBHead name: DBHead
k: 50 k: 50
Student2: Student2:
pretrained:
freeze_params: false freeze_params: false
return_all_feats: false return_all_feats: false
model_type: det model_type: det
......
...@@ -18,6 +18,7 @@ Global: ...@@ -18,6 +18,7 @@ Global:
Architecture: Architecture:
name: DistillationModel name: DistillationModel
algorithm: Distillation algorithm: Distillation
model_type: det
Models: Models:
Student: Student:
pretrained: ./pretrain_models/MobileNetV3_large_x0_5_pretrained pretrained: ./pretrain_models/MobileNetV3_large_x0_5_pretrained
......
...@@ -18,6 +18,7 @@ Global: ...@@ -18,6 +18,7 @@ Global:
Architecture: Architecture:
name: DistillationModel name: DistillationModel
algorithm: Distillation algorithm: Distillation
model_type: det
Models: Models:
Student: Student:
pretrained: ./pretrain_models/MobileNetV3_large_x0_5_pretrained pretrained: ./pretrain_models/MobileNetV3_large_x0_5_pretrained
......
...@@ -18,8 +18,8 @@ python3.7 -m pip install paddle2onnx ...@@ -18,8 +18,8 @@ python3.7 -m pip install paddle2onnx
- 安装 ONNX - 安装 ONNX
``` ```
# 建议安装 1.4.0 版本,可根据环境更换版本号 # 建议安装 1.9.0 版本,可根据环境更换版本号
python3.7 -m pip install onnxruntime==1.4.0 python3.7 -m pip install onnxruntime==1.9.0
``` ```
## 2. 模型转换 ## 2. 模型转换
...@@ -47,13 +47,15 @@ paddle2onnx --model_dir=./inference/ch_ppocr_mobile_v2.0_det_infer/ \ ...@@ -47,13 +47,15 @@ paddle2onnx --model_dir=./inference/ch_ppocr_mobile_v2.0_det_infer/ \
--params_filename=inference.pdiparams \ --params_filename=inference.pdiparams \
--save_file=./inference/det_mobile_onnx/model.onnx \ --save_file=./inference/det_mobile_onnx/model.onnx \
--opset_version=10 \ --opset_version=10 \
--input_shape_dict="{'x': [-1, 3, -1, -1]}" \
--enable_onnx_checker=True --enable_onnx_checker=True
``` ```
执行完毕后,ONNX 模型会被保存在 `./inference/det_mobile_onnx/` 路径下 执行完毕后,ONNX 模型会被保存在 `./inference/det_mobile_onnx/` 路径下
* 注意:以下几个模型暂不支持转换为 ONNX 模型: * 注意:对于OCR模型,转化过程中必须采用动态shape的形式,即加入选项--input_shape_dict="{'x': [-1, 3, -1, -1]}",否则预测结果可能与直接使用Paddle预测有细微不同。
NRTR、SAR、RARE、SRN 另外,以下几个模型暂不支持转换为 ONNX 模型:
NRTR、SAR、RARE、SRN
## 3. onnx 预测 ## 3. onnx 预测
...@@ -72,5 +74,3 @@ root INFO: 1.jpg [[[291, 295], [334, 292], [348, 844], [305, 847]], [[344, 296] ...@@ -72,5 +74,3 @@ root INFO: 1.jpg [[[291, 295], [334, 292], [348, 844], [305, 847]], [[344, 296]
The predict time of ../../doc/imgs/1.jpg: 0.06162881851196289 The predict time of ../../doc/imgs/1.jpg: 0.06162881851196289
The visualized image saved in ./inference_results/det_res_1.jpg The visualized image saved in ./inference_results/det_res_1.jpg
``` ```
* 注意:ONNX暂时不支持变长预测,需要将输入resize到固定输入,预测结果可能与直接使用Paddle预测有细微不同。
...@@ -57,7 +57,7 @@ PaddleOCR基于动态图开源的文本识别算法列表: ...@@ -57,7 +57,7 @@ PaddleOCR基于动态图开源的文本识别算法列表:
- [x] SAR([paper](https://arxiv.org/abs/1811.00751v2)) - [x] SAR([paper](https://arxiv.org/abs/1811.00751v2))
- [x] SEED([paper](https://arxiv.org/pdf/2005.10977.pdf)) - [x] SEED([paper](https://arxiv.org/pdf/2005.10977.pdf))
参考[DTRB][3](https://arxiv.org/abs/1904.01906)文字识别训练和评估流程,使用MJSynth和SynthText两个文字识别数据集训练,在IIIT, SVT, IC03, IC13, IC15, SVTP, CUTE数据集上进行评估,算法效果如下: 参考[DTRB](https://arxiv.org/abs/1904.01906)[3]文字识别训练和评估流程,使用MJSynth和SynthText两个文字识别数据集训练,在IIIT, SVT, IC03, IC13, IC15, SVTP, CUTE数据集上进行评估,算法效果如下:
|模型|骨干网络|Avg Accuracy|模型存储命名|下载链接| |模型|骨干网络|Avg Accuracy|模型存储命名|下载链接|
|---|---|---|---|---| |---|---|---|---|---|
......
...@@ -146,7 +146,7 @@ PaddleOCR欢迎大家向repo中积极贡献代码,下面给出一些贡献代 ...@@ -146,7 +146,7 @@ PaddleOCR欢迎大家向repo中积极贡献代码,下面给出一些贡献代
-`远程仓库` Clone到本地 -`远程仓库` Clone到本地
``` ```
# 拉取develop分支的代码 # 拉取dygraph分支的代码
git clone https://github.com/{your_name}/PaddleOCR.git -b dygraph git clone https://github.com/{your_name}/PaddleOCR.git -b dygraph
cd PaddleOCR cd PaddleOCR
``` ```
...@@ -191,11 +191,11 @@ git checkout -b new_branch ...@@ -191,11 +191,11 @@ git checkout -b new_branch
也可以基于远程或者上游的分支创建新的分支,命令如下。 也可以基于远程或者上游的分支创建新的分支,命令如下。
``` ```
# 基于用户远程仓库(origin)的develop创建new_branch分支 # 基于用户远程仓库(origin)的dygraph创建new_branch分支
git checkout -b new_branch origin/develop git checkout -b new_branch origin/dygraph
# 基于上游远程仓库(upstream)的develop创建new_branch分支 # 基于上游远程仓库(upstream)的dygraph创建new_branch分支
# 如果需要从upstream创建新的分支,需要首先使用git fetch upstream获取上游代码 # 如果需要从upstream创建新的分支,需要首先使用git fetch upstream获取上游代码
git checkout -b new_branch upstream/develop git checkout -b new_branch upstream/dygraph
``` ```
最终会显示切换到新的分支,输出信息如下 最终会显示切换到新的分支,输出信息如下
...@@ -246,8 +246,8 @@ git commit -m "your commit info" ...@@ -246,8 +246,8 @@ git commit -m "your commit info"
``` ```
git fetch upstream git fetch upstream
# 如果是希望提交到其他分支,则需要从upstream的其他分支pull代码,这里是develop # 如果是希望提交到其他分支,则需要从upstream的其他分支pull代码,这里是dygraph
git pull upstream develop git pull upstream dygraph
``` ```
#### 3.2.7 push到远程仓库 #### 3.2.7 push到远程仓库
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment