Commit cf03889b authored by weishengyu's avatar weishengyu
Browse files
parents 5a5017fe 011104e0
...@@ -274,6 +274,7 @@ class MainWindow(QMainWindow, WindowMixin): ...@@ -274,6 +274,7 @@ class MainWindow(QMainWindow, WindowMixin):
self.preButton.setIconSize(QSize(40, 100)) self.preButton.setIconSize(QSize(40, 100))
self.preButton.clicked.connect(self.openPrevImg) self.preButton.clicked.connect(self.openPrevImg)
self.preButton.setStyleSheet('border: none;') self.preButton.setStyleSheet('border: none;')
self.preButton.setShortcut('a')
self.iconlist = QListWidget() self.iconlist = QListWidget()
self.iconlist.setViewMode(QListView.IconMode) self.iconlist.setViewMode(QListView.IconMode)
self.iconlist.setFlow(QListView.TopToBottom) self.iconlist.setFlow(QListView.TopToBottom)
...@@ -289,12 +290,12 @@ class MainWindow(QMainWindow, WindowMixin): ...@@ -289,12 +290,12 @@ class MainWindow(QMainWindow, WindowMixin):
self.nextButton.setIconSize(QSize(40, 100)) self.nextButton.setIconSize(QSize(40, 100))
self.nextButton.setStyleSheet('border: none;') self.nextButton.setStyleSheet('border: none;')
self.nextButton.clicked.connect(self.openNextImg) self.nextButton.clicked.connect(self.openNextImg)
self.nextButton.setShortcut('d')
hlayout.addWidget(self.preButton) hlayout.addWidget(self.preButton)
hlayout.addWidget(self.iconlist) hlayout.addWidget(self.iconlist)
hlayout.addWidget(self.nextButton) hlayout.addWidget(self.nextButton)
# self.setLayout(hlayout)
iconListContainer = QWidget() iconListContainer = QWidget()
iconListContainer.setLayout(hlayout) iconListContainer.setLayout(hlayout)
...@@ -359,11 +360,6 @@ class MainWindow(QMainWindow, WindowMixin): ...@@ -359,11 +360,6 @@ class MainWindow(QMainWindow, WindowMixin):
opendir = action(getStr('openDir'), self.openDirDialog, opendir = action(getStr('openDir'), self.openDirDialog,
'Ctrl+u', 'open', getStr('openDir')) 'Ctrl+u', 'open', getStr('openDir'))
openNextImg = action(getStr('nextImg'), self.openNextImg,
'd', 'next', getStr('nextImgDetail'))
openPrevImg = action(getStr('prevImg'), self.openPrevImg,
'a', 'prev', getStr('prevImgDetail'))
save = action(getStr('save'), self.saveFile, save = action(getStr('save'), self.saveFile,
'Ctrl+V', 'verify', getStr('saveDetail'), enabled=False) 'Ctrl+V', 'verify', getStr('saveDetail'), enabled=False)
...@@ -371,7 +367,7 @@ class MainWindow(QMainWindow, WindowMixin): ...@@ -371,7 +367,7 @@ class MainWindow(QMainWindow, WindowMixin):
alcm = action(getStr('choosemodel'), self.autolcm, alcm = action(getStr('choosemodel'), self.autolcm,
'Ctrl+M', 'next', getStr('tipchoosemodel')) 'Ctrl+M', 'next', getStr('tipchoosemodel'))
deleteImg = action(getStr('deleteImg'), self.deleteImg, 'Ctrl+D', 'close', getStr('deleteImgDetail'), deleteImg = action(getStr('deleteImg'), self.deleteImg, 'Ctrl+Shift+D', 'close', getStr('deleteImgDetail'),
enabled=True) enabled=True)
resetAll = action(getStr('resetAll'), self.resetAll, None, 'resetall', getStr('resetAllDetail')) resetAll = action(getStr('resetAll'), self.resetAll, None, 'resetall', getStr('resetAllDetail'))
...@@ -388,7 +384,7 @@ class MainWindow(QMainWindow, WindowMixin): ...@@ -388,7 +384,7 @@ class MainWindow(QMainWindow, WindowMixin):
'w', 'new', getStr('crtBoxDetail'), enabled=False) 'w', 'new', getStr('crtBoxDetail'), enabled=False)
delete = action(getStr('delBox'), self.deleteSelectedShape, delete = action(getStr('delBox'), self.deleteSelectedShape,
'Delete', 'delete', getStr('delBoxDetail'), enabled=False) 'backspace', 'delete', getStr('delBoxDetail'), enabled=False)
copy = action(getStr('dupBox'), self.copySelectedShape, copy = action(getStr('dupBox'), self.copySelectedShape,
'Ctrl+C', 'copy', getStr('dupBoxDetail'), 'Ctrl+C', 'copy', getStr('dupBoxDetail'),
enabled=False) enabled=False)
...@@ -446,8 +442,11 @@ class MainWindow(QMainWindow, WindowMixin): ...@@ -446,8 +442,11 @@ class MainWindow(QMainWindow, WindowMixin):
reRec = action(getStr('reRecognition'), self.reRecognition, reRec = action(getStr('reRecognition'), self.reRecognition,
'Ctrl+Shift+R', 'reRec', getStr('reRecognition'), enabled=False) 'Ctrl+Shift+R', 'reRec', getStr('reRecognition'), enabled=False)
singleRere = action(getStr('singleRe'), self.singleRerecognition,
'Ctrl+R', 'reRec', getStr('singleRe'), enabled=False)
createpoly = action(getStr('creatPolygon'), self.createPolygon, createpoly = action(getStr('creatPolygon'), self.createPolygon,
'p', 'new', 'Creat Polygon', enabled=True) 'q', 'new', 'Creat Polygon', enabled=True)
saveRec = action(getStr('saveRec'), self.saveRecResult, saveRec = action(getStr('saveRec'), self.saveRecResult,
'', 'save', getStr('saveRec'), enabled=False) '', 'save', getStr('saveRec'), enabled=False)
...@@ -491,6 +490,7 @@ class MainWindow(QMainWindow, WindowMixin): ...@@ -491,6 +490,7 @@ class MainWindow(QMainWindow, WindowMixin):
icon='color', tip=getStr('shapeFillColorDetail'), icon='color', tip=getStr('shapeFillColorDetail'),
enabled=False) enabled=False)
# Label list context menu. # Label list context menu.
labelMenu = QMenu() labelMenu = QMenu()
addActions(labelMenu, (edit, delete)) addActions(labelMenu, (edit, delete))
...@@ -501,7 +501,6 @@ class MainWindow(QMainWindow, WindowMixin): ...@@ -501,7 +501,6 @@ class MainWindow(QMainWindow, WindowMixin):
# Draw squares/rectangles # Draw squares/rectangles
self.drawSquaresOption = QAction(getStr('drawSquares'), self) self.drawSquaresOption = QAction(getStr('drawSquares'), self)
self.drawSquaresOption.setShortcut('Ctrl+Shift+R')
self.drawSquaresOption.setCheckable(True) self.drawSquaresOption.setCheckable(True)
self.drawSquaresOption.setChecked(settings.get(SETTING_DRAW_SQUARE, False)) self.drawSquaresOption.setChecked(settings.get(SETTING_DRAW_SQUARE, False))
self.drawSquaresOption.triggered.connect(self.toogleDrawSquare) self.drawSquaresOption.triggered.connect(self.toogleDrawSquare)
...@@ -509,7 +508,7 @@ class MainWindow(QMainWindow, WindowMixin): ...@@ -509,7 +508,7 @@ class MainWindow(QMainWindow, WindowMixin):
# Store actions for further handling. # Store actions for further handling.
self.actions = struct(save=save, open=open, resetAll=resetAll, deleteImg=deleteImg, self.actions = struct(save=save, open=open, resetAll=resetAll, deleteImg=deleteImg,
lineColor=color1, create=create, delete=delete, edit=edit, copy=copy, lineColor=color1, create=create, delete=delete, edit=edit, copy=copy,
saveRec=saveRec, saveRec=saveRec, singleRere=singleRere,AutoRec=AutoRec,reRec=reRec,
createMode=createMode, editMode=editMode, createMode=createMode, editMode=editMode,
shapeLineColor=shapeLineColor, shapeFillColor=shapeFillColor, shapeLineColor=shapeLineColor, shapeFillColor=shapeFillColor,
zoom=zoom, zoomIn=zoomIn, zoomOut=zoomOut, zoomOrg=zoomOrg, zoom=zoom, zoomIn=zoomIn, zoomOut=zoomOut, zoomOrg=zoomOrg,
...@@ -518,9 +517,9 @@ class MainWindow(QMainWindow, WindowMixin): ...@@ -518,9 +517,9 @@ class MainWindow(QMainWindow, WindowMixin):
fileMenuActions=( fileMenuActions=(
open, opendir, saveLabel, resetAll, quit), open, opendir, saveLabel, resetAll, quit),
beginner=(), advanced=(), beginner=(), advanced=(),
editMenu=(createpoly, edit, copy, delete, editMenu=(createpoly, edit, copy, delete,singleRere,
None, color1, self.drawSquaresOption), None, color1, self.drawSquaresOption),
beginnerContext=(create, edit, copy, delete), beginnerContext=(create, edit, copy, delete, singleRere),
advancedContext=(createMode, editMode, edit, copy, advancedContext=(createMode, editMode, edit, copy,
delete, shapeLineColor, shapeFillColor), delete, shapeLineColor, shapeFillColor),
onLoadActive=( onLoadActive=(
...@@ -562,7 +561,7 @@ class MainWindow(QMainWindow, WindowMixin): ...@@ -562,7 +561,7 @@ class MainWindow(QMainWindow, WindowMixin):
zoomIn, zoomOut, zoomOrg, None, zoomIn, zoomOut, zoomOrg, None,
fitWindow, fitWidth)) fitWindow, fitWidth))
addActions(self.menus.autolabel, (alcm, None, help)) # addActions(self.menus.autolabel, (AutoRec, reRec, alcm, None, help)) #
self.menus.file.aboutToShow.connect(self.updateFileMenu) self.menus.file.aboutToShow.connect(self.updateFileMenu)
...@@ -572,6 +571,7 @@ class MainWindow(QMainWindow, WindowMixin): ...@@ -572,6 +571,7 @@ class MainWindow(QMainWindow, WindowMixin):
action('&Copy here', self.copyShape), action('&Copy here', self.copyShape),
action('&Move here', self.moveShape))) action('&Move here', self.moveShape)))
self.statusBar().showMessage('%s started.' % __appname__) self.statusBar().showMessage('%s started.' % __appname__)
self.statusBar().show() self.statusBar().show()
...@@ -919,6 +919,7 @@ class MainWindow(QMainWindow, WindowMixin): ...@@ -919,6 +919,7 @@ class MainWindow(QMainWindow, WindowMixin):
self.actions.edit.setEnabled(selected) self.actions.edit.setEnabled(selected)
self.actions.shapeLineColor.setEnabled(selected) self.actions.shapeLineColor.setEnabled(selected)
self.actions.shapeFillColor.setEnabled(selected) self.actions.shapeFillColor.setEnabled(selected)
self.actions.singleRere.setEnabled(selected)
def addLabel(self, shape): def addLabel(self, shape):
shape.paintLabel = self.displayLabelOption.isChecked() shape.paintLabel = self.displayLabelOption.isChecked()
...@@ -988,6 +989,19 @@ class MainWindow(QMainWindow, WindowMixin): ...@@ -988,6 +989,19 @@ class MainWindow(QMainWindow, WindowMixin):
self.updateComboBox() self.updateComboBox()
self.canvas.loadShapes(s) self.canvas.loadShapes(s)
def singleLabel(self, shape):
if shape is None:
# print('rm empty label')
return
item = self.shapesToItems[shape]
item.setText(shape.label)
self.updateComboBox()
# ADD:
item = self.shapesToItemsbox[shape]
item.setText(str([(int(p.x()), int(p.y())) for p in shape.points]))
self.updateComboBox()
def updateComboBox(self): def updateComboBox(self):
# Get the unique labels and add them to the Combobox. # Get the unique labels and add them to the Combobox.
itemsTextList = [str(self.labelList.item(i).text()) for i in range(self.labelList.count())] itemsTextList = [str(self.labelList.item(i).text()) for i in range(self.labelList.count())]
...@@ -1441,6 +1455,8 @@ class MainWindow(QMainWindow, WindowMixin): ...@@ -1441,6 +1455,8 @@ class MainWindow(QMainWindow, WindowMixin):
self.haveAutoReced = False self.haveAutoReced = False
self.AutoRecognition.setEnabled(True) self.AutoRecognition.setEnabled(True)
self.reRecogButton.setEnabled(True) self.reRecogButton.setEnabled(True)
self.actions.AutoRec.setEnabled(True)
self.actions.reRec.setEnabled(True)
self.actions.saveLabel.setEnabled(True) self.actions.saveLabel.setEnabled(True)
...@@ -1755,6 +1771,7 @@ class MainWindow(QMainWindow, WindowMixin): ...@@ -1755,6 +1771,7 @@ class MainWindow(QMainWindow, WindowMixin):
self.loadFile(self.filePath) # ADD self.loadFile(self.filePath) # ADD
self.haveAutoReced = True self.haveAutoReced = True
self.AutoRecognition.setEnabled(False) self.AutoRecognition.setEnabled(False)
self.actions.AutoRec.setEnabled(False)
self.setDirty() self.setDirty()
self.saveCacheLabel() self.saveCacheLabel()
...@@ -1794,6 +1811,27 @@ class MainWindow(QMainWindow, WindowMixin): ...@@ -1794,6 +1811,27 @@ class MainWindow(QMainWindow, WindowMixin):
else: else:
QMessageBox.information(self, "Information", "Draw a box!") QMessageBox.information(self, "Information", "Draw a box!")
def singleRerecognition(self):
img = cv2.imread(self.filePath)
shape = self.canvas.selectedShape
box = [[int(p.x()), int(p.y())] for p in shape.points]
assert len(box) == 4
img_crop = get_rotate_crop_image(img, np.array(box, np.float32))
if img_crop is None:
msg = 'Can not recognise the detection box in ' + self.filePath + '. Please change manually'
QMessageBox.information(self, "Information", msg)
return
result = self.ocr.ocr(img_crop, cls=True, det=False)
if result[0][0] is not '':
result.insert(0, box)
print('result in reRec is ', result)
if result[1][0] == shape.label:
print('label no change')
else:
shape.label = result[1][0]
self.singleLabel(shape)
self.setDirty()
print(box)
def autolcm(self): def autolcm(self):
vbox = QVBoxLayout() vbox = QVBoxLayout()
...@@ -1825,6 +1863,7 @@ class MainWindow(QMainWindow, WindowMixin): ...@@ -1825,6 +1863,7 @@ class MainWindow(QMainWindow, WindowMixin):
self.dialog.exec_() self.dialog.exec_()
if self.filePath: if self.filePath:
self.AutoRecognition.setEnabled(True) self.AutoRecognition.setEnabled(True)
self.actions.AutoRec.setEnabled(True)
def modelChoose(self): def modelChoose(self):
......
...@@ -6,6 +6,10 @@ PPOCRLabel is a semi-automatic graphic annotation tool suitable for OCR field. I ...@@ -6,6 +6,10 @@ PPOCRLabel is a semi-automatic graphic annotation tool suitable for OCR field. I
<img src="./data/gif/steps_en.gif" width="100%"/> <img src="./data/gif/steps_en.gif" width="100%"/>
### Recent Update
- 2020.12.18: Support re-recognition of a single label box (by [ninetailskim](https://github.com/ninetailskim) ), perfect shortcut keys.
## Installation ## Installation
### 1. Install PaddleOCR ### 1. Install PaddleOCR
...@@ -92,11 +96,30 @@ Therefore, if the recognition result has been manually changed before, it may ch ...@@ -92,11 +96,30 @@ Therefore, if the recognition result has been manually changed before, it may ch
## Explanation ## Explanation
### Shortcut keys
| Shortcut keys | Description |
| ---------------- | ------------------------------------------------ |
| Ctrl + shift + A | Automatically label all unchecked images |
| Ctrl + shift + R | Re-recognize all the labels of the current image |
| W | Create a rect box |
| Q | Create a four-points box |
| Ctrl + E | Edit label of the selected box |
| Ctrl + R | Re-recognize the selected box |
| Backspace | Delete the selected box |
| Ctrl + V | Check image |
| Ctrl + Shift + d | Delete image |
| D | Next image |
| A | Previous image |
| Ctrl++ | Zoom in |
| Ctrl-- | Zoom out |
| ↑→↓← | Move selected box |
### Built-in Model ### Built-in Model
- Default model: PPOCRLabel uses the Chinese and English ultra-lightweight OCR model in PaddleOCR by default, supports Chinese, English and number recognition, and multiple language detection. - Default model: PPOCRLabel uses the Chinese and English ultra-lightweight OCR model in PaddleOCR by default, supports Chinese, English and number recognition, and multiple language detection.
- Model language switching: Changing the built-in model language is supportable by clicking "PaddleOCR"-"Choose OCR Model" in the menu bar. Currently supported languages​include French, German, Korean, and Japanese. - Model language switching: Changing the built-in model language is supportable by clicking "PaddleOCR"-"Choose OCR Model" in the menu bar. Currently supported languages​include French, German, Korean, and Japanese.
For specific model download links, please refer to [PaddleOCR Model List](https://github.com/PaddlePaddle/PaddleOCR/blob/develop/doc/doc_en/models_list_en.md#multilingual-recognition-modelupdating) For specific model download links, please refer to [PaddleOCR Model List](https://github.com/PaddlePaddle/PaddleOCR/blob/develop/doc/doc_en/models_list_en.md#multilingual-recognition-modelupdating)
- Custom model: The model trained by users can be replaced by modifying PPOCRLabel.py in [PaddleOCR class instantiation](https://github.com/PaddlePaddle/PaddleOCR/blob/develop/PPOCRLabel/PPOCRLabel.py#L110) referring [Custom Model Code](https://github.com/PaddlePaddle/PaddleOCR/blob/develop/doc/doc_en/whl_en.md#use-custom-model) - Custom model: The model trained by users can be replaced by modifying PPOCRLabel.py in [PaddleOCR class instantiation](https://github.com/PaddlePaddle/PaddleOCR/blob/develop/PPOCRLabel/PPOCRLabel.py#L110) referring [Custom Model Code](https://github.com/PaddlePaddle/PaddleOCR/blob/develop/doc/doc_en/whl_en.md#use-custom-model)
......
...@@ -6,6 +6,10 @@ PPOCRLabel是一款适用于OCR领域的半自动化图形标注工具,使用p ...@@ -6,6 +6,10 @@ PPOCRLabel是一款适用于OCR领域的半自动化图形标注工具,使用p
<img src="./data/gif/steps.gif" width="100%"/> <img src="./data/gif/steps.gif" width="100%"/>
#### 近期更新
- 2020.12.18: 支持对单个标记框进行重新识别(by [ninetailskim](https://github.com/ninetailskim) ),完善快捷键。
## 安装 ## 安装
### 1. 安装PaddleOCR ### 1. 安装PaddleOCR
...@@ -72,6 +76,26 @@ python3 PPOCRLabel.py --lang ch ...@@ -72,6 +76,26 @@ python3 PPOCRLabel.py --lang ch
| crop_img | 识别数据。按照检测框切割后的图片。与rec_gt.txt同时产生。 | | crop_img | 识别数据。按照检测框切割后的图片。与rec_gt.txt同时产生。 |
## 说明 ## 说明
### 快捷键
| 快捷键 | 说明 |
| ---------------- | ---------------------------- |
| Ctrl + shift + A | 自动标注所有未确认过的图片 |
| Ctrl + shift + R | 对当前图片的所有标记重新识别 |
| W | 新建矩形框 |
| Q | 新建四点框 |
| Ctrl + E | 编辑所选框标签 |
| Ctrl + R | 重新识别所选标记 |
| Backspace | 删除所选框 |
| Ctrl + V | 确认本张图片标记 |
| Ctrl + Shift + d | 删除本张图片 |
| D | 下一张图片 |
| A | 上一张图片 |
| Ctrl++ | 缩小 |
| Ctrl-- | 放大 |
| ↑→↓← | 移动标记框 |
### 内置模型 ### 内置模型
- 默认模型:PPOCRLabel默认使用PaddleOCR中的中英文超轻量OCR模型,支持中英文与数字识别,多种语言检测。 - 默认模型:PPOCRLabel默认使用PaddleOCR中的中英文超轻量OCR模型,支持中英文与数字识别,多种语言检测。
......
...@@ -46,8 +46,9 @@ class Worker(QThread): ...@@ -46,8 +46,9 @@ class Worker(QThread):
chars = res[1][0] chars = res[1][0]
cond = res[1][1] cond = res[1][1]
posi = res[0] posi = res[0]
strs += "Transcription: " + chars + " Probability: " + str( strs += "Transcription: " + chars + " Probability: " + str(cond) + \
cond) + " Location: " + json.dumps(posi) + '\n' " Location: " + json.dumps(posi) +'\n'
# Sending large amounts of data repeatedly through pyqtSignal may affect the program efficiency
self.listValue.emit(strs) self.listValue.emit(strs)
self.mainThread.result_dic = self.result_dic self.mainThread.result_dic = self.result_dic
self.mainThread.filePath = Imgpath self.mainThread.filePath = Imgpath
......
This diff is collapsed.
...@@ -94,4 +94,5 @@ ok=确认 ...@@ -94,4 +94,5 @@ ok=确认
autolabeling=自动标注中 autolabeling=自动标注中
hideBox=隐藏所有标注 hideBox=隐藏所有标注
showBox=显示所有标注 showBox=显示所有标注
saveLabel=保存标记结果 saveLabel=保存标记结果
\ No newline at end of file singleRe=重识别此区块
\ No newline at end of file
saveAsDetail=將標籤保存到其他文件
changeSaveDir=改變存放目錄
openFile=開啟檔案
shapeLineColorDetail=更改線條顏色
resetAll=重置
crtBox=創建區塊
crtBoxDetail=畫一個區塊
dupBoxDetail=複製區塊
verifyImg=驗證圖像
zoominDetail=放大
verifyImgDetail=驗證圖像
saveDetail=將標籤存到
openFileDetail=打開圖像
fitWidthDetail=調整到窗口寬度
tutorial=YouTube教學
editLabel=編輯標籤
openAnnotationDetail=打開標籤文件
quit=結束
shapeFillColorDetail=更改填充顏色
closeCurDetail=關閉目前檔案
closeCur=關閉
deleteImg=刪除圖像
deleteImgDetail=刪除目前圖像
fitWin=調整到跟窗口一樣大小
delBox=刪除選取區塊
boxLineColorDetail=選擇框線顏色
originalsize=原始大小
resetAllDetail=重設所有設定
zoomoutDetail=畫面放大
save=儲存
saveAs=另存為
fitWinDetail=縮放到窗口一樣
openDir=開啟目錄
copyPrevBounding=複製當前圖像中的上一個邊界框
showHide=顯示/隱藏標籤
changeSaveFormat=更改儲存格式
shapeFillColor=填充顏色
quitApp=離開本程式
dupBox=複製區塊
delBoxDetail=刪除區塊
zoomin=放大畫面
info=資訊
openAnnotation=開啟標籤
prevImgDetail=上一個圖像
fitWidth=縮放到跟畫面一樣寬
zoomout=縮小畫面
changeSavedAnnotationDir=更改預設標籤存的目錄
nextImgDetail=下一個圖像
originalsizeDetail=放大到原始大小
prevImg=上一個圖像
tutorialDetail=顯示示範內容
shapeLineColor=形狀線條顏色
boxLineColor=日期分隔線顏色
editLabelDetail=修改所選區塊的標籤
nextImg=下一張圖片
useDefaultLabel=使用預設標籤
useDifficult=有難度的
boxLabelText=區塊的標籤
labels=標籤
autoSaveMode=自動儲存模式
singleClsMode=單一類別模式
displayLabel=顯示類別
fileList=檔案清單
files=檔案
iconList=XX
icon=XX
advancedMode=進階模式
advancedModeDetail=切到進階模式
showAllBoxDetail=顯示所有區塊
hideAllBoxDetail=隱藏所有區塊
...@@ -94,4 +94,5 @@ ok=OK ...@@ -94,4 +94,5 @@ ok=OK
autolabeling=Automatic Labeling autolabeling=Automatic Labeling
hideBox=Hide All Box hideBox=Hide All Box
showBox=Show All Box showBox=Show All Box
saveLabel=Save Label saveLabel=Save Label
\ No newline at end of file singleRe=Re-recognition RectBox
\ No newline at end of file
...@@ -69,12 +69,14 @@ fusion_generator: ...@@ -69,12 +69,14 @@ fusion_generator:
1. You can run `tools/synth_image` and generate the demo image, which is saved in the current folder. 1. You can run `tools/synth_image` and generate the demo image, which is saved in the current folder.
```python ```python
python3 -m tools.synth_image -c configs/config.yml --style_image examples/style_images/2.jpg --text_corpus PaddleOCR --language en python3 tools/synth_image.py -c configs/config.yml --style_image examples/style_images/2.jpg --text_corpus PaddleOCR --language en
``` ```
* Note 1: The language options is correspond to the corpus. Currently, the tool only supports English, Simplified Chinese and Korean. * Note 1: The language options is correspond to the corpus. Currently, the tool only supports English, Simplified Chinese and Korean.
* Note 2: Synth-Text is mainly used to generate images for OCR recognition models. * Note 2: Synth-Text is mainly used to generate images for OCR recognition models.
So the height of style images should be around 32 pixels. Images in other sizes may behave poorly. So the height of style images should be around 32 pixels. Images in other sizes may behave poorly.
* Note 3: You can modify `use_gpu` in `configs/config.yml` to determine whether to use GPU for prediction.
For example, enter the following image and corpus `PaddleOCR`. For example, enter the following image and corpus `PaddleOCR`.
...@@ -139,9 +141,10 @@ We provide a general dataset containing Chinese, English and Korean (50,000 imag ...@@ -139,9 +141,10 @@ We provide a general dataset containing Chinese, English and Korean (50,000 imag
2. You can run the following command to start synthesis task: 2. You can run the following command to start synthesis task:
``` bash ``` bash
python -m tools.synth_dataset.py -c configs/dataset_config.yml python3 tools/synth_dataset.py -c configs/dataset_config.yml
``` ```
We also provide example corpus and images in `examples` folder.
We also provide example corpus and images in `examples` folder.
<div align="center"> <div align="center">
<img src="examples/style_images/1.jpg" width="300"> <img src="examples/style_images/1.jpg" width="300">
<img src="examples/style_images/2.jpg" width="300"> <img src="examples/style_images/2.jpg" width="300">
......
...@@ -61,11 +61,12 @@ fusion_generator: ...@@ -61,11 +61,12 @@ fusion_generator:
输入一张风格图和一段文字语料,运行tools/synth_image,合成单张图片,结果图像保存在当前目录下: 输入一张风格图和一段文字语料,运行tools/synth_image,合成单张图片,结果图像保存在当前目录下:
```python ```python
python3 -m tools.synth_image -c configs/config.yml --style_image examples/style_images/2.jpg --text_corpus PaddleOCR --language en python3 tools/synth_image.py -c configs/config.yml --style_image examples/style_images/2.jpg --text_corpus PaddleOCR --language en
``` ```
* 注1:语言选项和语料相对应,目前该工具只支持英文、简体中文和韩语。 * 注1:语言选项和语料相对应,目前该工具只支持英文、简体中文和韩语。
* 注2:Style-Text生成的数据主要应用于OCR识别场景。基于当前PaddleOCR识别模型的设计,我们主要支持高度在32左右的风格图像。 * 注2:Style-Text生成的数据主要应用于OCR识别场景。基于当前PaddleOCR识别模型的设计,我们主要支持高度在32左右的风格图像。
如果输入图像尺寸相差过多,效果可能不佳。 如果输入图像尺寸相差过多,效果可能不佳。
* 注3:可以通过修改配置文件中的`use_gpu`(true或者false)参数来决定是否使用GPU进行预测。
例如,输入如下图片和语料"PaddleOCR": 例如,输入如下图片和语料"PaddleOCR":
...@@ -127,7 +128,7 @@ python3 -m tools.synth_image -c configs/config.yml --style_image examples/style_ ...@@ -127,7 +128,7 @@ python3 -m tools.synth_image -c configs/config.yml --style_image examples/style_
2. 运行`tools/synth_dataset`合成数据: 2. 运行`tools/synth_dataset`合成数据:
``` bash ``` bash
python -m tools.synth_dataset -c configs/dataset_config.yml python tools/synth_dataset.py -c configs/dataset_config.yml
``` ```
我们在examples目录下提供了样例图片和语料。 我们在examples目录下提供了样例图片和语料。
<div align="center"> <div align="center">
......
...@@ -28,6 +28,7 @@ class StyleTextRecPredictor(object): ...@@ -28,6 +28,7 @@ class StyleTextRecPredictor(object):
], "Generator {} not supported.".format(algorithm) ], "Generator {} not supported.".format(algorithm)
use_gpu = config["Global"]['use_gpu'] use_gpu = config["Global"]['use_gpu']
check_gpu(use_gpu) check_gpu(use_gpu)
paddle.set_device('gpu' if use_gpu else 'cpu')
self.logger = get_logger() self.logger = get_logger()
self.generator = getattr(style_text_rec, algorithm)(config) self.generator = getattr(style_text_rec, algorithm)(config)
self.height = config["Global"]["image_height"] self.height = config["Global"]["image_height"]
......
...@@ -11,6 +11,14 @@ ...@@ -11,6 +11,14 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import os
import sys
__dir__ = os.path.dirname(os.path.abspath(__file__))
sys.path.append(__dir__)
sys.path.append(os.path.abspath(os.path.join(__dir__, '..')))
from engine.synthesisers import DatasetSynthesiser from engine.synthesisers import DatasetSynthesiser
......
...@@ -16,13 +16,13 @@ import cv2 ...@@ -16,13 +16,13 @@ import cv2
import sys import sys
import glob import glob
from utils.config import ArgsParser
from engine.synthesisers import ImageSynthesiser
__dir__ = os.path.dirname(os.path.abspath(__file__)) __dir__ = os.path.dirname(os.path.abspath(__file__))
sys.path.append(__dir__) sys.path.append(__dir__)
sys.path.append(os.path.abspath(os.path.join(__dir__, '..'))) sys.path.append(os.path.abspath(os.path.join(__dir__, '..')))
from utils.config import ArgsParser
from engine.synthesisers import ImageSynthesiser
def synth_image(): def synth_image():
args = ArgsParser().parse_args() args = ArgsParser().parse_args()
......
...@@ -107,10 +107,10 @@ make inference_lib_dist ...@@ -107,10 +107,10 @@ make inference_lib_dist
For more compilation parameter options, please refer to the official website of the Paddle C++ inference library:[https://www.paddlepaddle.org.cn/documentation/docs/en/advanced_guide/inference_deployment/inference/build_and_install_lib_en.html](https://www.paddlepaddle.org.cn/documentation/docs/en/advanced_guide/inference_deployment/inference/build_and_install_lib_en.html). For more compilation parameter options, please refer to the official website of the Paddle C++ inference library:[https://www.paddlepaddle.org.cn/documentation/docs/en/advanced_guide/inference_deployment/inference/build_and_install_lib_en.html](https://www.paddlepaddle.org.cn/documentation/docs/en/advanced_guide/inference_deployment/inference/build_and_install_lib_en.html).
* After the compilation process, you can see the following files in the folder of `build/fluid_inference_install_dir/`. * After the compilation process, you can see the following files in the folder of `build/paddle_inference_install_dir/`.
``` ```
build/fluid_inference_install_dir/ build/paddle_inference_install_dir/
|-- CMakeCache.txt |-- CMakeCache.txt
|-- paddle |-- paddle
|-- third_party |-- third_party
......
...@@ -81,14 +81,14 @@ void ResizeImgType0::Run(const cv::Mat &img, cv::Mat &resize_img, ...@@ -81,14 +81,14 @@ void ResizeImgType0::Run(const cv::Mat &img, cv::Mat &resize_img,
else if (resize_h / 32 < 1 + 1e-5) else if (resize_h / 32 < 1 + 1e-5)
resize_h = 32; resize_h = 32;
else else
resize_h = (resize_h / 32 - 1) * 32; resize_h = (resize_h / 32) * 32;
if (resize_w % 32 == 0) if (resize_w % 32 == 0)
resize_w = resize_w; resize_w = resize_w;
else if (resize_w / 32 < 1 + 1e-5) else if (resize_w / 32 < 1 + 1e-5)
resize_w = 32; resize_w = 32;
else else
resize_w = (resize_w / 32 - 1) * 32; resize_w = (resize_w / 32) * 32;
cv::resize(img, resize_img, cv::Size(resize_w, resize_h)); cv::resize(img, resize_img, cv::Size(resize_w, resize_h));
......
...@@ -11,7 +11,7 @@ max_side_len 960 ...@@ -11,7 +11,7 @@ max_side_len 960
det_db_thresh 0.3 det_db_thresh 0.3
det_db_box_thresh 0.5 det_db_box_thresh 0.5
det_db_unclip_ratio 2.0 det_db_unclip_ratio 2.0
det_model_dir ./inference/ch__ppocr_mobile_v2.0_det_infer/ det_model_dir ./inference/ch_ppocr_mobile_v2.0_det_infer/
# cls config # cls config
use_angle_cls 0 use_angle_cls 0
......
...@@ -117,7 +117,7 @@ python3 tools/eval.py -c configs/cls/cls_mv3.yml -o Global.checkpoints={path/to/ ...@@ -117,7 +117,7 @@ python3 tools/eval.py -c configs/cls/cls_mv3.yml -o Global.checkpoints={path/to/
``` ```
# 预测分类结果 # 预测分类结果
python3 tools/infer_cls.py -c configs/cls/cls_mv3.yml -o Global.checkpoints={path/to/weights}/best_accuracy Global.infer_img=doc/imgs_words/ch/word_1.jpg python3 tools/infer_cls.py -c configs/cls/cls_mv3.yml -o Global.pretrained_model={path/to/weights}/best_accuracy Global.load_static_weights=false Global.infer_img=doc/imgs_words/ch/word_1.jpg
``` ```
预测图片: 预测图片:
......
...@@ -120,16 +120,16 @@ python3 tools/eval.py -c configs/det/det_mv3_db.yml -o Global.checkpoints="{pat ...@@ -120,16 +120,16 @@ python3 tools/eval.py -c configs/det/det_mv3_db.yml -o Global.checkpoints="{pat
测试单张图像的检测效果 测试单张图像的检测效果
```shell ```shell
python3 tools/infer_det.py -c configs/det/det_mv3_db.yml -o Global.infer_img="./doc/imgs_en/img_10.jpg" Global.checkpoints="./output/det_db/best_accuracy" python3 tools/infer_det.py -c configs/det/det_mv3_db.yml -o Global.infer_img="./doc/imgs_en/img_10.jpg" Global.pretrained_model="./output/det_db/best_accuracy" Global.load_static_weights=false
``` ```
测试DB模型时,调整后处理阈值, 测试DB模型时,调整后处理阈值,
```shell ```shell
python3 tools/infer_det.py -c configs/det/det_mv3_db.yml -o Global.infer_img="./doc/imgs_en/img_10.jpg" Global.checkpoints="./output/det_db/best_accuracy" PostProcess.box_thresh=0.6 PostProcess.unclip_ratio=1.5 python3 tools/infer_det.py -c configs/det/det_mv3_db.yml -o Global.infer_img="./doc/imgs_en/img_10.jpg" Global.pretrained_model="./output/det_db/best_accuracy" Global.load_static_weights=false PostProcess.box_thresh=0.6 PostProcess.unclip_ratio=1.5
``` ```
测试文件夹下所有图像的检测效果 测试文件夹下所有图像的检测效果
```shell ```shell
python3 tools/infer_det.py -c configs/det/det_mv3_db.yml -o Global.infer_img="./doc/imgs_en/" Global.checkpoints="./output/det_db/best_accuracy" python3 tools/infer_det.py -c configs/det/det_mv3_db.yml -o Global.infer_img="./doc/imgs_en/" Global.pretrained_model="./output/det_db/best_accuracy" Global.load_static_weights=false
``` ```
...@@ -245,7 +245,10 @@ python3 tools/infer/predict_det.py --det_algorithm="SAST" --image_dir="./doc/img ...@@ -245,7 +245,10 @@ python3 tools/infer/predict_det.py --det_algorithm="SAST" --image_dir="./doc/img
超轻量中文识别模型推理,可以执行如下命令: 超轻量中文识别模型推理,可以执行如下命令:
``` ```
python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words/ch/word_4.jpg" --rec_model_dir="./inference/rec_crnn/" # 下载超轻量中文识别模型:
wget https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_infer.tar
tar xf ch_ppocr_mobile_v2.0_rec_infer.tar
python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words/ch/word_4.jpg" --rec_model_dir="ch_ppocr_mobile_v2.0_rec_infer"
``` ```
![](../imgs_words/ch/word_4.jpg) ![](../imgs_words/ch/word_4.jpg)
...@@ -266,7 +269,6 @@ Predicts of ./doc/imgs_words/ch/word_4.jpg:('实力活力', 0.98458153) ...@@ -266,7 +269,6 @@ Predicts of ./doc/imgs_words/ch/word_4.jpg:('实力活力', 0.98458153)
``` ```
python3 tools/export_model.py -c configs/rec/rec_r34_vd_none_bilstm_ctc.yml -o Global.pretrained_model=./rec_r34_vd_none_bilstm_ctc_v2.0_train/best_accuracy Global.load_static_weights=False Global.save_inference_dir=./inference/rec_crnn python3 tools/export_model.py -c configs/rec/rec_r34_vd_none_bilstm_ctc.yml -o Global.pretrained_model=./rec_r34_vd_none_bilstm_ctc_v2.0_train/best_accuracy Global.load_static_weights=False Global.save_inference_dir=./inference/rec_crnn
``` ```
CRNN 文本识别模型推理,可以执行如下命令: CRNN 文本识别模型推理,可以执行如下命令:
...@@ -327,7 +329,10 @@ Predicts of ./doc/imgs_words/korean/1.jpg:('바탕으로', 0.9948904) ...@@ -327,7 +329,10 @@ Predicts of ./doc/imgs_words/korean/1.jpg:('바탕으로', 0.9948904)
方向分类模型推理,可以执行如下命令: 方向分类模型推理,可以执行如下命令:
``` ```
python3 tools/infer/predict_cls.py --image_dir="./doc/imgs_words/ch/word_4.jpg" --cls_model_dir="./inference/cls/" # 下载超轻量中文方向分类器模型:
wget https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar
tar xf ch_ppocr_mobile_v2.0_cls_infer.tar
python3 tools/infer/predict_cls.py --image_dir="./doc/imgs_words/ch/word_4.jpg" --cls_model_dir="ch_ppocr_mobile_v2.0_cls_infer"
``` ```
![](../imgs_words/ch/word_1.jpg) ![](../imgs_words/ch/word_1.jpg)
......
...@@ -324,7 +324,6 @@ Eval: ...@@ -324,7 +324,6 @@ Eval:
评估数据集可以通过 `configs/rec/rec_icdar15_train.yml` 修改Eval中的 `label_file_path` 设置。 评估数据集可以通过 `configs/rec/rec_icdar15_train.yml` 修改Eval中的 `label_file_path` 设置。
*注意* 评估时必须确保配置文件中 infer_img 字段为空
``` ```
# GPU 评估, Global.checkpoints 为待测权重 # GPU 评估, Global.checkpoints 为待测权重
python3 -m paddle.distributed.launch --gpus '0' tools/eval.py -c configs/rec/rec_icdar15_train.yml -o Global.checkpoints={path/to/weights}/best_accuracy python3 -m paddle.distributed.launch --gpus '0' tools/eval.py -c configs/rec/rec_icdar15_train.yml -o Global.checkpoints={path/to/weights}/best_accuracy
...@@ -342,7 +341,7 @@ python3 -m paddle.distributed.launch --gpus '0' tools/eval.py -c configs/rec/rec ...@@ -342,7 +341,7 @@ python3 -m paddle.distributed.launch --gpus '0' tools/eval.py -c configs/rec/rec
``` ```
# 预测英文结果 # 预测英文结果
python3 tools/infer_rec.py -c configs/rec/rec_icdar15_train.yml -o Global.checkpoints={path/to/weights}/best_accuracy Global.infer_img=doc/imgs_words/en/word_1.png python3 tools/infer_rec.py -c configs/rec/rec_icdar15_train.yml -o Global.pretrained_model={path/to/weights}/best_accuracy Global.load_static_weights=false Global.infer_img=doc/imgs_words/en/word_1.png
``` ```
预测图片: 预测图片:
...@@ -361,7 +360,7 @@ infer_img: doc/imgs_words/en/word_1.png ...@@ -361,7 +360,7 @@ infer_img: doc/imgs_words/en/word_1.png
``` ```
# 预测中文结果 # 预测中文结果
python3 tools/infer_rec.py -c configs/rec/ch_ppocr_v2.0/rec_chinese_lite_train_v2.0.yml -o Global.checkpoints={path/to/weights}/best_accuracy Global.infer_img=doc/imgs_words/ch/word_1.jpg python3 tools/infer_rec.py -c configs/rec/ch_ppocr_v2.0/rec_chinese_lite_train_v2.0.yml -o Global.pretrained_model={path/to/weights}/best_accuracy Global.load_static_weights=false Global.infer_img=doc/imgs_words/ch/word_1.jpg
``` ```
预测图片: 预测图片:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment