Commit cce6e1bf authored by chenych's avatar chenych
Browse files

First commit.

parents
Pipeline #640 failed with stages
in 0 seconds
*.mdb
*.tar
*.zip
*.eps
*.pdf
### Linux ###
*~
# temporary files which can be created if a process still has a handle open of a deleted file
.fuse_hidden*
# KDE directory preferences
.directory
# Linux trash folder which might appear on any partition or disk
.Trash-*
# .nfs files are created when an open file is removed but is still being accessed
.nfs*
### OSX ###
# General
.DS_Store
.AppleDouble
.LSOverride
# Icon must end with two \r
Icon
# Thumbnails
._*
# Files that might appear in the root of a volume
.DocumentRevisions-V100
.fseventsd
.Spotlight-V100
.TemporaryItems
.Trashes
.VolumeIcon.icns
.com.apple.timemachine.donotpresent
# Directories potentially created on remote AFP share
.AppleDB
.AppleDesktop
Network Trash Folder
Temporary Items
.apdisk
### Python ###
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib64/
parts/
sdist/
var/
wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
.hypothesis/
.pytest_cache/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
db.sqlite3
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
target/
# Jupyter Notebook
.ipynb_checkpoints
# IPython
profile_default/
ipython_config.py
# pyenv
.python-version
# celery beat schedule file
celerybeat-schedule
# SageMath parsed files
*.sage.py
# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
.dmypy.json
dmypy.json
### Python Patch ###
.venv/
### Python.VirtualEnv Stack ###
# Virtualenv
# http://iamzed.com/2009/05/07/a-primer-on-virtualenv/
[Bb]in
[Ii]nclude
[Ll]ib64
[Ll]ocal
[Ss]cripts
pyvenv.cfg
pip-selfcheck.json
### Windows ###
# Windows thumbnail cache files
Thumbs.db
ehthumbs.db
ehthumbs_vista.db
# Dump file
*.stackdump
# Folder config file
[Dd]esktop.ini
# Recycle Bin used on file shares
$RECYCLE.BIN/
# Windows Installer files
*.cab
*.msi
*.msix
*.msm
*.msp
# Windows shortcuts
*.lnk
.idea/
.vscode/
output/
exp/
data/
*.pyc
*.mp4
*.zip
\ No newline at end of file
AdelaiDet for non-commercial purposes
(For commercial use, contact chhshen@gmail.com for obtaining a commerical license.)
Copyright (c) 2019 the authors
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
# DeepSolo
## 论文
[DeepSolo: Let Transformer Decoder with Explicit Points Solo for Text Spotting](https://arxiv.org/abs/2211.10772)
[DeepSolo++: Let Transformer Decoder with Explicit Points Solo for Text Spotting](https://arxiv.org/abs/2305.19957)
## 模型结构
一个简洁的类似DETR的基线,允许一个具有显式点的解码器同时进行检测和识别(图 (c)、(f))。
<div align=center>
<img src="./doc/image.png"/>
</div>
## 算法原理
DeepSolo中,编码器在接收到图像特征后,生成由四个Bezier控制点表示的Bezier中心曲线候选和相应的分数,然后,选择前K个评分的候选。对于每个选定的曲线候选,在曲线上均匀采样N个点,这些点的坐标被编码为位置query并将其添加到内容query中形成复合query。接下来,将复合query输入deformable cross-attention解码器收集有用的文本特征。在解码器之后,采用了几个简单的并行预测头(线性层或MLP)将query解码为文本的中心线、边界、script和置信度,从而同时解决检测和识别问题。
<div align=center>
<img src="./doc/DeepSolo.jpg"/>
</div>
## 环境配置
训练需要依赖Detectron2库,编译Detectron2库需要满足 Python ≥ 3.7,PyTorch ≥ 1.8 并且 torchvision 与 PyTorch 版本匹配,gcc & g++ ≥ 5.4。如果想要更快的构建,推荐安装Ninja。
Tips: 如果detectron2安装失败,可尝试以下方式进行安装:
```
git clone https://github.com/facebookresearch/detectron2.git
python -m pip install -e detectron2
```
### Docker(方法一)
-v 路径、docker_name和imageID根据实际情况修改
```
docker pull image.sourcefind.cn:5000/dcu/admin/base/pytorch:1.13.1-centos7.6-dtk-23.04-py38-latest
docker run -it -v /path/your_code_data/:/path/your_code_data/ --shm-size=32G --privileged=true --device=/dev/kfd --device=/dev/dri/ --group-add video --name docker_name imageID bash
cd /your_code_path/deepsolo_pytorch
pip install -r requirements.txt
python -m pip install 'git+https://github.com/facebookresearch/detectron2.git'
bash make.sh
```
### Dockerfile(方法二)
-v 路径、docker_name和imageID根据实际情况修改
```
cd ./docker
cp ../requirements.txt requirements.txt
docker build --no-cache -t deepsolo:latest .
docker run -it -v /path/your_code_data/:/path/your_code_data/ --shm-size=32G --privileged=true --device=/dev/kfd --device=/dev/dri/ --group-add video --name docker_name imageID bash
cd /your_code_path/deepsolo_pytorch
pip install -r requirements.txt
python -m pip install 'git+https://github.com/facebookresearch/detectron2.git'
bash make.sh
```
### Anaconda(方法三)
1、关于本项目DCU显卡所需的特殊深度学习库可从光合开发者社区下载安装: https://developer.hpccube.com/tool/
```
DTK软件栈:dtk23.04
python:python3.8
torch:1.13.1
torchvision:0.14.1
```
Tips:以上dtk软件栈、python、torch等DCU相关工具版本需要严格一一对应
2、其他非特殊库直接按照下面步骤进行安装
```
pip install -r requirements.txt
python -m pip install 'git+https://github.com/facebookresearch/detectron2.git'
bash make.sh
```
## 数据集
所有的数据集请保存在 deepsolo_pytorch/datasets 下,因数据集较大,请按训练的需求进行选择下载。训练需求详见configs中yaml的DATASETS字段。
### 训练数据集
`[SynthText150K (CurvedSynText150K)]` [images](https://github.com/aim-uofa/AdelaiDet/tree/master/datasets) | [annotations(Part1)](https://1drv.ms/u/s!ApEsJ9RIZdBQgQTfQC578sYbkPik?e=2Yz06g) | [annotations(Part2)](https://1drv.ms/u/s!ApEsJ9RIZdBQgQJWqH404p34Wb1m?e=KImg6N)
`[MLT]` [images](https://github.com/aim-uofa/AdelaiDet/tree/master/datasets) | [annotations](https://1drv.ms/u/s!ApEsJ9RIZdBQgQBpvuvV2KBBbN64?e=HVTCab)
`[ICDAR2013]` [images](https://1drv.ms/u/s!ApEsJ9RIZdBQgQcK05sWzK3_t26T?e=5jTWAa) | [annotations](https://1drv.ms/u/s!ApEsJ9RIZdBQfbgqFCeiKOrTM0E?e=UMfIQh)
`[ICDAR2015]` [images](https://1drv.ms/u/s!ApEsJ9RIZdBQgQbupfCNqVxtYGna?e=b4TQY2) | [annotations](https://1drv.ms/u/s!ApEsJ9RIZdBQfhGW5JDiNcDxfWQ?e=PZ2JCX)
`[Total-Text]` [images](https://1drv.ms/u/s!ApEsJ9RIZdBQgQjyPyivo_FnjJ1H?e=qgSFYL) | [annotations](https://1drv.ms/u/s!ApEsJ9RIZdBQgQOShwd8O0K5Dd1f?e=GYyPAX)
`[CTW1500]` [images](https://1drv.ms/u/s!ApEsJ9RIZdBQgQlZVAH5AJld3Y9g?e=zgG71Z) | [annotations](https://1drv.ms/u/s!ApEsJ9RIZdBQfPpyzxoFV34zBg4?e=WK20AN)
`[TextOCR]` [images](https://dl.fbaipublicfiles.com/textvqa/images/train_val_images.zip) | [annotations](https://1drv.ms/u/s!ApEsJ9RIZdBQgQHY3mjH13GRLPGI?e=Dx1O99)
`[Inverse-Text]` [images](https://1drv.ms/u/s!AimBgYV7JjTlgccVhlbD4I3z5QfmsQ?e=myu7Ue) | [annotations](https://1drv.ms/u/s!ApEsJ9RIZdBQf3G4vZpf4QD5NKo?e=xR3GtY)
`[SynChinese130K]` [images](https://github.com/aim-uofa/AdelaiDet/tree/master/datasets) | [annotations](https://1drv.ms/u/s!AimBgYV7JjTlgch5W0n1Iv397i0csw?e=Gq8qww)
`[ArT]` [images](https://github.com/aim-uofa/AdelaiDet/tree/master/datasets) | [annotations](https://1drv.ms/u/s!AimBgYV7JjTlgch45d0VHNCoPC1jfQ?e=likK00)
`[LSVT]` [images](https://github.com/aim-uofa/AdelaiDet/tree/master/datasets) | [annotations](https://1drv.ms/u/s!AimBgYV7JjTlgch7yjmrCSN0TgoO4w?e=NKd5OG)
`[ReCTS]` [images](https://github.com/aim-uofa/AdelaiDet/tree/master/datasets) | [annotations](https://1drv.ms/u/s!AimBgYV7JjTlgch_xZ8otxFWfNgZSg?e=pdq28B)
`[Evaluation ground-truth]` [Link](https://1drv.ms/u/s!ApEsJ9RIZdBQem-MG1TjuRWApyA?e=fVPnmT)
### 验证数据集
```
cd datasets
mkdir evaluation
cd evaluation
wget -O gt_ctw1500.zip https://cloudstor.aarnet.edu.au/plus/s/xU3yeM3GnidiSTr/download
wget -O gt_totaltext.zip https://cloudstor.aarnet.edu.au/plus/s/SFHvin8BLUM4cNd/download
wget -O gt_icdar2015.zip https://drive.google.com/file/d/1wrq_-qIyb_8dhYVlDzLZTTajQzbic82Z/view?usp=sharing
wget -O gt_inversetext.zip https://cloudstor.aarnet.edu.au/plus/s/xU3yeM3GnidiSTr/download
```
### 数据集目录结构
用于正常训练的数据集请按此目录结构进行:
```
├── ./datasets
│ ├── simple
│ ├── test_images
│ ├── train_images
│ ├── test.json
│ └── train.json
│ ├── evaluation
│ ├── gt_totaltext.zip
│ ├── gt_ctw1500.zip
│ ├── gt_icdar2015.zip
│ └── gt_inversetext.zip
│ ├── syntext1
│ ├── train_images
│ └── annotations
│ ├── train_37voc.json
│ └── train_96voc.json
│ ├── syntext2
│ ├── train_images
│ └── annotations
│ ├── train_37voc.json
│ └── train_96voc.json
│ ├── mlt2017
│ ├── train_images
│ ├── train_37voc.json
│ └── train_96voc.json
│ ├── totaltext
│ ├── train_images
│ ├── test_images
│ ├── weak_voc_new.txt
│ ├── weak_voc_pair_list.txt
│ ├── train_37voc.json
│ ├── train_96voc.json
│ └── test.json
│ ├── ic13
│ ├── train_images
│ ├── train_37voc.json
│ └── train_96voc.json
│ ├── ic15
│ ├── train_images
│ ├── test_images
│ ├── new_strong_lexicon
│ ├── strong_lexicon
│ ├── ch4_test_vocabulary.txt
│ ├── ch4_test_vocabulary_new.txt
│ ├── ch4_test_vocabulary_pair_list.txt
│ ├── GenericVocabulary.txt
│ ├── GenericVocabulary_new.txt
│ ├── GenericVocabulary_pair_list.txt
│ ├── train_37voc.json
│ ├── train_96voc.json
│ └── test.json
│ ├── ctw1500
│ ├── train_images
│ ├── test_images
│ ├── weak_voc_new.txt
│ ├── weak_voc_pair_list.txt
│ ├── train_96voc.json
│ └── test.json
│ ├── textocr
│ ├── train_images
│ ├── train_37voc_1.json
│ └── train_37voc_2.json
│ ├── inversetext
│ ├── test_images
│ └── test.json
│ ├── chnsyntext
│ ├── syn_130k_images
│ └── chn_syntext.json
│ ├── ArT
│ ├── rename_artimg_train
│ └── art_train.json
│ ├── LSVT
│ ├── rename_lsvtimg_train
│ └── lsvt_train.json
│ ├── ReCTS
│ ├── ReCTS_train_images # 18,000 images
│ ├── ReCTS_val_images # 2,000 images
│ ├── ReCTS_test_images # 5,000 images
│ ├── rects_train.json
│ ├── rects_val.json
│ └── rects_test.json
```
如果使用自己的数据集,请将数据标注转换成COCO的格式,并在DeepSolo/adet/data/builtin.py代码第18行 _PREDEFINED_SPLITS_TEXT 参数中,参照结构补充自己的数据集。
项目同样提供了迷你数据集simple进行学习。
## 训练
### 单机多卡
Tips: 以下参数请根据实际情况自行修改 train.sh 中的参数设定
--config-file yaml文件配置地址
--num-gpus 训练卡数量
修改后执行:
```
bash train.sh
```
## 推理
Tips:
如需执行自己的预训练模型,请修改配置:
${CONFIG_FILE} yaml文件配置地址(注意修改预训练模型地址)
${IMAGE_PATH} 待测试数据地址
样例执行步骤:
1. 下载CTW1500的预训练模型:
|Backbone|Training Data|Weights|
|:------:|:------:|:------:|
|Res-50|Synth150K+Total-Text+MLT17+IC13+IC15|[OneDrive](https://1drv.ms/u/s!AimBgYV7JjTlgcdtYzwEBGvOH6CiBw?e=trgKFE)|
将预训练模型放在 pretrained_models/CTW1500/ 文件夹下,如果放置于其他地方,请同步修改配置文件中 MODEL.WEIGHTS 地址
2. 将待测试数据存放于 ${IMAGE_PATH} 下,执行
```
bash test.sh
```
推理结果默认保存在test_results文件夹下,可以使用参数 --output 替换结果保存路径。
## result
CTW1500上的结果展示
<div align=center>
<img src="./doc/results.jpg"/>
</div>
### 精度
基于backbone=R50在ctw1500上的测试结果如下表所示:
|Backbone|External Data|Det-P|Det-R|Det-F1|E2E-None|E2E-Full|
|:------:|:------:|:------:|:------:|:------:|:------:|:------:|
|Res-50(ours)|Synth150K+Total-Text+MLT17+IC13+IC15|0.9325|0.8475|0.8879|0.6408|0.812|
|Res-50|Synth150K+Total-Text+MLT17+IC13+IC15|0.932|0.85|0.889|0.642|0.814|
## 应用场景
### 算法类别
OCR
### 热点应用行业
政府,交通,物流
## 源码仓库及问题反馈
http://developer.hpccube.com/codes/modelzoo/deepsolo_pytorch.git
## 参考资料
https://github.com/ViTAE-Transformer/DeepSolo.git
from adet import modeling
__version__ = "0.1.1"
from .adet_checkpoint import AdetCheckpointer
__all__ = ["AdetCheckpointer"]
import pickle, os
from fvcore.common.file_io import PathManager
from detectron2.checkpoint import DetectionCheckpointer
class AdetCheckpointer(DetectionCheckpointer):
"""
Same as :class:`DetectronCheckpointer`, but is able to convert models
in AdelaiDet, such as LPF backbone.
"""
def _load_file(self, filename):
if filename.endswith(".pkl"):
with PathManager.open(filename, "rb") as f:
data = pickle.load(f, encoding="latin1")
if "model" in data and "__author__" in data:
# file is in Detectron2 model zoo format
self.logger.info("Reading a file from '{}'".format(data["__author__"]))
return data
else:
# assume file is from Caffe2 / Detectron1 model zoo
if "blobs" in data:
# Detection models have "blobs", but ImageNet models don't
data = data["blobs"]
data = {k: v for k, v in data.items() if not k.endswith("_momentum")}
if "weight_order" in data:
del data["weight_order"]
return {"model": data, "__author__": "Caffe2", "matching_heuristics": True}
loaded = super()._load_file(filename) # load native pth checkpoint
if "model" not in loaded:
loaded = {"model": loaded}
basename = os.path.basename(filename).lower()
if "lpf" in basename or "dla" in basename:
loaded["matching_heuristics"] = True
return loaded
from .config import get_cfg
__all__ = [
"get_cfg",
]
from detectron2.config import CfgNode
def get_cfg() -> CfgNode:
"""
Get a copy of the default config.
Returns:
a detectron2 CfgNode instance.
"""
from .defaults import _C
return _C.clone()
from detectron2.config.defaults import _C
from detectron2.config import CfgNode as CN
# ---------------------------------------------------------------------------- #
# Additional Configs
# ---------------------------------------------------------------------------- #
_C.MODEL.MOBILENET = False
_C.MODEL.BACKBONE.ANTI_ALIAS = False
_C.MODEL.RESNETS.DEFORM_INTERVAL = 1
_C.INPUT.HFLIP_TRAIN = False
_C.INPUT.CROP.CROP_INSTANCE = True
_C.INPUT.ROTATE = True
_C.MODEL.BASIS_MODULE = CN()
_C.MODEL.BASIS_MODULE.NAME = "ProtoNet"
_C.MODEL.BASIS_MODULE.NUM_BASES = 4
_C.MODEL.BASIS_MODULE.LOSS_ON = False
_C.MODEL.BASIS_MODULE.ANN_SET = "coco"
_C.MODEL.BASIS_MODULE.CONVS_DIM = 128
_C.MODEL.BASIS_MODULE.IN_FEATURES = ["p3", "p4", "p5"]
_C.MODEL.BASIS_MODULE.NORM = "SyncBN"
_C.MODEL.BASIS_MODULE.NUM_CONVS = 3
_C.MODEL.BASIS_MODULE.COMMON_STRIDE = 8
_C.MODEL.BASIS_MODULE.NUM_CLASSES = 80
_C.MODEL.BASIS_MODULE.LOSS_WEIGHT = 0.3
_C.MODEL.TOP_MODULE = CN()
_C.MODEL.TOP_MODULE.NAME = "conv"
_C.MODEL.TOP_MODULE.DIM = 16
# ---------------------------------------------------------------------------- #
# BAText Options
# ---------------------------------------------------------------------------- #
_C.MODEL.BATEXT = CN()
_C.MODEL.BATEXT.VOC_SIZE = 96
_C.MODEL.BATEXT.NUM_CHARS = 25
_C.MODEL.BATEXT.POOLER_RESOLUTION = (8, 32)
_C.MODEL.BATEXT.IN_FEATURES = ["p2", "p3", "p4"]
_C.MODEL.BATEXT.POOLER_SCALES = (0.25, 0.125, 0.0625)
_C.MODEL.BATEXT.SAMPLING_RATIO = 1
_C.MODEL.BATEXT.CONV_DIM = 256
_C.MODEL.BATEXT.NUM_CONV = 2
_C.MODEL.BATEXT.RECOGNITION_LOSS = "ctc"
_C.MODEL.BATEXT.RECOGNIZER = "attn"
_C.MODEL.BATEXT.CANONICAL_SIZE = 96 # largest min_size for level 3 (stride=8)
_C.MODEL.BATEXT.USE_COORDCONV = False
_C.MODEL.BATEXT.USE_AET = False
_C.MODEL.BATEXT.CUSTOM_DICT = "" # Path to the class file.
# ---------------------------------------------------------------------------- #
# SwinTransformer Options
# ---------------------------------------------------------------------------- #
_C.MODEL.SWIN = CN()
_C.MODEL.SWIN.TYPE = 'tiny'
_C.MODEL.SWIN.DROP_PATH_RATE = 0.2
# ---------------------------------------------------------------------------- #
# ViTAE-v2 Options
# ---------------------------------------------------------------------------- #
_C.MODEL.ViTAEv2 = CN()
_C.MODEL.ViTAEv2.TYPE = 'vitaev2_s'
_C.MODEL.ViTAEv2.DROP_PATH_RATE = 0.2
# ---------------------------------------------------------------------------- #
# (Deformable) Transformer Options
# ---------------------------------------------------------------------------- #
_C.MODEL.TRANSFORMER = CN()
_C.MODEL.TRANSFORMER.ENABLED = False
_C.MODEL.TRANSFORMER.INFERENCE_TH_TEST = 0.4
_C.MODEL.TRANSFORMER.AUX_LOSS = True
_C.MODEL.TRANSFORMER.ENC_LAYERS = 6
_C.MODEL.TRANSFORMER.DEC_LAYERS = 6
_C.MODEL.TRANSFORMER.DIM_FEEDFORWARD = 1024
_C.MODEL.TRANSFORMER.HIDDEN_DIM = 256
_C.MODEL.TRANSFORMER.DROPOUT = 0.0
_C.MODEL.TRANSFORMER.NHEADS = 8
_C.MODEL.TRANSFORMER.NUM_QUERIES = 100
_C.MODEL.TRANSFORMER.ENC_N_POINTS = 4
_C.MODEL.TRANSFORMER.DEC_N_POINTS = 4
_C.MODEL.TRANSFORMER.POSITION_EMBEDDING_SCALE = 6.283185307179586 # 2 PI
_C.MODEL.TRANSFORMER.NUM_FEATURE_LEVELS = 4
_C.MODEL.TRANSFORMER.VOC_SIZE = 37 # a-z + 0-9 + unknown
_C.MODEL.TRANSFORMER.CUSTOM_DICT = "" # Path to the character class file.
_C.MODEL.TRANSFORMER.NUM_POINTS = 25 # the number of point queries for each instance
_C.MODEL.TRANSFORMER.TEMPERATURE = 10000
_C.MODEL.TRANSFORMER.BOUNDARY_HEAD = True # True: with boundary predictions, False: only with center lines
_C.MODEL.TRANSFORMER.LOSS = CN()
_C.MODEL.TRANSFORMER.LOSS.AUX_LOSS = True
_C.MODEL.TRANSFORMER.LOSS.FOCAL_ALPHA = 0.25
_C.MODEL.TRANSFORMER.LOSS.FOCAL_GAMMA = 2.0
# bezier proposal loss
_C.MODEL.TRANSFORMER.LOSS.BEZIER_CLASS_WEIGHT = 1.0
_C.MODEL.TRANSFORMER.LOSS.BEZIER_COORD_WEIGHT = 1.0
_C.MODEL.TRANSFORMER.LOSS.BEZIER_SAMPLE_POINTS = 25
# supervise the sampled on-curve points but not 4 Bezier control points
# target loss
_C.MODEL.TRANSFORMER.LOSS.POINT_CLASS_WEIGHT = 1.0
_C.MODEL.TRANSFORMER.LOSS.POINT_COORD_WEIGHT = 1.0
_C.MODEL.TRANSFORMER.LOSS.POINT_TEXT_WEIGHT = 0.5
_C.MODEL.TRANSFORMER.LOSS.BOUNDARY_WEIGHT = 0.5
_C.SOLVER.OPTIMIZER = "ADAMW"
_C.SOLVER.LR_BACKBONE = 1e-5
_C.SOLVER.LR_BACKBONE_NAMES = []
_C.SOLVER.LR_LINEAR_PROJ_NAMES = []
_C.SOLVER.LR_LINEAR_PROJ_MULT = 0.1
# 1 - Generic, 2 - Weak, 3 - Strong (for icdar2015)
# 1 - Full lexicon (for totaltext)
_C.TEST.LEXICON_TYPE = 1
\ No newline at end of file
from .text_evaluation_all import TextEvaluator
#!/usr/bin/env python2
#encoding: UTF-8
import json
import sys;sys.path.append('./')
import zipfile
import re
import sys
import os
import codecs
import importlib
from io import StringIO
from shapely.geometry import *
def print_help():
sys.stdout.write('Usage: python %s.py -g=<gtFile> -s=<submFile> [-o=<outputFolder> -p=<jsonParams>]' %sys.argv[0])
sys.exit(2)
def load_zip_file_keys(file,fileNameRegExp=''):
"""
Returns an array with the entries of the ZIP file that match with the regular expression.
The key's are the names or the file or the capturing group definied in the fileNameRegExp
"""
try:
archive=zipfile.ZipFile(file, mode='r', allowZip64=True)
except :
raise Exception('Error loading the ZIP archive.')
pairs = []
for name in archive.namelist():
addFile = True
keyName = name
if fileNameRegExp!="":
m = re.match(fileNameRegExp,name)
if m == None:
addFile = False
else:
if len(m.groups())>0:
keyName = m.group(1)
if addFile:
pairs.append( keyName )
return pairs
def load_zip_file(file,fileNameRegExp='',allEntries=False):
"""
Returns an array with the contents (filtered by fileNameRegExp) of a ZIP file.
The key's are the names or the file or the capturing group definied in the fileNameRegExp
allEntries validates that all entries in the ZIP file pass the fileNameRegExp
"""
try:
archive=zipfile.ZipFile(file, mode='r', allowZip64=True)
except :
raise Exception('Error loading the ZIP archive')
pairs = []
for name in archive.namelist():
addFile = True
keyName = name
if fileNameRegExp!="":
m = re.match(fileNameRegExp,name)
if m == None:
addFile = False
else:
if len(m.groups())>0:
keyName = m.group(1)
if addFile:
pairs.append( [ keyName , archive.read(name)] )
else:
if allEntries:
raise Exception('ZIP entry not valid: %s' %name)
return dict(pairs)
def decode_utf8(raw):
"""
Returns a Unicode object on success, or None on failure
"""
try:
raw = codecs.decode(raw,'utf-8', 'replace')
#extracts BOM if exists
raw = raw.encode('utf8')
if raw.startswith(codecs.BOM_UTF8):
raw = raw.replace(codecs.BOM_UTF8, '', 1)
return raw.decode('utf-8')
except:
return None
def validate_lines_in_file_gt(fileName,file_contents,CRLF=True,LTRB=True,withTranscription=False,withConfidence=False,imWidth=0,imHeight=0):
"""
This function validates that all lines of the file calling the Line validation function for each line
"""
utf8File = decode_utf8(file_contents)
if (utf8File is None) :
raise Exception("The file %s is not UTF-8" %fileName)
lines = utf8File.split( "\r\n" if CRLF else "\n" )
for line in lines:
line = line.replace("\r","").replace("\n","")
if(line != ""):
try:
validate_tl_line_gt(line,LTRB,withTranscription,withConfidence,imWidth,imHeight)
except Exception as e:
raise Exception(("Line in sample not valid. Sample: %s Line: %s Error: %s" %(fileName,line,str(e))).encode('utf-8', 'replace'))
def validate_lines_in_file(fileName,file_contents,CRLF=True,LTRB=True,withTranscription=False,withConfidence=False,imWidth=0,imHeight=0):
"""
This function validates that all lines of the file calling the Line validation function for each line
"""
utf8File = decode_utf8(file_contents)
if (utf8File is None) :
raise Exception("The file %s is not UTF-8" %fileName)
lines = utf8File.split( "\r\n" if CRLF else "\n" )
for line in lines:
line = line.replace("\r","").replace("\n","")
if(line != ""):
try:
validate_tl_line(line,LTRB,withTranscription,withConfidence,imWidth,imHeight)
except Exception as e:
raise Exception(("Line in sample not valid. Sample: %s Line: %s Error: %s" %(fileName,line,str(e))).encode('utf-8', 'replace'))
def validate_tl_line_gt(line,LTRB=True,withTranscription=True,withConfidence=True,imWidth=0,imHeight=0):
"""
Validate the format of the line. If the line is not valid an exception will be raised.
If maxWidth and maxHeight are specified, all points must be inside the imgage bounds.
Posible values are:
LTRB=True: xmin,ymin,xmax,ymax[,confidence][,transcription]
LTRB=False: x1,y1,x2,y2,x3,y3,x4,y4[,confidence][,transcription]
"""
get_tl_line_values_gt(line,LTRB,withTranscription,withConfidence,imWidth,imHeight)
def validate_tl_line(line,LTRB=True,withTranscription=True,withConfidence=True,imWidth=0,imHeight=0):
"""
Validate the format of the line. If the line is not valid an exception will be raised.
If maxWidth and maxHeight are specified, all points must be inside the imgage bounds.
Posible values are:
LTRB=True: xmin,ymin,xmax,ymax[,confidence][,transcription]
LTRB=False: x1,y1,x2,y2,x3,y3,x4,y4[,confidence][,transcription]
"""
get_tl_line_values(line,LTRB,withTranscription,withConfidence,imWidth,imHeight)
def get_tl_line_values_gt(line,LTRB=True,withTranscription=False,withConfidence=False,imWidth=0,imHeight=0):
"""
Validate the format of the line. If the line is not valid an exception will be raised.
If maxWidth and maxHeight are specified, all points must be inside the imgage bounds.
Posible values are:
LTRB=True: xmin,ymin,xmax,ymax[,confidence][,transcription]
LTRB=False: x1,y1,x2,y2,x3,y3,x4,y4[,confidence][,transcription]
Returns values from a textline. Points , [Confidences], [Transcriptions]
"""
confidence = 0.0
transcription = "";
points = []
if LTRB:
# do not use
raise Exception('Not implemented.')
else:
# if withTranscription and withConfidence:
# cors = line.split(',')
# assert(len(cors)%2 -2 == 0), 'num cors should be even.'
# try:
# points = [ float(ic) for ic in cors[:-2]]
# except Exception as e:
# raise(e)
# elif withConfidence:
# cors = line.split(',')
# assert(len(cors)%2 -1 == 0), 'num cors should be even.'
# try:
# points = [ float(ic) for ic in cors[:-1]]
# except Exception as e:
# raise(e)
# elif withTranscription:
# cors = line.split(',')
# assert(len(cors)%2 -1 == 0), 'num cors should be even.'
# try:
# points = [ float(ic) for ic in cors[:-1]]
# except Exception as e:
# raise(e)
# else:
# cors = line.split(',')
# assert(len(cors)%2 == 0), 'num cors should be even.'
# try:
# points = [ float(ic) for ic in cors[:]]
# except Exception as e:
# raise(e)
if withTranscription and withConfidence:
raise('not implemented')
elif withConfidence:
raise('not implemented')
elif withTranscription:
ptr = line.strip().split(',####')
cors = ptr[0].split(',')
recs = ptr[1].strip()
assert(len(cors)%2 == 0), 'num cors should be even.'
try:
points = [ float(ic) for ic in cors[:]]
except Exception as e:
raise(e)
else:
raise('not implemented')
validate_clockwise_points(points)
if (imWidth>0 and imHeight>0):
for ip in range(0, len(points), 2):
validate_point_inside_bounds(points[ip],points[ip+1],imWidth,imHeight);
if withConfidence:
try:
confidence = 1.0
except ValueError:
raise Exception("Confidence value must be a float")
if withTranscription:
# posTranscription = numPoints + (2 if withConfidence else 1)
# transcription = cors[-1].strip()
transcription = recs
m2 = re.match(r'^\s*\"(.*)\"\s*$',transcription)
if m2 != None : #Transcription with double quotes, we extract the value and replace escaped characters
transcription = m2.group(1).replace("\\\\", "\\").replace("\\\"", "\"")
return points,confidence,transcription
def get_tl_line_values(line,LTRB=True,withTranscription=False,withConfidence=False,imWidth=0,imHeight=0):
"""
Validate the format of the line. If the line is not valid an exception will be raised.
If maxWidth and maxHeight are specified, all points must be inside the imgage bounds.
Posible values are:
LTRB=True: xmin,ymin,xmax,ymax[,confidence][,transcription]
LTRB=False: x1,y1,x2,y2,x3,y3,x4,y4[,confidence][,transcription]
Returns values from a textline. Points , [Confidences], [Transcriptions]
"""
confidence = 0.0
transcription = "";
points = []
if LTRB:
# do not use
raise Exception('Not implemented.')
else:
if withTranscription and withConfidence:
raise('not implemented')
elif withConfidence:
raise('not implemented')
elif withTranscription:
ptr = line.strip().split(',####')
cors = ptr[0].split(',')
recs = ptr[1].strip()
assert(len(cors)%2 == 0), 'num cors should be even.'
try:
points = [ float(ic) for ic in cors[:]]
except Exception as e:
raise(e)
else:
raise('not implemented')
# print('det clock wise')
validate_clockwise_points(points)
if (imWidth>0 and imHeight>0):
for ip in range(0, len(points), 2):
validate_point_inside_bounds(points[ip],points[ip+1],imWidth,imHeight);
if withConfidence:
try:
confidence = 1.0
except ValueError:
raise Exception("Confidence value must be a float")
if withTranscription:
# posTranscription = numPoints + (2 if withConfidence else 1)
transcription = recs
m2 = re.match(r'^\s*\"(.*)\"\s*$',transcription)
if m2 != None : #Transcription with double quotes, we extract the value and replace escaped characters
transcription = m2.group(1).replace("\\\\", "\\").replace("\\\"", "\"")
return points,confidence,transcription
def validate_point_inside_bounds(x,y,imWidth,imHeight):
if(x<0 or x>imWidth):
raise Exception("X value (%s) not valid. Image dimensions: (%s,%s)" %(xmin,imWidth,imHeight))
if(y<0 or y>imHeight):
raise Exception("Y value (%s) not valid. Image dimensions: (%s,%s) Sample: %s Line:%s" %(ymin,imWidth,imHeight))
def validate_clockwise_points(points):
"""
Validates that the points that the 4 points that dlimite a polygon are in clockwise order.
"""
# if len(points) != 8:
# raise Exception("Points list not valid." + str(len(points)))
# point = [
# [int(points[0]) , int(points[1])],
# [int(points[2]) , int(points[3])],
# [int(points[4]) , int(points[5])],
# [int(points[6]) , int(points[7])]
# ]
# edge = [
# ( point[1][0] - point[0][0])*( point[1][1] + point[0][1]),
# ( point[2][0] - point[1][0])*( point[2][1] + point[1][1]),
# ( point[3][0] - point[2][0])*( point[3][1] + point[2][1]),
# ( point[0][0] - point[3][0])*( point[0][1] + point[3][1])
# ]
# summatory = edge[0] + edge[1] + edge[2] + edge[3];
# if summatory>0:
# raise Exception("Points are not clockwise. The coordinates of bounding quadrilaterals have to be given in clockwise order. Regarding the correct interpretation of 'clockwise' remember that the image coordinate system used is the standard one, with the image origin at the upper left, the X axis extending to the right and Y axis extending downwards.")
pts = [(points[j], points[j+1]) for j in range(0,len(points),2)]
try:
pdet = Polygon(pts)
except:
assert(0), ('not a valid polygon', pts)
# The polygon should be valid.
if not pdet.is_valid:
assert(0), ('polygon has intersection sides', pts)
pRing = LinearRing(pts)
if pRing.is_ccw:
assert(0), ("Points are not clockwise. The coordinates of bounding quadrilaterals have to be given in clockwise order. Regarding the correct interpretation of 'clockwise' remember that the image coordinate system used is the standard one, with the image origin at the upper left, the X axis extending to the right and Y axis extending downwards.")
def get_tl_line_values_from_file_contents(content,CRLF=True,LTRB=True,withTranscription=False,withConfidence=False,imWidth=0,imHeight=0,sort_by_confidences=True):
"""
Returns all points, confindences and transcriptions of a file in lists. Valid line formats:
xmin,ymin,xmax,ymax,[confidence],[transcription]
x1,y1,x2,y2,x3,y3,x4,y4,[confidence],[transcription]
"""
pointsList = []
transcriptionsList = []
confidencesList = []
lines = content.split( "\r\n" if CRLF else "\n" )
for line in lines:
line = line.replace("\r","").replace("\n","")
if(line != "") :
points, confidence, transcription = get_tl_line_values_gt(line,LTRB,withTranscription,withConfidence,imWidth,imHeight);
pointsList.append(points)
transcriptionsList.append(transcription)
confidencesList.append(confidence)
if withConfidence and len(confidencesList)>0 and sort_by_confidences:
import numpy as np
sorted_ind = np.argsort(-np.array(confidencesList))
confidencesList = [confidencesList[i] for i in sorted_ind]
pointsList = [pointsList[i] for i in sorted_ind]
transcriptionsList = [transcriptionsList[i] for i in sorted_ind]
return pointsList,confidencesList,transcriptionsList
def get_tl_line_values_from_file_contents_det(content,CRLF=True,LTRB=True,withTranscription=False,withConfidence=False,imWidth=0,imHeight=0,sort_by_confidences=True):
"""
Returns all points, confindences and transcriptions of a file in lists. Valid line formats:
xmin,ymin,xmax,ymax,[confidence],[transcription]
x1,y1,x2,y2,x3,y3,x4,y4,[confidence],[transcription]
"""
pointsList = []
transcriptionsList = []
confidencesList = []
lines = content.split( "\r\n" if CRLF else "\n" )
for line in lines:
line = line.replace("\r","").replace("\n","")
if(line != "") :
points, confidence, transcription = get_tl_line_values(line,LTRB,withTranscription,withConfidence,imWidth,imHeight);
pointsList.append(points)
transcriptionsList.append(transcription)
confidencesList.append(confidence)
if withConfidence and len(confidencesList)>0 and sort_by_confidences:
import numpy as np
sorted_ind = np.argsort(-np.array(confidencesList))
confidencesList = [confidencesList[i] for i in sorted_ind]
pointsList = [pointsList[i] for i in sorted_ind]
transcriptionsList = [transcriptionsList[i] for i in sorted_ind]
return pointsList,confidencesList,transcriptionsList
def main_evaluation(p,det_file, gt_file, default_evaluation_params_fn,validate_data_fn,evaluate_method_fn,show_result=True,per_sample=True):
"""
This process validates a method, evaluates it and if it succed generates a ZIP file with a JSON entry for each sample.
Params:
p: Dictionary of parmeters with the GT/submission locations. If None is passed, the parameters send by the system are used.
default_evaluation_params_fn: points to a function that returns a dictionary with the default parameters used for the evaluation
validate_data_fn: points to a method that validates the corrct format of the submission
evaluate_method_fn: points to a function that evaluated the submission and return a Dictionary with the results
"""
# if (p == None):
# p = dict([s[1:].split('=') for s in sys.argv[1:]])
# if(len(sys.argv)<3):
# print_help()
p = {}
p['g'] =gt_file #'tttgt.zip'
p['s'] =det_file #'det.zip'
evalParams = default_evaluation_params_fn()
if 'p' in p.keys():
evalParams.update( p['p'] if isinstance(p['p'], dict) else json.loads(p['p'][1:-1]) )
resDict={'calculated':True,'Message':'','method':'{}','per_sample':'{}'}
# try:
validate_data_fn(p['g'], p['s'], evalParams)
evalData = evaluate_method_fn(p['g'], p['s'], evalParams)
resDict.update(evalData)
# except Exception as e:
# resDict['Message']= str(e)
# resDict['calculated']=False
if 'o' in p:
if not os.path.exists(p['o']):
os.makedirs(p['o'])
resultsOutputname = p['o'] + '/results.zip'
outZip = zipfile.ZipFile(resultsOutputname, mode='w', allowZip64=True)
del resDict['per_sample']
if 'output_items' in resDict.keys():
del resDict['output_items']
outZip.writestr('method.json',json.dumps(resDict))
if not resDict['calculated']:
if show_result:
sys.stderr.write('Error!\n'+ resDict['Message']+'\n\n')
if 'o' in p:
outZip.close()
return resDict
if 'o' in p:
if per_sample == True:
for k,v in evalData['per_sample'].items():
outZip.writestr( k + '.json',json.dumps(v))
if 'output_items' in evalData.keys():
for k, v in evalData['output_items'].items():
outZip.writestr( k,v)
outZip.close()
# if show_result:
# sys.stdout.write("Calculated!")
# sys.stdout.write('\n')
# sys.stdout.write(json.dumps(resDict['e2e_method']))
# sys.stdout.write('\n')
# sys.stdout.write(json.dumps(resDict['det_only_method']))
# sys.stdout.write('\n')
return resDict
def main_validation(default_evaluation_params_fn,validate_data_fn):
"""
This process validates a method
Params:
default_evaluation_params_fn: points to a function that returns a dictionary with the default parameters used for the evaluation
validate_data_fn: points to a method that validates the corrct format of the submission
"""
try:
p = dict([s[1:].split('=') for s in sys.argv[1:]])
evalParams = default_evaluation_params_fn()
if 'p' in p.keys():
evalParams.update( p['p'] if isinstance(p['p'], dict) else json.loads(p['p'][1:-1]) )
validate_data_fn(p['g'], p['s'], evalParams)
print('SUCCESS')
sys.exit(0)
except Exception as e:
print(str(e))
sys.exit(101)
\ No newline at end of file
#!/usr/bin/env python2
#encoding: UTF-8
import json
import sys;sys.path.append('./')
import zipfile
import re
import sys
import os
import codecs
import importlib
try:
from StringIO import StringIO
except ImportError:
from io import StringIO
def print_help():
sys.stdout.write('Usage: python %s.py -g=<gtFile> -s=<submFile> [-o=<outputFolder> -p=<jsonParams>]' %sys.argv[0])
sys.exit(2)
def load_zip_file_keys(file,fileNameRegExp=''):
"""
Returns an array with the entries of the ZIP file that match with the regular expression.
The key's are the names or the file or the capturing group definied in the fileNameRegExp
"""
try:
archive=zipfile.ZipFile(file, mode='r', allowZip64=True)
except :
raise Exception('Error loading the ZIP archive.')
pairs = []
for name in archive.namelist():
addFile = True
keyName = name
if fileNameRegExp!="":
m = re.match(fileNameRegExp,name)
if m == None:
addFile = False
else:
if len(m.groups())>0:
keyName = m.group(1)
if addFile:
pairs.append( keyName )
return pairs
def load_zip_file(file,fileNameRegExp='',allEntries=False):
"""
Returns an array with the contents (filtered by fileNameRegExp) of a ZIP file.
The key's are the names or the file or the capturing group definied in the fileNameRegExp
allEntries validates that all entries in the ZIP file pass the fileNameRegExp
"""
try:
archive=zipfile.ZipFile(file, mode='r', allowZip64=True)
except :
raise Exception('Error loading the ZIP archive')
pairs = []
for name in archive.namelist():
addFile = True
keyName = name
if fileNameRegExp!="":
m = re.match(fileNameRegExp,name)
if m == None:
addFile = False
else:
if len(m.groups())>0:
keyName = m.group(1)
if addFile:
pairs.append( [ keyName , archive.read(name)] )
else:
if allEntries:
raise Exception('ZIP entry not valid: %s' %name)
return dict(pairs)
def decode_utf8(raw):
"""
Returns a Unicode object on success, or None on failure
"""
try:
raw = codecs.decode(raw,'utf-8', 'replace')
#extracts BOM if exists
raw = raw.encode('utf8')
if raw.startswith(codecs.BOM_UTF8):
raw = raw.replace(codecs.BOM_UTF8, '', 1)
return raw.decode('utf-8')
except:
return None
def validate_lines_in_file(fileName,file_contents,CRLF=True,LTRB=True,withTranscription=False,withConfidence=False,imWidth=0,imHeight=0):
"""
This function validates that all lines of the file calling the Line validation function for each line
"""
utf8File = decode_utf8(file_contents)
if (utf8File is None) :
raise Exception("The file %s is not UTF-8" %fileName)
lines = utf8File.split( "\r\n" if CRLF else "\n" )
for line in lines:
line = line.replace("\r","").replace("\n","")
if(line != ""):
try:
validate_tl_line(line,LTRB,withTranscription,withConfidence,imWidth,imHeight)
except Exception as e:
raise Exception(("Line in sample not valid. Sample: %s Line: %s Error: %s" %(fileName,line,str(e))).encode('utf-8', 'replace'))
def validate_tl_line(line,LTRB=True,withTranscription=True,withConfidence=True,imWidth=0,imHeight=0):
"""
Validate the format of the line. If the line is not valid an exception will be raised.
If maxWidth and maxHeight are specified, all points must be inside the imgage bounds.
Posible values are:
LTRB=True: xmin,ymin,xmax,ymax[,confidence][,transcription]
LTRB=False: x1,y1,x2,y2,x3,y3,x4,y4[,confidence][,transcription]
"""
get_tl_line_values(line,LTRB,withTranscription,withConfidence,imWidth,imHeight)
def get_tl_line_values(line,LTRB=True,withTranscription=False,withConfidence=False,imWidth=0,imHeight=0):
"""
Validate the format of the line. If the line is not valid an exception will be raised.
If maxWidth and maxHeight are specified, all points must be inside the imgage bounds.
Posible values are:
LTRB=True: xmin,ymin,xmax,ymax[,confidence][,transcription]
LTRB=False: x1,y1,x2,y2,x3,y3,x4,y4[,confidence][,transcription]
Returns values from a textline. Points , [Confidences], [Transcriptions]
"""
confidence = 0.0
transcription = "";
points = []
numPoints = 4;
if LTRB:
numPoints = 4;
if withTranscription and withConfidence:
m = re.match(r'^\s*(-?[0-9]+)\s*,\s*(-?[0-9]+)\s*,\s*([0-9]+)\s*,\s*([0-9]+)\s*,\s*([0-1].?[0-9]*)\s*,(.*)$',line)
if m == None :
m = re.match(r'^\s*(-?[0-9]+)\s*,\s*(-?[0-9]+)\s*,\s*([0-9]+)\s*,\s*([0-9]+)\s*,\s*([0-1].?[0-9]*)\s*,(.*)$',line)
raise Exception("Format incorrect. Should be: xmin,ymin,xmax,ymax,confidence,transcription")
elif withConfidence:
m = re.match(r'^\s*(-?[0-9]+)\s*,\s*(-?[0-9]+)\s*,\s*([0-9]+)\s*,\s*([0-9]+)\s*,\s*([0-1].?[0-9]*)\s*$',line)
if m == None :
raise Exception("Format incorrect. Should be: xmin,ymin,xmax,ymax,confidence")
elif withTranscription:
m = re.match(r'^\s*(-?[0-9]+)\s*,\s*(-?[0-9]+)\s*,\s*([0-9]+)\s*,\s*([0-9]+)\s*,(.*)$',line)
if m == None :
raise Exception("Format incorrect. Should be: xmin,ymin,xmax,ymax,transcription")
else:
m = re.match(r'^\s*(-?[0-9]+)\s*,\s*(-?[0-9]+)\s*,\s*([0-9]+)\s*,\s*([0-9]+)\s*,?\s*$',line)
if m == None :
raise Exception("Format incorrect. Should be: xmin,ymin,xmax,ymax")
xmin = int(m.group(1))
ymin = int(m.group(2))
xmax = int(m.group(3))
ymax = int(m.group(4))
if(xmax<xmin):
raise Exception("Xmax value (%s) not valid (Xmax < Xmin)." %(xmax))
if(ymax<ymin):
raise Exception("Ymax value (%s) not valid (Ymax < Ymin)." %(ymax))
points = [ float(m.group(i)) for i in range(1, (numPoints+1) ) ]
if (imWidth>0 and imHeight>0):
validate_point_inside_bounds(xmin,ymin,imWidth,imHeight);
validate_point_inside_bounds(xmax,ymax,imWidth,imHeight);
else:
numPoints = 8;
if withTranscription and withConfidence:
m = re.match(r'^\s*(-?[0-9]+)\s*,\s*(-?[0-9]+)\s*,\s*(-?[0-9]+)\s*,\s*(-?[0-9]+)\s*,\s*(-?[0-9]+)\s*,\s*(-?[0-9]+)\s*,\s*(-?[0-9]+)\s*,\s*(-?[0-9]+)\s*,\s*([0-1].?[0-9]*)\s*,(.*)$',line)
if m == None :
raise Exception("Format incorrect. Should be: x1,y1,x2,y2,x3,y3,x4,y4,confidence,transcription")
elif withConfidence:
m = re.match(r'^\s*(-?[0-9]+)\s*,\s*(-?[0-9]+)\s*,\s*(-?[0-9]+)\s*,\s*(-?[0-9]+)\s*,\s*(-?[0-9]+)\s*,\s*(-?[0-9]+)\s*,\s*(-?[0-9]+)\s*,\s*(-?[0-9]+)\s*,\s*([0-1].?[0-9]*)\s*$',line)
if m == None :
raise Exception("Format incorrect. Should be: x1,y1,x2,y2,x3,y3,x4,y4,confidence")
elif withTranscription:
m = re.match(r'^\s*(-?[0-9]+)\s*,\s*(-?[0-9]+)\s*,\s*(-?[0-9]+)\s*,\s*(-?[0-9]+)\s*,\s*(-?[0-9]+)\s*,\s*(-?[0-9]+)\s*,\s*(-?[0-9]+)\s*,\s*(-?[0-9]+)\s*,(.*)$',line)
if m == None :
raise Exception("Format incorrect. Should be: x1,y1,x2,y2,x3,y3,x4,y4,transcription")
else:
m = re.match(r'^\s*(-?[0-9]+)\s*,\s*(-?[0-9]+)\s*,\s*(-?[0-9]+)\s*,\s*(-?[0-9]+)\s*,\s*(-?[0-9]+)\s*,\s*(-?[0-9]+)\s*,\s*(-?[0-9]+)\s*,\s*(-?[0-9]+)\s*$',line)
if m == None :
raise Exception("Format incorrect. Should be: x1,y1,x2,y2,x3,y3,x4,y4")
points = [ float(m.group(i)) for i in range(1, (numPoints+1) ) ]
validate_clockwise_points(points)
if (imWidth>0 and imHeight>0):
validate_point_inside_bounds(points[0],points[1],imWidth,imHeight);
validate_point_inside_bounds(points[2],points[3],imWidth,imHeight);
validate_point_inside_bounds(points[4],points[5],imWidth,imHeight);
validate_point_inside_bounds(points[6],points[7],imWidth,imHeight);
if withConfidence:
try:
confidence = float(m.group(numPoints+1))
except ValueError:
raise Exception("Confidence value must be a float")
if withTranscription:
posTranscription = numPoints + (2 if withConfidence else 1)
transcription = m.group(posTranscription)
m2 = re.match(r'^\s*\"(.*)\"\s*$',transcription)
if m2 != None : #Transcription with double quotes, we extract the value and replace escaped characters
transcription = m2.group(1).replace("\\\\", "\\").replace("\\\"", "\"")
return points,confidence,transcription
def validate_point_inside_bounds(x,y,imWidth,imHeight):
if(x<0 or x>imWidth):
raise Exception("X value (%s) not valid. Image dimensions: (%s,%s)" %(xmin,imWidth,imHeight))
if(y<0 or y>imHeight):
raise Exception("Y value (%s) not valid. Image dimensions: (%s,%s) Sample: %s Line:%s" %(ymin,imWidth,imHeight))
def validate_clockwise_points(points):
"""
Validates that the points that the 4 points that dlimite a polygon are in clockwise order.
"""
if len(points) != 8:
raise Exception("Points list not valid." + str(len(points)))
point = [
[int(points[0]) , int(points[1])],
[int(points[2]) , int(points[3])],
[int(points[4]) , int(points[5])],
[int(points[6]) , int(points[7])]
]
edge = [
( point[1][0] - point[0][0])*( point[1][1] + point[0][1]),
( point[2][0] - point[1][0])*( point[2][1] + point[1][1]),
( point[3][0] - point[2][0])*( point[3][1] + point[2][1]),
( point[0][0] - point[3][0])*( point[0][1] + point[3][1])
]
summatory = edge[0] + edge[1] + edge[2] + edge[3];
if summatory>0:
raise Exception("Points are not clockwise. The coordinates of bounding quadrilaterals have to be given in clockwise order. Regarding the correct interpretation of 'clockwise' remember that the image coordinate system used is the standard one, with the image origin at the upper left, the X axis extending to the right and Y axis extending downwards.")
def get_tl_line_values_from_file_contents(content,CRLF=True,LTRB=True,withTranscription=False,withConfidence=False,imWidth=0,imHeight=0,sort_by_confidences=True):
"""
Returns all points, confindences and transcriptions of a file in lists. Valid line formats:
xmin,ymin,xmax,ymax,[confidence],[transcription]
x1,y1,x2,y2,x3,y3,x4,y4,[confidence],[transcription]
"""
pointsList = []
transcriptionsList = []
confidencesList = []
lines = content.split( "\r\n" if CRLF else "\n" )
for line in lines:
line = line.replace("\r","").replace("\n","")
if(line != "") :
points, confidence, transcription = get_tl_line_values(line,LTRB,withTranscription,withConfidence,imWidth,imHeight);
pointsList.append(points)
transcriptionsList.append(transcription)
confidencesList.append(confidence)
if withConfidence and len(confidencesList)>0 and sort_by_confidences:
import numpy as np
sorted_ind = np.argsort(-np.array(confidencesList))
confidencesList = [confidencesList[i] for i in sorted_ind]
pointsList = [pointsList[i] for i in sorted_ind]
transcriptionsList = [transcriptionsList[i] for i in sorted_ind]
return pointsList,confidencesList,transcriptionsList
def main_evaluation(p,default_evaluation_params_fn,validate_data_fn,evaluate_method_fn,show_result=True,per_sample=True):
"""
This process validates a method, evaluates it and if it succed generates a ZIP file with a JSON entry for each sample.
Params:
p: Dictionary of parmeters with the GT/submission locations. If None is passed, the parameters send by the system are used.
default_evaluation_params_fn: points to a function that returns a dictionary with the default parameters used for the evaluation
validate_data_fn: points to a method that validates the corrct format of the submission
evaluate_method_fn: points to a function that evaluated the submission and return a Dictionary with the results
"""
if (p == None):
p = dict([s[1:].split('=') for s in sys.argv[1:]])
if(len(sys.argv)<3):
print_help()
evalParams = default_evaluation_params_fn()
if 'p' in p.keys():
evalParams.update( p['p'] if isinstance(p['p'], dict) else json.loads(p['p'][1:-1]) )
resDict={'calculated':True,'Message':'','method':'{}','per_sample':'{}'}
try:
validate_data_fn(p['g'], p['s'], evalParams)
evalData = evaluate_method_fn(p['g'], p['s'], evalParams)
resDict.update(evalData)
except Exception as e:
resDict['Message']= str(e)
resDict['calculated']=False
if 'o' in p:
if not os.path.exists(p['o']):
os.makedirs(p['o'])
resultsOutputname = p['o'] + '/results.zip'
outZip = zipfile.ZipFile(resultsOutputname, mode='w', allowZip64=True)
del resDict['per_sample']
if 'output_items' in resDict.keys():
del resDict['output_items']
outZip.writestr('method.json',json.dumps(resDict))
if not resDict['calculated']:
if show_result:
sys.stderr.write('Error!\n'+ resDict['Message']+'\n\n')
if 'o' in p:
outZip.close()
return resDict
if 'o' in p:
if per_sample == True:
for k,v in evalData['per_sample'].items():
outZip.writestr( k + '.json',json.dumps(v))
if 'output_items' in evalData.keys():
for k, v in evalData['output_items'].items():
outZip.writestr( k,v)
outZip.close()
# if show_result:
# sys.stdout.write("Calculated!")
# sys.stdout.write("\n")
# sys.stdout.write(json.dumps(resDict['e2e_method']))
# sys.stdout.write("\n")
return resDict
def main_validation(default_evaluation_params_fn,validate_data_fn):
"""
This process validates a method
Params:
default_evaluation_params_fn: points to a function that returns a dictionary with the default parameters used for the evaluation
validate_data_fn: points to a method that validates the corrct format of the submission
"""
try:
p = dict([s[1:].split('=') for s in sys.argv[1:]])
evalParams = default_evaluation_params_fn()
if 'p' in p.keys():
evalParams.update( p['p'] if isinstance(p['p'], dict) else json.loads(p['p'][1:-1]) )
validate_data_fn(p['g'], p['s'], evalParams)
print('SUCCESS')
sys.exit(0)
except Exception as e:
print(str(e))
sys.exit(101)
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
from .ms_deform_attn import MSDeformAttn
__all__ = [k for k in globals().keys() if not k.startswith("_")]
\ No newline at end of file
/*!
**************************************************************************************************
* Deformable DETR
* Copyright (c) 2020 SenseTime. All Rights Reserved.
* Licensed under the Apache License, Version 2.0 [see LICENSE for details]
**************************************************************************************************
* Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0
**************************************************************************************************
*/
#pragma once
#include "ms_deform_attn_cpu.h"
#ifdef WITH_CUDA
#include "ms_deform_attn_cuda.h"
#endif
at::Tensor
ms_deform_attn_forward(
const at::Tensor &value,
const at::Tensor &spatial_shapes,
const at::Tensor &level_start_index,
const at::Tensor &sampling_loc,
const at::Tensor &attn_weight,
const int im2col_step)
{
if (value.type().is_cuda())
{
#ifdef WITH_CUDA
return ms_deform_attn_cuda_forward(
value, spatial_shapes, level_start_index, sampling_loc, attn_weight, im2col_step);
#else
AT_ERROR("Not compiled with GPU support");
#endif
}
AT_ERROR("Not implemented on the CPU");
}
std::vector<at::Tensor>
ms_deform_attn_backward(
const at::Tensor &value,
const at::Tensor &spatial_shapes,
const at::Tensor &level_start_index,
const at::Tensor &sampling_loc,
const at::Tensor &attn_weight,
const at::Tensor &grad_output,
const int im2col_step)
{
if (value.type().is_cuda())
{
#ifdef WITH_CUDA
return ms_deform_attn_cuda_backward(
value, spatial_shapes, level_start_index, sampling_loc, attn_weight, grad_output, im2col_step);
#else
AT_ERROR("Not compiled with GPU support");
#endif
}
AT_ERROR("Not implemented on the CPU");
}
/*!
**************************************************************************************************
* Deformable DETR
* Copyright (c) 2020 SenseTime. All Rights Reserved.
* Licensed under the Apache License, Version 2.0 [see LICENSE for details]
**************************************************************************************************
* Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0
**************************************************************************************************
*/
#include <vector>
#include <ATen/ATen.h>
#include <ATen/cuda/CUDAContext.h>
at::Tensor
ms_deform_attn_cpu_forward(
const at::Tensor &value,
const at::Tensor &spatial_shapes,
const at::Tensor &level_start_index,
const at::Tensor &sampling_loc,
const at::Tensor &attn_weight,
const int im2col_step)
{
AT_ERROR("Not implement on cpu");
}
std::vector<at::Tensor>
ms_deform_attn_cpu_backward(
const at::Tensor &value,
const at::Tensor &spatial_shapes,
const at::Tensor &level_start_index,
const at::Tensor &sampling_loc,
const at::Tensor &attn_weight,
const at::Tensor &grad_output,
const int im2col_step)
{
AT_ERROR("Not implement on cpu");
}
/*!
**************************************************************************************************
* Deformable DETR
* Copyright (c) 2020 SenseTime. All Rights Reserved.
* Licensed under the Apache License, Version 2.0 [see LICENSE for details]
**************************************************************************************************
* Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0
**************************************************************************************************
*/
#pragma once
#include <torch/extension.h>
at::Tensor
ms_deform_attn_cpu_forward(
const at::Tensor &value,
const at::Tensor &spatial_shapes,
const at::Tensor &level_start_index,
const at::Tensor &sampling_loc,
const at::Tensor &attn_weight,
const int im2col_step);
std::vector<at::Tensor>
ms_deform_attn_cpu_backward(
const at::Tensor &value,
const at::Tensor &spatial_shapes,
const at::Tensor &level_start_index,
const at::Tensor &sampling_loc,
const at::Tensor &attn_weight,
const at::Tensor &grad_output,
const int im2col_step);
/*!
**************************************************************************************************
* Deformable DETR
* Copyright (c) 2020 SenseTime. All Rights Reserved.
* Licensed under the Apache License, Version 2.0 [see LICENSE for details]
**************************************************************************************************
* Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0
**************************************************************************************************
*/
#include <vector>
#include "ms_deform_im2col_cuda.cuh"
#include <ATen/ATen.h>
#include <ATen/cuda/CUDAContext.h>
#include <cuda.h>
#include <cuda_runtime.h>
at::Tensor ms_deform_attn_cuda_forward(
const at::Tensor &value,
const at::Tensor &spatial_shapes,
const at::Tensor &level_start_index,
const at::Tensor &sampling_loc,
const at::Tensor &attn_weight,
const int im2col_step)
{
AT_ASSERTM(value.is_contiguous(), "value tensor has to be contiguous");
AT_ASSERTM(spatial_shapes.is_contiguous(), "spatial_shapes tensor has to be contiguous");
AT_ASSERTM(level_start_index.is_contiguous(), "level_start_index tensor has to be contiguous");
AT_ASSERTM(sampling_loc.is_contiguous(), "sampling_loc tensor has to be contiguous");
AT_ASSERTM(attn_weight.is_contiguous(), "attn_weight tensor has to be contiguous");
AT_ASSERTM(value.type().is_cuda(), "value must be a CUDA tensor");
AT_ASSERTM(spatial_shapes.type().is_cuda(), "spatial_shapes must be a CUDA tensor");
AT_ASSERTM(level_start_index.type().is_cuda(), "level_start_index must be a CUDA tensor");
AT_ASSERTM(sampling_loc.type().is_cuda(), "sampling_loc must be a CUDA tensor");
AT_ASSERTM(attn_weight.type().is_cuda(), "attn_weight must be a CUDA tensor");
const int batch = value.size(0);
const int spatial_size = value.size(1);
const int num_heads = value.size(2);
const int channels = value.size(3);
const int num_levels = spatial_shapes.size(0);
const int num_query = sampling_loc.size(1);
const int num_point = sampling_loc.size(4);
const int im2col_step_ = std::min(batch, im2col_step);
AT_ASSERTM(batch % im2col_step_ == 0, "batch(%d) must divide im2col_step(%d)", batch, im2col_step_);
auto output = at::zeros({batch, num_query, num_heads, channels}, value.options());
const int batch_n = im2col_step_;
auto output_n = output.view({batch/im2col_step_, batch_n, num_query, num_heads, channels});
auto per_value_size = spatial_size * num_heads * channels;
auto per_sample_loc_size = num_query * num_heads * num_levels * num_point * 2;
auto per_attn_weight_size = num_query * num_heads * num_levels * num_point;
for (int n = 0; n < batch/im2col_step_; ++n)
{
auto columns = output_n.select(0, n);
AT_DISPATCH_FLOATING_TYPES(value.type(), "ms_deform_attn_forward_cuda", ([&] {
ms_deformable_im2col_cuda(at::cuda::getCurrentCUDAStream(),
value.data<scalar_t>() + n * im2col_step_ * per_value_size,
spatial_shapes.data<int64_t>(),
level_start_index.data<int64_t>(),
sampling_loc.data<scalar_t>() + n * im2col_step_ * per_sample_loc_size,
attn_weight.data<scalar_t>() + n * im2col_step_ * per_attn_weight_size,
batch_n, spatial_size, num_heads, channels, num_levels, num_query, num_point,
columns.data<scalar_t>());
}));
}
output = output.view({batch, num_query, num_heads*channels});
return output;
}
std::vector<at::Tensor> ms_deform_attn_cuda_backward(
const at::Tensor &value,
const at::Tensor &spatial_shapes,
const at::Tensor &level_start_index,
const at::Tensor &sampling_loc,
const at::Tensor &attn_weight,
const at::Tensor &grad_output,
const int im2col_step)
{
AT_ASSERTM(value.is_contiguous(), "value tensor has to be contiguous");
AT_ASSERTM(spatial_shapes.is_contiguous(), "spatial_shapes tensor has to be contiguous");
AT_ASSERTM(level_start_index.is_contiguous(), "level_start_index tensor has to be contiguous");
AT_ASSERTM(sampling_loc.is_contiguous(), "sampling_loc tensor has to be contiguous");
AT_ASSERTM(attn_weight.is_contiguous(), "attn_weight tensor has to be contiguous");
AT_ASSERTM(grad_output.is_contiguous(), "grad_output tensor has to be contiguous");
AT_ASSERTM(value.type().is_cuda(), "value must be a CUDA tensor");
AT_ASSERTM(spatial_shapes.type().is_cuda(), "spatial_shapes must be a CUDA tensor");
AT_ASSERTM(level_start_index.type().is_cuda(), "level_start_index must be a CUDA tensor");
AT_ASSERTM(sampling_loc.type().is_cuda(), "sampling_loc must be a CUDA tensor");
AT_ASSERTM(attn_weight.type().is_cuda(), "attn_weight must be a CUDA tensor");
AT_ASSERTM(grad_output.type().is_cuda(), "grad_output must be a CUDA tensor");
const int batch = value.size(0);
const int spatial_size = value.size(1);
const int num_heads = value.size(2);
const int channels = value.size(3);
const int num_levels = spatial_shapes.size(0);
const int num_query = sampling_loc.size(1);
const int num_point = sampling_loc.size(4);
const int im2col_step_ = std::min(batch, im2col_step);
AT_ASSERTM(batch % im2col_step_ == 0, "batch(%d) must divide im2col_step(%d)", batch, im2col_step_);
auto grad_value = at::zeros_like(value);
auto grad_sampling_loc = at::zeros_like(sampling_loc);
auto grad_attn_weight = at::zeros_like(attn_weight);
const int batch_n = im2col_step_;
auto per_value_size = spatial_size * num_heads * channels;
auto per_sample_loc_size = num_query * num_heads * num_levels * num_point * 2;
auto per_attn_weight_size = num_query * num_heads * num_levels * num_point;
auto grad_output_n = grad_output.view({batch/im2col_step_, batch_n, num_query, num_heads, channels});
for (int n = 0; n < batch/im2col_step_; ++n)
{
auto grad_output_g = grad_output_n.select(0, n);
AT_DISPATCH_FLOATING_TYPES(value.type(), "ms_deform_attn_backward_cuda", ([&] {
ms_deformable_col2im_cuda(at::cuda::getCurrentCUDAStream(),
grad_output_g.data<scalar_t>(),
value.data<scalar_t>() + n * im2col_step_ * per_value_size,
spatial_shapes.data<int64_t>(),
level_start_index.data<int64_t>(),
sampling_loc.data<scalar_t>() + n * im2col_step_ * per_sample_loc_size,
attn_weight.data<scalar_t>() + n * im2col_step_ * per_attn_weight_size,
batch_n, spatial_size, num_heads, channels, num_levels, num_query, num_point,
grad_value.data<scalar_t>() + n * im2col_step_ * per_value_size,
grad_sampling_loc.data<scalar_t>() + n * im2col_step_ * per_sample_loc_size,
grad_attn_weight.data<scalar_t>() + n * im2col_step_ * per_attn_weight_size);
}));
}
return {
grad_value, grad_sampling_loc, grad_attn_weight
};
}
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment