First commit.

cce6e1bf · chenych · cce6e1bf · cce6e1bf · cce6e1bf · cce6e1bf
Commit cce6e1bf authored Nov 21, 2023 by chenych
20 changed files
--- a/.gitignore
+++ b/.gitignore
+*.mdb
+*.tar
+*.zip
+*.eps
+*.pdf
+### Linux ###
+*~
+# temporary files which can be created if a process still has a handle open of a deleted file
+.fuse_hidden*
+# KDE directory preferences
+.directory
+# Linux trash folder which might appear on any partition or disk
+.Trash-*
+# .nfs files are created when an open file is removed but is still being accessed
+.nfs*
+### OSX ###
+# General
+.DS_Store
+.AppleDouble
+.LSOverride
+# Icon must end with two \r
+Icon
+# Thumbnails
+._*
+# Files that might appear in the root of a volume
+.DocumentRevisions-V100
+.fseventsd
+.Spotlight-V100
+.TemporaryItems
+.Trashes
+.VolumeIcon.icns
+.com.apple.timemachine.donotpresent
+# Directories potentially created on remote AFP share
+.AppleDB
+.AppleDesktop
+Network Trash Folder
+Temporary Items
+.apdisk
+### Python ###
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+.hypothesis/
+.pytest_cache/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# IPython
+profile_default/
+ipython_config.py
+# pyenv
+.python-version
+# celery beat schedule file
+celerybeat-schedule
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+### Python Patch ###
+.venv/
+### Python.VirtualEnv Stack ###
+# Virtualenv
+# http://iamzed.com/2009/05/07/a-primer-on-virtualenv/
+[Bb]in
+[Ii]nclude
+[Ll]ib64
+[Ll]ocal
+[Ss]cripts
+pyvenv.cfg
+pip-selfcheck.json
+### Windows ###
+# Windows thumbnail cache files
+Thumbs.db
+ehthumbs.db
+ehthumbs_vista.db
+# Dump file
+*.stackdump
+# Folder config file
+[Dd]esktop.ini
+# Recycle Bin used on file shares
+$RECYCLE.BIN/
+# Windows Installer files
+*.cab
+*.msi
+*.msix
+*.msm
+*.msp
+# Windows shortcuts
+*.lnk
+.idea/
+.vscode/
+output/
+exp/
+data/
+*.pyc
+*.mp4
+*.zip
\ No newline at end of file
--- a/LICENSE
+++ b/LICENSE
+AdelaiDet for non-commercial purposes
+(For commercial use, contact chhshen@gmail.com for obtaining a commerical license.)
+Copyright (c) 2019 the authors
+All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+* Redistributions of source code must retain the above copyright notice, this
+  list of conditions and the following disclaimer.
+* Redistributions in binary form must reproduce the above copyright notice,
+  this list of conditions and the following disclaimer in the documentation
+  and/or other materials provided with the distribution.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
--- a/README.md
+++ b/README.md
+# DeepSolo
+## 论文
+[DeepSolo: Let Transformer Decoder with Explicit Points Solo for Text Spotting](https://arxiv.org/abs/2211.10772)
+[DeepSolo++: Let Transformer Decoder with Explicit Points Solo for Text Spotting](https://arxiv.org/abs/2305.19957)
+## 模型结构
+一个简洁的类似DETR的基线，允许一个具有显式点的解码器同时进行检测和识别（图 (c)、(f)）。
+<div align=center>
+    <img src="./doc/image.png"/>
+</div>
+## 算法原理
+DeepSolo中，编码器在接收到图像特征后，生成由四个Bezier控制点表示的Bezier中心曲线候选和相应的分数，然后，选择前K个评分的候选。对于每个选定的曲线候选，在曲线上均匀采样N个点，这些点的坐标被编码为位置query并将其添加到内容query中形成复合query。接下来，将复合query输入deformable cross-attention解码器收集有用的文本特征。在解码器之后，采用了几个简单的并行预测头（线性层或MLP）将query解码为文本的中心线、边界、script和置信度，从而同时解决检测和识别问题。
+<div align=center>
+    <img src="./doc/DeepSolo.jpg"/>
+</div>
+## 环境配置
+训练需要依赖Detectron2库，编译Detectron2库需要满足 Python ≥ 3.7，PyTorch ≥ 1.8 并且 torchvision 与 PyTorch 版本匹配，gcc & g++ ≥ 5.4。如果想要更快的构建，推荐安装Ninja。
+Tips: 如果detectron2安装失败，可尝试以下方式进行安装：
+```
+git clone https://github.com/facebookresearch/detectron2.git
+python -m pip install -e detectron2
+```
+### Docker（方法一）
+-v 路径、docker_name和imageID根据实际情况修改
+```
+docker pull image.sourcefind.cn:5000/dcu/admin/base/pytorch:1.13.1-centos7.6-dtk-23.04-py38-latest
+docker run -it -v /path/your_code_data/:/path/your_code_data/ --shm-size=32G --privileged=true --device=/dev/kfd --device=/dev/dri/ --group-add video --name docker_name imageID bash
+cd /your_code_path/deepsolo_pytorch
+pip install -r requirements.txt
+python -m pip install 'git+https://github.com/facebookresearch/detectron2.git'
+bash make.sh
+```
+### Dockerfile（方法二）
+-v 路径、docker_name和imageID根据实际情况修改
+```
+cd ./docker
+cp ../requirements.txt requirements.txt
+docker build --no-cache -t deepsolo:latest .
+docker run -it -v /path/your_code_data/:/path/your_code_data/ --shm-size=32G --privileged=true --device=/dev/kfd --device=/dev/dri/ --group-add video --name docker_name imageID bash
+cd /your_code_path/deepsolo_pytorch
+pip install -r requirements.txt
+python -m pip install 'git+https://github.com/facebookresearch/detectron2.git'
+bash make.sh
+```
+### Anaconda（方法三）
+1、关于本项目DCU显卡所需的特殊深度学习库可从光合开发者社区下载安装: https://developer.hpccube.com/tool/
+```
+DTK软件栈：dtk23.04
+python：python3.8
+torch：1.13.1
+torchvision：0.14.1
+```
+Tips：以上dtk软件栈、python、torch等DCU相关工具版本需要严格一一对应
+2、其他非特殊库直接按照下面步骤进行安装
+```
+pip install -r requirements.txt
+python -m pip install 'git+https://github.com/facebookresearch/detectron2.git'
+bash make.sh
+```
+## 数据集
+所有的数据集请保存在 deepsolo_pytorch/datasets 下，因数据集较大，请按训练的需求进行选择下载。训练需求详见configs中yaml的DATASETS字段。
+### 训练数据集
+`[SynthText150K (CurvedSynText150K)]` [images](https://github.com/aim-uofa/AdelaiDet/tree/master/datasets) | [annotations(Part1)](https://1drv.ms/u/s!ApEsJ9RIZdBQgQTfQC578sYbkPik?e=2Yz06g) | [annotations(Part2)](https://1drv.ms/u/s!ApEsJ9RIZdBQgQJWqH404p34Wb1m?e=KImg6N)
+`[MLT]` [images](https://github.com/aim-uofa/AdelaiDet/tree/master/datasets) | [annotations](https://1drv.ms/u/s!ApEsJ9RIZdBQgQBpvuvV2KBBbN64?e=HVTCab)
+`[ICDAR2013]` [images](https://1drv.ms/u/s!ApEsJ9RIZdBQgQcK05sWzK3_t26T?e=5jTWAa) | [annotations](https://1drv.ms/u/s!ApEsJ9RIZdBQfbgqFCeiKOrTM0E?e=UMfIQh)
+`[ICDAR2015]` [images](https://1drv.ms/u/s!ApEsJ9RIZdBQgQbupfCNqVxtYGna?e=b4TQY2) | [annotations](https://1drv.ms/u/s!ApEsJ9RIZdBQfhGW5JDiNcDxfWQ?e=PZ2JCX)
+`[Total-Text]` [images](https://1drv.ms/u/s!ApEsJ9RIZdBQgQjyPyivo_FnjJ1H?e=qgSFYL) | [annotations](https://1drv.ms/u/s!ApEsJ9RIZdBQgQOShwd8O0K5Dd1f?e=GYyPAX)
+`[CTW1500]` [images](https://1drv.ms/u/s!ApEsJ9RIZdBQgQlZVAH5AJld3Y9g?e=zgG71Z) | [annotations](https://1drv.ms/u/s!ApEsJ9RIZdBQfPpyzxoFV34zBg4?e=WK20AN)
+`[TextOCR]` [images](https://dl.fbaipublicfiles.com/textvqa/images/train_val_images.zip) | [annotations](https://1drv.ms/u/s!ApEsJ9RIZdBQgQHY3mjH13GRLPGI?e=Dx1O99)
+`[Inverse-Text]` [images](https://1drv.ms/u/s!AimBgYV7JjTlgccVhlbD4I3z5QfmsQ?e=myu7Ue) | [annotations](https://1drv.ms/u/s!ApEsJ9RIZdBQf3G4vZpf4QD5NKo?e=xR3GtY)
+`[SynChinese130K]` [images](https://github.com/aim-uofa/AdelaiDet/tree/master/datasets) | [annotations](https://1drv.ms/u/s!AimBgYV7JjTlgch5W0n1Iv397i0csw?e=Gq8qww)
+`[ArT]` [images](https://github.com/aim-uofa/AdelaiDet/tree/master/datasets) | [annotations](https://1drv.ms/u/s!AimBgYV7JjTlgch45d0VHNCoPC1jfQ?e=likK00)
+`[LSVT]` [images](https://github.com/aim-uofa/AdelaiDet/tree/master/datasets) | [annotations](https://1drv.ms/u/s!AimBgYV7JjTlgch7yjmrCSN0TgoO4w?e=NKd5OG)
+`[ReCTS]` [images](https://github.com/aim-uofa/AdelaiDet/tree/master/datasets) | [annotations](https://1drv.ms/u/s!AimBgYV7JjTlgch_xZ8otxFWfNgZSg?e=pdq28B)
+`[Evaluation ground-truth]` [Link](https://1drv.ms/u/s!ApEsJ9RIZdBQem-MG1TjuRWApyA?e=fVPnmT)
+### 验证数据集
+```
+cd datasets
+mkdir evaluation
+cd evaluation
+wget -O gt_ctw1500.zip https://cloudstor.aarnet.edu.au/plus/s/xU3yeM3GnidiSTr/download
+wget -O gt_totaltext.zip https://cloudstor.aarnet.edu.au/plus/s/SFHvin8BLUM4cNd/download
+wget -O gt_icdar2015.zip https://drive.google.com/file/d/1wrq_-qIyb_8dhYVlDzLZTTajQzbic82Z/view?usp=sharing
+wget -O gt_inversetext.zip https://cloudstor.aarnet.edu.au/plus/s/xU3yeM3GnidiSTr/download
+```
+### 数据集目录结构
+用于正常训练的数据集请按此目录结构进行：
+```
+├── ./datasets
+│   ├── simple
+│       ├── test_images
+│       ├── train_images
+│       ├── test.json
+│       └── train.json
+│   ├── evaluation
+│       ├── gt_totaltext.zip
+│       ├── gt_ctw1500.zip
+│       ├── gt_icdar2015.zip
+│       └── gt_inversetext.zip
+│   ├── syntext1
+│       ├── train_images
+│       └── annotations
+│           ├── train_37voc.json
+│           └── train_96voc.json
+│   ├── syntext2
+│       ├── train_images
+│       └── annotations
+│           ├── train_37voc.json
+│           └── train_96voc.json
+│   ├── mlt2017
+│       ├── train_images
+│       ├── train_37voc.json
+│       └── train_96voc.json
+│   ├── totaltext
+│       ├── train_images
+│       ├── test_images
+│       ├── weak_voc_new.txt
+│       ├── weak_voc_pair_list.txt
+│       ├── train_37voc.json
+│       ├── train_96voc.json
+│       └── test.json
+│   ├── ic13
+│       ├── train_images
+│       ├── train_37voc.json
+│       └── train_96voc.json
+│   ├── ic15
+│       ├── train_images
+│       ├── test_images
+│       ├── new_strong_lexicon
+│       ├── strong_lexicon
+│       ├── ch4_test_vocabulary.txt
+│       ├── ch4_test_vocabulary_new.txt
+│       ├── ch4_test_vocabulary_pair_list.txt
+│       ├── GenericVocabulary.txt
+│       ├── GenericVocabulary_new.txt
+│       ├── GenericVocabulary_pair_list.txt
+│       ├── train_37voc.json
+│       ├── train_96voc.json
+│       └── test.json
+│   ├── ctw1500
+│       ├── train_images
+│       ├── test_images
+│       ├── weak_voc_new.txt
+│       ├── weak_voc_pair_list.txt
+│       ├── train_96voc.json
+│       └── test.json
+│   ├── textocr
+│       ├── train_images
+│       ├── train_37voc_1.json
+│       └── train_37voc_2.json
+│   ├── inversetext
+│       ├── test_images
+│       └── test.json
+│   ├── chnsyntext
+│       ├── syn_130k_images
+│       └── chn_syntext.json
+│   ├── ArT
+│       ├── rename_artimg_train
+│       └── art_train.json
+│   ├── LSVT
+│       ├── rename_lsvtimg_train
+│       └── lsvt_train.json
+│   ├── ReCTS
+│       ├── ReCTS_train_images  # 18,000 images
+│       ├── ReCTS_val_images  # 2,000 images
+│       ├── ReCTS_test_images  # 5,000 images
+│       ├── rects_train.json
+│       ├── rects_val.json
+│       └── rects_test.json
+```
+如果使用自己的数据集，请将数据标注转换成COCO的格式，并在DeepSolo/adet/data/builtin.py代码第18行 _PREDEFINED_SPLITS_TEXT 参数中，参照结构补充自己的数据集。
+项目同样提供了迷你数据集simple进行学习。
+## 训练
+### 单机多卡
+Tips: 以下参数请根据实际情况自行修改 train.sh 中的参数设定
+--config-file yaml文件配置地址
+--num-gpus 训练卡数量
+修改后执行：
+```
+bash train.sh
+```
+## 推理
+Tips:
+如需执行自己的预训练模型，请修改配置:
+${CONFIG_FILE} yaml文件配置地址（注意修改预训练模型地址）
+${IMAGE_PATH} 待测试数据地址
+样例执行步骤：
+1. 下载CTW1500的预训练模型：
+|Backbone|Training Data|Weights|
+|:------:|:------:|:------:|
+|Res-50|Synth150K+Total-Text+MLT17+IC13+IC15|[OneDrive](https://1drv.ms/u/s!AimBgYV7JjTlgcdtYzwEBGvOH6CiBw?e=trgKFE)|
+将预训练模型放在 pretrained_models/CTW1500/ 文件夹下，如果放置于其他地方，请同步修改配置文件中 MODEL.WEIGHTS 地址
+2. 将待测试数据存放于 ${IMAGE_PATH} 下，执行
+```
+bash test.sh
+```
+推理结果默认保存在test_results文件夹下，可以使用参数 --output 替换结果保存路径。
+## result
+CTW1500上的结果展示
+<div align=center>
+    <img src="./doc/results.jpg"/>
+</div>
+### 精度
+基于backbone=R50在ctw1500上的测试结果如下表所示：
+|Backbone|External Data|Det-P|Det-R|Det-F1|E2E-None|E2E-Full|
+|:------:|:------:|:------:|:------:|:------:|:------:|:------:|
+|Res-50(ours)|Synth150K+Total-Text+MLT17+IC13+IC15|0.9325|0.8475|0.8879|0.6408|0.812|
+|Res-50|Synth150K+Total-Text+MLT17+IC13+IC15|0.932|0.85|0.889|0.642|0.814|
+## 应用场景
+### 算法类别
+OCR
+### 热点应用行业
+政府,交通,物流
+## 源码仓库及问题反馈
+http://developer.hpccube.com/codes/modelzoo/deepsolo_pytorch.git
+## 参考资料
+https://github.com/ViTAE-Transformer/DeepSolo.git
--- a/adet/__init__.py
+++ b/adet/__init__.py
+from adet import modeling
+__version__ = "0.1.1"
--- a/adet/checkpoint/__init__.py
+++ b/adet/checkpoint/__init__.py
+from .adet_checkpoint import AdetCheckpointer
+__all__ = ["AdetCheckpointer"]
--- a/adet/checkpoint/adet_checkpoint.py
+++ b/adet/checkpoint/adet_checkpoint.py
+import pickle, os
+from fvcore.common.file_io import PathManager
+from detectron2.checkpoint import DetectionCheckpointer
+class AdetCheckpointer(DetectionCheckpointer):
+    """
+    Same as :class:`DetectronCheckpointer`, but is able to convert models
+    in AdelaiDet, such as LPF backbone.
+    """
+    def _load_file(self, filename):
+        if filename.endswith(".pkl"):
+            with PathManager.open(filename, "rb") as f:
+                data = pickle.load(f, encoding="latin1")
+            if "model" in data and "__author__" in data:
+                # file is in Detectron2 model zoo format
+                self.logger.info("Reading a file from '{}'".format(data["__author__"]))
+                return data
+            else:
+                # assume file is from Caffe2 / Detectron1 model zoo
+                if "blobs" in data:
+                    # Detection models have "blobs", but ImageNet models don't
+                    data = data["blobs"]
+                data = {k: v for k, v in data.items() if not k.endswith("_momentum")}
+                if "weight_order" in data:
+                    del data["weight_order"]
+                return {"model": data, "__author__": "Caffe2", "matching_heuristics": True}
+        loaded = super()._load_file(filename)  # load native pth checkpoint
+        if "model" not in loaded:
+            loaded = {"model": loaded}
+        basename = os.path.basename(filename).lower()
+        if "lpf" in basename or "dla" in basename:
+            loaded["matching_heuristics"] = True
+        return loaded
--- a/adet/config/__init__.py
+++ b/adet/config/__init__.py
+from .config import get_cfg
+__all__ = [
+    "get_cfg",
+]
--- a/adet/config/config.py
+++ b/adet/config/config.py
+from detectron2.config import CfgNode
+def get_cfg() -> CfgNode:
+    """
+    Get a copy of the default config.
+    Returns:
+        a detectron2 CfgNode instance.
+    """
+    from .defaults import _C
+    return _C.clone()
--- a/adet/config/defaults.py
+++ b/adet/config/defaults.py
+from detectron2.config.defaults import _C
+from detectron2.config import CfgNode as CN
+# ---------------------------------------------------------------------------- #
+# Additional Configs
+# ---------------------------------------------------------------------------- #
+_C.MODEL.MOBILENET = False
+_C.MODEL.BACKBONE.ANTI_ALIAS = False
+_C.MODEL.RESNETS.DEFORM_INTERVAL = 1
+_C.INPUT.HFLIP_TRAIN = False
+_C.INPUT.CROP.CROP_INSTANCE = True
+_C.INPUT.ROTATE = True
+_C.MODEL.BASIS_MODULE = CN()
+_C.MODEL.BASIS_MODULE.NAME = "ProtoNet"
+_C.MODEL.BASIS_MODULE.NUM_BASES = 4
+_C.MODEL.BASIS_MODULE.LOSS_ON = False
+_C.MODEL.BASIS_MODULE.ANN_SET = "coco"
+_C.MODEL.BASIS_MODULE.CONVS_DIM = 128
+_C.MODEL.BASIS_MODULE.IN_FEATURES = ["p3", "p4", "p5"]
+_C.MODEL.BASIS_MODULE.NORM = "SyncBN"
+_C.MODEL.BASIS_MODULE.NUM_CONVS = 3
+_C.MODEL.BASIS_MODULE.COMMON_STRIDE = 8
+_C.MODEL.BASIS_MODULE.NUM_CLASSES = 80
+_C.MODEL.BASIS_MODULE.LOSS_WEIGHT = 0.3
+_C.MODEL.TOP_MODULE = CN()
+_C.MODEL.TOP_MODULE.NAME = "conv"
+_C.MODEL.TOP_MODULE.DIM = 16
+# ---------------------------------------------------------------------------- #
+# BAText Options
+# ---------------------------------------------------------------------------- #
+_C.MODEL.BATEXT = CN()
+_C.MODEL.BATEXT.VOC_SIZE = 96
+_C.MODEL.BATEXT.NUM_CHARS = 25
+_C.MODEL.BATEXT.POOLER_RESOLUTION = (8, 32)
+_C.MODEL.BATEXT.IN_FEATURES = ["p2", "p3", "p4"]
+_C.MODEL.BATEXT.POOLER_SCALES = (0.25, 0.125, 0.0625)
+_C.MODEL.BATEXT.SAMPLING_RATIO = 1
+_C.MODEL.BATEXT.CONV_DIM = 256
+_C.MODEL.BATEXT.NUM_CONV = 2
+_C.MODEL.BATEXT.RECOGNITION_LOSS = "ctc"
+_C.MODEL.BATEXT.RECOGNIZER = "attn"
+_C.MODEL.BATEXT.CANONICAL_SIZE = 96  # largest min_size for level 3 (stride=8)
+_C.MODEL.BATEXT.USE_COORDCONV = False
+_C.MODEL.BATEXT.USE_AET = False
+_C.MODEL.BATEXT.CUSTOM_DICT = "" # Path to the class file.
+# ---------------------------------------------------------------------------- #
+# SwinTransformer Options
+# ---------------------------------------------------------------------------- #
+_C.MODEL.SWIN = CN()
+_C.MODEL.SWIN.TYPE = 'tiny'
+_C.MODEL.SWIN.DROP_PATH_RATE = 0.2
+# ---------------------------------------------------------------------------- #
+# ViTAE-v2 Options
+# ---------------------------------------------------------------------------- #
+_C.MODEL.ViTAEv2 = CN()
+_C.MODEL.ViTAEv2.TYPE = 'vitaev2_s'
+_C.MODEL.ViTAEv2.DROP_PATH_RATE = 0.2
+# ---------------------------------------------------------------------------- #
+# (Deformable) Transformer Options
+# ---------------------------------------------------------------------------- #
+_C.MODEL.TRANSFORMER = CN()
+_C.MODEL.TRANSFORMER.ENABLED = False
+_C.MODEL.TRANSFORMER.INFERENCE_TH_TEST = 0.4
+_C.MODEL.TRANSFORMER.AUX_LOSS = True
+_C.MODEL.TRANSFORMER.ENC_LAYERS = 6
+_C.MODEL.TRANSFORMER.DEC_LAYERS = 6
+_C.MODEL.TRANSFORMER.DIM_FEEDFORWARD = 1024
+_C.MODEL.TRANSFORMER.HIDDEN_DIM = 256
+_C.MODEL.TRANSFORMER.DROPOUT = 0.0
+_C.MODEL.TRANSFORMER.NHEADS = 8
+_C.MODEL.TRANSFORMER.NUM_QUERIES = 100
+_C.MODEL.TRANSFORMER.ENC_N_POINTS = 4
+_C.MODEL.TRANSFORMER.DEC_N_POINTS = 4
+_C.MODEL.TRANSFORMER.POSITION_EMBEDDING_SCALE = 6.283185307179586  # 2 PI
+_C.MODEL.TRANSFORMER.NUM_FEATURE_LEVELS = 4
+_C.MODEL.TRANSFORMER.VOC_SIZE = 37  # a-z + 0-9 + unknown
+_C.MODEL.TRANSFORMER.CUSTOM_DICT = "" # Path to the character class file.
+_C.MODEL.TRANSFORMER.NUM_POINTS = 25  # the number of point queries for each instance
+_C.MODEL.TRANSFORMER.TEMPERATURE = 10000
+_C.MODEL.TRANSFORMER.BOUNDARY_HEAD = True # True: with boundary predictions, False: only with center lines
+_C.MODEL.TRANSFORMER.LOSS = CN()
+_C.MODEL.TRANSFORMER.LOSS.AUX_LOSS = True
+_C.MODEL.TRANSFORMER.LOSS.FOCAL_ALPHA = 0.25
+_C.MODEL.TRANSFORMER.LOSS.FOCAL_GAMMA = 2.0
+# bezier proposal loss
+_C.MODEL.TRANSFORMER.LOSS.BEZIER_CLASS_WEIGHT = 1.0
+_C.MODEL.TRANSFORMER.LOSS.BEZIER_COORD_WEIGHT = 1.0
+_C.MODEL.TRANSFORMER.LOSS.BEZIER_SAMPLE_POINTS = 25
+# supervise the sampled on-curve points but not 4 Bezier control points
+# target loss
+_C.MODEL.TRANSFORMER.LOSS.POINT_CLASS_WEIGHT = 1.0
+_C.MODEL.TRANSFORMER.LOSS.POINT_COORD_WEIGHT = 1.0
+_C.MODEL.TRANSFORMER.LOSS.POINT_TEXT_WEIGHT = 0.5
+_C.MODEL.TRANSFORMER.LOSS.BOUNDARY_WEIGHT = 0.5
+_C.SOLVER.OPTIMIZER = "ADAMW"
+_C.SOLVER.LR_BACKBONE = 1e-5
+_C.SOLVER.LR_BACKBONE_NAMES = []
+_C.SOLVER.LR_LINEAR_PROJ_NAMES = []
+_C.SOLVER.LR_LINEAR_PROJ_MULT = 0.1
+# 1 - Generic, 2 - Weak, 3 - Strong (for icdar2015)
+# 1 - Full lexicon (for totaltext)
+_C.TEST.LEXICON_TYPE = 1
\ No newline at end of file
--- a/adet/evaluation/__init__.py
+++ b/adet/evaluation/__init__.py
+from .text_evaluation_all import TextEvaluator
--- a/adet/evaluation/rrc_evaluation_funcs.py
+++ b/adet/evaluation/rrc_evaluation_funcs.py
+#!/usr/bin/env python2
+#encoding: UTF-8
+import json
+import sys;sys.path.append('./')
+import zipfile
+import re
+import sys
+import os
+import codecs
+import importlib
+from io import StringIO
+from shapely.geometry import *
+def print_help():
+    sys.stdout.write('Usage: python %s.py -g=<gtFile> -s=<submFile> [-o=<outputFolder> -p=<jsonParams>]' %sys.argv[0])
+    sys.exit(2)
+def load_zip_file_keys(file,fileNameRegExp=''):
+    """
+    Returns an array with the entries of the ZIP file that match with the regular expression.
+    The key's are the names or the file or the capturing group definied in the fileNameRegExp
+    """
+    try:
+        archive=zipfile.ZipFile(file, mode='r', allowZip64=True)
+    except :
+        raise Exception('Error loading the ZIP archive.')
+    pairs = []
+    for name in archive.namelist():
+        addFile = True
+        keyName = name
+        if fileNameRegExp!="":
+            m = re.match(fileNameRegExp,name)
+            if m == None:
+                addFile = False
+            else:
+                if len(m.groups())>0:
+                    keyName = m.group(1)
+        if addFile:
+            pairs.append( keyName )
+    return pairs
+def load_zip_file(file,fileNameRegExp='',allEntries=False):
+    """
+    Returns an array with the contents (filtered by fileNameRegExp) of a ZIP file.
+    The key's are the names or the file or the capturing group definied in the fileNameRegExp
+    allEntries validates that all entries in the ZIP file pass the fileNameRegExp
+    """
+    try:
+        archive=zipfile.ZipFile(file, mode='r', allowZip64=True)
+    except :
+        raise Exception('Error loading the ZIP archive')    
+    pairs = []
+    for name in archive.namelist():
+        addFile = True
+        keyName = name
+        if fileNameRegExp!="":
+            m = re.match(fileNameRegExp,name)
+            if m == None:
+                addFile = False
+            else:
+                if len(m.groups())>0:
+                    keyName = m.group(1)
+        if addFile:
+            pairs.append( [ keyName , archive.read(name)] )
+        else:
+            if allEntries:
+                raise Exception('ZIP entry not valid: %s' %name)             
+    return dict(pairs)
+def decode_utf8(raw):
+    """
+    Returns a Unicode object on success, or None on failure
+    """
+    try:
+        raw = codecs.decode(raw,'utf-8', 'replace')
+        #extracts BOM if exists
+        raw = raw.encode('utf8')
+        if raw.startswith(codecs.BOM_UTF8):
+            raw = raw.replace(codecs.BOM_UTF8, '', 1)
+        return raw.decode('utf-8')
+    except:
+       return None
+def validate_lines_in_file_gt(fileName,file_contents,CRLF=True,LTRB=True,withTranscription=False,withConfidence=False,imWidth=0,imHeight=0):
+    """
+    This function validates that all lines of the file calling the Line validation function for each line
+    """
+    utf8File = decode_utf8(file_contents)
+    if (utf8File is None) :
+        raise Exception("The file %s is not UTF-8" %fileName)
+    lines = utf8File.split( "\r\n" if CRLF else "\n" )
+    for line in lines:
+        line = line.replace("\r","").replace("\n","")
+        if(line != ""):
+            try:
+                validate_tl_line_gt(line,LTRB,withTranscription,withConfidence,imWidth,imHeight)
+            except Exception as e:
+                raise Exception(("Line in sample not valid. Sample: %s Line: %s Error: %s" %(fileName,line,str(e))).encode('utf-8', 'replace'))
+def validate_lines_in_file(fileName,file_contents,CRLF=True,LTRB=True,withTranscription=False,withConfidence=False,imWidth=0,imHeight=0):
+    """
+    This function validates that all lines of the file calling the Line validation function for each line
+    """
+    utf8File = decode_utf8(file_contents)
+    if (utf8File is None) :
+        raise Exception("The file %s is not UTF-8" %fileName)
+    lines = utf8File.split( "\r\n" if CRLF else "\n" )
+    for line in lines:
+        line = line.replace("\r","").replace("\n","")
+        if(line != ""):
+            try:
+                validate_tl_line(line,LTRB,withTranscription,withConfidence,imWidth,imHeight)
+            except Exception as e:
+                raise Exception(("Line in sample not valid. Sample: %s Line: %s Error: %s" %(fileName,line,str(e))).encode('utf-8', 'replace'))
+def validate_tl_line_gt(line,LTRB=True,withTranscription=True,withConfidence=True,imWidth=0,imHeight=0):
+    """
+    Validate the format of the line. If the line is not valid an exception will be raised.
+    If maxWidth and maxHeight are specified, all points must be inside the imgage bounds.
+    Posible values are:
+    LTRB=True: xmin,ymin,xmax,ymax[,confidence][,transcription] 
+    LTRB=False: x1,y1,x2,y2,x3,y3,x4,y4[,confidence][,transcription] 
+    """
+    get_tl_line_values_gt(line,LTRB,withTranscription,withConfidence,imWidth,imHeight)   
+def validate_tl_line(line,LTRB=True,withTranscription=True,withConfidence=True,imWidth=0,imHeight=0):
+    """
+    Validate the format of the line. If the line is not valid an exception will be raised.
+    If maxWidth and maxHeight are specified, all points must be inside the imgage bounds.
+    Posible values are:
+    LTRB=True: xmin,ymin,xmax,ymax[,confidence][,transcription] 
+    LTRB=False: x1,y1,x2,y2,x3,y3,x4,y4[,confidence][,transcription] 
+    """
+    get_tl_line_values(line,LTRB,withTranscription,withConfidence,imWidth,imHeight)
+def get_tl_line_values_gt(line,LTRB=True,withTranscription=False,withConfidence=False,imWidth=0,imHeight=0):
+    """
+    Validate the format of the line. If the line is not valid an exception will be raised.
+    If maxWidth and maxHeight are specified, all points must be inside the imgage bounds.
+    Posible values are:
+    LTRB=True: xmin,ymin,xmax,ymax[,confidence][,transcription] 
+    LTRB=False: x1,y1,x2,y2,x3,y3,x4,y4[,confidence][,transcription] 
+    Returns values from a textline. Points , [Confidences], [Transcriptions]
+    """
+    confidence = 0.0
+    transcription = "";
+    points = []
+    if LTRB:
+        # do not use
+        raise Exception('Not implemented.')
+    else:
+        # if withTranscription and withConfidence:
+        #     cors = line.split(',')
+        #     assert(len(cors)%2 -2 == 0), 'num cors should be even.'
+        #     try:
+        #         points = [ float(ic) for ic in cors[:-2]]
+        #     except Exception as e:
+        #         raise(e)
+        # elif withConfidence:
+        #     cors = line.split(',')
+        #     assert(len(cors)%2 -1 == 0), 'num cors should be even.'
+        #     try:
+        #         points = [ float(ic) for ic in cors[:-1]]
+        #     except Exception as e:
+        #         raise(e)
+        # elif withTranscription:
+        #     cors = line.split(',')
+        #     assert(len(cors)%2 -1 == 0), 'num cors should be even.'
+        #     try:
+        #         points = [ float(ic) for ic in cors[:-1]]
+        #     except Exception as e:
+        #         raise(e)
+        # else:
+        #     cors = line.split(',')
+        #     assert(len(cors)%2 == 0), 'num cors should be even.'
+        #     try:
+        #         points = [ float(ic) for ic in cors[:]]
+        #     except Exception as e:
+        #         raise(e)
+        if withTranscription and withConfidence:
+            raise('not implemented')
+        elif withConfidence:
+            raise('not implemented')
+        elif withTranscription:
+            ptr = line.strip().split(',####')
+            cors = ptr[0].split(',')
+            recs = ptr[1].strip()
+            assert(len(cors)%2 == 0), 'num cors should be even.'
+            try:
+                points = [ float(ic) for ic in cors[:]]
+            except Exception as e:
+                raise(e)
+        else:
+            raise('not implemented')
+        validate_clockwise_points(points)
+        if (imWidth>0 and imHeight>0):
+            for ip in range(0, len(points), 2):
+                validate_point_inside_bounds(points[ip],points[ip+1],imWidth,imHeight);
+    if withConfidence:
+        try:
+            confidence = 1.0
+        except ValueError:
+            raise Exception("Confidence value must be a float")       
+    if withTranscription:
+        # posTranscription = numPoints + (2 if withConfidence else 1)
+        # transcription = cors[-1].strip()
+        transcription = recs
+        m2 = re.match(r'^\s*\"(.*)\"\s*$',transcription)
+        if m2 != None : #Transcription with double quotes, we extract the value and replace escaped characters
+            transcription = m2.group(1).replace("\\\\", "\\").replace("\\\"", "\"")
+    return points,confidence,transcription
+def get_tl_line_values(line,LTRB=True,withTranscription=False,withConfidence=False,imWidth=0,imHeight=0):
+    """
+    Validate the format of the line. If the line is not valid an exception will be raised.
+    If maxWidth and maxHeight are specified, all points must be inside the imgage bounds.
+    Posible values are:
+    LTRB=True: xmin,ymin,xmax,ymax[,confidence][,transcription] 
+    LTRB=False: x1,y1,x2,y2,x3,y3,x4,y4[,confidence][,transcription] 
+    Returns values from a textline. Points , [Confidences], [Transcriptions]
+    """
+    confidence = 0.0
+    transcription = "";
+    points = []
+    if LTRB:
+        # do not use
+        raise Exception('Not implemented.')
+    else:
+        if withTranscription and withConfidence:
+            raise('not implemented')
+        elif withConfidence:
+            raise('not implemented')
+        elif withTranscription:
+            ptr = line.strip().split(',####')
+            cors = ptr[0].split(',')
+            recs = ptr[1].strip()
+            assert(len(cors)%2 == 0), 'num cors should be even.'
+            try:
+                points = [ float(ic) for ic in cors[:]]
+            except Exception as e:
+                raise(e)
+        else:
+            raise('not implemented')
+        # print('det clock wise')
+        validate_clockwise_points(points)
+        if (imWidth>0 and imHeight>0):
+            for ip in range(0, len(points), 2):
+                validate_point_inside_bounds(points[ip],points[ip+1],imWidth,imHeight);
+    if withConfidence:
+        try:
+            confidence = 1.0
+        except ValueError:
+            raise Exception("Confidence value must be a float")       
+    if withTranscription:
+        # posTranscription = numPoints + (2 if withConfidence else 1)
+        transcription = recs
+        m2 = re.match(r'^\s*\"(.*)\"\s*$',transcription)
+        if m2 != None : #Transcription with double quotes, we extract the value and replace escaped characters
+            transcription = m2.group(1).replace("\\\\", "\\").replace("\\\"", "\"")
+    return points,confidence,transcription
+def validate_point_inside_bounds(x,y,imWidth,imHeight):
+    if(x<0 or x>imWidth):
+            raise Exception("X value (%s) not valid. Image dimensions: (%s,%s)" %(xmin,imWidth,imHeight))
+    if(y<0 or y>imHeight):
+            raise Exception("Y value (%s)  not valid. Image dimensions: (%s,%s) Sample: %s Line:%s" %(ymin,imWidth,imHeight))
+def validate_clockwise_points(points):
+    """
+    Validates that the points that the 4 points that dlimite a polygon are in clockwise order.
+    """
+    # if len(points) != 8:
+    #     raise Exception("Points list not valid." + str(len(points)))
+    # point = [
+    #             [int(points[0]) , int(points[1])],
+    #             [int(points[2]) , int(points[3])],
+    #             [int(points[4]) , int(points[5])],
+    #             [int(points[6]) , int(points[7])]
+    #         ]
+    # edge = [
+    #             ( point[1][0] - point[0][0])*( point[1][1] + point[0][1]),
+    #             ( point[2][0] - point[1][0])*( point[2][1] + point[1][1]),
+    #             ( point[3][0] - point[2][0])*( point[3][1] + point[2][1]),
+    #             ( point[0][0] - point[3][0])*( point[0][1] + point[3][1])
+    # ]
+    # summatory = edge[0] + edge[1] + edge[2] + edge[3];
+    # if summatory>0:
+    #     raise Exception("Points are not clockwise. The coordinates of bounding quadrilaterals have to be given in clockwise order. Regarding the correct interpretation of 'clockwise' remember that the image coordinate system used is the standard one, with the image origin at the upper left, the X axis extending to the right and Y axis extending downwards.")
+    pts = [(points[j], points[j+1]) for j in range(0,len(points),2)]
+    try:
+        pdet = Polygon(pts)
+    except:
+        assert(0), ('not a valid polygon', pts)
+    # The polygon should be valid.
+    if not pdet.is_valid: 
+        assert(0), ('polygon has intersection sides', pts)
+    pRing = LinearRing(pts)
+    if pRing.is_ccw:
+        assert(0),  ("Points are not clockwise. The coordinates of bounding quadrilaterals have to be given in clockwise order. Regarding the correct interpretation of 'clockwise' remember that the image coordinate system used is the standard one, with the image origin at the upper left, the X axis extending to the right and Y axis extending downwards.")
+def get_tl_line_values_from_file_contents(content,CRLF=True,LTRB=True,withTranscription=False,withConfidence=False,imWidth=0,imHeight=0,sort_by_confidences=True):
+    """
+    Returns all points, confindences and transcriptions of a file in lists. Valid line formats:
+    xmin,ymin,xmax,ymax,[confidence],[transcription]
+    x1,y1,x2,y2,x3,y3,x4,y4,[confidence],[transcription]
+    """
+    pointsList = []
+    transcriptionsList = []
+    confidencesList = []
+    lines = content.split( "\r\n" if CRLF else "\n" )
+    for line in lines:
+        line = line.replace("\r","").replace("\n","")
+        if(line != "") :
+            points, confidence, transcription = get_tl_line_values_gt(line,LTRB,withTranscription,withConfidence,imWidth,imHeight);
+            pointsList.append(points)
+            transcriptionsList.append(transcription)
+            confidencesList.append(confidence)
+    if withConfidence and len(confidencesList)>0 and sort_by_confidences:
+        import numpy as np
+        sorted_ind = np.argsort(-np.array(confidencesList))
+        confidencesList = [confidencesList[i] for i in sorted_ind]
+        pointsList = [pointsList[i] for i in sorted_ind]
+        transcriptionsList = [transcriptionsList[i] for i in sorted_ind]        
+    return pointsList,confidencesList,transcriptionsList
+def get_tl_line_values_from_file_contents_det(content,CRLF=True,LTRB=True,withTranscription=False,withConfidence=False,imWidth=0,imHeight=0,sort_by_confidences=True):
+    """
+    Returns all points, confindences and transcriptions of a file in lists. Valid line formats:
+    xmin,ymin,xmax,ymax,[confidence],[transcription]
+    x1,y1,x2,y2,x3,y3,x4,y4,[confidence],[transcription]
+    """
+    pointsList = []
+    transcriptionsList = []
+    confidencesList = []
+    lines = content.split( "\r\n" if CRLF else "\n" )
+    for line in lines:
+        line = line.replace("\r","").replace("\n","")
+        if(line != "") :
+            points, confidence, transcription = get_tl_line_values(line,LTRB,withTranscription,withConfidence,imWidth,imHeight);
+            pointsList.append(points)
+            transcriptionsList.append(transcription)
+            confidencesList.append(confidence)
+    if withConfidence and len(confidencesList)>0 and sort_by_confidences:
+        import numpy as np
+        sorted_ind = np.argsort(-np.array(confidencesList))
+        confidencesList = [confidencesList[i] for i in sorted_ind]
+        pointsList = [pointsList[i] for i in sorted_ind]
+        transcriptionsList = [transcriptionsList[i] for i in sorted_ind]        
+    return pointsList,confidencesList,transcriptionsList
+def main_evaluation(p,det_file, gt_file, default_evaluation_params_fn,validate_data_fn,evaluate_method_fn,show_result=True,per_sample=True):
+    """
+    This process validates a method, evaluates it and if it succed generates a ZIP file with a JSON entry for each sample.
+    Params:
+    p: Dictionary of parmeters with the GT/submission locations. If None is passed, the parameters send by the system are used.
+    default_evaluation_params_fn: points to a function that returns a dictionary with the default parameters used for the evaluation
+    validate_data_fn: points to a method that validates the corrct format of the submission
+    evaluate_method_fn: points to a function that evaluated the submission and return a Dictionary with the results
+    """
+    # if (p == None):
+    #     p = dict([s[1:].split('=') for s in sys.argv[1:]])
+    #     if(len(sys.argv)<3):
+    #         print_help()
+    p = {}
+    p['g'] =gt_file  #'tttgt.zip'
+    p['s'] =det_file #'det.zip'
+    evalParams = default_evaluation_params_fn()
+    if 'p' in p.keys():
+        evalParams.update( p['p'] if isinstance(p['p'], dict) else json.loads(p['p'][1:-1]) )
+    resDict={'calculated':True,'Message':'','method':'{}','per_sample':'{}'}    
+    # try:
+    validate_data_fn(p['g'], p['s'], evalParams)  
+    evalData = evaluate_method_fn(p['g'], p['s'], evalParams)
+    resDict.update(evalData)
+    # except Exception as e:
+        # resDict['Message']= str(e)
+        # resDict['calculated']=False
+    if 'o' in p:
+        if not os.path.exists(p['o']):
+            os.makedirs(p['o'])
+        resultsOutputname = p['o'] + '/results.zip'
+        outZip = zipfile.ZipFile(resultsOutputname, mode='w', allowZip64=True)
+        del resDict['per_sample']
+        if 'output_items' in resDict.keys():
+            del resDict['output_items']
+        outZip.writestr('method.json',json.dumps(resDict))
+    if not resDict['calculated']:
+        if show_result:
+            sys.stderr.write('Error!\n'+ resDict['Message']+'\n\n')
+        if 'o' in p:
+            outZip.close()
+        return resDict
+    if 'o' in p:
+        if per_sample == True:
+            for k,v in evalData['per_sample'].items():
+                outZip.writestr( k + '.json',json.dumps(v)) 
+            if 'output_items' in evalData.keys():
+                for k, v in evalData['output_items'].items():
+                    outZip.writestr( k,v) 
+        outZip.close()
+    # if show_result:
+    #     sys.stdout.write("Calculated!")
+    #     sys.stdout.write('\n')
+    #     sys.stdout.write(json.dumps(resDict['e2e_method']))
+    #     sys.stdout.write('\n')
+    #     sys.stdout.write(json.dumps(resDict['det_only_method']))
+    #     sys.stdout.write('\n')
+    return resDict
+def main_validation(default_evaluation_params_fn,validate_data_fn):
+    """
+    This process validates a method
+    Params:
+    default_evaluation_params_fn: points to a function that returns a dictionary with the default parameters used for the evaluation
+    validate_data_fn: points to a method that validates the corrct format of the submission
+    """    
+    try:
+        p = dict([s[1:].split('=') for s in sys.argv[1:]])
+        evalParams = default_evaluation_params_fn()
+        if 'p' in p.keys():
+            evalParams.update( p['p'] if isinstance(p['p'], dict) else json.loads(p['p'][1:-1]) )
+        validate_data_fn(p['g'], p['s'], evalParams)              
+        print('SUCCESS')
+        sys.exit(0)
+    except Exception as e:
+        print(str(e))
+        sys.exit(101)
\ No newline at end of file
--- a/adet/evaluation/rrc_evaluation_funcs_ic15.py
+++ b/adet/evaluation/rrc_evaluation_funcs_ic15.py
+#!/usr/bin/env python2
+#encoding: UTF-8
+import json
+import sys;sys.path.append('./')
+import zipfile
+import re
+import sys
+import os
+import codecs
+import importlib
+try:
+    from StringIO import StringIO
+except ImportError:
+    from io import StringIO
+def print_help():
+    sys.stdout.write('Usage: python %s.py -g=<gtFile> -s=<submFile> [-o=<outputFolder> -p=<jsonParams>]' %sys.argv[0])
+    sys.exit(2)
+def load_zip_file_keys(file,fileNameRegExp=''):
+    """
+    Returns an array with the entries of the ZIP file that match with the regular expression.
+    The key's are the names or the file or the capturing group definied in the fileNameRegExp
+    """
+    try:
+        archive=zipfile.ZipFile(file, mode='r', allowZip64=True)
+    except :
+        raise Exception('Error loading the ZIP archive.')
+    pairs = []
+    for name in archive.namelist():
+        addFile = True
+        keyName = name
+        if fileNameRegExp!="":
+            m = re.match(fileNameRegExp,name)
+            if m == None:
+                addFile = False
+            else:
+                if len(m.groups())>0:
+                    keyName = m.group(1)
+        if addFile:
+            pairs.append( keyName )
+    return pairs
+def load_zip_file(file,fileNameRegExp='',allEntries=False):
+    """
+    Returns an array with the contents (filtered by fileNameRegExp) of a ZIP file.
+    The key's are the names or the file or the capturing group definied in the fileNameRegExp
+    allEntries validates that all entries in the ZIP file pass the fileNameRegExp
+    """
+    try:
+        archive=zipfile.ZipFile(file, mode='r', allowZip64=True)
+    except :
+        raise Exception('Error loading the ZIP archive')    
+    pairs = []
+    for name in archive.namelist():
+        addFile = True
+        keyName = name
+        if fileNameRegExp!="":
+            m = re.match(fileNameRegExp,name)
+            if m == None:
+                addFile = False
+            else:
+                if len(m.groups())>0:
+                    keyName = m.group(1)
+        if addFile:
+            pairs.append( [ keyName , archive.read(name)] )
+        else:
+            if allEntries:
+                raise Exception('ZIP entry not valid: %s' %name)             
+    return dict(pairs)
+def decode_utf8(raw):
+    """
+    Returns a Unicode object on success, or None on failure
+    """
+    try:
+        raw = codecs.decode(raw,'utf-8', 'replace')
+        #extracts BOM if exists
+        raw = raw.encode('utf8')
+        if raw.startswith(codecs.BOM_UTF8):
+            raw = raw.replace(codecs.BOM_UTF8, '', 1)
+        return raw.decode('utf-8')
+    except:
+       return None
+def validate_lines_in_file(fileName,file_contents,CRLF=True,LTRB=True,withTranscription=False,withConfidence=False,imWidth=0,imHeight=0):
+    """
+    This function validates that all lines of the file calling the Line validation function for each line
+    """
+    utf8File = decode_utf8(file_contents)
+    if (utf8File is None) :
+        raise Exception("The file %s is not UTF-8" %fileName)
+    lines = utf8File.split( "\r\n" if CRLF else "\n" )
+    for line in lines:
+        line = line.replace("\r","").replace("\n","")
+        if(line != ""):
+            try:
+                validate_tl_line(line,LTRB,withTranscription,withConfidence,imWidth,imHeight)
+            except Exception as e:
+                raise Exception(("Line in sample not valid. Sample: %s Line: %s Error: %s" %(fileName,line,str(e))).encode('utf-8', 'replace'))
+def validate_tl_line(line,LTRB=True,withTranscription=True,withConfidence=True,imWidth=0,imHeight=0):
+    """
+    Validate the format of the line. If the line is not valid an exception will be raised.
+    If maxWidth and maxHeight are specified, all points must be inside the imgage bounds.
+    Posible values are:
+    LTRB=True: xmin,ymin,xmax,ymax[,confidence][,transcription] 
+    LTRB=False: x1,y1,x2,y2,x3,y3,x4,y4[,confidence][,transcription] 
+    """
+    get_tl_line_values(line,LTRB,withTranscription,withConfidence,imWidth,imHeight)
+def get_tl_line_values(line,LTRB=True,withTranscription=False,withConfidence=False,imWidth=0,imHeight=0):
+    """
+    Validate the format of the line. If the line is not valid an exception will be raised.
+    If maxWidth and maxHeight are specified, all points must be inside the imgage bounds.
+    Posible values are:
+    LTRB=True: xmin,ymin,xmax,ymax[,confidence][,transcription] 
+    LTRB=False: x1,y1,x2,y2,x3,y3,x4,y4[,confidence][,transcription] 
+    Returns values from a textline. Points , [Confidences], [Transcriptions]
+    """
+    confidence = 0.0
+    transcription = "";
+    points = []
+    numPoints = 4;
+    if LTRB:
+        numPoints = 4;
+        if withTranscription and withConfidence:
+            m = re.match(r'^\s*(-?[0-9]+)\s*,\s*(-?[0-9]+)\s*,\s*([0-9]+)\s*,\s*([0-9]+)\s*,\s*([0-1].?[0-9]*)\s*,(.*)$',line)
+            if m == None :
+                m = re.match(r'^\s*(-?[0-9]+)\s*,\s*(-?[0-9]+)\s*,\s*([0-9]+)\s*,\s*([0-9]+)\s*,\s*([0-1].?[0-9]*)\s*,(.*)$',line)
+                raise Exception("Format incorrect. Should be: xmin,ymin,xmax,ymax,confidence,transcription")
+        elif withConfidence:
+            m = re.match(r'^\s*(-?[0-9]+)\s*,\s*(-?[0-9]+)\s*,\s*([0-9]+)\s*,\s*([0-9]+)\s*,\s*([0-1].?[0-9]*)\s*$',line)
+            if m == None :
+                raise Exception("Format incorrect. Should be: xmin,ymin,xmax,ymax,confidence")
+        elif withTranscription:
+            m = re.match(r'^\s*(-?[0-9]+)\s*,\s*(-?[0-9]+)\s*,\s*([0-9]+)\s*,\s*([0-9]+)\s*,(.*)$',line)
+            if m == None :
+                raise Exception("Format incorrect. Should be: xmin,ymin,xmax,ymax,transcription")
+        else:
+            m = re.match(r'^\s*(-?[0-9]+)\s*,\s*(-?[0-9]+)\s*,\s*([0-9]+)\s*,\s*([0-9]+)\s*,?\s*$',line)
+            if m == None :
+                raise Exception("Format incorrect. Should be: xmin,ymin,xmax,ymax")
+        xmin = int(m.group(1))
+        ymin = int(m.group(2))
+        xmax = int(m.group(3))
+        ymax = int(m.group(4))
+        if(xmax<xmin):
+                raise Exception("Xmax value (%s) not valid (Xmax < Xmin)." %(xmax))
+        if(ymax<ymin):
+                raise Exception("Ymax value (%s)  not valid (Ymax < Ymin)." %(ymax))  
+        points = [ float(m.group(i)) for i in range(1, (numPoints+1) ) ]
+        if (imWidth>0 and imHeight>0):
+            validate_point_inside_bounds(xmin,ymin,imWidth,imHeight);
+            validate_point_inside_bounds(xmax,ymax,imWidth,imHeight);
+    else:
+        numPoints = 8;
+        if withTranscription and withConfidence:
+            m = re.match(r'^\s*(-?[0-9]+)\s*,\s*(-?[0-9]+)\s*,\s*(-?[0-9]+)\s*,\s*(-?[0-9]+)\s*,\s*(-?[0-9]+)\s*,\s*(-?[0-9]+)\s*,\s*(-?[0-9]+)\s*,\s*(-?[0-9]+)\s*,\s*([0-1].?[0-9]*)\s*,(.*)$',line)
+            if m == None :
+                raise Exception("Format incorrect. Should be: x1,y1,x2,y2,x3,y3,x4,y4,confidence,transcription")
+        elif withConfidence:
+            m = re.match(r'^\s*(-?[0-9]+)\s*,\s*(-?[0-9]+)\s*,\s*(-?[0-9]+)\s*,\s*(-?[0-9]+)\s*,\s*(-?[0-9]+)\s*,\s*(-?[0-9]+)\s*,\s*(-?[0-9]+)\s*,\s*(-?[0-9]+)\s*,\s*([0-1].?[0-9]*)\s*$',line)
+            if m == None :
+                raise Exception("Format incorrect. Should be: x1,y1,x2,y2,x3,y3,x4,y4,confidence")
+        elif withTranscription:
+            m = re.match(r'^\s*(-?[0-9]+)\s*,\s*(-?[0-9]+)\s*,\s*(-?[0-9]+)\s*,\s*(-?[0-9]+)\s*,\s*(-?[0-9]+)\s*,\s*(-?[0-9]+)\s*,\s*(-?[0-9]+)\s*,\s*(-?[0-9]+)\s*,(.*)$',line)
+            if m == None :
+                raise Exception("Format incorrect. Should be: x1,y1,x2,y2,x3,y3,x4,y4,transcription")
+        else:
+            m = re.match(r'^\s*(-?[0-9]+)\s*,\s*(-?[0-9]+)\s*,\s*(-?[0-9]+)\s*,\s*(-?[0-9]+)\s*,\s*(-?[0-9]+)\s*,\s*(-?[0-9]+)\s*,\s*(-?[0-9]+)\s*,\s*(-?[0-9]+)\s*$',line)
+            if m == None :
+                raise Exception("Format incorrect. Should be: x1,y1,x2,y2,x3,y3,x4,y4")
+        points = [ float(m.group(i)) for i in range(1, (numPoints+1) ) ]
+        validate_clockwise_points(points)
+        if (imWidth>0 and imHeight>0):
+            validate_point_inside_bounds(points[0],points[1],imWidth,imHeight);
+            validate_point_inside_bounds(points[2],points[3],imWidth,imHeight);
+            validate_point_inside_bounds(points[4],points[5],imWidth,imHeight);
+            validate_point_inside_bounds(points[6],points[7],imWidth,imHeight);
+    if withConfidence:
+        try:
+            confidence = float(m.group(numPoints+1))
+        except ValueError:
+            raise Exception("Confidence value must be a float")       
+    if withTranscription:
+        posTranscription = numPoints + (2 if withConfidence else 1)
+        transcription = m.group(posTranscription)
+        m2 = re.match(r'^\s*\"(.*)\"\s*$',transcription)
+        if m2 != None : #Transcription with double quotes, we extract the value and replace escaped characters
+            transcription = m2.group(1).replace("\\\\", "\\").replace("\\\"", "\"")
+    return points,confidence,transcription
+def validate_point_inside_bounds(x,y,imWidth,imHeight):
+    if(x<0 or x>imWidth):
+            raise Exception("X value (%s) not valid. Image dimensions: (%s,%s)" %(xmin,imWidth,imHeight))
+    if(y<0 or y>imHeight):
+            raise Exception("Y value (%s)  not valid. Image dimensions: (%s,%s) Sample: %s Line:%s" %(ymin,imWidth,imHeight))
+def validate_clockwise_points(points):
+    """
+    Validates that the points that the 4 points that dlimite a polygon are in clockwise order.
+    """
+    if len(points) != 8:
+        raise Exception("Points list not valid." + str(len(points)))
+    point = [
+                [int(points[0]) , int(points[1])],
+                [int(points[2]) , int(points[3])],
+                [int(points[4]) , int(points[5])],
+                [int(points[6]) , int(points[7])]
+            ]
+    edge = [
+                ( point[1][0] - point[0][0])*( point[1][1] + point[0][1]),
+                ( point[2][0] - point[1][0])*( point[2][1] + point[1][1]),
+                ( point[3][0] - point[2][0])*( point[3][1] + point[2][1]),
+                ( point[0][0] - point[3][0])*( point[0][1] + point[3][1])
+    ]
+    summatory = edge[0] + edge[1] + edge[2] + edge[3];
+    if summatory>0:
+        raise Exception("Points are not clockwise. The coordinates of bounding quadrilaterals have to be given in clockwise order. Regarding the correct interpretation of 'clockwise' remember that the image coordinate system used is the standard one, with the image origin at the upper left, the X axis extending to the right and Y axis extending downwards.")
+def get_tl_line_values_from_file_contents(content,CRLF=True,LTRB=True,withTranscription=False,withConfidence=False,imWidth=0,imHeight=0,sort_by_confidences=True):
+    """
+    Returns all points, confindences and transcriptions of a file in lists. Valid line formats:
+    xmin,ymin,xmax,ymax,[confidence],[transcription]
+    x1,y1,x2,y2,x3,y3,x4,y4,[confidence],[transcription]
+    """
+    pointsList = []
+    transcriptionsList = []
+    confidencesList = []
+    lines = content.split( "\r\n" if CRLF else "\n" )
+    for line in lines:
+        line = line.replace("\r","").replace("\n","")
+        if(line != "") :
+            points, confidence, transcription = get_tl_line_values(line,LTRB,withTranscription,withConfidence,imWidth,imHeight);
+            pointsList.append(points)
+            transcriptionsList.append(transcription)
+            confidencesList.append(confidence)
+    if withConfidence and len(confidencesList)>0 and sort_by_confidences:
+        import numpy as np
+        sorted_ind = np.argsort(-np.array(confidencesList))
+        confidencesList = [confidencesList[i] for i in sorted_ind]
+        pointsList = [pointsList[i] for i in sorted_ind]
+        transcriptionsList = [transcriptionsList[i] for i in sorted_ind]        
+    return pointsList,confidencesList,transcriptionsList
+def main_evaluation(p,default_evaluation_params_fn,validate_data_fn,evaluate_method_fn,show_result=True,per_sample=True):
+    """
+    This process validates a method, evaluates it and if it succed generates a ZIP file with a JSON entry for each sample.
+    Params:
+    p: Dictionary of parmeters with the GT/submission locations. If None is passed, the parameters send by the system are used.
+    default_evaluation_params_fn: points to a function that returns a dictionary with the default parameters used for the evaluation
+    validate_data_fn: points to a method that validates the corrct format of the submission
+    evaluate_method_fn: points to a function that evaluated the submission and return a Dictionary with the results
+    """
+    if (p == None):
+        p = dict([s[1:].split('=') for s in sys.argv[1:]])
+        if(len(sys.argv)<3):
+            print_help()
+    evalParams = default_evaluation_params_fn()
+    if 'p' in p.keys():
+        evalParams.update( p['p'] if isinstance(p['p'], dict) else json.loads(p['p'][1:-1]) )
+    resDict={'calculated':True,'Message':'','method':'{}','per_sample':'{}'}    
+    try:
+        validate_data_fn(p['g'], p['s'], evalParams)  
+        evalData = evaluate_method_fn(p['g'], p['s'], evalParams)
+        resDict.update(evalData)
+    except Exception as e:
+        resDict['Message']= str(e)
+        resDict['calculated']=False
+    if 'o' in p:
+        if not os.path.exists(p['o']):
+            os.makedirs(p['o'])
+        resultsOutputname = p['o'] + '/results.zip'
+        outZip = zipfile.ZipFile(resultsOutputname, mode='w', allowZip64=True)
+        del resDict['per_sample']
+        if 'output_items' in resDict.keys():
+            del resDict['output_items']
+        outZip.writestr('method.json',json.dumps(resDict))
+    if not resDict['calculated']:
+        if show_result:
+            sys.stderr.write('Error!\n'+ resDict['Message']+'\n\n')
+        if 'o' in p:
+            outZip.close()
+        return resDict
+    if 'o' in p:
+        if per_sample == True:
+            for k,v in evalData['per_sample'].items():
+                outZip.writestr( k + '.json',json.dumps(v)) 
+            if 'output_items' in evalData.keys():
+                for k, v in evalData['output_items'].items():
+                    outZip.writestr( k,v) 
+        outZip.close()
+    # if show_result:
+    #     sys.stdout.write("Calculated!")
+    #     sys.stdout.write("\n")
+    #     sys.stdout.write(json.dumps(resDict['e2e_method']))
+    #     sys.stdout.write("\n")
+    return resDict
+def main_validation(default_evaluation_params_fn,validate_data_fn):
+    """
+    This process validates a method
+    Params:
+    default_evaluation_params_fn: points to a function that returns a dictionary with the default parameters used for the evaluation
+    validate_data_fn: points to a method that validates the corrct format of the submission
+    """    
+    try:
+        p = dict([s[1:].split('=') for s in sys.argv[1:]])
+        evalParams = default_evaluation_params_fn()
+        if 'p' in p.keys():
+            evalParams.update( p['p'] if isinstance(p['p'], dict) else json.loads(p['p'][1:-1]) )
+        validate_data_fn(p['g'], p['s'], evalParams)              
+        print('SUCCESS')
+        sys.exit(0)
+    except Exception as e:
+        print(str(e))
+        sys.exit(101)
--- a/adet/evaluation/text_eval_script.py
+++ b/adet/evaluation/text_eval_script.py
--- a/adet/evaluation/text_eval_script_ic15.py
+++ b/adet/evaluation/text_eval_script_ic15.py
--- a/adet/evaluation/text_evaluation_all.py
+++ b/adet/evaluation/text_evaluation_all.py
--- a/adet/layers/__init__.py
+++ b/adet/layers/__init__.py
+from .ms_deform_attn import MSDeformAttn
+__all__ = [k for k in globals().keys() if not k.startswith("_")]
\ No newline at end of file
--- a/adet/layers/csrc/DeformAttn/ms_deform_attn.h
+++ b/adet/layers/csrc/DeformAttn/ms_deform_attn.h
+/*!
+**************************************************************************************************
+* Deformable DETR
+* Copyright (c) 2020 SenseTime. All Rights Reserved.
+* Licensed under the Apache License, Version 2.0 [see LICENSE for details]
+**************************************************************************************************
+* Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0
+**************************************************************************************************
+*/
+#pragma once
+#include "ms_deform_attn_cpu.h"
+#ifdef WITH_CUDA
+#include "ms_deform_attn_cuda.h"
+#endif
+at::Tensor
+ms_deform_attn_forward(
+    const at::Tensor &value, 
+    const at::Tensor &spatial_shapes,
+    const at::Tensor &level_start_index,
+    const at::Tensor &sampling_loc,
+    const at::Tensor &attn_weight,
+    const int im2col_step)
+{
+    if (value.type().is_cuda())
+    {
+#ifdef WITH_CUDA
+        return ms_deform_attn_cuda_forward(
+            value, spatial_shapes, level_start_index, sampling_loc, attn_weight, im2col_step);
+#else
+        AT_ERROR("Not compiled with GPU support");
+#endif
+    }
+    AT_ERROR("Not implemented on the CPU");
+}
+std::vector<at::Tensor>
+ms_deform_attn_backward(
+    const at::Tensor &value, 
+    const at::Tensor &spatial_shapes,
+    const at::Tensor &level_start_index,
+    const at::Tensor &sampling_loc,
+    const at::Tensor &attn_weight,
+    const at::Tensor &grad_output,
+    const int im2col_step)
+{
+    if (value.type().is_cuda())
+    {
+#ifdef WITH_CUDA
+        return ms_deform_attn_cuda_backward(
+            value, spatial_shapes, level_start_index, sampling_loc, attn_weight, grad_output, im2col_step);
+#else
+        AT_ERROR("Not compiled with GPU support");
+#endif
+    }
+    AT_ERROR("Not implemented on the CPU");
+}
--- a/adet/layers/csrc/DeformAttn/ms_deform_attn_cpu.cpp
+++ b/adet/layers/csrc/DeformAttn/ms_deform_attn_cpu.cpp
+/*!
+**************************************************************************************************
+* Deformable DETR
+* Copyright (c) 2020 SenseTime. All Rights Reserved.
+* Licensed under the Apache License, Version 2.0 [see LICENSE for details]
+**************************************************************************************************
+* Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0
+**************************************************************************************************
+*/
+#include <vector>
+#include <ATen/ATen.h>
+#include <ATen/cuda/CUDAContext.h>
+at::Tensor
+ms_deform_attn_cpu_forward(
+    const at::Tensor &value,
+    const at::Tensor &spatial_shapes,
+    const at::Tensor &level_start_index,
+    const at::Tensor &sampling_loc,
+    const at::Tensor &attn_weight,
+    const int im2col_step)
+{
+    AT_ERROR("Not implement on cpu");
+}
+std::vector<at::Tensor>
+ms_deform_attn_cpu_backward(
+    const at::Tensor &value,
+    const at::Tensor &spatial_shapes,
+    const at::Tensor &level_start_index,
+    const at::Tensor &sampling_loc,
+    const at::Tensor &attn_weight,
+    const at::Tensor &grad_output,
+    const int im2col_step)
+{
+    AT_ERROR("Not implement on cpu");
+}
--- a/adet/layers/csrc/DeformAttn/ms_deform_attn_cpu.h
+++ b/adet/layers/csrc/DeformAttn/ms_deform_attn_cpu.h
+/*!
+**************************************************************************************************
+* Deformable DETR
+* Copyright (c) 2020 SenseTime. All Rights Reserved.
+* Licensed under the Apache License, Version 2.0 [see LICENSE for details]
+**************************************************************************************************
+* Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0
+**************************************************************************************************
+*/
+#pragma once
+#include <torch/extension.h>
+at::Tensor
+ms_deform_attn_cpu_forward(
+    const at::Tensor &value, 
+    const at::Tensor &spatial_shapes,
+    const at::Tensor &level_start_index,
+    const at::Tensor &sampling_loc,
+    const at::Tensor &attn_weight,
+    const int im2col_step);
+std::vector<at::Tensor>
+ms_deform_attn_cpu_backward(
+    const at::Tensor &value, 
+    const at::Tensor &spatial_shapes,
+    const at::Tensor &level_start_index,
+    const at::Tensor &sampling_loc,
+    const at::Tensor &attn_weight,
+    const at::Tensor &grad_output,
+    const int im2col_step);
--- a/adet/layers/csrc/DeformAttn/ms_deform_attn_cuda.cu
+++ b/adet/layers/csrc/DeformAttn/ms_deform_attn_cuda.cu
+/*!
+**************************************************************************************************
+* Deformable DETR
+* Copyright (c) 2020 SenseTime. All Rights Reserved.
+* Licensed under the Apache License, Version 2.0 [see LICENSE for details]
+**************************************************************************************************
+* Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0
+**************************************************************************************************
+*/
+#include <vector>
+#include "ms_deform_im2col_cuda.cuh"
+#include <ATen/ATen.h>
+#include <ATen/cuda/CUDAContext.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+at::Tensor ms_deform_attn_cuda_forward(
+    const at::Tensor &value, 
+    const at::Tensor &spatial_shapes,
+    const at::Tensor &level_start_index,
+    const at::Tensor &sampling_loc,
+    const at::Tensor &attn_weight,
+    const int im2col_step)
+{
+    AT_ASSERTM(value.is_contiguous(), "value tensor has to be contiguous");
+    AT_ASSERTM(spatial_shapes.is_contiguous(), "spatial_shapes tensor has to be contiguous");
+    AT_ASSERTM(level_start_index.is_contiguous(), "level_start_index tensor has to be contiguous");
+    AT_ASSERTM(sampling_loc.is_contiguous(), "sampling_loc tensor has to be contiguous");
+    AT_ASSERTM(attn_weight.is_contiguous(), "attn_weight tensor has to be contiguous");
+    AT_ASSERTM(value.type().is_cuda(), "value must be a CUDA tensor");
+    AT_ASSERTM(spatial_shapes.type().is_cuda(), "spatial_shapes must be a CUDA tensor");
+    AT_ASSERTM(level_start_index.type().is_cuda(), "level_start_index must be a CUDA tensor");
+    AT_ASSERTM(sampling_loc.type().is_cuda(), "sampling_loc must be a CUDA tensor");
+    AT_ASSERTM(attn_weight.type().is_cuda(), "attn_weight must be a CUDA tensor");
+    const int batch = value.size(0);
+    const int spatial_size = value.size(1);
+    const int num_heads = value.size(2);
+    const int channels = value.size(3);
+    const int num_levels = spatial_shapes.size(0);
+    const int num_query = sampling_loc.size(1);
+    const int num_point = sampling_loc.size(4);
+    const int im2col_step_ = std::min(batch, im2col_step);
+    AT_ASSERTM(batch % im2col_step_ == 0, "batch(%d) must divide im2col_step(%d)", batch, im2col_step_);
+    auto output = at::zeros({batch, num_query, num_heads, channels}, value.options());
+    const int batch_n = im2col_step_;
+    auto output_n = output.view({batch/im2col_step_, batch_n, num_query, num_heads, channels});
+    auto per_value_size = spatial_size * num_heads * channels;
+    auto per_sample_loc_size = num_query * num_heads * num_levels * num_point * 2;
+    auto per_attn_weight_size = num_query * num_heads * num_levels * num_point;
+    for (int n = 0; n < batch/im2col_step_; ++n)
+    {
+        auto columns = output_n.select(0, n);
+        AT_DISPATCH_FLOATING_TYPES(value.type(), "ms_deform_attn_forward_cuda", ([&] {
+            ms_deformable_im2col_cuda(at::cuda::getCurrentCUDAStream(),
+                value.data<scalar_t>() + n * im2col_step_ * per_value_size,
+                spatial_shapes.data<int64_t>(),
+                level_start_index.data<int64_t>(),
+                sampling_loc.data<scalar_t>() + n * im2col_step_ * per_sample_loc_size,
+                attn_weight.data<scalar_t>() + n * im2col_step_ * per_attn_weight_size,
+                batch_n, spatial_size, num_heads, channels, num_levels, num_query, num_point,
+                columns.data<scalar_t>());
+        }));
+    }
+    output = output.view({batch, num_query, num_heads*channels});
+    return output;
+}
+std::vector<at::Tensor> ms_deform_attn_cuda_backward(
+    const at::Tensor &value, 
+    const at::Tensor &spatial_shapes,
+    const at::Tensor &level_start_index,
+    const at::Tensor &sampling_loc,
+    const at::Tensor &attn_weight,
+    const at::Tensor &grad_output,
+    const int im2col_step)
+{
+    AT_ASSERTM(value.is_contiguous(), "value tensor has to be contiguous");
+    AT_ASSERTM(spatial_shapes.is_contiguous(), "spatial_shapes tensor has to be contiguous");
+    AT_ASSERTM(level_start_index.is_contiguous(), "level_start_index tensor has to be contiguous");
+    AT_ASSERTM(sampling_loc.is_contiguous(), "sampling_loc tensor has to be contiguous");
+    AT_ASSERTM(attn_weight.is_contiguous(), "attn_weight tensor has to be contiguous");
+    AT_ASSERTM(grad_output.is_contiguous(), "grad_output tensor has to be contiguous");
+    AT_ASSERTM(value.type().is_cuda(), "value must be a CUDA tensor");
+    AT_ASSERTM(spatial_shapes.type().is_cuda(), "spatial_shapes must be a CUDA tensor");
+    AT_ASSERTM(level_start_index.type().is_cuda(), "level_start_index must be a CUDA tensor");
+    AT_ASSERTM(sampling_loc.type().is_cuda(), "sampling_loc must be a CUDA tensor");
+    AT_ASSERTM(attn_weight.type().is_cuda(), "attn_weight must be a CUDA tensor");
+    AT_ASSERTM(grad_output.type().is_cuda(), "grad_output must be a CUDA tensor");
+    const int batch = value.size(0);
+    const int spatial_size = value.size(1);
+    const int num_heads = value.size(2);
+    const int channels = value.size(3);
+    const int num_levels = spatial_shapes.size(0);
+    const int num_query = sampling_loc.size(1);
+    const int num_point = sampling_loc.size(4);
+    const int im2col_step_ = std::min(batch, im2col_step);
+    AT_ASSERTM(batch % im2col_step_ == 0, "batch(%d) must divide im2col_step(%d)", batch, im2col_step_);
+    auto grad_value = at::zeros_like(value);
+    auto grad_sampling_loc = at::zeros_like(sampling_loc);
+    auto grad_attn_weight = at::zeros_like(attn_weight);
+    const int batch_n = im2col_step_;
+    auto per_value_size = spatial_size * num_heads * channels;
+    auto per_sample_loc_size = num_query * num_heads * num_levels * num_point * 2;
+    auto per_attn_weight_size = num_query * num_heads * num_levels * num_point;
+    auto grad_output_n = grad_output.view({batch/im2col_step_, batch_n, num_query, num_heads, channels});
+    for (int n = 0; n < batch/im2col_step_; ++n)
+    {
+        auto grad_output_g = grad_output_n.select(0, n);
+        AT_DISPATCH_FLOATING_TYPES(value.type(), "ms_deform_attn_backward_cuda", ([&] {
+            ms_deformable_col2im_cuda(at::cuda::getCurrentCUDAStream(),
+                                    grad_output_g.data<scalar_t>(),
+                                    value.data<scalar_t>() + n * im2col_step_ * per_value_size,
+                                    spatial_shapes.data<int64_t>(),
+                                    level_start_index.data<int64_t>(),
+                                    sampling_loc.data<scalar_t>() + n * im2col_step_ * per_sample_loc_size,
+                                    attn_weight.data<scalar_t>() + n * im2col_step_ * per_attn_weight_size,
+                                    batch_n, spatial_size, num_heads, channels, num_levels, num_query, num_point,
+                                    grad_value.data<scalar_t>() +  n * im2col_step_ * per_value_size,
+                                    grad_sampling_loc.data<scalar_t>() + n * im2col_step_ * per_sample_loc_size,
+                                    grad_attn_weight.data<scalar_t>() + n * im2col_step_ * per_attn_weight_size);
+        }));
+    }
+    return {
+        grad_value, grad_sampling_loc, grad_attn_weight
+    };
+}
\ No newline at end of file