first commit

2ac5586e · Rayyyyy · 2ac5586e · 2ac5586e · 2ac5586e · 2ac5586e
Commit 2ac5586e authored Feb 02, 2024 by Rayyyyy
20 changed files
--- a/.gitignore
+++ b/.gitignore
+datasets/*
+experiments/*
+results/*
+tb_logger/*
+wandb/*
+tmp/*
+modify_model.py
+hat/version.py
+
+*.DS_Store
+
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+pip-wheel-metadata/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+.python-version
+
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
--- a/LICENSE
+++ b/LICENSE
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright 2023 Xiangyu Chen
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
--- a/README.md
+++ b/README.md
+# HAT
+## 论文
+[HAT: Hybrid Attention Transformer for Image Restoration](https://arxiv.org/abs/2309.05239)
+
+## 模型结构
+HAT包括三个部分，包括浅层特征提取、深层特征提取和图像重建。
+
+<div align=center>
+    <img src="./doc/model.png"/>
+</div>
+
+## 算法原理
+HAT方法结合了通道注意力和基于窗口的自注意力方案，利用两者的互补优势。此外，引入了重叠的跨注意力模块来增强相邻窗口特征之间的交互, 更好地聚合跨窗口信息。在训练阶段，HAT还采用了相同的任务预训练策略，以进一步挖掘模型的潜力进行进一步改进。得益于这些设计，HAT可以激活更多的像素进行重建，从而显著提高性能。
+
+<div align=center>
+    <img src="./doc/method.png"/>
+</div>
+
+## 环境配置
+-v 路径、docker_name和imageID根据实际情况修改
+
+### Docker（方法一）
+
+```bash
+docker pull image.sourcefind.cn:5000/dcu/admin/base/pytorch:1.13.1-centos7.6-dtk23.10-py38
+
+docker run -it -v /path/your_code_data/:/path/your_code_data/ -v /opt/hyhal/:/opt/hyhal/ --shm-size=32G --privileged=true --device=/dev/kfd --device=/dev/dri/ --group-add video --name docker_name imageID bash
+
+cd /your_code_path/hat_pytorch
+pip install -r requirements.txt
+python setup.py develop
+```
+
+### Dockerfile（方法二）
+
+```bash
+cd ./docker
+cp ../requirements.txt requirements.txt
+
+docker build --no-cache -t hat:latest .
+docker run -it -v /path/your_code_data/:/path/your_code_data/ -v /opt/hyhal/:/opt/hyhal/ --shm-size=32G --privileged=true --device=/dev/kfd --device=/dev/dri/ --group-add video --name docker_name imageID bash
+
+cd /your_code_path/hat_pytorch
+pip install -r requirements.txt
+python setup.py develop
+```
+
+### Anaconda（方法三）
+
+1、关于本项目DCU显卡所需的特殊深度学习库可从光合开发者社区下载安装: https://developer.hpccube.com/tool/
+
+```bash
+DTK软件栈：dtk23.10
+python：python3.8
+torch：1.13.1
+torchvision：0.14.1
+```
+
+Tips：以上dtk软件栈、python、torch等DCU相关工具版本需要严格一一对应
+
+2、其他非特殊库直接按照requirements.txt安装
+
+```
+pip install -r requirements.txt
+python setup.py develop
+```
+
+## 数据集
+训练：
+[ImageNet dataset](https://image-net.org/challenges/LSVRC/2012/2012-downloads.php)
+[DIV2K](https://data.vision.ee.ethz.ch/cvl/DIV2K/)
+[Flickr2K](https://cv.snu.ac.kr/research/EDSR/Flickr2K.tar)
+
+Tips: DF2K: DIV2K 和 Flickr2 数据的整合
+训练数据处理请参考[BasicSR](https://github.com/XPixelGroup/BasicSR/blob/master/docs/DatasetPreparation.md)
+
+测试：
+[Classical SR Testing](https://drive.google.com/drive/folders/1gt5eT293esqY0yr1Anbm36EdnxWW_5oH?usp=sharing)
+
+
+数据准备具体步骤如下：
+1. 将数据存放在datasets目录下, 数据集的目录结构如下:
+
+```
+├── DF2K
+│   ├── DF2K_HR # HR 数据
+│   ├── DF2K_HR_sub # 生成的
+│   ├── DF2K_bicx4 # train_LR_bicubic_X4 数据
+│   ├── DF2K_bicx4_sub # 生成的
+├── Set5
+│   ├── GTmod12
+│   ├── LRbicx2
+│   ├── LRbicx3
+│   ├── LRbicx4
+│   ├── original
+├── Set14
+│   ├── GTmod12
+│   ├── LRbicx2
+│   ├── LRbicx3
+│   ├── LRbicx4
+│   ├── original
+```
+
+Tips: 项目提供了tiny_datasets用于快速上手学习, 如果实用tiny_datasets, 需要对下面的代码内的地址进行替换, 当前默认完整数据集的处理地址。
+
+2. 因为 DF2K 数据集是 2K 分辨率的 (比如: 2048x1080), 而我们在训练的时候往往并不要那么大 (常见的是 128x128 或者 192x192 的训练patch). 因此我们可以先把2K的图片裁剪成有overlap的 480x480 的子图像块. 然后再由 dataloader 从这个 480x480 的子图像块中随机crop出 128x128 或者 192x192 的训练patch.
+
+```bash
+python extract_subimages.py # 将图片进行sub
+```
+
+3. 生成 meta_info_file
+```bash
+python scripts/data_preparation/generate_meta_info.py
+```
+
+## 训练
+训练日志及weights保存在./experiments文件中
+
+### 单机多卡
+
+```bash
+bash train.sh
+```
+
+### 多机多卡
+
+1. 修改run.sh中18行所需虚拟环境变量地址;
+2. 修改single_process.sh中22行所需训练的yaml文件地址，如与默认一致，可不修改。
+
+执行命令如下, 训练日志保存在logs文件夹下
+```bash
+bash run.sh
+```
+
+## 推理
+预训练模型下载地址：[Google Drive](https://drive.google.com/drive/folders/1HpmReFfoUqUbnAOQ7rvOeNU3uf_m69w0?usp=sharing) or [百度网盘](https://pan.baidu.com/s/1u2r4Lc2_EEeQqra2-w85Xg) (access code: qyrl)。
+测试结果将保存到 ./results 路径下。
+
+options/test/HAT_SRx4_ImageNet-LR.yml 适用于不适用ground truth image的推理过程。
+```bash
+bash val.sh
+```
+
+## result
+基于 Real_HAT_GAN_SRx4_sharper.pth 的测试结果展示
+
+<div align=center>
+    <img src="./doc/Visual_Results.png"/>
+</div>
+
+### 精度
+
+未经x2预训练的SRx4上的基准PSNR测试结果, Mulit-Adds针对64x64输入的计算。
+
+| Model | Params(M) | Multi-Adds(G) | Set5 | Set14 | BSD100 | Urban100 | Manga109 |
+| :------: | :------: | :------: | :------: |:------: | :------: | :------: |:------:|
+| HAT-S	 | 9.6 | 54.9 | 32.92 | 29.15 | 27.97 | 27.87 | 32.35 |
+| HAT | 20.8 | 102.4 | 33.04 | 29.23 | 28.00 | 27.97 | 32.48 |
+| HAT（our） | 20.8 | 102.4 | 33.1486 | xxx | xxx | xxx | xxx |
+
+## 应用场景
+### 算法类别
+图像重建
+
+### 热点应用行业
+交通,公安,制造
+
+## 源码仓库及问题反馈
+- https://developer.hpccube.com/codes/modelzoo/hat_pytorch
+
+## 参考资料
+- https://github.com/XPixelGroup/HAT?tab=readme-ov-file
--- a/VERSION
+++ b/VERSION
+0.1.0
\ No newline at end of file
--- a/cog.yaml
+++ b/cog.yaml
+build:
+  cuda: "10.2"
+  gpu: true
+  python_version: "3.8"
+  system_packages:
+    - "libgl1-mesa-glx"
+    - "libglib2.0-0"
+  python_packages:
+    - "numpy==1.21.5"
+    - "ipython==7.21.0"
+    - "opencv-python==4.5.4.58"
+    - "torch==1.9.1"
+    - "torchvision==0.10.1"
+    - "einops==0.4.1"
+
+  run:
+    - pip install basicsr==1.3.4.9
+
+predict: "predict.py:Predictor"
--- a/doc/Visual_Results.png
+++ b/doc/Visual_Results.png
--- a/doc/method.png
+++ b/doc/method.png
--- a/doc/model.png
+++ b/doc/model.png
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
+FROM image.sourcefind.cn:5000/dcu/admin/base/pytorch:1.13.1-centos7.6-dtk23.10-py38
+RUN source /opt/dtk/env.sh
+COPY requirements.txt requirements.txt
+RUN pip3 install -r requirements.txt
\ No newline at end of file
--- a/extract_subimages.py
+++ b/extract_subimages.py
+import cv2
+import numpy as np
+import os
+import sys
+from multiprocessing import Pool
+from os import path as osp
+from tqdm import tqdm
+
+from basicsr.utils import scandir
+
+
+def main():
+    """A multi-thread tool to crop large images to sub-images for faster IO.
+
+    It is used for DIV2K dataset.
+
+    Args:
+        opt (dict): Configuration dict. It contains:
+        n_thread (int): Thread number.
+        compression_level (int):  CV_IMWRITE_PNG_COMPRESSION from 0 to 9. A higher value means a smaller size and
+            longer compression time. Use 0 for faster CPU decompression. Default: 3, same in cv2.
+        input_folder (str): Path to the input folder.
+        save_folder (str): Path to save folder.
+        crop_size (int): Crop size.
+        step (int): Step for overlapped sliding window.
+        thresh_size (int): Threshold size. Patches whose size is lower than thresh_size will be dropped.
+
+    Usage:
+        For each folder, run this script.
+        Typically, there are four folders to be processed for DIV2K dataset.
+
+            * DIV2K_train_HR
+            * DIV2K_train_LR_bicubic/X2
+            * DIV2K_train_LR_bicubic/X3
+            * DIV2K_train_LR_bicubic/X4
+
+        After process, each sub_folder should have the same number of subimages.
+
+        Remember to modify opt configurations according to your settings.
+    """
+
+    opt = {}
+    opt['n_thread'] = 20
+    opt['compression_level'] = 3
+
+    # HR images
+    opt['input_folder'] = 'datasets/DF2K/DF2K_HR'
+    opt['save_folder'] = 'datasets/DF2K/DF2K_HR_sub'
+    opt['crop_size'] = 480
+    opt['step'] = 240
+    opt['thresh_size'] = 0
+    extract_subimages(opt)
+
+    # LRx4 images
+    opt['input_folder'] = 'datasets/DF2K/DF2K_bicx4'
+    opt['save_folder'] = 'datasets/DF2K/DF2K_bicx4_sub'
+    opt['crop_size'] = 120
+    opt['step'] = 60
+    opt['thresh_size'] = 0
+    extract_subimages(opt)
+
+
+def extract_subimages(opt):
+    """Crop images to subimages.
+
+    Args:
+        opt (dict): Configuration dict. It contains:
+        input_folder (str): Path to the input folder.
+        save_folder (str): Path to save folder.
+        n_thread (int): Thread number.
+    """
+    input_folder = opt['input_folder']
+    save_folder = opt['save_folder']
+    if not osp.exists(save_folder):
+        os.makedirs(save_folder)
+        print(f'mkdir {save_folder} ...')
+    else:
+        print(f'Folder {save_folder} already exists. Exit.')
+        sys.exit(1)
+
+    img_list = list(scandir(input_folder, full_path=True))
+
+    pbar = tqdm(total=len(img_list), unit='image', desc='Extract')
+    pool = Pool(opt['n_thread'])
+    for path in img_list:
+        pool.apply_async(worker, args=(path, opt), callback=lambda arg: pbar.update(1))
+    pool.close()
+    pool.join()
+    pbar.close()
+    print('All processes done.')
+
+
+def worker(path, opt):
+    """Worker for each process.
+
+    Args:
+        path (str): Image path.
+        opt (dict): Configuration dict. It contains:
+        crop_size (int): Crop size.
+        step (int): Step for overlapped sliding window.
+        thresh_size (int): Threshold size. Patches whose size is lower than thresh_size will be dropped.
+        save_folder (str): Path to save folder.
+        compression_level (int): for cv2.IMWRITE_PNG_COMPRESSION.
+
+    Returns:
+        process_info (str): Process information displayed in progress bar.
+    """
+    crop_size = opt['crop_size']
+    step = opt['step']
+    thresh_size = opt['thresh_size']
+    img_name, extension = osp.splitext(osp.basename(path))
+
+    # remove the x2, x3, x4 and x8 in the filename for DIV2K
+    img_name = img_name.replace('x2', '').replace('x3', '').replace('x4', '').replace('x8', '')
+
+    img = cv2.imread(path, cv2.IMREAD_UNCHANGED)
+
+    h, w = img.shape[0:2]
+    h_space = np.arange(0, h - crop_size + 1, step)
+    if h - (h_space[-1] + crop_size) > thresh_size:
+        h_space = np.append(h_space, h - crop_size)
+    w_space = np.arange(0, w - crop_size + 1, step)
+    if w - (w_space[-1] + crop_size) > thresh_size:
+        w_space = np.append(w_space, w - crop_size)
+
+    index = 0
+    for x in h_space:
+        for y in w_space:
+            index += 1
+            cropped_img = img[x:x + crop_size, y:y + crop_size, ...]
+            cropped_img = np.ascontiguousarray(cropped_img)
+            cv2.imwrite(
+                osp.join(opt['save_folder'], f'{img_name}_s{index:03d}{extension}'), cropped_img,
+                [cv2.IMWRITE_PNG_COMPRESSION, opt['compression_level']])
+    process_info = f'Processing {img_name} ...'
+    return process_info
+
+
+if __name__ == '__main__':
+    main()
--- a/figures/Comparison.png
+++ b/figures/Comparison.png
--- a/figures/Performance_comparison.png
+++ b/figures/Performance_comparison.png
--- a/figures/Visual_Results.png
+++ b/figures/Visual_Results.png
--- a/generate_meta_info.py
+++ b/generate_meta_info.py
+from os import path as osp
+from PIL import Image
+
+from basicsr.utils import scandir
+
+
+def generate_meta_info_df2k():
+    """Generate meta info for DIV2K dataset.
+    """
+
+    gt_folder = 'datasets/DF2K/DF2K_HR_sub/'
+    meta_info_txt = 'hat/data/meta_info/meta_info_DF2Ksub_GT.txt'
+
+    img_list = sorted(list(scandir(gt_folder)))
+
+    with open(meta_info_txt, 'w') as f:
+        for idx, img_path in enumerate(img_list):
+            img = Image.open(osp.join(gt_folder, img_path))  # lazy load
+            width, height = img.size
+            mode = img.mode
+            if mode == 'RGB':
+                n_channel = 3
+            elif mode == 'L':
+                n_channel = 1
+            else:
+                raise ValueError(f'Unsupported mode {mode}.')
+
+            info = f'{img_path} ({height},{width},{n_channel})'
+            print(idx + 1, info)
+            f.write(f'{info}\n')
+
+
+if __name__ == '__main__':
+    generate_meta_info_df2k()
--- a/hat/__init__.py
+++ b/hat/__init__.py
+# flake8: noqa
+from .archs import *
+from .data import *
+from .models import *
+
+# from .version import __gitsha__, __version__
--- a/hat/archs/__init__.py
+++ b/hat/archs/__init__.py
+import importlib
+from os import path as osp
+
+from basicsr.utils import scandir
+
+# automatically scan and import arch modules for registry
+# scan all the files that end with '_arch.py' under the archs folder
+arch_folder = osp.dirname(osp.abspath(__file__))
+arch_filenames = [osp.splitext(osp.basename(v))[0] for v in scandir(arch_folder) if v.endswith('_arch.py')]
+# import all the arch modules
+_arch_modules = [importlib.import_module(f'hat.archs.{file_name}') for file_name in arch_filenames]
--- a/hat/archs/discriminator_arch.py
+++ b/hat/archs/discriminator_arch.py
+from basicsr.utils.registry import ARCH_REGISTRY
+from torch import nn as nn
+from torch.nn import functional as F
+from torch.nn.utils import spectral_norm
+
+
+@ARCH_REGISTRY.register()
+class UNetDiscriminatorSN(nn.Module):
+    """Defines a U-Net discriminator with spectral normalization (SN)
+
+    It is used in Real-ESRGAN: Training Real-World Blind Super-Resolution with Pure Synthetic Data.
+
+    Arg:
+        num_in_ch (int): Channel number of inputs. Default: 3.
+        num_feat (int): Channel number of base intermediate features. Default: 64.
+        skip_connection (bool): Whether to use skip connections between U-Net. Default: True.
+    """
+
+    def __init__(self, num_in_ch, num_feat=64, skip_connection=True):
+        super(UNetDiscriminatorSN, self).__init__()
+        self.skip_connection = skip_connection
+        norm = spectral_norm
+        # the first convolution
+        self.conv0 = nn.Conv2d(num_in_ch, num_feat, kernel_size=3, stride=1, padding=1)
+        # downsample
+        self.conv1 = norm(nn.Conv2d(num_feat, num_feat * 2, 4, 2, 1, bias=False))
+        self.conv2 = norm(nn.Conv2d(num_feat * 2, num_feat * 4, 4, 2, 1, bias=False))
+        self.conv3 = norm(nn.Conv2d(num_feat * 4, num_feat * 8, 4, 2, 1, bias=False))
+        # upsample
+        self.conv4 = norm(nn.Conv2d(num_feat * 8, num_feat * 4, 3, 1, 1, bias=False))
+        self.conv5 = norm(nn.Conv2d(num_feat * 4, num_feat * 2, 3, 1, 1, bias=False))
+        self.conv6 = norm(nn.Conv2d(num_feat * 2, num_feat, 3, 1, 1, bias=False))
+        # extra convolutions
+        self.conv7 = norm(nn.Conv2d(num_feat, num_feat, 3, 1, 1, bias=False))
+        self.conv8 = norm(nn.Conv2d(num_feat, num_feat, 3, 1, 1, bias=False))
+        self.conv9 = nn.Conv2d(num_feat, 1, 3, 1, 1)
+
+    def forward(self, x):
+        # downsample
+        x0 = F.leaky_relu(self.conv0(x), negative_slope=0.2, inplace=True)
+        x1 = F.leaky_relu(self.conv1(x0), negative_slope=0.2, inplace=True)
+        x2 = F.leaky_relu(self.conv2(x1), negative_slope=0.2, inplace=True)
+        x3 = F.leaky_relu(self.conv3(x2), negative_slope=0.2, inplace=True)
+
+        # upsample
+        x3 = F.interpolate(x3, scale_factor=2, mode='bilinear', align_corners=False)
+        x4 = F.leaky_relu(self.conv4(x3), negative_slope=0.2, inplace=True)
+
+        if self.skip_connection:
+            x4 = x4 + x2
+        x4 = F.interpolate(x4, scale_factor=2, mode='bilinear', align_corners=False)
+        x5 = F.leaky_relu(self.conv5(x4), negative_slope=0.2, inplace=True)
+
+        if self.skip_connection:
+            x5 = x5 + x1
+        x5 = F.interpolate(x5, scale_factor=2, mode='bilinear', align_corners=False)
+        x6 = F.leaky_relu(self.conv6(x5), negative_slope=0.2, inplace=True)
+
+        if self.skip_connection:
+            x6 = x6 + x0
+
+        # extra convolutions
+        out = F.leaky_relu(self.conv7(x6), negative_slope=0.2, inplace=True)
+        out = F.leaky_relu(self.conv8(out), negative_slope=0.2, inplace=True)
+        out = self.conv9(out)
+
+        return out
\ No newline at end of file
--- a/hat/archs/hat_arch.py
+++ b/hat/archs/hat_arch.py
--- a/hat/archs/srvgg_arch.py
+++ b/hat/archs/srvgg_arch.py
+from basicsr.utils.registry import ARCH_REGISTRY
+from torch import nn as nn
+from torch.nn import functional as F
+
+
+@ARCH_REGISTRY.register()
+class SRVGGNetCompact(nn.Module):
+    """A compact VGG-style network structure for super-resolution.
+
+    It is a compact network structure, which performs upsampling in the last layer and no convolution is
+    conducted on the HR feature space.
+
+    Args:
+        num_in_ch (int): Channel number of inputs. Default: 3.
+        num_out_ch (int): Channel number of outputs. Default: 3.
+        num_feat (int): Channel number of intermediate features. Default: 64.
+        num_conv (int): Number of convolution layers in the body network. Default: 16.
+        upscale (int): Upsampling factor. Default: 4.
+        act_type (str): Activation type, options: 'relu', 'prelu', 'leakyrelu'. Default: prelu.
+    """
+
+    def __init__(self, num_in_ch=3, num_out_ch=3, num_feat=64, num_conv=16, upscale=4, act_type='prelu'):
+        super(SRVGGNetCompact, self).__init__()
+        self.num_in_ch = num_in_ch
+        self.num_out_ch = num_out_ch
+        self.num_feat = num_feat
+        self.num_conv = num_conv
+        self.upscale = upscale
+        self.act_type = act_type
+
+        self.body = nn.ModuleList()
+        # the first conv
+        self.body.append(nn.Conv2d(num_in_ch, num_feat, 3, 1, 1))
+        # the first activation
+        if act_type == 'relu':
+            activation = nn.ReLU(inplace=True)
+        elif act_type == 'prelu':
+            activation = nn.PReLU(num_parameters=num_feat)
+        elif act_type == 'leakyrelu':
+            activation = nn.LeakyReLU(negative_slope=0.1, inplace=True)
+        self.body.append(activation)
+
+        # the body structure
+        for _ in range(num_conv):
+            self.body.append(nn.Conv2d(num_feat, num_feat, 3, 1, 1))
+            # activation
+            if act_type == 'relu':
+                activation = nn.ReLU(inplace=True)
+            elif act_type == 'prelu':
+                activation = nn.PReLU(num_parameters=num_feat)
+            elif act_type == 'leakyrelu':
+                activation = nn.LeakyReLU(negative_slope=0.1, inplace=True)
+            self.body.append(activation)
+
+        # the last conv
+        self.body.append(nn.Conv2d(num_feat, num_out_ch * upscale * upscale, 3, 1, 1))
+        # upsample
+        self.upsampler = nn.PixelShuffle(upscale)
+
+    def forward(self, x):
+        out = x
+        for i in range(0, len(self.body)):
+            out = self.body[i](out)
+
+        out = self.upsampler(out)
+        # add the nearest upsampled image, so that the network learns the residual
+        base = F.interpolate(x, scale_factor=self.upscale, mode='nearest')
+        out += base
+        return out
\ No newline at end of file
--- a/hat/data/__init__.py
+++ b/hat/data/__init__.py
+import importlib
+from os import path as osp
+
+from basicsr.utils import scandir
+
+# automatically scan and import dataset modules for registry
+# scan all the files that end with '_dataset.py' under the data folder
+data_folder = osp.dirname(osp.abspath(__file__))
+dataset_filenames = [osp.splitext(osp.basename(v))[0] for v in scandir(data_folder) if v.endswith('_dataset.py')]
+# import all the dataset modules
+_dataset_modules = [importlib.import_module(f'hat.data.{file_name}') for file_name in dataset_filenames]