update

fcf8f201 · dengjb · fcf8f201 · fcf8f201 · fcf8f201 · fcf8f201
Commit fcf8f201 authored Nov 19, 2025 by dengjb
20 changed files
--- a/Alignedreid_demo.py
+++ b/Alignedreid_demo.py
+import torch
+from util.FeatureExtractor import FeatureExtractor
+from torchvision import transforms
+from IPython import embed
+import models
+from scipy.spatial.distance import cosine, euclidean
+from  util.utils import *
+from sklearn.preprocessing import normalize
+def pool2d(tensor, type= 'max'):
+    sz = tensor.size()
+    if type == 'max':
+        x = torch.nn.functional.max_pool2d(tensor, kernel_size=(sz[2]/8, sz[3]))
+    if type == 'mean':
+        x = torch.nn.functional.mean_pool2d(tensor, kernel_size=(sz[2]/8, sz[3]))
+    x = x[0].cpu().data.numpy()
+    x = np.transpose(x,(2,1,0))[0]
+    return x
+if __name__ == '__main__':
+    os.environ['CUDA_VISIBLE_DEVICES'] = "0"
+    use_gpu = torch.cuda.is_available()
+    model = models.init_model(name='resnet50', num_classes=751, loss={'softmax', 'metric'}, use_gpu=use_gpu,aligned=True)
+    checkpoint = torch.load("./log/market1501/alignedreid/checkpoint_ep300.pth.tar")
+    model.load_state_dict(checkpoint['state_dict'])
+    img_transform = transforms.Compose([
+        transforms.Resize((256, 128)),
+        transforms.ToTensor(),
+        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
+    ])
+    exact_list = ['7']
+    myexactor = FeatureExtractor(model, exact_list)
+    img_path1 = './data/market1501/query/0001_c1s1_001051_00.jpg'
+    img_path2 = './data/market1501/query/0001_c2s1_000301_00.jpg'
+    img1 = read_image(img_path1)
+    img2 = read_image(img_path2)
+    img1 = img_to_tensor(img1, img_transform)
+    img2 = img_to_tensor(img2, img_transform)
+    if use_gpu:
+        model = model.cuda()
+        img1 = img1.cuda()
+        img2 = img2.cuda()
+    model.eval()
+    f1 = myexactor(img1)
+    f2 = myexactor(img2)
+    a1 = normalize(pool2d(f1[0], type='max'))
+    a2 = normalize(pool2d(f2[0], type='max'))
+    dist = np.zeros((8,8))
+    for i in range(8):
+        temp_feat1 = a1[i]
+        for j in range(8):
+            temp_feat2 = a2[j]
+            dist[i][j] = euclidean(temp_feat1, temp_feat2)
+    show_alignedreid(img_path1, img_path2, dist)
\ No newline at end of file
--- a/LICENCE.md
+++ b/LICENCE.md
+MIT License
+Copyright (c) [year] [fullname]
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
--- a/LICENSE
+++ b/LICENSE
+MIT License
+Copyright (c) 2018 Hao Luo
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
--- a/README.md
+++ b/README.md
+# AlignedReID
+## 论文
+[AlignedReID](https://www.sciencedirect.com/science/article/pii/S0031320319302031?via%3Dihub#!)
+## 模型简介
+AlignedReID是旷视科技提出的行人重识别（ReID）算法，首次在该任务上实现超越人类水平的性能。其核心通过局部特征对齐优化，显著提升了细节特征的捕捉能力。
+#### 核心方法
+**局部对齐：** 采用动态规划计算局部最短路径，确保身体各部分相对顺序正确，同时最小化对齐距离 。 ‌
+**损失函数：** 将最短路径长度纳入局部损失，辅助学习整体特征 。 ‌‌
+**数据集支持:**主要使用Market1501数据集，包含训练集（751人，12,936张图）、测试集（750人，19,732张图）和查询集（750人，3,368张图） 。 ‌‌
+**代码实现**
+**PyTorch框架：** 支持数据集加载、特征提取及损失计算 。 ‌
+**关键函数：** shortest_dist（动态规划计算最短路径）、local_dist（局部特征匹配） 。 ‌‌
+**性能优势:** 通过局部细节优化，有效解决遮挡、姿态变化等挑战，成为行人重识别领域的基准方法之一 。
+## 环境依赖
+| 软件 | 版本 |
+| :------: | :------: |
+| DTK | 25.04.2 |
+| python | 3.10.12 |
+| transformers | 4.57.1 |
+| vllm | 0.11.0+das.opt1.alpha.8e22ded.dtk25042 |
+| torch | 2.5.1+das.opt1.dtk25042 |
+| triton | 3.1+das.opt1.3c5d12d.dtk25041 |
+| flash_attn | 2.6.1+das.opt1.dtk2504 |
+| flash_mla | 1.0.0+das.opt1.dtk25042 |
+当前仅支持镜像:
+- 挂载地址`-v`根据实际模型情况修改
+```bash
+docker run -it --shm-size 60g --network=host --name minimax_m2 --privileged --device=/dev/kfd --device=/dev/dri --device=/dev/mkfd --group-add video --cap-add=SYS_PTRACE --security-opt seccomp=unconfined -u root -v /opt/hyhal/:/opt/hyhal/:ro -v /path/your_code_path/:/path/your_code_path/ image.sourcefind.cn:5000/dcu/admin/base/pytorch:2.5.1-ubuntu22.04-dtk25.04.2-py3.10 bash
+```
+更多镜像可前往[光源](https://sourcefind.cn/#/service-list)下载使用。
+关于本项目DCU显卡所需的特殊深度学习库可从[光合](https://developer.sourcefind.cn/tool/)开发者社区下载安装。
+## 数据集
+在此仓库下创建一个目录来存储 reid 数据集：
+```bash
+bash
+cd AlignedReID/
+mkdir data/
+```
+### Market1501
+1. 下载数据集到 `data/` ，下载地址：http://www.liangzheng.org/Project/project_reid.html.
+2. 解压数据后重命名为 `market1501`. 数据结构应如下所示:
+```
+market1501/
+    bounding_box_test/
+    bounding_box_train/
+    ...
+```
+3. 启动训练代码时，使用参数 `-d market1501` 进行数据集选择
+### CUHK03
+1、在 data/ 下创建一个名为 cuhk03/ 的文件夹。
+2、从 http://www.ee.cuhk.edu.hk/~xgwang/CUHK_identification.html 下载数据集到 data/cuhk03/ 并解压 cuhk03_release.zip，这样您将拥有 data/cuhk03/cuhk03_release。
+3、从 person-re-ranking 下载新划分 [14]。您需要的是 cuhk03_new_protocol_config_detected.mat 和 cuhk03_new_protocol_config_labeled.mat。将这两个 mat 文件放在 data/cuhk03 下。最终的数据结构如下所示：
+```
+cuhk03/
+    cuhk03_release/
+    cuhk03_new_protocol_config_detected.mat
+    cuhk03_new_protocol_config_labeled.mat
+    ...
+```
+4、运行训练代码时使用 -d cuhk03。默认模式下，我们使用新划分 (767/700)。如果您想使用 [13] 创建的原始划分 (1367/100)，请指定 --cuhk03-classic-split。由于 [13] 的 CMC 计算方式与 Market1501 不同，您可能需要指定 --use-metric-cuhk03 以与其方法进行公平比较。此外，我们支持 labeled 和 detected 两种模式。默认模式加载 detected 图像。如果您想在 labeled 图像上训练和测试，请指定 --cuhk03-labeled。
+### DukeMTMC-reID
+1、在 data/ 下创建一个名为 dukemtmc-reid 的目录。
+2、从 https://github.com/layumi/DukeMTMC-reID_evaluation#download-dataset 下载数据集 DukeMTMC-reID.zip 并将其放到 data/dukemtmc-reid。解压 zip 文件，结果如下：
+```bash
+dukemtmc-reid/
+    DukeMTMC-reid.zip # (您可以删除这个 zip 文件，没关系)
+    DukeMTMC-reid/ # 此文件夹包含 8 个文件。
+```
+3、运行训练代码时使用 -d dukemtmcreid
+### MSMT17 
+1、在 data/ 下创建一个名为 msmt17/ 的目录。
+2、从 http://www.pkuvmc.com/publications/msmt17.html 下载数据集 MSMT17_V1.tar.gz 到 data/msmt17/。在同一文件夹下解压该文件，这样您将拥有：
+```bash
+msmt17/
+    MSMT17_V1.tar.gz # (您可以随意处理这个 .tar 文件)
+    MSMT17_V1/
+        train/
+        test/
+        list_train.txt
+        ... (总共六个 .txt 文件)
+```
+3、运行训练代码时使用 -d msmt17
+## 训练
+我们建议使用CUHK03数据集和MSMT17数据集进行未来研究训练，命令如下：
+```bash
+python train_alignedreid.py  -d cuhk03 -a resnet50 --test_distance global_local --reranking (--labelsmooth)
+```
+## 推理
+全局+局部(DMLI)
+```bash
+python train_alignedreid.-d cuhk03 -a resnet50 --evaluate --resume YOUR_MODEL_PATH --save-dir log/resnet50-cuhk03-alignedreid --test_distance global_local (--reranking)
+```
+局部(DMLI)
+```bash
+python train_alignedreid.py -d cuhk03 -a resnet50 --evaluate --resume YOUR_MODEL_PATH --save-dir log/resnet50-cuhk03-alignedreid --test_distance local (--reranking)
+```
+## 效果展示
+<div align=center>
+    <img src="imgs/Figure_1.png"/>
+</div>
+### 精度
+DCU与GPU精度一致，推理框架：pytorch。
+## 预训练权重
+| 模型名称  | 权重大小  | DCU型号  | 最低卡数需求 |下载地址|
+|:-----:|:----------:|:----------:|:---------------------:|:----------:|
+| ResNet50 | 96.8MB| K100AI | 1 | [下载地址](https://pan.baidu.com/s/1Zbx_K2Cm1cgUUTNYJRImMA) |
+## 源码仓库及问题反馈
+- https://developer.sourcefind.cn/codes/modelzoo/alignedreid_pytorch
+## 参考资料
+- https://github.com/michuanhaohao/AlignedReID
--- a/README_ori.md
+++ b/README_ori.md
+# AlignedReID++ (Pattern Recognition)
+Alignedreid++: Dynamically Matching Local Information for Person Re-Identification.
+[[PDF]](https://www.sciencedirect.com/science/article/pii/S0031320319302031?via%3Dihub#!)
+```
+@article{luo2019alignedreid++,
+  title={AlignedReID++: Dynamically matching local information for person re-identification},
+  author={Luo, Hao and Jiang, Wei and Zhang, Xuan and Fan, Xing and Qian, Jingjing and Zhang, Chi},
+  journal={Pattern Recognition},
+  volume={94},
+  pages={53--61},
+  year={2019},
+  publisher={Elsevier}
+}
+@article{zhang2017alignedreid,
+  title={Alignedreid: Surpassing human-level performance in person re-identification},
+  author={Zhang, Xuan and Luo, Hao and Fan, Xing and Xiang, Weilai and Sun, Yixiao and Xiao, Qiqi and Jiang, Wei and Zhang, Chi and Sun, Jian},
+  journal={arXiv preprint arXiv:1711.08184},
+  year={2017}
+}
+```
+# Version
+Python2/Python3
+torch0.4.0
+torchvision0.2.1
+Now, we support ResNet, ShuffleNet, DenseNet and InceptionV4.
+## Demo
+<img src='imgs/Figure_1.png' align="right" width=415>
+<img src='imgs/Figure_0.png' align="left" width=415>
+## Have a try
+Your can test the demo with your own model and datasets. You should change the path of the model and images by manually. The default model is ResNet50 for Market1501.
+```bash
+python Alignedreid_demo.py
+```
+## Results (rank1/mAP)　and models
+#### Market1501
+| Model |  Loss | Global | Local | DMLI | Global+DMLI | Global+DMLI(RK) |Download|
+| --- | :---: | :---: | :---: | :---: | :---: | :---: | :---: |
+| Resnet50 | Alignedreid | 89.2/75.9 | 90.7/75.5 | 91.1/77.4 | 91.0/77.6 | 92.0/88.5 | [model](https://pan.baidu.com/s/1Zbx_K2Cm1cgUUTNYJRImMA) |
+| Resnet50 | Alignedreid(LS) | 90.6/77.7 | 91.4/76.7 | 91.9/78.8 | 91.8/79.1 | 92.8/89.4 | [model](https://pan.baidu.com/s/12JHXjGMzdEv6BsNhpeMYbQ) |
+#### DukeMTMCReID
+| Model |  Loss | Global | Local | DMLI | Global+DMLI | Global+DMLI(RK) |Download|
+| --- | :---: | :---: | :---: | :---: | :---: | :---: | :---: |
+| Resnet50 | Alignedreid | 79.3/65.6 | 80.9/66.9 | 81.0/67.7 | 80.7/68.0 |  85.2/81.2  |[model](https://pan.baidu.com/s/1RJg7cU1QKdGKJsndJU3dlA)|
+| Resnet50 | Alignedreid(LS) | 81.2/67.4 | 81.5/68.4 | 81.8/69.4 | 82.1/69.7 | 86.2/82.8 |[model](https://pan.baidu.com/s/1CW-ii3lpYnlX7n-JppliVw) |
+#### CUHK03
+| Model |  Loss | Global | Local | DMLI | Global+DMLI | Global+DMLI(RK) |Download|
+| --- | :---: | :---: | :---: | :---: | :---: | :---: | :---: |
+| Resnet50 | Alignedreid | 60.7/58.4 | 60.2/58.2 | 60.9/59.6 | 60.9/59.7 | 67.6/70.7 |[model](https://pan.baidu.com/s/1YI8gs_SnoKfWnQyU-HrK8g)|
+| Resnet50 | Alignedreid(LS) | 59.7/58.1 | 59.9/57.2 | 61.1/59.4 | 61.5/59.6 | 67.9/70.7 |[model](https://pan.baidu.com/s/1AVxGRBhucNUfUYTeIXyP9A) |
+#### MSMT17
+| Model |  Loss | Global | Local | DMLI | Global+DMLI |Download|
+| --- | :---: | :---: | :---: | :---: | :---: | :---: | 
+| Resnet50 | Alignedreid | 63.4/38.4 | 63.8 | 66.3/40.2 | 66.3/40.6 |[model](https://pan.baidu.com/s/1E7rV4PCDoDmAIWjwBnclwg)|
+| Resnet50 | Alignedreid(LS) | 67.6/41.8 | 67.3/38.4| 69.6/43.3 | 69.8/43.7 |[model](https://pan.baidu.com/s/1D46g8D_OvnUfu43cWKK83Q) |
+#### Market1501-Partial
+| Model |  Loss | Global | Local | DMLI | 
+| --- | :---: | :---: | :---: | :---: | 
+| Resnet50 | Softmax | 59.0/46.4 | 56.5/43.7 | 63.3/50.0 | 
+| Resnet50 | Softmax+TriHard | 62.4/49.7 | 51.8/37.6 | 68.0/52.7 |
+| Resnet50 | Alignedreid | 65.9/53.5 | 52.8/38.1 | 70.1/55.3 | 
+#### DukeMTMCReID-Partial
+| Model |  Loss | Global | Local | DMLI | 
+| --- | :---: | :---: | :---: | :---: | 
+| Resnet50 | Softmax | 45.9/34.7 | 48.6/36.1 | 53.6/40.6 | 
+| Resnet50 | Softmax+TriHard | 47.8/36.4 | 43.3/31.5 | 53.7/40.5 |
+| Resnet50 | Alignedreid | 49.8/38.2 | 44.8/33.3 | 55.3/42.8 | 
+You can download the models on [Google Drive](https://drive.google.com/open?id=1-QApSAY51NvRcQgyUxCn8lP1sSWCeMVg).
+# Prepare data
+Create a directory to store reid datasets under this repo via
+```bash
+cd AlignedReID/
+mkdir data/
+```
+If you wanna store datasets in another directory, you need to specify `--root path_to_your/data` when running the training code. Please follow the instructions below to prepare each dataset. After that, you can simply do `-d the_dataset` when running the training code. 
+**Market1501** :
+1. Download dataset to `data/` from http://www.liangzheng.org/Project/project_reid.html.
+2. Extract dataset and rename to `market1501`. The data structure would look like:
+```
+market1501/
+    bounding_box_test/
+    bounding_box_train/
+    ...
+```
+3. Use `-d market1501` when running the training code.
+**CUHK03** [13]:
+1. Create a folder named `cuhk03/` under `data/`.
+2. Download dataset to `data/cuhk03/` from http://www.ee.cuhk.edu.hk/~xgwang/CUHK_identification.html and extract `cuhk03_release.zip`, so you will have `data/cuhk03/cuhk03_release`.
+3. Download new split [14] from [person-re-ranking](https://github.com/zhunzhong07/person-re-ranking/tree/master/evaluation/data/CUHK03). What you need are `cuhk03_new_protocol_config_detected.mat` and `cuhk03_new_protocol_config_labeled.mat`. Put these two mat files under `data/cuhk03`. Finally, the data structure would look like
+```
+cuhk03/
+    cuhk03_release/
+    cuhk03_new_protocol_config_detected.mat
+    cuhk03_new_protocol_config_labeled.mat
+    ...
+```
+4. Use `-d cuhk03` when running the training code. In default mode, we use new split (767/700). If you wanna use the original splits (1367/100) created by [13], specify `--cuhk03-classic-split`. As [13] computes CMC differently from Market1501, you might need to specify `--use-metric-cuhk03` for fair comparison with their method. In addition, we support both `labeled` and `detected` modes. The default mode loads `detected` images. Specify `--cuhk03-labeled` if you wanna train and test on `labeled` images.
+**DukeMTMC-reID** [16, 17]:
+1. Create a directory under `data/` called `dukemtmc-reid`.
+2. Download dataset `DukeMTMC-reID.zip` from https://github.com/layumi/DukeMTMC-reID_evaluation#download-dataset and put it to `data/dukemtmc-reid`. Extract the zip file, which leads to
+```
+dukemtmc-reid/
+    DukeMTMC-reid.zip # (you can delete this zip file, it is ok)
+    DukeMTMC-reid/ # this folder contains 8 files.
+```
+3. Use `-d dukemtmcreid` when running the training code.
+**MSMT17** [22]:
+1. Create a directory named `msmt17/` under `data/`.
+2. Download dataset `MSMT17_V1.tar.gz` to `data/msmt17/` from http://www.pkuvmc.com/publications/msmt17.html. Extract the file under the same folder, so you will have
+```
+msmt17/
+    MSMT17_V1.tar.gz # (do whatever you want with this .tar file)
+    MSMT17_V1/
+        train/
+        test/
+        list_train.txt
+        ... (totally six .txt files)
+```
+3. Use `-d msmt17` when running the training code.
+# Train
+Since the performance of Market1501 and DukeMTMCReID is too high, we suggest to using CUHK03 and MSMT17 for future research.
+```bash
+python train_alignedreid.py  -d cuhk03 -a resnet50 --test_distance global_local --reranking (--labelsmooth)
+```
+**Note:** You can add your experimental settings for 'args'
+# Test
+#### Global+Local(DMLI)
+```bash
+python train_alignedreid.-d cuhk03 -a resnet50 --evaluate --resume YOUR_MODEL_PATH --save-dir log/resnet50-cuhk03-alignedreid --test_distance global_local (--reranking)
+```
+#### Local(DMLI)
+```bash
+python train_alignedreid.py -d cuhk03 -a resnet50 --evaluate --resume YOUR_MODEL_PATH --save-dir log/resnet50-cuhk03-alignedreid --test_distance local (--reranking)
+```
+#### Local(Without DMLI)
+```bash
+python train_alignedreid.py -d cuhk03 -a resnet50 --evaluate --resume YOUR_MODEL_PATH --save-dir log/resnet50-cuhk03-alignedreid --test_distance local --unaligned (--reranking)
+```
+#### Global
+```bash
+python train_alignedreid.py -d cuhk03 -a resnet50 --evaluate --resume YOUR_MODEL_PATH --save-dir log/resnet50-cuhk03-alignedreid --test_distance global (--reranking)
+```
+**Note:** (--reranking) means whether you use 'Re-ranking with k-reciprocal Encoding (CVPR2017)' to boost the performance.
+## Test on Partial ReID
+```bash
+scp -r data/market1501 data/market1501-partial
+python gen_partial_dataset.py
+python train_alignedreid.py -d market1501-partial -a resnet50 --evaluate --resume YOUR_MODEL_PATH --save-dir log/resnet50-market1501-partial-alignedreid --test_distance local (--unaligned)
+```
--- a/aligned/HorizontalMaxPool2D.py
+++ b/aligned/HorizontalMaxPool2D.py
+import torch.nn as nn
+class HorizontalMaxPool2d(nn.Module):
+    def __init__(self):
+        super(HorizontalMaxPool2d, self).__init__()
+    def forward(self, x):
+        inp_size = x.size()
+        return nn.functional.max_pool2d(input=x,kernel_size= (1, inp_size[3]))
\ No newline at end of file
--- a/aligned/__init__.py
+++ b/aligned/__init__.py
--- a/aligned/local_dist.py
+++ b/aligned/local_dist.py
+import torch
+def batch_euclidean_dist(x, y):
+  """
+  Args:
+    x: pytorch Variable, with shape [Batch size, Local part, Feature channel]
+    y: pytorch Variable, with shape [Batch size, Local part, Feature channel]
+  Returns:
+    dist: pytorch Variable, with shape [Batch size, Local part, Local part]
+  """
+  assert len(x.size()) == 3
+  assert len(y.size()) == 3
+  assert x.size(0) == y.size(0)
+  assert x.size(-1) == y.size(-1)
+  N, m, d = x.size()
+  N, n, d = y.size()
+  # shape [N, m, n]
+  xx = torch.pow(x, 2).sum(-1, keepdim=True).expand(N, m, n)
+  yy = torch.pow(y, 2).sum(-1, keepdim=True).expand(N, n, m).permute(0, 2, 1)
+  dist = xx + yy
+  dist.baddbmm_(1, -2, x, y.permute(0, 2, 1))
+  dist = dist.clamp(min=1e-12).sqrt()  # for numerical stability
+  return dist
+def shortest_dist(dist_mat):
+  """Parallel version.
+  Args:
+    dist_mat: pytorch Variable, available shape:
+      1) [m, n]
+      2) [m, n, N], N is batch size
+      3) [m, n, *], * can be arbitrary additional dimensions
+  Returns:
+    dist: three cases corresponding to `dist_mat`:
+      1) scalar
+      2) pytorch Variable, with shape [N]
+      3) pytorch Variable, with shape [*]
+  """
+  m, n = dist_mat.size()[:2]
+  # Just offering some reference for accessing intermediate distance.
+  dist = [[0 for _ in range(n)] for _ in range(m)]
+  for i in range(m):
+    for j in range(n):
+      if (i == 0) and (j == 0):
+        dist[i][j] = dist_mat[i, j]
+      elif (i == 0) and (j > 0):
+        dist[i][j] = dist[i][j - 1] + dist_mat[i, j]
+      elif (i > 0) and (j == 0):
+        dist[i][j] = dist[i - 1][j] + dist_mat[i, j]
+      else:
+        dist[i][j] = torch.min(dist[i - 1][j], dist[i][j - 1]) + dist_mat[i, j]
+  dist = dist[-1][-1]
+  return dist
+def hard_example_mining(dist_mat, labels, return_inds=False):
+  """For each anchor, find the hardest positive and negative sample.
+  Args:
+    dist_mat: pytorch Variable, pair wise distance between samples, shape [N, N]
+    labels: pytorch LongTensor, with shape [N]
+    return_inds: whether to return the indices. Save time if `False`(?)
+  Returns:
+    dist_ap: pytorch Variable, distance(anchor, positive); shape [N]
+    dist_an: pytorch Variable, distance(anchor, negative); shape [N]
+    p_inds: pytorch LongTensor, with shape [N];
+      indices of selected hard positive samples; 0 <= p_inds[i] <= N - 1
+    n_inds: pytorch LongTensor, with shape [N];
+      indices of selected hard negative samples; 0 <= n_inds[i] <= N - 1
+  NOTE: Only consider the case in which all labels have same num of samples,
+    thus we can cope with all anchors in parallel.
+  """
+  assert len(dist_mat.size()) == 2
+  assert dist_mat.size(0) == dist_mat.size(1)
+  N = dist_mat.size(0)
+  # shape [N, N]
+  is_pos = labels.expand(N, N).eq(labels.expand(N, N).t())
+  is_neg = labels.expand(N, N).ne(labels.expand(N, N).t())
+  # `dist_ap` means distance(anchor, positive)
+  # both `dist_ap` and `relative_p_inds` with shape [N, 1]
+  dist_ap, relative_p_inds = torch.max(
+    dist_mat[is_pos].contiguous().view(N, -1), 1, keepdim=True)
+  # `dist_an` means distance(anchor, negative)
+  # both `dist_an` and `relative_n_inds` with shape [N, 1]
+  dist_an, relative_n_inds = torch.min(
+    dist_mat[is_neg].contiguous().view(N, -1), 1, keepdim=True)
+  # shape [N]
+  dist_ap = dist_ap.squeeze(1)
+  dist_an = dist_an.squeeze(1)
+  if return_inds:
+    # shape [N, N]
+    ind = (labels.new().resize_as_(labels)
+           .copy_(torch.arange(0, N).long())
+           .unsqueeze( 0).expand(N, N))
+    # shape [N, 1]
+    p_inds = torch.gather(
+      ind[is_pos].contiguous().view(N, -1), 1, relative_p_inds.data)
+    n_inds = torch.gather(
+      ind[is_neg].contiguous().view(N, -1), 1, relative_n_inds.data)
+    # shape [N]
+    p_inds = p_inds.squeeze(1)
+    n_inds = n_inds.squeeze(1)
+    return dist_ap, dist_an, p_inds, n_inds
+  return dist_ap, dist_an
+def euclidean_dist(x, y):
+  """
+  Args:
+    x: pytorch Variable, with shape [m, d]
+    y: pytorch Variable, with shape [n, d]
+  Returns:
+    dist: pytorch Variable, with shape [m, n]
+  """
+  m, n = x.size(0), y.size(0)
+  xx = torch.pow(x, 2).sum(1, keepdim=True).expand(m, n)
+  yy = torch.pow(y, 2).sum(1, keepdim=True).expand(n, m).t()
+  dist = xx + yy
+  dist.addmm_(1, -2, x, y.t())
+  dist = dist.clamp(min=1e-12).sqrt()  # for numerical stability
+  return dist
+def batch_local_dist(x, y):
+  """
+  Args:
+    x: pytorch Variable, with shape [N, m, d]
+    y: pytorch Variable, with shape [N, n, d]
+  Returns:
+    dist: pytorch Variable, with shape [N]
+  """
+  assert len(x.size()) == 3
+  assert len(y.size()) == 3
+  assert x.size(0) == y.size(0)
+  assert x.size(-1) == y.size(-1)
+  # shape [N, m, n]
+  dist_mat = batch_euclidean_dist(x, y)
+  dist_mat = (torch.exp(dist_mat) - 1.) / (torch.exp(dist_mat) + 1.)
+  # shape [N]
+  dist = shortest_dist(dist_mat.permute(1, 2, 0))
+  return dist
+if __name__ == '__main__':
+    x = torch.randn(32,2048)
+    y = torch.randn(32,2048)
+    dist_mat = euclidean_dist(x,y)
+    dist_ap, dist_an, p_inds, n_inds = hard_example_mining(dist_mat,return_inds=True)
+    from IPython import embed
+    embed()
\ No newline at end of file
--- a/gen_partial_dataset.py
+++ b/gen_partial_dataset.py
+import cv2
+import os
+import numpy as np
+data_root = "/home/csc302/workspace/luohao/code/AlignedReID/data/market1501/query"
+gen_root = "/home/csc302/workspace/luohao/code/AlignedReID/data/market1501_partial/query"
+def random_crop(img, sample_rate=0.6):
+    h,w = img.shape[:2]
+    sh = np.random.randint(sample_rate*h, h*0.9,1)[0]
+    bh = np.random.randint(0, h-sh, 1)[0]
+    img = img[bh:sh+bh,:,:]
+    img = cv2.resize(img, (w,h))
+    return img
+for image_name in os.listdir(data_root):
+    if image_name[-3:] != 'jpg':
+        continue
+    img_path = os.path.join(data_root, image_name)
+    img = cv2.imread(img_path)
+    img = random_crop(img)
+    save_path = os.path.join(gen_root, image_name)
+    cv2.imwrite(save_path, img)
\ No newline at end of file
--- a/imgs/Figure_0.png
+++ b/imgs/Figure_0.png
--- a/imgs/Figure_1.png
+++ b/imgs/Figure_1.png
--- a/model.properties
+++ b/model.properties
+# 模型唯一标识
+modelCode=1824
+# 模型名称
+modelName=AlignedReID
+# 模型描述
+modelDescription=AlignedReID是旷视科技提出的行人重识别（ReID）算法，首次在该任务上实现超越人类水平的性能。其核心通过局部特征对齐优化，显著提升了细节特征的捕捉能力。
+# 应用场景
+processType=推理
+# 算法类别
+appScenario=目标检测
+# 框架类型
+frameType=pytorch
+# 加速卡类型
+accelerateType=K100AI
\ No newline at end of file
--- a/models/DenseNet.py
+++ b/models/DenseNet.py
+from __future__ import absolute_import
+import torch
+from torch import nn
+from torch.nn import functional as F
+import torchvision
+from aligned.HorizontalMaxPool2D import HorizontalMaxPool2d
+__all__ = ['DenseNet121']
+class DenseNet121(nn.Module):
+    def __init__(self, num_classes, loss={'softmax'}, aligned=False,**kwargs):
+        super(DenseNet121, self).__init__()
+        self.loss = loss
+        densenet121 = torchvision.models.densenet121(pretrained=True)
+        self.base = densenet121.features
+        self.classifier = nn.Linear(1024, num_classes)
+        self.feat_dim = 1024 # feature dimension
+        self.aligned = aligned
+        self.horizon_pool = HorizontalMaxPool2d()
+        if self.aligned:
+            self.bn = nn.BatchNorm2d(1024)
+            self.relu = nn.ReLU(inplace=True)
+            self.conv1 = nn.Conv2d(1024, 128, kernel_size=1, stride=1, padding=0, bias=True)
+    def forward(self, x):
+        x = self.base(x)
+        if not self.training:
+            lf = self.horizon_pool(x)
+        if self.aligned:
+            lf = self.bn(x)
+            lf = self.relu(lf)
+            lf = self.horizon_pool(lf)
+            lf = self.conv1(lf)
+        if self.aligned or not self.training:
+            lf = lf.view(lf.size()[0:3])
+            lf = lf / torch.pow(lf, 2).sum(dim=1, keepdim=True).clamp(min=1e-12).sqrt()
+        x = F.avg_pool2d(x, x.size()[2:])
+        f = x.view(x.size(0), -1)
+        # f = 1. * f / (torch.norm(f, 2, dim=-1, keepdim=True).expand_as(f) + 1e-12)
+        if not self.training:
+            return f, lf
+        y = self.classifier(f)
+        if self.loss == {'softmax'}:
+            return y
+        elif self.loss == {'metric'}:
+            if self.aligned: return f, lf
+            return f
+        elif self.loss == {'softmax', 'metric'}:
+            if self.aligned: return y, f, lf
+            return y, f
+        else:
+            raise KeyError("Unsupported loss: {}".format(self.loss))
\ No newline at end of file
--- a/models/InceptionV4.py
+++ b/models/InceptionV4.py
+from __future__ import absolute_import
+import torch
+import torch.nn as nn
+from torch.nn import functional as F
+import torch.utils.model_zoo as model_zoo
+import os
+import sys
+from aligned.HorizontalMaxPool2D import HorizontalMaxPool2d
+__all__ = ['InceptionV4ReID']
+"""
+Code imported from https://github.com/Cadene/pretrained-models.pytorch
+"""
+pretrained_settings = {
+    'inceptionv4': {
+        'imagenet': {
+            'url': 'http://data.lip6.fr/cadene/pretrainedmodels/inceptionv4-8e4777a0.pth',
+            'input_space': 'RGB',
+            'input_size': [3, 299, 299],
+            'input_range': [0, 1],
+            'mean': [0.5, 0.5, 0.5],
+            'std': [0.5, 0.5, 0.5],
+            'num_classes': 1000
+        },
+        'imagenet+background': {
+            'url': 'http://data.lip6.fr/cadene/pretrainedmodels/inceptionv4-8e4777a0.pth',
+            'input_space': 'RGB',
+            'input_size': [3, 299, 299],
+            'input_range': [0, 1],
+            'mean': [0.5, 0.5, 0.5],
+            'std': [0.5, 0.5, 0.5],
+            'num_classes': 1001
+        }
+    }
+}
+class BasicConv2d(nn.Module):
+    def __init__(self, in_planes, out_planes, kernel_size, stride, padding=0):
+        super(BasicConv2d, self).__init__()
+        self.conv = nn.Conv2d(in_planes, out_planes,
+                              kernel_size=kernel_size, stride=stride,
+                              padding=padding, bias=False) # verify bias false
+        self.bn = nn.BatchNorm2d(out_planes,
+                                 eps=0.001, # value found in tensorflow
+                                 momentum=0.1, # default pytorch value
+                                 affine=True)
+        self.relu = nn.ReLU(inplace=True)
+    def forward(self, x):
+        x = self.conv(x)
+        x = self.bn(x)
+        x = self.relu(x)
+        return x
+class Mixed_3a(nn.Module):
+    def __init__(self):
+        super(Mixed_3a, self).__init__()
+        self.maxpool = nn.MaxPool2d(3, stride=2)
+        self.conv = BasicConv2d(64, 96, kernel_size=3, stride=2)
+    def forward(self, x):
+        x0 = self.maxpool(x)
+        x1 = self.conv(x)
+        out = torch.cat((x0, x1), 1)
+        return out
+class Mixed_4a(nn.Module):
+    def __init__(self):
+        super(Mixed_4a, self).__init__()
+        self.branch0 = nn.Sequential(
+            BasicConv2d(160, 64, kernel_size=1, stride=1),
+            BasicConv2d(64, 96, kernel_size=3, stride=1)
+        )
+        self.branch1 = nn.Sequential(
+            BasicConv2d(160, 64, kernel_size=1, stride=1),
+            BasicConv2d(64, 64, kernel_size=(1,7), stride=1, padding=(0,3)),
+            BasicConv2d(64, 64, kernel_size=(7,1), stride=1, padding=(3,0)),
+            BasicConv2d(64, 96, kernel_size=(3,3), stride=1)
+        )
+    def forward(self, x):
+        x0 = self.branch0(x)
+        x1 = self.branch1(x)
+        out = torch.cat((x0, x1), 1)
+        return out
+class Mixed_5a(nn.Module):
+    def __init__(self):
+        super(Mixed_5a, self).__init__()
+        self.conv = BasicConv2d(192, 192, kernel_size=3, stride=2)
+        self.maxpool = nn.MaxPool2d(3, stride=2)
+    def forward(self, x):
+        x0 = self.conv(x)
+        x1 = self.maxpool(x)
+        out = torch.cat((x0, x1), 1)
+        return out
+class Inception_A(nn.Module):
+    def __init__(self):
+        super(Inception_A, self).__init__()
+        self.branch0 = BasicConv2d(384, 96, kernel_size=1, stride=1)
+        self.branch1 = nn.Sequential(
+            BasicConv2d(384, 64, kernel_size=1, stride=1),
+            BasicConv2d(64, 96, kernel_size=3, stride=1, padding=1)
+        )
+        self.branch2 = nn.Sequential(
+            BasicConv2d(384, 64, kernel_size=1, stride=1),
+            BasicConv2d(64, 96, kernel_size=3, stride=1, padding=1),
+            BasicConv2d(96, 96, kernel_size=3, stride=1, padding=1)
+        )
+        self.branch3 = nn.Sequential(
+            nn.AvgPool2d(3, stride=1, padding=1, count_include_pad=False),
+            BasicConv2d(384, 96, kernel_size=1, stride=1)
+        )
+    def forward(self, x):
+        x0 = self.branch0(x)
+        x1 = self.branch1(x)
+        x2 = self.branch2(x)
+        x3 = self.branch3(x)
+        out = torch.cat((x0, x1, x2, x3), 1)
+        return out
+class Reduction_A(nn.Module):
+    def __init__(self):
+        super(Reduction_A, self).__init__()
+        self.branch0 = BasicConv2d(384, 384, kernel_size=3, stride=2)
+        self.branch1 = nn.Sequential(
+            BasicConv2d(384, 192, kernel_size=1, stride=1),
+            BasicConv2d(192, 224, kernel_size=3, stride=1, padding=1),
+            BasicConv2d(224, 256, kernel_size=3, stride=2)
+        )
+        self.branch2 = nn.MaxPool2d(3, stride=2)
+    def forward(self, x):
+        x0 = self.branch0(x)
+        x1 = self.branch1(x)
+        x2 = self.branch2(x)
+        out = torch.cat((x0, x1, x2), 1)
+        return out
+class Inception_B(nn.Module):
+    def __init__(self):
+        super(Inception_B, self).__init__()
+        self.branch0 = BasicConv2d(1024, 384, kernel_size=1, stride=1)
+        self.branch1 = nn.Sequential(
+            BasicConv2d(1024, 192, kernel_size=1, stride=1),
+            BasicConv2d(192, 224, kernel_size=(1,7), stride=1, padding=(0,3)),
+            BasicConv2d(224, 256, kernel_size=(7,1), stride=1, padding=(3,0))
+        )
+        self.branch2 = nn.Sequential(
+            BasicConv2d(1024, 192, kernel_size=1, stride=1),
+            BasicConv2d(192, 192, kernel_size=(7,1), stride=1, padding=(3,0)),
+            BasicConv2d(192, 224, kernel_size=(1,7), stride=1, padding=(0,3)),
+            BasicConv2d(224, 224, kernel_size=(7,1), stride=1, padding=(3,0)),
+            BasicConv2d(224, 256, kernel_size=(1,7), stride=1, padding=(0,3))
+        )
+        self.branch3 = nn.Sequential(
+            nn.AvgPool2d(3, stride=1, padding=1, count_include_pad=False),
+            BasicConv2d(1024, 128, kernel_size=1, stride=1)
+        )
+    def forward(self, x):
+        x0 = self.branch0(x)
+        x1 = self.branch1(x)
+        x2 = self.branch2(x)
+        x3 = self.branch3(x)
+        out = torch.cat((x0, x1, x2, x3), 1)
+        return out
+class Reduction_B(nn.Module):
+    def __init__(self):
+        super(Reduction_B, self).__init__()
+        self.branch0 = nn.Sequential(
+            BasicConv2d(1024, 192, kernel_size=1, stride=1),
+            BasicConv2d(192, 192, kernel_size=3, stride=2)
+        )
+        self.branch1 = nn.Sequential(
+            BasicConv2d(1024, 256, kernel_size=1, stride=1),
+            BasicConv2d(256, 256, kernel_size=(1,7), stride=1, padding=(0,3)),
+            BasicConv2d(256, 320, kernel_size=(7,1), stride=1, padding=(3,0)),
+            BasicConv2d(320, 320, kernel_size=3, stride=2)
+        )
+        self.branch2 = nn.MaxPool2d(3, stride=2)
+    def forward(self, x):
+        x0 = self.branch0(x)
+        x1 = self.branch1(x)
+        x2 = self.branch2(x)
+        out = torch.cat((x0, x1, x2), 1)
+        return out
+class Inception_C(nn.Module):
+    def __init__(self):
+        super(Inception_C, self).__init__()
+        self.branch0 = BasicConv2d(1536, 256, kernel_size=1, stride=1)
+        self.branch1_0 = BasicConv2d(1536, 384, kernel_size=1, stride=1)
+        self.branch1_1a = BasicConv2d(384, 256, kernel_size=(1,3), stride=1, padding=(0,1))
+        self.branch1_1b = BasicConv2d(384, 256, kernel_size=(3,1), stride=1, padding=(1,0))
+        self.branch2_0 = BasicConv2d(1536, 384, kernel_size=1, stride=1)
+        self.branch2_1 = BasicConv2d(384, 448, kernel_size=(3,1), stride=1, padding=(1,0))
+        self.branch2_2 = BasicConv2d(448, 512, kernel_size=(1,3), stride=1, padding=(0,1))
+        self.branch2_3a = BasicConv2d(512, 256, kernel_size=(1,3), stride=1, padding=(0,1))
+        self.branch2_3b = BasicConv2d(512, 256, kernel_size=(3,1), stride=1, padding=(1,0))
+        self.branch3 = nn.Sequential(
+            nn.AvgPool2d(3, stride=1, padding=1, count_include_pad=False),
+            BasicConv2d(1536, 256, kernel_size=1, stride=1)
+        )
+    def forward(self, x):
+        x0 = self.branch0(x)
+        x1_0 = self.branch1_0(x)
+        x1_1a = self.branch1_1a(x1_0)
+        x1_1b = self.branch1_1b(x1_0)
+        x1 = torch.cat((x1_1a, x1_1b), 1)
+        x2_0 = self.branch2_0(x)
+        x2_1 = self.branch2_1(x2_0)
+        x2_2 = self.branch2_2(x2_1)
+        x2_3a = self.branch2_3a(x2_2)
+        x2_3b = self.branch2_3b(x2_2)
+        x2 = torch.cat((x2_3a, x2_3b), 1)
+        x3 = self.branch3(x)
+        out = torch.cat((x0, x1, x2, x3), 1)
+        return out
+class InceptionV4(nn.Module):
+    def __init__(self, num_classes=1001):
+        super(InceptionV4, self).__init__()
+        # Special attributs
+        self.input_space = None
+        self.input_size = (299, 299, 3)
+        self.mean = None
+        self.std = None
+        # Modules
+        self.features = nn.Sequential(
+            BasicConv2d(3, 32, kernel_size=3, stride=2),
+            BasicConv2d(32, 32, kernel_size=3, stride=1),
+            BasicConv2d(32, 64, kernel_size=3, stride=1, padding=1),
+            Mixed_3a(),
+            Mixed_4a(),
+            Mixed_5a(),
+            Inception_A(),
+            Inception_A(),
+            Inception_A(),
+            Inception_A(),
+            Reduction_A(), # Mixed_6a
+            Inception_B(),
+            Inception_B(),
+            Inception_B(),
+            Inception_B(),
+            Inception_B(),
+            Inception_B(),
+            Inception_B(),
+            Reduction_B(), # Mixed_7a
+            Inception_C(),
+            Inception_C(),
+            Inception_C()
+        )
+        self.avg_pool = nn.AvgPool2d(8, count_include_pad=False)
+        self.last_linear = nn.Linear(1536, num_classes)
+    def logits(self, features):
+        x = self.avg_pool(features)
+        x = x.view(x.size(0), -1)
+        x = self.last_linear(x) 
+        return x
+    def forward(self, input):
+        x = self.features(input)
+        x = self.logits(x)
+        return x
+def inceptionv4(num_classes=1000, pretrained='imagenet'):
+    if pretrained:
+        settings = pretrained_settings['inceptionv4'][pretrained]
+        assert num_classes == settings['num_classes'], \
+            "num_classes should be {}, but is {}".format(settings['num_classes'], num_classes)
+        # both 'imagenet'&'imagenet+background' are loaded from same parameters
+        model = InceptionV4(num_classes=1001)
+        model.load_state_dict(model_zoo.load_url(settings['url']))
+        if pretrained == 'imagenet':
+            new_last_linear = nn.Linear(1536, 1000)
+            new_last_linear.weight.data = model.last_linear.weight.data[1:]
+            new_last_linear.bias.data = model.last_linear.bias.data[1:]
+            model.last_linear = new_last_linear
+        model.input_space = settings['input_space']
+        model.input_size = settings['input_size']
+        model.input_range = settings['input_range']
+        model.mean = settings['mean']
+        model.std = settings['std']
+    else:
+        model = InceptionV4(num_classes=num_classes)
+    return model
+class InceptionV4ReID(nn.Module):
+    def __init__(self, num_classes, loss={'softmax'}, aligned=False, **kwargs):
+        super(InceptionV4ReID, self).__init__()
+        self.loss = loss
+        base = inceptionv4()
+        self.features = base.features
+        self.classifier = nn.Linear(1536, num_classes)
+        self.feat_dim = 1536 # feature dimension
+        self.aligned = aligned
+        self.horizon_pool = HorizontalMaxPool2d()
+        if self.aligned:
+            self.bn = nn.BatchNorm2d(1536)
+            self.relu = nn.ReLU(inplace=True)
+            self.conv1 = nn.Conv2d(1536, 128, kernel_size=1, stride=1, padding=0, bias=True)
+    def forward(self, x):
+        x = self.features(x)
+        if not self.training:
+            lf = self.horizon_pool(x)
+        if self.aligned:
+            lf = self.bn(x)
+            lf = self.relu(lf)
+            lf = self.horizon_pool(lf)
+            lf = self.conv1(lf)
+        if self.aligned or not self.training:
+            lf = lf.view(lf.size()[0:3])
+            lf = lf / torch.pow(lf, 2).sum(dim=1, keepdim=True).clamp(min=1e-12).sqrt()
+        x = F.avg_pool2d(x, x.size()[2:])
+        f = x.view(x.size(0), -1)
+        if not self.training:
+            return f, lf
+        y = self.classifier(f)
+        if self.loss == {'softmax'}:
+            return y
+        elif self.loss == {'metric'}:
+            if self.aligned: return f, lf
+            return f
+        elif self.loss == {'softmax', 'metric'}:
+            if self.aligned: return y, f, lf
+            return y, f
+        else:
+            raise KeyError("Unsupported loss: {}".format(self.loss))
\ No newline at end of file
--- a/models/ResNet.py
+++ b/models/ResNet.py
+from __future__ import absolute_import
+import torch
+from torch import nn
+from torch.nn import functional as F
+import torchvision
+from aligned.HorizontalMaxPool2D import HorizontalMaxPool2d
+__all__ = ['ResNet50', 'ResNet101']
+class ResNet50(nn.Module):
+    def __init__(self, num_classes, loss={'softmax'}, aligned=False, **kwargs):
+        super(ResNet50, self).__init__()
+        self.loss = loss
+        resnet50 = torchvision.models.resnet50(pretrained=True)
+        self.base = nn.Sequential(*list(resnet50.children())[:-2])
+        self.classifier = nn.Linear(2048, num_classes)
+        self.feat_dim = 2048 # feature dimension
+        self.aligned = aligned
+        self.horizon_pool = HorizontalMaxPool2d()
+        if self.aligned:
+            self.bn = nn.BatchNorm2d(2048)
+            self.relu = nn.ReLU(inplace=True)
+            self.conv1 = nn.Conv2d(2048, 128, kernel_size=1, stride=1, padding=0, bias=True)
+    def forward(self, x):
+        x = self.base(x)
+        if not self.training:
+            lf = self.horizon_pool(x)
+        if self.aligned and self.training:
+            lf = self.bn(x)
+            lf = self.relu(lf)
+            lf = self.horizon_pool(lf)
+            lf = self.conv1(lf)
+        if self.aligned or not self.training:
+            lf = lf.view(lf.size()[0:3])
+            lf = lf / torch.pow(lf,2).sum(dim=1, keepdim=True).clamp(min=1e-12).sqrt()
+        x = F.avg_pool2d(x, x.size()[2:])
+        f = x.view(x.size(0), -1)
+        #f = 1. * f / (torch.norm(f, 2, dim=-1, keepdim=True).expand_as(f) + 1e-12)
+        if not self.training:
+            return f,lf
+        y = self.classifier(f)
+        if self.loss == {'softmax'}:
+            return y
+        elif self.loss == {'metric'}:
+            if self.aligned: return  f, lf
+            return f
+        elif self.loss == {'softmax', 'metric'}:
+            if self.aligned: return y, f, lf
+            return y, f
+        else:
+            raise KeyError("Unsupported loss: {}".format(self.loss))
+class ResNet101(nn.Module):
+    def __init__(self, num_classes, loss={'softmax'}, aligned=False, **kwargs):
+        super(ResNet101, self).__init__()
+        self.loss = loss
+        resnet101 = torchvision.models.resnet101(pretrained=False)
+        self.base = nn.Sequential(*list(resnet101.children())[:-2])
+        self.classifier = nn.Linear(2048, num_classes)
+        self.feat_dim = 2048 # feature dimension
+        self.aligned = aligned
+        self.horizon_pool = HorizontalMaxPool2d()
+        if self.aligned:
+            self.bn = nn.BatchNorm2d(2048)
+            self.relu = nn.ReLU(inplace=True)
+            self.conv1 = nn.Conv2d(2048, 128, kernel_size=1, stride=1, padding=0, bias=True)
+    def forward(self, x):
+        x = self.base(x)
+        if not self.training:
+            lf = self.horizon_pool(x)
+        if self.aligned:
+            lf = self.bn(x)
+            lf = self.relu(lf)
+            lf = self.horizon_pool(lf)
+            lf = self.conv1(lf)
+        if self.aligned or not self.training:
+            lf = lf.view(lf.size()[0:3])
+            lf = lf / torch.pow(lf, 2).sum(dim=1, keepdim=True).clamp(min=1e-12).sqrt()
+        x = F.avg_pool2d(x, x.size()[2:])
+        f = x.view(x.size(0), -1)
+        # f = 1. * f / (torch.norm(f, 2, dim=-1, keepdim=True).expand_as(f) + 1e-12)
+        if not self.training:
+            return f, lf
+        y = self.classifier(f)
+        if self.loss == {'softmax'}:
+            return y
+        elif self.loss == {'metric'}:
+            if self.aligned: return f, lf
+            return f
+        elif self.loss == {'softmax', 'metric'}:
+            if self.aligned: return y, f, lf
+            return y, f
+        else:
+            raise KeyError("Unsupported loss: {}".format(self.loss))
--- a/models/ShuffleNet.py
+++ b/models/ShuffleNet.py
+from __future__ import absolute_import
+import torch
+from torch import nn
+from torch.nn import functional as F
+import torchvision
+from aligned.HorizontalMaxPool2D import HorizontalMaxPool2d
+__all__ = ['ShuffleNet']
+class ChannelShuffle(nn.Module):
+    def __init__(self, num_groups):
+        super(ChannelShuffle, self).__init__()
+        self.g = num_groups
+    def forward(self, x):
+        b, c, h, w = x.size()
+        n = c / self.g
+        # reshape
+        x = x.view(b, self.g, n, h, w)
+        # transpose
+        x = x.permute(0, 2, 1, 3, 4).contiguous()
+        # flatten
+        x = x.view(b, c, h, w)
+        return x
+class Bottleneck(nn.Module):
+    def __init__(self, in_channels, out_channels, stride, num_groups):
+        super(Bottleneck, self).__init__()
+        assert stride in [1, 2], "Warning: stride must be either 1 or 2"
+        self.stride = stride
+        mid_channels = out_channels / 4
+        if stride == 2: out_channels -= in_channels
+        self.conv1 = nn.Conv2d(in_channels, mid_channels, 1, groups=num_groups, bias=False)
+        self.bn1 = nn.BatchNorm2d(mid_channels)
+        self.shuffle1 = ChannelShuffle(num_groups)
+        self.conv2 = nn.Conv2d(mid_channels, mid_channels, 3, stride=stride, padding=1, groups=mid_channels, bias=False)
+        self.bn2 = nn.BatchNorm2d(mid_channels)
+        self.conv3 = nn.Conv2d(mid_channels, out_channels, 1, groups=num_groups, bias=False)
+        self.bn3 = nn.BatchNorm2d(out_channels)
+        if stride == 2: self.shortcut = nn.AvgPool2d(3, stride=2, padding=1)
+    def forward(self, x):
+        out = F.relu(self.bn1(self.conv1(x)))
+        out = self.shuffle1(out)
+        out = self.bn2(self.conv2(out))
+        out = self.bn3(self.conv3(out))
+        if self.stride == 2:
+            res = self.shortcut(x)
+            out = F.relu(torch.cat([res, out], 1))
+        else:
+            out = F.relu(x + out)
+        return out
+# configuration of (num_groups: #out_channels) based on Table 1 in the paper
+cfg = {
+    1: [144, 288, 576],
+    2: [200, 400, 800],
+    3: [240, 480, 960],
+    4: [272, 544, 1088],
+    8: [384, 768, 1536],
+}
+class ShuffleNet(nn.Module):
+    """ShuffleNet
+    Reference:
+    Zhang et al. ShuffleNet: An Extremely Efficient Convolutional Neural
+    Network for Mobile Devices. CVPR 2018.
+    """
+    def __init__(self, num_classes, loss={'softmax'}, num_groups=3, aligned=False, **kwargs):
+        super(ShuffleNet, self).__init__()
+        self.loss = loss
+        self.conv1 = nn.Sequential(
+            nn.Conv2d(3, 24, 3, stride=2, padding=1, bias=False),
+            nn.BatchNorm2d(24),
+            nn.ReLU(),
+            nn.MaxPool2d(3, stride=2, padding=1),
+        )
+        self.stage2 = nn.Sequential(
+            Bottleneck(24, cfg[num_groups][0], 2, num_groups),
+            Bottleneck(cfg[num_groups][0], cfg[num_groups][0], 1, num_groups),
+            Bottleneck(cfg[num_groups][0], cfg[num_groups][0], 1, num_groups),
+            Bottleneck(cfg[num_groups][0], cfg[num_groups][0], 1, num_groups),
+        )
+        self.stage3 = nn.Sequential(
+            Bottleneck(cfg[num_groups][0], cfg[num_groups][1], 2, num_groups),
+            Bottleneck(cfg[num_groups][1], cfg[num_groups][1], 1, num_groups),
+            Bottleneck(cfg[num_groups][1], cfg[num_groups][1], 1, num_groups),
+            Bottleneck(cfg[num_groups][1], cfg[num_groups][1], 1, num_groups),
+            Bottleneck(cfg[num_groups][1], cfg[num_groups][1], 1, num_groups),
+            Bottleneck(cfg[num_groups][1], cfg[num_groups][1], 1, num_groups),
+            Bottleneck(cfg[num_groups][1], cfg[num_groups][1], 1, num_groups),
+            Bottleneck(cfg[num_groups][1], cfg[num_groups][1], 1, num_groups),
+        )
+        self.stage4 = nn.Sequential(
+            Bottleneck(cfg[num_groups][1], cfg[num_groups][2], 2, num_groups),
+            Bottleneck(cfg[num_groups][2], cfg[num_groups][2], 1, num_groups),
+            Bottleneck(cfg[num_groups][2], cfg[num_groups][2], 1, num_groups),
+            Bottleneck(cfg[num_groups][2], cfg[num_groups][2], 1, num_groups),
+        )
+        self.classifier = nn.Linear(cfg[num_groups][2], num_classes)
+        self.feat_dim = cfg[num_groups][2]
+        self.aligned = aligned
+        self.horizon_pool = HorizontalMaxPool2d()
+    def forward(self, x):
+        x = self.conv1(x)
+        x = self.stage2(x)
+        x = self.stage3(x)
+        x = self.stage4(x)
+        if self.aligned or not self.training:
+            lf = self.horizon_pool(x)
+            lf = lf.view(lf.size()[0:3])
+            lf = lf / torch.pow(lf, 2).sum(dim=1, keepdim=True).clamp(min=1e-12).sqrt()
+        f = F.avg_pool2d(x, x.size()[2:]).view(x.size(0), -1)
+        if not self.training:
+            return f, lf
+        y = self.classifier(f)
+        if self.loss == {'softmax'}:
+            return y
+        elif self.loss == {'metric'}:
+            if self.aligned: return f, lf
+            return f
+        elif self.loss == {'softmax', 'metric'}:
+            if self.aligned: return y, f, lf
+            return y, f
+        else:
+            raise KeyError("Unsupported loss: {}".format(self.loss))
\ No newline at end of file
--- a/models/__init__.py
+++ b/models/__init__.py
+from __future__ import absolute_import
+from .ResNet import *
+from .DenseNet import *
+from .ShuffleNet import *
+from .InceptionV4 import *
+__factory = {
+    'resnet50': ResNet50,
+    'resnet101': ResNet101,
+    'densenet121': DenseNet121,
+    'shufflenet': ShuffleNet,
+    'inceptionv4': InceptionV4ReID,
+}
+def get_names():
+    return __factory.keys()
+def init_model(name, *args, **kwargs):
+    if name not in __factory.keys():
+        raise KeyError("Unknown model: {}".format(name))
+    return __factory[name](*args, **kwargs)
\ No newline at end of file
--- a/models/__pycache__/ResNet.cpython-36.pyc
+++ b/models/__pycache__/ResNet.cpython-36.pyc
--- a/models/__pycache__/__init__.cpython-36.pyc
+++ b/models/__pycache__/__init__.cpython-36.pyc
--- a/train_alignedreid.py
+++ b/train_alignedreid.py
+from __future__ import absolute_import
+import sys
+import time
+import datetime
+import argparse
+import os.path as osp
+import numpy as np
+import torch
+import torch.nn as nn
+from torch.utils.data import DataLoader
+import torch.backends.cudnn as cudnn
+from torch.optim import lr_scheduler
+import models
+from util.losses import CrossEntropyLoss, DeepSupervision, CrossEntropyLabelSmooth, TripletLossAlignedReID
+from util import data_manager
+from util import transforms as T
+from util.dataset_loader import ImageDataset
+from util.utils import Logger
+from util.utils import AverageMeter, Logger, save_checkpoint
+from util.eval_metrics import evaluate
+from util.optimizers import init_optim
+from util.samplers import RandomIdentitySampler
+from IPython import embed
+parser = argparse.ArgumentParser(description='Train AlignedReID with cross entropy loss and triplet hard loss')
+# Datasets
+parser.add_argument('--root', type=str, default='data', help="root path to data directory")
+parser.add_argument('-d', '--dataset', type=str, default='market1501',
+                    choices=data_manager.get_names())
+parser.add_argument('-j', '--workers', default=4, type=int,
+                    help="number of data loading workers (default: 4)")
+parser.add_argument('--height', type=int, default=256,
+                    help="height of an image (default: 256)")
+parser.add_argument('--width', type=int, default=128,
+                    help="width of an image (default: 128)")
+parser.add_argument('--split-id', type=int, default=0, help="split index")
+# CUHK03-specific setting
+parser.add_argument('--cuhk03-labeled', action='store_true',
+                    help="whether to use labeled images, if false, detected images are used (default: False)")
+parser.add_argument('--cuhk03-classic-split', action='store_true',
+                    help="whether to use classic split by Li et al. CVPR'14 (default: False)")
+parser.add_argument('--use-metric-cuhk03', action='store_true',
+                    help="whether to use cuhk03-metric (default: False)")
+# Optimization options
+parser.add_argument('--labelsmooth', action='store_true', help="label smooth")
+parser.add_argument('--optim', type=str, default='adam', help="optimization algorithm (see optimizers.py)")
+parser.add_argument('--max-epoch', default=300, type=int,
+                    help="maximum epochs to run")
+parser.add_argument('--start-epoch', default=0, type=int,
+                    help="manual epoch number (useful on restarts)")
+parser.add_argument('--train-batch', default=32, type=int,
+                    help="train batch size")
+parser.add_argument('--test-batch', default=32, type=int, help="test batch size")
+parser.add_argument('--lr', '--learning-rate', default=0.0002, type=float,
+                    help="initial learning rate")
+parser.add_argument('--stepsize', default=150, type=int,
+                    help="stepsize to decay learning rate (>0 means this is enabled)")
+parser.add_argument('--gamma', default=0.1, type=float,
+                    help="learning rate decay")
+parser.add_argument('--weight-decay', default=5e-04, type=float,
+                    help="weight decay (default: 5e-04)")
+# triplet hard loss
+parser.add_argument('--margin', type=float, default=0.3, help="margin for triplet loss")
+parser.add_argument('--num-instances', type=int, default=4,
+                    help="number of instances per identity")
+parser.add_argument('--htri-only', action='store_true', default=False,
+                    help="if this is True, only htri loss is used in training")
+# Architecture
+parser.add_argument('-a', '--arch', type=str, default='resnet50', choices=models.get_names())
+# Miscs
+parser.add_argument('--print-freq', type=int, default=10, help="print frequency")
+parser.add_argument('--seed', type=int, default=1, help="manual seed")
+parser.add_argument('--resume', type=str, default='', metavar='PATH')
+parser.add_argument('--evaluate', action='store_true', help="evaluation only")
+parser.add_argument('--eval-step', type=int, default=-1,
+                    help="run evaluation for every N epochs (set to -1 to test after training)")
+parser.add_argument('--start-eval', type=int, default=0, help="start to evaluate after specific epoch")
+parser.add_argument('--save-dir', type=str, default='log')
+parser.add_argument('--use_cpu', action='store_true', help="use cpu")
+parser.add_argument('--gpu-devices', default='0', type=str, help='gpu device ids for CUDA_VISIBLE_DEVICES')
+parser.add_argument('--reranking',action= 'store_true', help= 'result re_ranking')
+parser.add_argument('--test_distance',type = str, default='global', help= 'test distance type')
+parser.add_argument('--unaligned',action= 'store_true', help= 'test local feature with unalignment')
+args = parser.parse_args()
+def main():
+    use_gpu = torch.cuda.is_available()
+    if args.use_cpu: use_gpu = False
+    pin_memory = True if use_gpu else False
+    if not args.evaluate:
+        sys.stdout = Logger(osp.join(args.save_dir, 'log_train.txt'))
+    else:
+        sys.stdout = Logger(osp.join(args.save_dir, 'log_test.txt'))
+    print("==========\nArgs:{}\n==========".format(args))
+    if use_gpu:
+        print("Currently using GPU {}".format(args.gpu_devices))
+        os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu_devices
+        cudnn.benchmark = True
+        torch.cuda.manual_seed_all(args.seed)
+    else:
+        print("Currently using CPU (GPU is highly recommended)")
+    print("Initializing dataset {}".format(args.dataset))
+    dataset = data_manager.init_img_dataset(
+        root=args.root, name=args.dataset, split_id=args.split_id,
+        cuhk03_labeled=args.cuhk03_labeled, cuhk03_classic_split=args.cuhk03_classic_split,
+    )
+    # data augmentation
+    transform_train = T.Compose([
+        T.Random2DTranslation(args.height, args.width),
+        T.RandomHorizontalFlip(),
+        T.ToTensor(),
+        T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
+    ])
+    transform_test = T.Compose([
+        T.Resize((args.height, args.width)),
+        T.ToTensor(),
+        T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
+    ])
+    trainloader = DataLoader(
+        ImageDataset(dataset.train, transform=transform_train),
+        sampler=RandomIdentitySampler(dataset.train, num_instances=args.num_instances),
+        batch_size=args.train_batch, num_workers=args.workers,
+        pin_memory=pin_memory, drop_last=True,
+    )
+    queryloader = DataLoader(
+        ImageDataset(dataset.query, transform=transform_test),
+        batch_size=args.test_batch, shuffle=False, num_workers=args.workers,
+        pin_memory=pin_memory, drop_last=False,
+    )
+    galleryloader = DataLoader(
+        ImageDataset(dataset.gallery, transform=transform_test),
+        batch_size=args.test_batch, shuffle=False, num_workers=args.workers,
+        pin_memory=pin_memory, drop_last=False,
+    )
+    print("Initializing model: {}".format(args.arch))
+    model = models.init_model(name=args.arch, num_classes=dataset.num_train_pids, loss={'softmax','metric'}, aligned =True, use_gpu=use_gpu)
+    print("Model size: {:.5f}M".format(sum(p.numel() for p in model.parameters())/1000000.0))
+    if args.labelsmooth:
+        criterion_class = CrossEntropyLabelSmooth(num_classes=dataset.num_train_pids, use_gpu=use_gpu)
+    else:
+        criterion_class = CrossEntropyLoss(use_gpu=use_gpu)
+    criterion_metric = TripletLossAlignedReID(margin=args.margin)
+    optimizer = init_optim(args.optim, model.parameters(), args.lr, args.weight_decay)
+    if args.stepsize > 0:
+        scheduler = lr_scheduler.StepLR(optimizer, step_size=args.stepsize, gamma=args.gamma)
+    start_epoch = args.start_epoch
+    if args.resume:
+        print("Loading checkpoint from '{}'".format(args.resume))
+        checkpoint = torch.load(args.resume)
+        model.load_state_dict(checkpoint['state_dict'])
+        start_epoch = checkpoint['epoch']
+    if use_gpu:
+        model = nn.DataParallel(model).cuda()
+    if args.evaluate:
+        print("Evaluate only")
+        test(model, queryloader, galleryloader, use_gpu)
+        return 0
+    start_time = time.time()
+    train_time = 0
+    best_rank1 = -np.inf
+    best_epoch = 0
+    print("==> Start training")
+    for epoch in range(start_epoch, args.max_epoch):
+        start_train_time = time.time()
+        train(epoch, model, criterion_class, criterion_metric, optimizer, trainloader, use_gpu)
+        train_time += round(time.time() - start_train_time)
+        if args.stepsize > 0: scheduler.step()
+        if (epoch + 1) > args.start_eval and args.eval_step > 0 and (epoch + 1) % args.eval_step == 0 or (
+                epoch + 1) == args.max_epoch:
+            print("==> Test")
+            rank1 = test(model, queryloader, galleryloader, use_gpu)
+            is_best = rank1 > best_rank1
+            if is_best:
+                best_rank1 = rank1
+                best_epoch = epoch + 1
+            if use_gpu:
+                state_dict = model.module.state_dict()
+            else:
+                state_dict = model.state_dict()
+            save_checkpoint({
+                'state_dict': state_dict,
+                'rank1': rank1,
+                'epoch': epoch,
+            }, is_best, osp.join(args.save_dir, 'checkpoint_ep' + str(epoch + 1) + '.pth.tar'))
+    print("==> Best Rank-1 {:.1%}, achieved at epoch {}".format(best_rank1, best_epoch))
+    elapsed = round(time.time() - start_time)
+    elapsed = str(datetime.timedelta(seconds=elapsed))
+    train_time = str(datetime.timedelta(seconds=train_time))
+    print("Finished. Total elapsed time (h:m:s): {}. Training time (h:m:s): {}.".format(elapsed, train_time))
+def train(epoch, model, criterion_class, criterion_metric, optimizer, trainloader, use_gpu):
+    model.train()
+    losses = AverageMeter()
+    batch_time = AverageMeter()
+    data_time = AverageMeter()
+    xent_losses = AverageMeter()
+    global_losses = AverageMeter()
+    local_losses = AverageMeter()
+    end = time.time()
+    for batch_idx, (imgs, pids, _) in enumerate(trainloader):
+        if use_gpu:
+            imgs, pids = imgs.cuda(), pids.cuda()
+        # measure data loading time
+        data_time.update(time.time() - end)
+        outputs, features, local_features = model(imgs)
+        if args.htri_only:
+            if isinstance(features, tuple):
+                global_loss, local_loss = DeepSupervision(criterion_metric, features, pids, local_features)
+            else:
+                global_loss, local_loss = criterion_metric(features, pids, local_features)
+        else:
+            if isinstance(outputs, tuple):
+                xent_loss = DeepSupervision(criterion_class, outputs, pids)
+            else:
+                xent_loss = criterion_class(outputs, pids)
+            if isinstance(features, tuple):
+                global_loss, local_loss = DeepSupervision(criterion_metric, features, pids, local_features)
+            else:
+                global_loss, local_loss = criterion_metric(features, pids, local_features)
+        loss = xent_loss + global_loss + local_loss
+        optimizer.zero_grad()
+        loss.backward()
+        optimizer.step()
+        batch_time.update(time.time() - end)
+        end = time.time()
+        losses.update(loss.item(), pids.size(0))
+        xent_losses.update(xent_loss.item(), pids.size(0))
+        global_losses.update(global_loss.item(), pids.size(0))
+        local_losses.update(local_loss.item(), pids.size(0))
+        if (batch_idx+1) % args.print_freq == 0:
+            print('Epoch: [{0}][{1}/{2}]\t'
+                  'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
+                  'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'
+                  'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
+                  'CLoss {xent_loss.val:.4f} ({xent_loss.avg:.4f})\t'
+                  'GLoss {global_loss.val:.4f} ({global_loss.avg:.4f})\t'
+                  'LLoss {local_loss.val:.4f} ({local_loss.avg:.4f})\t'.format(
+                   epoch+1, batch_idx+1, len(trainloader), batch_time=batch_time,data_time=data_time,
+                   loss=losses,xent_loss=xent_losses, global_loss=global_losses, local_loss = local_losses))
+def test(model, queryloader, galleryloader, use_gpu, ranks=[1, 5, 10, 20]):
+    batch_time = AverageMeter()
+    model.eval()
+    with torch.no_grad():
+        qf, q_pids, q_camids, lqf = [], [], [], []
+        for batch_idx, (imgs, pids, camids) in enumerate(queryloader):
+            if use_gpu: imgs = imgs.cuda()
+            end = time.time()
+            features, local_features = model(imgs)
+            batch_time.update(time.time() - end)
+            features = features.data.cpu()
+            local_features = local_features.data.cpu()
+            qf.append(features)
+            lqf.append(local_features)
+            q_pids.extend(pids)
+            q_camids.extend(camids)
+        qf = torch.cat(qf, 0)
+        lqf = torch.cat(lqf,0)
+        q_pids = np.asarray(q_pids)
+        q_camids = np.asarray(q_camids)
+        print("Extracted features for query set, obtained {}-by-{} matrix".format(qf.size(0), qf.size(1)))
+        gf, g_pids, g_camids, lgf = [], [], [], []
+        end = time.time()
+        for batch_idx, (imgs, pids, camids) in enumerate(galleryloader):
+            if use_gpu: imgs = imgs.cuda()
+            end = time.time()
+            features, local_features = model(imgs)
+            batch_time.update(time.time() - end)
+            features = features.data.cpu()
+            local_features = local_features.data.cpu()
+            gf.append(features)
+            lgf.append(local_features)
+            g_pids.extend(pids)
+            g_camids.extend(camids)
+        gf = torch.cat(gf, 0)
+        lgf = torch.cat(lgf,0)
+        g_pids = np.asarray(g_pids)
+        g_camids = np.asarray(g_camids)
+        print("Extracted features for gallery set, obtained {}-by-{} matrix".format(gf.size(0), gf.size(1)))
+    print("==> BatchTime(s)/BatchSize(img): {:.3f}/{}".format(batch_time.avg, args.test_batch))
+    # feature normlization
+    qf = 1. * qf / (torch.norm(qf, 2, dim = -1, keepdim=True).expand_as(qf) + 1e-12)
+    gf = 1. * gf / (torch.norm(gf, 2, dim = -1, keepdim=True).expand_as(gf) + 1e-12)
+    m, n = qf.size(0), gf.size(0)
+    distmat = torch.pow(qf, 2).sum(dim=1, keepdim=True).expand(m, n) + \
+              torch.pow(gf, 2).sum(dim=1, keepdim=True).expand(n, m).t()
+    distmat.addmm_(1, -2, qf, gf.t())
+    distmat = distmat.numpy()
+    if not args.test_distance== 'global':
+        print("Only using global branch")
+        from util.distance import low_memory_local_dist
+        lqf = lqf.permute(0,2,1)
+        lgf = lgf.permute(0,2,1)
+        local_distmat = low_memory_local_dist(lqf.numpy(),lgf.numpy(),aligned= not args.unaligned)
+        if args.test_distance== 'local':
+            print("Only using local branch")
+            distmat = local_distmat
+        if args.test_distance == 'global_local':
+            print("Using global and local branches")
+            distmat = local_distmat+distmat
+    print("Computing CMC and mAP")
+    cmc, mAP = evaluate(distmat, q_pids, g_pids, q_camids, g_camids, use_metric_cuhk03=args.use_metric_cuhk03)
+    print("Results ----------")
+    print("mAP: {:.1%}".format(mAP))
+    print("CMC curve")
+    for r in ranks:
+        print("Rank-{:<3}: {:.1%}".format(r, cmc[r - 1]))
+    print("------------------")
+    if args.reranking:
+        from util.re_ranking import re_ranking
+        if args.test_distance == 'global':
+            print("Only using global branch for reranking")
+            distmat = re_ranking(qf,gf,k1=20, k2=6, lambda_value=0.3)
+        else:
+            local_qq_distmat = low_memory_local_dist(lqf.numpy(), lqf.numpy(),aligned= not args.unaligned)
+            local_gg_distmat = low_memory_local_dist(lgf.numpy(), lgf.numpy(),aligned= not args.unaligned)
+            local_dist = np.concatenate(
+                [np.concatenate([local_qq_distmat, local_distmat], axis=1),
+                 np.concatenate([local_distmat.T, local_gg_distmat], axis=1)],
+                axis=0)
+            if args.test_distance == 'local':
+                print("Only using local branch for reranking")
+                distmat = re_ranking(qf,gf,k1=20,k2=6,lambda_value=0.3,local_distmat=local_dist,only_local=True)
+            elif args.test_distance == 'global_local':
+                print("Using global and local branches for reranking")
+                distmat = re_ranking(qf,gf,k1=20,k2=6,lambda_value=0.3,local_distmat=local_dist,only_local=False)
+        print("Computing CMC and mAP for re_ranking")
+        cmc, mAP = evaluate(distmat, q_pids, g_pids, q_camids, g_camids, use_metric_cuhk03=args.use_metric_cuhk03)
+        print("Results ----------")
+        print("mAP(RK): {:.1%}".format(mAP))
+        print("CMC curve(RK)")
+        for r in ranks:
+            print("Rank-{:<3}: {:.1%}".format(r, cmc[r - 1]))
+        print("------------------")
+    return cmc[0]
+if __name__ == '__main__':
+    main()