Commit fcf8f201 authored by dengjb's avatar dengjb
Browse files

update

parents
import torch
from util.FeatureExtractor import FeatureExtractor
from torchvision import transforms
from IPython import embed
import models
from scipy.spatial.distance import cosine, euclidean
from util.utils import *
from sklearn.preprocessing import normalize
def pool2d(tensor, type= 'max'):
sz = tensor.size()
if type == 'max':
x = torch.nn.functional.max_pool2d(tensor, kernel_size=(sz[2]/8, sz[3]))
if type == 'mean':
x = torch.nn.functional.mean_pool2d(tensor, kernel_size=(sz[2]/8, sz[3]))
x = x[0].cpu().data.numpy()
x = np.transpose(x,(2,1,0))[0]
return x
if __name__ == '__main__':
os.environ['CUDA_VISIBLE_DEVICES'] = "0"
use_gpu = torch.cuda.is_available()
model = models.init_model(name='resnet50', num_classes=751, loss={'softmax', 'metric'}, use_gpu=use_gpu,aligned=True)
checkpoint = torch.load("./log/market1501/alignedreid/checkpoint_ep300.pth.tar")
model.load_state_dict(checkpoint['state_dict'])
img_transform = transforms.Compose([
transforms.Resize((256, 128)),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])
exact_list = ['7']
myexactor = FeatureExtractor(model, exact_list)
img_path1 = './data/market1501/query/0001_c1s1_001051_00.jpg'
img_path2 = './data/market1501/query/0001_c2s1_000301_00.jpg'
img1 = read_image(img_path1)
img2 = read_image(img_path2)
img1 = img_to_tensor(img1, img_transform)
img2 = img_to_tensor(img2, img_transform)
if use_gpu:
model = model.cuda()
img1 = img1.cuda()
img2 = img2.cuda()
model.eval()
f1 = myexactor(img1)
f2 = myexactor(img2)
a1 = normalize(pool2d(f1[0], type='max'))
a2 = normalize(pool2d(f2[0], type='max'))
dist = np.zeros((8,8))
for i in range(8):
temp_feat1 = a1[i]
for j in range(8):
temp_feat2 = a2[j]
dist[i][j] = euclidean(temp_feat1, temp_feat2)
show_alignedreid(img_path1, img_path2, dist)
\ No newline at end of file
MIT License
Copyright (c) [year] [fullname]
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
MIT License
Copyright (c) 2018 Hao Luo
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
# AlignedReID
## 论文
[AlignedReID](https://www.sciencedirect.com/science/article/pii/S0031320319302031?via%3Dihub#!)
## 模型简介
AlignedReID是旷视科技提出的行人重识别(ReID)算法,首次在该任务上实现超越人类水平的性能。其核心通过局部特征对齐优化,显著提升了细节特征的捕捉能力。
#### 核心方法
**局部对齐:** 采用动态规划计算局部最短路径,确保身体各部分相对顺序正确,同时最小化对齐距离 。 ‌
**损失函数:** 将最短路径长度纳入局部损失,辅助学习整体特征 。 ‌‌
**数据集支持:**主要使用Market1501数据集,包含训练集(751人,12,936张图)、测试集(750人,19,732张图)和查询集(750人,3,368张图) 。 ‌‌
**代码实现**
**PyTorch框架:** 支持数据集加载、特征提取及损失计算 。 ‌
**关键函数:** shortest_dist(动态规划计算最短路径)、local_dist(局部特征匹配) 。 ‌‌
**性能优势:** 通过局部细节优化,有效解决遮挡、姿态变化等挑战,成为行人重识别领域的基准方法之一 。
## 环境依赖
| 软件 | 版本 |
| :------: | :------: |
| DTK | 25.04.2 |
| python | 3.10.12 |
| transformers | 4.57.1 |
| vllm | 0.11.0+das.opt1.alpha.8e22ded.dtk25042 |
| torch | 2.5.1+das.opt1.dtk25042 |
| triton | 3.1+das.opt1.3c5d12d.dtk25041 |
| flash_attn | 2.6.1+das.opt1.dtk2504 |
| flash_mla | 1.0.0+das.opt1.dtk25042 |
当前仅支持镜像:
- 挂载地址`-v`根据实际模型情况修改
```bash
docker run -it --shm-size 60g --network=host --name minimax_m2 --privileged --device=/dev/kfd --device=/dev/dri --device=/dev/mkfd --group-add video --cap-add=SYS_PTRACE --security-opt seccomp=unconfined -u root -v /opt/hyhal/:/opt/hyhal/:ro -v /path/your_code_path/:/path/your_code_path/ image.sourcefind.cn:5000/dcu/admin/base/pytorch:2.5.1-ubuntu22.04-dtk25.04.2-py3.10 bash
```
更多镜像可前往[光源](https://sourcefind.cn/#/service-list)下载使用。
关于本项目DCU显卡所需的特殊深度学习库可从[光合](https://developer.sourcefind.cn/tool/)开发者社区下载安装。
## 数据集
在此仓库下创建一个目录来存储 reid 数据集:
```bash
bash
cd AlignedReID/
mkdir data/
```
### Market1501
1. 下载数据集到 `data/` ,下载地址:http://www.liangzheng.org/Project/project_reid.html.
2. 解压数据后重命名为 `market1501`. 数据结构应如下所示:
```
market1501/
bounding_box_test/
bounding_box_train/
...
```
3. 启动训练代码时,使用参数 `-d market1501` 进行数据集选择
### CUHK03
1、在 data/ 下创建一个名为 cuhk03/ 的文件夹。
2、从 http://www.ee.cuhk.edu.hk/~xgwang/CUHK_identification.html 下载数据集到 data/cuhk03/ 并解压 cuhk03_release.zip,这样您将拥有 data/cuhk03/cuhk03_release。
3、从 person-re-ranking 下载新划分 [14]。您需要的是 cuhk03_new_protocol_config_detected.mat 和 cuhk03_new_protocol_config_labeled.mat。将这两个 mat 文件放在 data/cuhk03 下。最终的数据结构如下所示:
```
cuhk03/
cuhk03_release/
cuhk03_new_protocol_config_detected.mat
cuhk03_new_protocol_config_labeled.mat
...
```
4、运行训练代码时使用 -d cuhk03。默认模式下,我们使用新划分 (767/700)。如果您想使用 [13] 创建的原始划分 (1367/100),请指定 --cuhk03-classic-split。由于 [13] 的 CMC 计算方式与 Market1501 不同,您可能需要指定 --use-metric-cuhk03 以与其方法进行公平比较。此外,我们支持 labeled 和 detected 两种模式。默认模式加载 detected 图像。如果您想在 labeled 图像上训练和测试,请指定 --cuhk03-labeled。
### DukeMTMC-reID
1、在 data/ 下创建一个名为 dukemtmc-reid 的目录。
2、从 https://github.com/layumi/DukeMTMC-reID_evaluation#download-dataset 下载数据集 DukeMTMC-reID.zip 并将其放到 data/dukemtmc-reid。解压 zip 文件,结果如下:
```bash
dukemtmc-reid/
DukeMTMC-reid.zip # (您可以删除这个 zip 文件,没关系)
DukeMTMC-reid/ # 此文件夹包含 8 个文件。
```
3、运行训练代码时使用 -d dukemtmcreid
### MSMT17
1、在 data/ 下创建一个名为 msmt17/ 的目录。
2、从 http://www.pkuvmc.com/publications/msmt17.html 下载数据集 MSMT17_V1.tar.gz 到 data/msmt17/。在同一文件夹下解压该文件,这样您将拥有:
```bash
msmt17/
MSMT17_V1.tar.gz # (您可以随意处理这个 .tar 文件)
MSMT17_V1/
train/
test/
list_train.txt
... (总共六个 .txt 文件)
```
3、运行训练代码时使用 -d msmt17
## 训练
我们建议使用CUHK03数据集和MSMT17数据集进行未来研究训练,命令如下:
```bash
python train_alignedreid.py -d cuhk03 -a resnet50 --test_distance global_local --reranking (--labelsmooth)
```
## 推理
全局+局部(DMLI)
```bash
python train_alignedreid.-d cuhk03 -a resnet50 --evaluate --resume YOUR_MODEL_PATH --save-dir log/resnet50-cuhk03-alignedreid --test_distance global_local (--reranking)
```
局部(DMLI)
```bash
python train_alignedreid.py -d cuhk03 -a resnet50 --evaluate --resume YOUR_MODEL_PATH --save-dir log/resnet50-cuhk03-alignedreid --test_distance local (--reranking)
```
## 效果展示
<div align=center>
<img src="imgs/Figure_1.png"/>
</div>
### 精度
DCU与GPU精度一致,推理框架:pytorch。
## 预训练权重
| 模型名称 | 权重大小 | DCU型号 | 最低卡数需求 |下载地址|
|:-----:|:----------:|:----------:|:---------------------:|:----------:|
| ResNet50 | 96.8MB| K100AI | 1 | [下载地址](https://pan.baidu.com/s/1Zbx_K2Cm1cgUUTNYJRImMA) |
## 源码仓库及问题反馈
- https://developer.sourcefind.cn/codes/modelzoo/alignedreid_pytorch
## 参考资料
- https://github.com/michuanhaohao/AlignedReID
# AlignedReID++ (Pattern Recognition)
Alignedreid++: Dynamically Matching Local Information for Person Re-Identification.
[[PDF]](https://www.sciencedirect.com/science/article/pii/S0031320319302031?via%3Dihub#!)
```
@article{luo2019alignedreid++,
title={AlignedReID++: Dynamically matching local information for person re-identification},
author={Luo, Hao and Jiang, Wei and Zhang, Xuan and Fan, Xing and Qian, Jingjing and Zhang, Chi},
journal={Pattern Recognition},
volume={94},
pages={53--61},
year={2019},
publisher={Elsevier}
}
@article{zhang2017alignedreid,
title={Alignedreid: Surpassing human-level performance in person re-identification},
author={Zhang, Xuan and Luo, Hao and Fan, Xing and Xiang, Weilai and Sun, Yixiao and Xiao, Qiqi and Jiang, Wei and Zhang, Chi and Sun, Jian},
journal={arXiv preprint arXiv:1711.08184},
year={2017}
}
```
# Version
Python2/Python3
torch0.4.0
torchvision0.2.1
Now, we support ResNet, ShuffleNet, DenseNet and InceptionV4.
## Demo
<img src='imgs/Figure_1.png' align="right" width=415>
<img src='imgs/Figure_0.png' align="left" width=415>
## Have a try
Your can test the demo with your own model and datasets. You should change the path of the model and images by manually. The default model is ResNet50 for Market1501.
```bash
python Alignedreid_demo.py
```
## Results (rank1/mAP) and models
#### Market1501
| Model | Loss | Global | Local | DMLI | Global+DMLI | Global+DMLI(RK) |Download|
| --- | :---: | :---: | :---: | :---: | :---: | :---: | :---: |
| Resnet50 | Alignedreid | 89.2/75.9 | 90.7/75.5 | 91.1/77.4 | 91.0/77.6 | 92.0/88.5 | [model](https://pan.baidu.com/s/1Zbx_K2Cm1cgUUTNYJRImMA) |
| Resnet50 | Alignedreid(LS) | 90.6/77.7 | 91.4/76.7 | 91.9/78.8 | 91.8/79.1 | 92.8/89.4 | [model](https://pan.baidu.com/s/12JHXjGMzdEv6BsNhpeMYbQ) |
#### DukeMTMCReID
| Model | Loss | Global | Local | DMLI | Global+DMLI | Global+DMLI(RK) |Download|
| --- | :---: | :---: | :---: | :---: | :---: | :---: | :---: |
| Resnet50 | Alignedreid | 79.3/65.6 | 80.9/66.9 | 81.0/67.7 | 80.7/68.0 | 85.2/81.2 |[model](https://pan.baidu.com/s/1RJg7cU1QKdGKJsndJU3dlA)|
| Resnet50 | Alignedreid(LS) | 81.2/67.4 | 81.5/68.4 | 81.8/69.4 | 82.1/69.7 | 86.2/82.8 |[model](https://pan.baidu.com/s/1CW-ii3lpYnlX7n-JppliVw) |
#### CUHK03
| Model | Loss | Global | Local | DMLI | Global+DMLI | Global+DMLI(RK) |Download|
| --- | :---: | :---: | :---: | :---: | :---: | :---: | :---: |
| Resnet50 | Alignedreid | 60.7/58.4 | 60.2/58.2 | 60.9/59.6 | 60.9/59.7 | 67.6/70.7 |[model](https://pan.baidu.com/s/1YI8gs_SnoKfWnQyU-HrK8g)|
| Resnet50 | Alignedreid(LS) | 59.7/58.1 | 59.9/57.2 | 61.1/59.4 | 61.5/59.6 | 67.9/70.7 |[model](https://pan.baidu.com/s/1AVxGRBhucNUfUYTeIXyP9A) |
#### MSMT17
| Model | Loss | Global | Local | DMLI | Global+DMLI |Download|
| --- | :---: | :---: | :---: | :---: | :---: | :---: |
| Resnet50 | Alignedreid | 63.4/38.4 | 63.8 | 66.3/40.2 | 66.3/40.6 |[model](https://pan.baidu.com/s/1E7rV4PCDoDmAIWjwBnclwg)|
| Resnet50 | Alignedreid(LS) | 67.6/41.8 | 67.3/38.4| 69.6/43.3 | 69.8/43.7 |[model](https://pan.baidu.com/s/1D46g8D_OvnUfu43cWKK83Q) |
#### Market1501-Partial
| Model | Loss | Global | Local | DMLI |
| --- | :---: | :---: | :---: | :---: |
| Resnet50 | Softmax | 59.0/46.4 | 56.5/43.7 | 63.3/50.0 |
| Resnet50 | Softmax+TriHard | 62.4/49.7 | 51.8/37.6 | 68.0/52.7 |
| Resnet50 | Alignedreid | 65.9/53.5 | 52.8/38.1 | 70.1/55.3 |
#### DukeMTMCReID-Partial
| Model | Loss | Global | Local | DMLI |
| --- | :---: | :---: | :---: | :---: |
| Resnet50 | Softmax | 45.9/34.7 | 48.6/36.1 | 53.6/40.6 |
| Resnet50 | Softmax+TriHard | 47.8/36.4 | 43.3/31.5 | 53.7/40.5 |
| Resnet50 | Alignedreid | 49.8/38.2 | 44.8/33.3 | 55.3/42.8 |
You can download the models on [Google Drive](https://drive.google.com/open?id=1-QApSAY51NvRcQgyUxCn8lP1sSWCeMVg).
# Prepare data
Create a directory to store reid datasets under this repo via
```bash
cd AlignedReID/
mkdir data/
```
If you wanna store datasets in another directory, you need to specify `--root path_to_your/data` when running the training code. Please follow the instructions below to prepare each dataset. After that, you can simply do `-d the_dataset` when running the training code.
**Market1501** :
1. Download dataset to `data/` from http://www.liangzheng.org/Project/project_reid.html.
2. Extract dataset and rename to `market1501`. The data structure would look like:
```
market1501/
bounding_box_test/
bounding_box_train/
...
```
3. Use `-d market1501` when running the training code.
**CUHK03** [13]:
1. Create a folder named `cuhk03/` under `data/`.
2. Download dataset to `data/cuhk03/` from http://www.ee.cuhk.edu.hk/~xgwang/CUHK_identification.html and extract `cuhk03_release.zip`, so you will have `data/cuhk03/cuhk03_release`.
3. Download new split [14] from [person-re-ranking](https://github.com/zhunzhong07/person-re-ranking/tree/master/evaluation/data/CUHK03). What you need are `cuhk03_new_protocol_config_detected.mat` and `cuhk03_new_protocol_config_labeled.mat`. Put these two mat files under `data/cuhk03`. Finally, the data structure would look like
```
cuhk03/
cuhk03_release/
cuhk03_new_protocol_config_detected.mat
cuhk03_new_protocol_config_labeled.mat
...
```
4. Use `-d cuhk03` when running the training code. In default mode, we use new split (767/700). If you wanna use the original splits (1367/100) created by [13], specify `--cuhk03-classic-split`. As [13] computes CMC differently from Market1501, you might need to specify `--use-metric-cuhk03` for fair comparison with their method. In addition, we support both `labeled` and `detected` modes. The default mode loads `detected` images. Specify `--cuhk03-labeled` if you wanna train and test on `labeled` images.
**DukeMTMC-reID** [16, 17]:
1. Create a directory under `data/` called `dukemtmc-reid`.
2. Download dataset `DukeMTMC-reID.zip` from https://github.com/layumi/DukeMTMC-reID_evaluation#download-dataset and put it to `data/dukemtmc-reid`. Extract the zip file, which leads to
```
dukemtmc-reid/
DukeMTMC-reid.zip # (you can delete this zip file, it is ok)
DukeMTMC-reid/ # this folder contains 8 files.
```
3. Use `-d dukemtmcreid` when running the training code.
**MSMT17** [22]:
1. Create a directory named `msmt17/` under `data/`.
2. Download dataset `MSMT17_V1.tar.gz` to `data/msmt17/` from http://www.pkuvmc.com/publications/msmt17.html. Extract the file under the same folder, so you will have
```
msmt17/
MSMT17_V1.tar.gz # (do whatever you want with this .tar file)
MSMT17_V1/
train/
test/
list_train.txt
... (totally six .txt files)
```
3. Use `-d msmt17` when running the training code.
# Train
Since the performance of Market1501 and DukeMTMCReID is too high, we suggest to using CUHK03 and MSMT17 for future research.
```bash
python train_alignedreid.py -d cuhk03 -a resnet50 --test_distance global_local --reranking (--labelsmooth)
```
**Note:** You can add your experimental settings for 'args'
# Test
#### Global+Local(DMLI)
```bash
python train_alignedreid.-d cuhk03 -a resnet50 --evaluate --resume YOUR_MODEL_PATH --save-dir log/resnet50-cuhk03-alignedreid --test_distance global_local (--reranking)
```
#### Local(DMLI)
```bash
python train_alignedreid.py -d cuhk03 -a resnet50 --evaluate --resume YOUR_MODEL_PATH --save-dir log/resnet50-cuhk03-alignedreid --test_distance local (--reranking)
```
#### Local(Without DMLI)
```bash
python train_alignedreid.py -d cuhk03 -a resnet50 --evaluate --resume YOUR_MODEL_PATH --save-dir log/resnet50-cuhk03-alignedreid --test_distance local --unaligned (--reranking)
```
#### Global
```bash
python train_alignedreid.py -d cuhk03 -a resnet50 --evaluate --resume YOUR_MODEL_PATH --save-dir log/resnet50-cuhk03-alignedreid --test_distance global (--reranking)
```
**Note:** (--reranking) means whether you use 'Re-ranking with k-reciprocal Encoding (CVPR2017)' to boost the performance.
## Test on Partial ReID
```bash
scp -r data/market1501 data/market1501-partial
python gen_partial_dataset.py
python train_alignedreid.py -d market1501-partial -a resnet50 --evaluate --resume YOUR_MODEL_PATH --save-dir log/resnet50-market1501-partial-alignedreid --test_distance local (--unaligned)
```
import torch.nn as nn
class HorizontalMaxPool2d(nn.Module):
def __init__(self):
super(HorizontalMaxPool2d, self).__init__()
def forward(self, x):
inp_size = x.size()
return nn.functional.max_pool2d(input=x,kernel_size= (1, inp_size[3]))
\ No newline at end of file
import torch
def batch_euclidean_dist(x, y):
"""
Args:
x: pytorch Variable, with shape [Batch size, Local part, Feature channel]
y: pytorch Variable, with shape [Batch size, Local part, Feature channel]
Returns:
dist: pytorch Variable, with shape [Batch size, Local part, Local part]
"""
assert len(x.size()) == 3
assert len(y.size()) == 3
assert x.size(0) == y.size(0)
assert x.size(-1) == y.size(-1)
N, m, d = x.size()
N, n, d = y.size()
# shape [N, m, n]
xx = torch.pow(x, 2).sum(-1, keepdim=True).expand(N, m, n)
yy = torch.pow(y, 2).sum(-1, keepdim=True).expand(N, n, m).permute(0, 2, 1)
dist = xx + yy
dist.baddbmm_(1, -2, x, y.permute(0, 2, 1))
dist = dist.clamp(min=1e-12).sqrt() # for numerical stability
return dist
def shortest_dist(dist_mat):
"""Parallel version.
Args:
dist_mat: pytorch Variable, available shape:
1) [m, n]
2) [m, n, N], N is batch size
3) [m, n, *], * can be arbitrary additional dimensions
Returns:
dist: three cases corresponding to `dist_mat`:
1) scalar
2) pytorch Variable, with shape [N]
3) pytorch Variable, with shape [*]
"""
m, n = dist_mat.size()[:2]
# Just offering some reference for accessing intermediate distance.
dist = [[0 for _ in range(n)] for _ in range(m)]
for i in range(m):
for j in range(n):
if (i == 0) and (j == 0):
dist[i][j] = dist_mat[i, j]
elif (i == 0) and (j > 0):
dist[i][j] = dist[i][j - 1] + dist_mat[i, j]
elif (i > 0) and (j == 0):
dist[i][j] = dist[i - 1][j] + dist_mat[i, j]
else:
dist[i][j] = torch.min(dist[i - 1][j], dist[i][j - 1]) + dist_mat[i, j]
dist = dist[-1][-1]
return dist
def hard_example_mining(dist_mat, labels, return_inds=False):
"""For each anchor, find the hardest positive and negative sample.
Args:
dist_mat: pytorch Variable, pair wise distance between samples, shape [N, N]
labels: pytorch LongTensor, with shape [N]
return_inds: whether to return the indices. Save time if `False`(?)
Returns:
dist_ap: pytorch Variable, distance(anchor, positive); shape [N]
dist_an: pytorch Variable, distance(anchor, negative); shape [N]
p_inds: pytorch LongTensor, with shape [N];
indices of selected hard positive samples; 0 <= p_inds[i] <= N - 1
n_inds: pytorch LongTensor, with shape [N];
indices of selected hard negative samples; 0 <= n_inds[i] <= N - 1
NOTE: Only consider the case in which all labels have same num of samples,
thus we can cope with all anchors in parallel.
"""
assert len(dist_mat.size()) == 2
assert dist_mat.size(0) == dist_mat.size(1)
N = dist_mat.size(0)
# shape [N, N]
is_pos = labels.expand(N, N).eq(labels.expand(N, N).t())
is_neg = labels.expand(N, N).ne(labels.expand(N, N).t())
# `dist_ap` means distance(anchor, positive)
# both `dist_ap` and `relative_p_inds` with shape [N, 1]
dist_ap, relative_p_inds = torch.max(
dist_mat[is_pos].contiguous().view(N, -1), 1, keepdim=True)
# `dist_an` means distance(anchor, negative)
# both `dist_an` and `relative_n_inds` with shape [N, 1]
dist_an, relative_n_inds = torch.min(
dist_mat[is_neg].contiguous().view(N, -1), 1, keepdim=True)
# shape [N]
dist_ap = dist_ap.squeeze(1)
dist_an = dist_an.squeeze(1)
if return_inds:
# shape [N, N]
ind = (labels.new().resize_as_(labels)
.copy_(torch.arange(0, N).long())
.unsqueeze( 0).expand(N, N))
# shape [N, 1]
p_inds = torch.gather(
ind[is_pos].contiguous().view(N, -1), 1, relative_p_inds.data)
n_inds = torch.gather(
ind[is_neg].contiguous().view(N, -1), 1, relative_n_inds.data)
# shape [N]
p_inds = p_inds.squeeze(1)
n_inds = n_inds.squeeze(1)
return dist_ap, dist_an, p_inds, n_inds
return dist_ap, dist_an
def euclidean_dist(x, y):
"""
Args:
x: pytorch Variable, with shape [m, d]
y: pytorch Variable, with shape [n, d]
Returns:
dist: pytorch Variable, with shape [m, n]
"""
m, n = x.size(0), y.size(0)
xx = torch.pow(x, 2).sum(1, keepdim=True).expand(m, n)
yy = torch.pow(y, 2).sum(1, keepdim=True).expand(n, m).t()
dist = xx + yy
dist.addmm_(1, -2, x, y.t())
dist = dist.clamp(min=1e-12).sqrt() # for numerical stability
return dist
def batch_local_dist(x, y):
"""
Args:
x: pytorch Variable, with shape [N, m, d]
y: pytorch Variable, with shape [N, n, d]
Returns:
dist: pytorch Variable, with shape [N]
"""
assert len(x.size()) == 3
assert len(y.size()) == 3
assert x.size(0) == y.size(0)
assert x.size(-1) == y.size(-1)
# shape [N, m, n]
dist_mat = batch_euclidean_dist(x, y)
dist_mat = (torch.exp(dist_mat) - 1.) / (torch.exp(dist_mat) + 1.)
# shape [N]
dist = shortest_dist(dist_mat.permute(1, 2, 0))
return dist
if __name__ == '__main__':
x = torch.randn(32,2048)
y = torch.randn(32,2048)
dist_mat = euclidean_dist(x,y)
dist_ap, dist_an, p_inds, n_inds = hard_example_mining(dist_mat,return_inds=True)
from IPython import embed
embed()
\ No newline at end of file
import cv2
import os
import numpy as np
data_root = "/home/csc302/workspace/luohao/code/AlignedReID/data/market1501/query"
gen_root = "/home/csc302/workspace/luohao/code/AlignedReID/data/market1501_partial/query"
def random_crop(img, sample_rate=0.6):
h,w = img.shape[:2]
sh = np.random.randint(sample_rate*h, h*0.9,1)[0]
bh = np.random.randint(0, h-sh, 1)[0]
img = img[bh:sh+bh,:,:]
img = cv2.resize(img, (w,h))
return img
for image_name in os.listdir(data_root):
if image_name[-3:] != 'jpg':
continue
img_path = os.path.join(data_root, image_name)
img = cv2.imread(img_path)
img = random_crop(img)
save_path = os.path.join(gen_root, image_name)
cv2.imwrite(save_path, img)
\ No newline at end of file
# 模型唯一标识
modelCode=1824
# 模型名称
modelName=AlignedReID
# 模型描述
modelDescription=AlignedReID是旷视科技提出的行人重识别(ReID)算法,首次在该任务上实现超越人类水平的性能。其核心通过局部特征对齐优化,显著提升了细节特征的捕捉能力。
# 应用场景
processType=推理
# 算法类别
appScenario=目标检测
# 框架类型
frameType=pytorch
# 加速卡类型
accelerateType=K100AI
\ No newline at end of file
from __future__ import absolute_import
import torch
from torch import nn
from torch.nn import functional as F
import torchvision
from aligned.HorizontalMaxPool2D import HorizontalMaxPool2d
__all__ = ['DenseNet121']
class DenseNet121(nn.Module):
def __init__(self, num_classes, loss={'softmax'}, aligned=False,**kwargs):
super(DenseNet121, self).__init__()
self.loss = loss
densenet121 = torchvision.models.densenet121(pretrained=True)
self.base = densenet121.features
self.classifier = nn.Linear(1024, num_classes)
self.feat_dim = 1024 # feature dimension
self.aligned = aligned
self.horizon_pool = HorizontalMaxPool2d()
if self.aligned:
self.bn = nn.BatchNorm2d(1024)
self.relu = nn.ReLU(inplace=True)
self.conv1 = nn.Conv2d(1024, 128, kernel_size=1, stride=1, padding=0, bias=True)
def forward(self, x):
x = self.base(x)
if not self.training:
lf = self.horizon_pool(x)
if self.aligned:
lf = self.bn(x)
lf = self.relu(lf)
lf = self.horizon_pool(lf)
lf = self.conv1(lf)
if self.aligned or not self.training:
lf = lf.view(lf.size()[0:3])
lf = lf / torch.pow(lf, 2).sum(dim=1, keepdim=True).clamp(min=1e-12).sqrt()
x = F.avg_pool2d(x, x.size()[2:])
f = x.view(x.size(0), -1)
# f = 1. * f / (torch.norm(f, 2, dim=-1, keepdim=True).expand_as(f) + 1e-12)
if not self.training:
return f, lf
y = self.classifier(f)
if self.loss == {'softmax'}:
return y
elif self.loss == {'metric'}:
if self.aligned: return f, lf
return f
elif self.loss == {'softmax', 'metric'}:
if self.aligned: return y, f, lf
return y, f
else:
raise KeyError("Unsupported loss: {}".format(self.loss))
\ No newline at end of file
from __future__ import absolute_import
import torch
import torch.nn as nn
from torch.nn import functional as F
import torch.utils.model_zoo as model_zoo
import os
import sys
from aligned.HorizontalMaxPool2D import HorizontalMaxPool2d
__all__ = ['InceptionV4ReID']
"""
Code imported from https://github.com/Cadene/pretrained-models.pytorch
"""
pretrained_settings = {
'inceptionv4': {
'imagenet': {
'url': 'http://data.lip6.fr/cadene/pretrainedmodels/inceptionv4-8e4777a0.pth',
'input_space': 'RGB',
'input_size': [3, 299, 299],
'input_range': [0, 1],
'mean': [0.5, 0.5, 0.5],
'std': [0.5, 0.5, 0.5],
'num_classes': 1000
},
'imagenet+background': {
'url': 'http://data.lip6.fr/cadene/pretrainedmodels/inceptionv4-8e4777a0.pth',
'input_space': 'RGB',
'input_size': [3, 299, 299],
'input_range': [0, 1],
'mean': [0.5, 0.5, 0.5],
'std': [0.5, 0.5, 0.5],
'num_classes': 1001
}
}
}
class BasicConv2d(nn.Module):
def __init__(self, in_planes, out_planes, kernel_size, stride, padding=0):
super(BasicConv2d, self).__init__()
self.conv = nn.Conv2d(in_planes, out_planes,
kernel_size=kernel_size, stride=stride,
padding=padding, bias=False) # verify bias false
self.bn = nn.BatchNorm2d(out_planes,
eps=0.001, # value found in tensorflow
momentum=0.1, # default pytorch value
affine=True)
self.relu = nn.ReLU(inplace=True)
def forward(self, x):
x = self.conv(x)
x = self.bn(x)
x = self.relu(x)
return x
class Mixed_3a(nn.Module):
def __init__(self):
super(Mixed_3a, self).__init__()
self.maxpool = nn.MaxPool2d(3, stride=2)
self.conv = BasicConv2d(64, 96, kernel_size=3, stride=2)
def forward(self, x):
x0 = self.maxpool(x)
x1 = self.conv(x)
out = torch.cat((x0, x1), 1)
return out
class Mixed_4a(nn.Module):
def __init__(self):
super(Mixed_4a, self).__init__()
self.branch0 = nn.Sequential(
BasicConv2d(160, 64, kernel_size=1, stride=1),
BasicConv2d(64, 96, kernel_size=3, stride=1)
)
self.branch1 = nn.Sequential(
BasicConv2d(160, 64, kernel_size=1, stride=1),
BasicConv2d(64, 64, kernel_size=(1,7), stride=1, padding=(0,3)),
BasicConv2d(64, 64, kernel_size=(7,1), stride=1, padding=(3,0)),
BasicConv2d(64, 96, kernel_size=(3,3), stride=1)
)
def forward(self, x):
x0 = self.branch0(x)
x1 = self.branch1(x)
out = torch.cat((x0, x1), 1)
return out
class Mixed_5a(nn.Module):
def __init__(self):
super(Mixed_5a, self).__init__()
self.conv = BasicConv2d(192, 192, kernel_size=3, stride=2)
self.maxpool = nn.MaxPool2d(3, stride=2)
def forward(self, x):
x0 = self.conv(x)
x1 = self.maxpool(x)
out = torch.cat((x0, x1), 1)
return out
class Inception_A(nn.Module):
def __init__(self):
super(Inception_A, self).__init__()
self.branch0 = BasicConv2d(384, 96, kernel_size=1, stride=1)
self.branch1 = nn.Sequential(
BasicConv2d(384, 64, kernel_size=1, stride=1),
BasicConv2d(64, 96, kernel_size=3, stride=1, padding=1)
)
self.branch2 = nn.Sequential(
BasicConv2d(384, 64, kernel_size=1, stride=1),
BasicConv2d(64, 96, kernel_size=3, stride=1, padding=1),
BasicConv2d(96, 96, kernel_size=3, stride=1, padding=1)
)
self.branch3 = nn.Sequential(
nn.AvgPool2d(3, stride=1, padding=1, count_include_pad=False),
BasicConv2d(384, 96, kernel_size=1, stride=1)
)
def forward(self, x):
x0 = self.branch0(x)
x1 = self.branch1(x)
x2 = self.branch2(x)
x3 = self.branch3(x)
out = torch.cat((x0, x1, x2, x3), 1)
return out
class Reduction_A(nn.Module):
def __init__(self):
super(Reduction_A, self).__init__()
self.branch0 = BasicConv2d(384, 384, kernel_size=3, stride=2)
self.branch1 = nn.Sequential(
BasicConv2d(384, 192, kernel_size=1, stride=1),
BasicConv2d(192, 224, kernel_size=3, stride=1, padding=1),
BasicConv2d(224, 256, kernel_size=3, stride=2)
)
self.branch2 = nn.MaxPool2d(3, stride=2)
def forward(self, x):
x0 = self.branch0(x)
x1 = self.branch1(x)
x2 = self.branch2(x)
out = torch.cat((x0, x1, x2), 1)
return out
class Inception_B(nn.Module):
def __init__(self):
super(Inception_B, self).__init__()
self.branch0 = BasicConv2d(1024, 384, kernel_size=1, stride=1)
self.branch1 = nn.Sequential(
BasicConv2d(1024, 192, kernel_size=1, stride=1),
BasicConv2d(192, 224, kernel_size=(1,7), stride=1, padding=(0,3)),
BasicConv2d(224, 256, kernel_size=(7,1), stride=1, padding=(3,0))
)
self.branch2 = nn.Sequential(
BasicConv2d(1024, 192, kernel_size=1, stride=1),
BasicConv2d(192, 192, kernel_size=(7,1), stride=1, padding=(3,0)),
BasicConv2d(192, 224, kernel_size=(1,7), stride=1, padding=(0,3)),
BasicConv2d(224, 224, kernel_size=(7,1), stride=1, padding=(3,0)),
BasicConv2d(224, 256, kernel_size=(1,7), stride=1, padding=(0,3))
)
self.branch3 = nn.Sequential(
nn.AvgPool2d(3, stride=1, padding=1, count_include_pad=False),
BasicConv2d(1024, 128, kernel_size=1, stride=1)
)
def forward(self, x):
x0 = self.branch0(x)
x1 = self.branch1(x)
x2 = self.branch2(x)
x3 = self.branch3(x)
out = torch.cat((x0, x1, x2, x3), 1)
return out
class Reduction_B(nn.Module):
def __init__(self):
super(Reduction_B, self).__init__()
self.branch0 = nn.Sequential(
BasicConv2d(1024, 192, kernel_size=1, stride=1),
BasicConv2d(192, 192, kernel_size=3, stride=2)
)
self.branch1 = nn.Sequential(
BasicConv2d(1024, 256, kernel_size=1, stride=1),
BasicConv2d(256, 256, kernel_size=(1,7), stride=1, padding=(0,3)),
BasicConv2d(256, 320, kernel_size=(7,1), stride=1, padding=(3,0)),
BasicConv2d(320, 320, kernel_size=3, stride=2)
)
self.branch2 = nn.MaxPool2d(3, stride=2)
def forward(self, x):
x0 = self.branch0(x)
x1 = self.branch1(x)
x2 = self.branch2(x)
out = torch.cat((x0, x1, x2), 1)
return out
class Inception_C(nn.Module):
def __init__(self):
super(Inception_C, self).__init__()
self.branch0 = BasicConv2d(1536, 256, kernel_size=1, stride=1)
self.branch1_0 = BasicConv2d(1536, 384, kernel_size=1, stride=1)
self.branch1_1a = BasicConv2d(384, 256, kernel_size=(1,3), stride=1, padding=(0,1))
self.branch1_1b = BasicConv2d(384, 256, kernel_size=(3,1), stride=1, padding=(1,0))
self.branch2_0 = BasicConv2d(1536, 384, kernel_size=1, stride=1)
self.branch2_1 = BasicConv2d(384, 448, kernel_size=(3,1), stride=1, padding=(1,0))
self.branch2_2 = BasicConv2d(448, 512, kernel_size=(1,3), stride=1, padding=(0,1))
self.branch2_3a = BasicConv2d(512, 256, kernel_size=(1,3), stride=1, padding=(0,1))
self.branch2_3b = BasicConv2d(512, 256, kernel_size=(3,1), stride=1, padding=(1,0))
self.branch3 = nn.Sequential(
nn.AvgPool2d(3, stride=1, padding=1, count_include_pad=False),
BasicConv2d(1536, 256, kernel_size=1, stride=1)
)
def forward(self, x):
x0 = self.branch0(x)
x1_0 = self.branch1_0(x)
x1_1a = self.branch1_1a(x1_0)
x1_1b = self.branch1_1b(x1_0)
x1 = torch.cat((x1_1a, x1_1b), 1)
x2_0 = self.branch2_0(x)
x2_1 = self.branch2_1(x2_0)
x2_2 = self.branch2_2(x2_1)
x2_3a = self.branch2_3a(x2_2)
x2_3b = self.branch2_3b(x2_2)
x2 = torch.cat((x2_3a, x2_3b), 1)
x3 = self.branch3(x)
out = torch.cat((x0, x1, x2, x3), 1)
return out
class InceptionV4(nn.Module):
def __init__(self, num_classes=1001):
super(InceptionV4, self).__init__()
# Special attributs
self.input_space = None
self.input_size = (299, 299, 3)
self.mean = None
self.std = None
# Modules
self.features = nn.Sequential(
BasicConv2d(3, 32, kernel_size=3, stride=2),
BasicConv2d(32, 32, kernel_size=3, stride=1),
BasicConv2d(32, 64, kernel_size=3, stride=1, padding=1),
Mixed_3a(),
Mixed_4a(),
Mixed_5a(),
Inception_A(),
Inception_A(),
Inception_A(),
Inception_A(),
Reduction_A(), # Mixed_6a
Inception_B(),
Inception_B(),
Inception_B(),
Inception_B(),
Inception_B(),
Inception_B(),
Inception_B(),
Reduction_B(), # Mixed_7a
Inception_C(),
Inception_C(),
Inception_C()
)
self.avg_pool = nn.AvgPool2d(8, count_include_pad=False)
self.last_linear = nn.Linear(1536, num_classes)
def logits(self, features):
x = self.avg_pool(features)
x = x.view(x.size(0), -1)
x = self.last_linear(x)
return x
def forward(self, input):
x = self.features(input)
x = self.logits(x)
return x
def inceptionv4(num_classes=1000, pretrained='imagenet'):
if pretrained:
settings = pretrained_settings['inceptionv4'][pretrained]
assert num_classes == settings['num_classes'], \
"num_classes should be {}, but is {}".format(settings['num_classes'], num_classes)
# both 'imagenet'&'imagenet+background' are loaded from same parameters
model = InceptionV4(num_classes=1001)
model.load_state_dict(model_zoo.load_url(settings['url']))
if pretrained == 'imagenet':
new_last_linear = nn.Linear(1536, 1000)
new_last_linear.weight.data = model.last_linear.weight.data[1:]
new_last_linear.bias.data = model.last_linear.bias.data[1:]
model.last_linear = new_last_linear
model.input_space = settings['input_space']
model.input_size = settings['input_size']
model.input_range = settings['input_range']
model.mean = settings['mean']
model.std = settings['std']
else:
model = InceptionV4(num_classes=num_classes)
return model
class InceptionV4ReID(nn.Module):
def __init__(self, num_classes, loss={'softmax'}, aligned=False, **kwargs):
super(InceptionV4ReID, self).__init__()
self.loss = loss
base = inceptionv4()
self.features = base.features
self.classifier = nn.Linear(1536, num_classes)
self.feat_dim = 1536 # feature dimension
self.aligned = aligned
self.horizon_pool = HorizontalMaxPool2d()
if self.aligned:
self.bn = nn.BatchNorm2d(1536)
self.relu = nn.ReLU(inplace=True)
self.conv1 = nn.Conv2d(1536, 128, kernel_size=1, stride=1, padding=0, bias=True)
def forward(self, x):
x = self.features(x)
if not self.training:
lf = self.horizon_pool(x)
if self.aligned:
lf = self.bn(x)
lf = self.relu(lf)
lf = self.horizon_pool(lf)
lf = self.conv1(lf)
if self.aligned or not self.training:
lf = lf.view(lf.size()[0:3])
lf = lf / torch.pow(lf, 2).sum(dim=1, keepdim=True).clamp(min=1e-12).sqrt()
x = F.avg_pool2d(x, x.size()[2:])
f = x.view(x.size(0), -1)
if not self.training:
return f, lf
y = self.classifier(f)
if self.loss == {'softmax'}:
return y
elif self.loss == {'metric'}:
if self.aligned: return f, lf
return f
elif self.loss == {'softmax', 'metric'}:
if self.aligned: return y, f, lf
return y, f
else:
raise KeyError("Unsupported loss: {}".format(self.loss))
\ No newline at end of file
from __future__ import absolute_import
import torch
from torch import nn
from torch.nn import functional as F
import torchvision
from aligned.HorizontalMaxPool2D import HorizontalMaxPool2d
__all__ = ['ResNet50', 'ResNet101']
class ResNet50(nn.Module):
def __init__(self, num_classes, loss={'softmax'}, aligned=False, **kwargs):
super(ResNet50, self).__init__()
self.loss = loss
resnet50 = torchvision.models.resnet50(pretrained=True)
self.base = nn.Sequential(*list(resnet50.children())[:-2])
self.classifier = nn.Linear(2048, num_classes)
self.feat_dim = 2048 # feature dimension
self.aligned = aligned
self.horizon_pool = HorizontalMaxPool2d()
if self.aligned:
self.bn = nn.BatchNorm2d(2048)
self.relu = nn.ReLU(inplace=True)
self.conv1 = nn.Conv2d(2048, 128, kernel_size=1, stride=1, padding=0, bias=True)
def forward(self, x):
x = self.base(x)
if not self.training:
lf = self.horizon_pool(x)
if self.aligned and self.training:
lf = self.bn(x)
lf = self.relu(lf)
lf = self.horizon_pool(lf)
lf = self.conv1(lf)
if self.aligned or not self.training:
lf = lf.view(lf.size()[0:3])
lf = lf / torch.pow(lf,2).sum(dim=1, keepdim=True).clamp(min=1e-12).sqrt()
x = F.avg_pool2d(x, x.size()[2:])
f = x.view(x.size(0), -1)
#f = 1. * f / (torch.norm(f, 2, dim=-1, keepdim=True).expand_as(f) + 1e-12)
if not self.training:
return f,lf
y = self.classifier(f)
if self.loss == {'softmax'}:
return y
elif self.loss == {'metric'}:
if self.aligned: return f, lf
return f
elif self.loss == {'softmax', 'metric'}:
if self.aligned: return y, f, lf
return y, f
else:
raise KeyError("Unsupported loss: {}".format(self.loss))
class ResNet101(nn.Module):
def __init__(self, num_classes, loss={'softmax'}, aligned=False, **kwargs):
super(ResNet101, self).__init__()
self.loss = loss
resnet101 = torchvision.models.resnet101(pretrained=False)
self.base = nn.Sequential(*list(resnet101.children())[:-2])
self.classifier = nn.Linear(2048, num_classes)
self.feat_dim = 2048 # feature dimension
self.aligned = aligned
self.horizon_pool = HorizontalMaxPool2d()
if self.aligned:
self.bn = nn.BatchNorm2d(2048)
self.relu = nn.ReLU(inplace=True)
self.conv1 = nn.Conv2d(2048, 128, kernel_size=1, stride=1, padding=0, bias=True)
def forward(self, x):
x = self.base(x)
if not self.training:
lf = self.horizon_pool(x)
if self.aligned:
lf = self.bn(x)
lf = self.relu(lf)
lf = self.horizon_pool(lf)
lf = self.conv1(lf)
if self.aligned or not self.training:
lf = lf.view(lf.size()[0:3])
lf = lf / torch.pow(lf, 2).sum(dim=1, keepdim=True).clamp(min=1e-12).sqrt()
x = F.avg_pool2d(x, x.size()[2:])
f = x.view(x.size(0), -1)
# f = 1. * f / (torch.norm(f, 2, dim=-1, keepdim=True).expand_as(f) + 1e-12)
if not self.training:
return f, lf
y = self.classifier(f)
if self.loss == {'softmax'}:
return y
elif self.loss == {'metric'}:
if self.aligned: return f, lf
return f
elif self.loss == {'softmax', 'metric'}:
if self.aligned: return y, f, lf
return y, f
else:
raise KeyError("Unsupported loss: {}".format(self.loss))
from __future__ import absolute_import
import torch
from torch import nn
from torch.nn import functional as F
import torchvision
from aligned.HorizontalMaxPool2D import HorizontalMaxPool2d
__all__ = ['ShuffleNet']
class ChannelShuffle(nn.Module):
def __init__(self, num_groups):
super(ChannelShuffle, self).__init__()
self.g = num_groups
def forward(self, x):
b, c, h, w = x.size()
n = c / self.g
# reshape
x = x.view(b, self.g, n, h, w)
# transpose
x = x.permute(0, 2, 1, 3, 4).contiguous()
# flatten
x = x.view(b, c, h, w)
return x
class Bottleneck(nn.Module):
def __init__(self, in_channels, out_channels, stride, num_groups):
super(Bottleneck, self).__init__()
assert stride in [1, 2], "Warning: stride must be either 1 or 2"
self.stride = stride
mid_channels = out_channels / 4
if stride == 2: out_channels -= in_channels
self.conv1 = nn.Conv2d(in_channels, mid_channels, 1, groups=num_groups, bias=False)
self.bn1 = nn.BatchNorm2d(mid_channels)
self.shuffle1 = ChannelShuffle(num_groups)
self.conv2 = nn.Conv2d(mid_channels, mid_channels, 3, stride=stride, padding=1, groups=mid_channels, bias=False)
self.bn2 = nn.BatchNorm2d(mid_channels)
self.conv3 = nn.Conv2d(mid_channels, out_channels, 1, groups=num_groups, bias=False)
self.bn3 = nn.BatchNorm2d(out_channels)
if stride == 2: self.shortcut = nn.AvgPool2d(3, stride=2, padding=1)
def forward(self, x):
out = F.relu(self.bn1(self.conv1(x)))
out = self.shuffle1(out)
out = self.bn2(self.conv2(out))
out = self.bn3(self.conv3(out))
if self.stride == 2:
res = self.shortcut(x)
out = F.relu(torch.cat([res, out], 1))
else:
out = F.relu(x + out)
return out
# configuration of (num_groups: #out_channels) based on Table 1 in the paper
cfg = {
1: [144, 288, 576],
2: [200, 400, 800],
3: [240, 480, 960],
4: [272, 544, 1088],
8: [384, 768, 1536],
}
class ShuffleNet(nn.Module):
"""ShuffleNet
Reference:
Zhang et al. ShuffleNet: An Extremely Efficient Convolutional Neural
Network for Mobile Devices. CVPR 2018.
"""
def __init__(self, num_classes, loss={'softmax'}, num_groups=3, aligned=False, **kwargs):
super(ShuffleNet, self).__init__()
self.loss = loss
self.conv1 = nn.Sequential(
nn.Conv2d(3, 24, 3, stride=2, padding=1, bias=False),
nn.BatchNorm2d(24),
nn.ReLU(),
nn.MaxPool2d(3, stride=2, padding=1),
)
self.stage2 = nn.Sequential(
Bottleneck(24, cfg[num_groups][0], 2, num_groups),
Bottleneck(cfg[num_groups][0], cfg[num_groups][0], 1, num_groups),
Bottleneck(cfg[num_groups][0], cfg[num_groups][0], 1, num_groups),
Bottleneck(cfg[num_groups][0], cfg[num_groups][0], 1, num_groups),
)
self.stage3 = nn.Sequential(
Bottleneck(cfg[num_groups][0], cfg[num_groups][1], 2, num_groups),
Bottleneck(cfg[num_groups][1], cfg[num_groups][1], 1, num_groups),
Bottleneck(cfg[num_groups][1], cfg[num_groups][1], 1, num_groups),
Bottleneck(cfg[num_groups][1], cfg[num_groups][1], 1, num_groups),
Bottleneck(cfg[num_groups][1], cfg[num_groups][1], 1, num_groups),
Bottleneck(cfg[num_groups][1], cfg[num_groups][1], 1, num_groups),
Bottleneck(cfg[num_groups][1], cfg[num_groups][1], 1, num_groups),
Bottleneck(cfg[num_groups][1], cfg[num_groups][1], 1, num_groups),
)
self.stage4 = nn.Sequential(
Bottleneck(cfg[num_groups][1], cfg[num_groups][2], 2, num_groups),
Bottleneck(cfg[num_groups][2], cfg[num_groups][2], 1, num_groups),
Bottleneck(cfg[num_groups][2], cfg[num_groups][2], 1, num_groups),
Bottleneck(cfg[num_groups][2], cfg[num_groups][2], 1, num_groups),
)
self.classifier = nn.Linear(cfg[num_groups][2], num_classes)
self.feat_dim = cfg[num_groups][2]
self.aligned = aligned
self.horizon_pool = HorizontalMaxPool2d()
def forward(self, x):
x = self.conv1(x)
x = self.stage2(x)
x = self.stage3(x)
x = self.stage4(x)
if self.aligned or not self.training:
lf = self.horizon_pool(x)
lf = lf.view(lf.size()[0:3])
lf = lf / torch.pow(lf, 2).sum(dim=1, keepdim=True).clamp(min=1e-12).sqrt()
f = F.avg_pool2d(x, x.size()[2:]).view(x.size(0), -1)
if not self.training:
return f, lf
y = self.classifier(f)
if self.loss == {'softmax'}:
return y
elif self.loss == {'metric'}:
if self.aligned: return f, lf
return f
elif self.loss == {'softmax', 'metric'}:
if self.aligned: return y, f, lf
return y, f
else:
raise KeyError("Unsupported loss: {}".format(self.loss))
\ No newline at end of file
from __future__ import absolute_import
from .ResNet import *
from .DenseNet import *
from .ShuffleNet import *
from .InceptionV4 import *
__factory = {
'resnet50': ResNet50,
'resnet101': ResNet101,
'densenet121': DenseNet121,
'shufflenet': ShuffleNet,
'inceptionv4': InceptionV4ReID,
}
def get_names():
return __factory.keys()
def init_model(name, *args, **kwargs):
if name not in __factory.keys():
raise KeyError("Unknown model: {}".format(name))
return __factory[name](*args, **kwargs)
\ No newline at end of file
from __future__ import absolute_import
import sys
import time
import datetime
import argparse
import os.path as osp
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
import torch.backends.cudnn as cudnn
from torch.optim import lr_scheduler
import models
from util.losses import CrossEntropyLoss, DeepSupervision, CrossEntropyLabelSmooth, TripletLossAlignedReID
from util import data_manager
from util import transforms as T
from util.dataset_loader import ImageDataset
from util.utils import Logger
from util.utils import AverageMeter, Logger, save_checkpoint
from util.eval_metrics import evaluate
from util.optimizers import init_optim
from util.samplers import RandomIdentitySampler
from IPython import embed
parser = argparse.ArgumentParser(description='Train AlignedReID with cross entropy loss and triplet hard loss')
# Datasets
parser.add_argument('--root', type=str, default='data', help="root path to data directory")
parser.add_argument('-d', '--dataset', type=str, default='market1501',
choices=data_manager.get_names())
parser.add_argument('-j', '--workers', default=4, type=int,
help="number of data loading workers (default: 4)")
parser.add_argument('--height', type=int, default=256,
help="height of an image (default: 256)")
parser.add_argument('--width', type=int, default=128,
help="width of an image (default: 128)")
parser.add_argument('--split-id', type=int, default=0, help="split index")
# CUHK03-specific setting
parser.add_argument('--cuhk03-labeled', action='store_true',
help="whether to use labeled images, if false, detected images are used (default: False)")
parser.add_argument('--cuhk03-classic-split', action='store_true',
help="whether to use classic split by Li et al. CVPR'14 (default: False)")
parser.add_argument('--use-metric-cuhk03', action='store_true',
help="whether to use cuhk03-metric (default: False)")
# Optimization options
parser.add_argument('--labelsmooth', action='store_true', help="label smooth")
parser.add_argument('--optim', type=str, default='adam', help="optimization algorithm (see optimizers.py)")
parser.add_argument('--max-epoch', default=300, type=int,
help="maximum epochs to run")
parser.add_argument('--start-epoch', default=0, type=int,
help="manual epoch number (useful on restarts)")
parser.add_argument('--train-batch', default=32, type=int,
help="train batch size")
parser.add_argument('--test-batch', default=32, type=int, help="test batch size")
parser.add_argument('--lr', '--learning-rate', default=0.0002, type=float,
help="initial learning rate")
parser.add_argument('--stepsize', default=150, type=int,
help="stepsize to decay learning rate (>0 means this is enabled)")
parser.add_argument('--gamma', default=0.1, type=float,
help="learning rate decay")
parser.add_argument('--weight-decay', default=5e-04, type=float,
help="weight decay (default: 5e-04)")
# triplet hard loss
parser.add_argument('--margin', type=float, default=0.3, help="margin for triplet loss")
parser.add_argument('--num-instances', type=int, default=4,
help="number of instances per identity")
parser.add_argument('--htri-only', action='store_true', default=False,
help="if this is True, only htri loss is used in training")
# Architecture
parser.add_argument('-a', '--arch', type=str, default='resnet50', choices=models.get_names())
# Miscs
parser.add_argument('--print-freq', type=int, default=10, help="print frequency")
parser.add_argument('--seed', type=int, default=1, help="manual seed")
parser.add_argument('--resume', type=str, default='', metavar='PATH')
parser.add_argument('--evaluate', action='store_true', help="evaluation only")
parser.add_argument('--eval-step', type=int, default=-1,
help="run evaluation for every N epochs (set to -1 to test after training)")
parser.add_argument('--start-eval', type=int, default=0, help="start to evaluate after specific epoch")
parser.add_argument('--save-dir', type=str, default='log')
parser.add_argument('--use_cpu', action='store_true', help="use cpu")
parser.add_argument('--gpu-devices', default='0', type=str, help='gpu device ids for CUDA_VISIBLE_DEVICES')
parser.add_argument('--reranking',action= 'store_true', help= 'result re_ranking')
parser.add_argument('--test_distance',type = str, default='global', help= 'test distance type')
parser.add_argument('--unaligned',action= 'store_true', help= 'test local feature with unalignment')
args = parser.parse_args()
def main():
use_gpu = torch.cuda.is_available()
if args.use_cpu: use_gpu = False
pin_memory = True if use_gpu else False
if not args.evaluate:
sys.stdout = Logger(osp.join(args.save_dir, 'log_train.txt'))
else:
sys.stdout = Logger(osp.join(args.save_dir, 'log_test.txt'))
print("==========\nArgs:{}\n==========".format(args))
if use_gpu:
print("Currently using GPU {}".format(args.gpu_devices))
os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu_devices
cudnn.benchmark = True
torch.cuda.manual_seed_all(args.seed)
else:
print("Currently using CPU (GPU is highly recommended)")
print("Initializing dataset {}".format(args.dataset))
dataset = data_manager.init_img_dataset(
root=args.root, name=args.dataset, split_id=args.split_id,
cuhk03_labeled=args.cuhk03_labeled, cuhk03_classic_split=args.cuhk03_classic_split,
)
# data augmentation
transform_train = T.Compose([
T.Random2DTranslation(args.height, args.width),
T.RandomHorizontalFlip(),
T.ToTensor(),
T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])
transform_test = T.Compose([
T.Resize((args.height, args.width)),
T.ToTensor(),
T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])
trainloader = DataLoader(
ImageDataset(dataset.train, transform=transform_train),
sampler=RandomIdentitySampler(dataset.train, num_instances=args.num_instances),
batch_size=args.train_batch, num_workers=args.workers,
pin_memory=pin_memory, drop_last=True,
)
queryloader = DataLoader(
ImageDataset(dataset.query, transform=transform_test),
batch_size=args.test_batch, shuffle=False, num_workers=args.workers,
pin_memory=pin_memory, drop_last=False,
)
galleryloader = DataLoader(
ImageDataset(dataset.gallery, transform=transform_test),
batch_size=args.test_batch, shuffle=False, num_workers=args.workers,
pin_memory=pin_memory, drop_last=False,
)
print("Initializing model: {}".format(args.arch))
model = models.init_model(name=args.arch, num_classes=dataset.num_train_pids, loss={'softmax','metric'}, aligned =True, use_gpu=use_gpu)
print("Model size: {:.5f}M".format(sum(p.numel() for p in model.parameters())/1000000.0))
if args.labelsmooth:
criterion_class = CrossEntropyLabelSmooth(num_classes=dataset.num_train_pids, use_gpu=use_gpu)
else:
criterion_class = CrossEntropyLoss(use_gpu=use_gpu)
criterion_metric = TripletLossAlignedReID(margin=args.margin)
optimizer = init_optim(args.optim, model.parameters(), args.lr, args.weight_decay)
if args.stepsize > 0:
scheduler = lr_scheduler.StepLR(optimizer, step_size=args.stepsize, gamma=args.gamma)
start_epoch = args.start_epoch
if args.resume:
print("Loading checkpoint from '{}'".format(args.resume))
checkpoint = torch.load(args.resume)
model.load_state_dict(checkpoint['state_dict'])
start_epoch = checkpoint['epoch']
if use_gpu:
model = nn.DataParallel(model).cuda()
if args.evaluate:
print("Evaluate only")
test(model, queryloader, galleryloader, use_gpu)
return 0
start_time = time.time()
train_time = 0
best_rank1 = -np.inf
best_epoch = 0
print("==> Start training")
for epoch in range(start_epoch, args.max_epoch):
start_train_time = time.time()
train(epoch, model, criterion_class, criterion_metric, optimizer, trainloader, use_gpu)
train_time += round(time.time() - start_train_time)
if args.stepsize > 0: scheduler.step()
if (epoch + 1) > args.start_eval and args.eval_step > 0 and (epoch + 1) % args.eval_step == 0 or (
epoch + 1) == args.max_epoch:
print("==> Test")
rank1 = test(model, queryloader, galleryloader, use_gpu)
is_best = rank1 > best_rank1
if is_best:
best_rank1 = rank1
best_epoch = epoch + 1
if use_gpu:
state_dict = model.module.state_dict()
else:
state_dict = model.state_dict()
save_checkpoint({
'state_dict': state_dict,
'rank1': rank1,
'epoch': epoch,
}, is_best, osp.join(args.save_dir, 'checkpoint_ep' + str(epoch + 1) + '.pth.tar'))
print("==> Best Rank-1 {:.1%}, achieved at epoch {}".format(best_rank1, best_epoch))
elapsed = round(time.time() - start_time)
elapsed = str(datetime.timedelta(seconds=elapsed))
train_time = str(datetime.timedelta(seconds=train_time))
print("Finished. Total elapsed time (h:m:s): {}. Training time (h:m:s): {}.".format(elapsed, train_time))
def train(epoch, model, criterion_class, criterion_metric, optimizer, trainloader, use_gpu):
model.train()
losses = AverageMeter()
batch_time = AverageMeter()
data_time = AverageMeter()
xent_losses = AverageMeter()
global_losses = AverageMeter()
local_losses = AverageMeter()
end = time.time()
for batch_idx, (imgs, pids, _) in enumerate(trainloader):
if use_gpu:
imgs, pids = imgs.cuda(), pids.cuda()
# measure data loading time
data_time.update(time.time() - end)
outputs, features, local_features = model(imgs)
if args.htri_only:
if isinstance(features, tuple):
global_loss, local_loss = DeepSupervision(criterion_metric, features, pids, local_features)
else:
global_loss, local_loss = criterion_metric(features, pids, local_features)
else:
if isinstance(outputs, tuple):
xent_loss = DeepSupervision(criterion_class, outputs, pids)
else:
xent_loss = criterion_class(outputs, pids)
if isinstance(features, tuple):
global_loss, local_loss = DeepSupervision(criterion_metric, features, pids, local_features)
else:
global_loss, local_loss = criterion_metric(features, pids, local_features)
loss = xent_loss + global_loss + local_loss
optimizer.zero_grad()
loss.backward()
optimizer.step()
batch_time.update(time.time() - end)
end = time.time()
losses.update(loss.item(), pids.size(0))
xent_losses.update(xent_loss.item(), pids.size(0))
global_losses.update(global_loss.item(), pids.size(0))
local_losses.update(local_loss.item(), pids.size(0))
if (batch_idx+1) % args.print_freq == 0:
print('Epoch: [{0}][{1}/{2}]\t'
'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'
'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
'CLoss {xent_loss.val:.4f} ({xent_loss.avg:.4f})\t'
'GLoss {global_loss.val:.4f} ({global_loss.avg:.4f})\t'
'LLoss {local_loss.val:.4f} ({local_loss.avg:.4f})\t'.format(
epoch+1, batch_idx+1, len(trainloader), batch_time=batch_time,data_time=data_time,
loss=losses,xent_loss=xent_losses, global_loss=global_losses, local_loss = local_losses))
def test(model, queryloader, galleryloader, use_gpu, ranks=[1, 5, 10, 20]):
batch_time = AverageMeter()
model.eval()
with torch.no_grad():
qf, q_pids, q_camids, lqf = [], [], [], []
for batch_idx, (imgs, pids, camids) in enumerate(queryloader):
if use_gpu: imgs = imgs.cuda()
end = time.time()
features, local_features = model(imgs)
batch_time.update(time.time() - end)
features = features.data.cpu()
local_features = local_features.data.cpu()
qf.append(features)
lqf.append(local_features)
q_pids.extend(pids)
q_camids.extend(camids)
qf = torch.cat(qf, 0)
lqf = torch.cat(lqf,0)
q_pids = np.asarray(q_pids)
q_camids = np.asarray(q_camids)
print("Extracted features for query set, obtained {}-by-{} matrix".format(qf.size(0), qf.size(1)))
gf, g_pids, g_camids, lgf = [], [], [], []
end = time.time()
for batch_idx, (imgs, pids, camids) in enumerate(galleryloader):
if use_gpu: imgs = imgs.cuda()
end = time.time()
features, local_features = model(imgs)
batch_time.update(time.time() - end)
features = features.data.cpu()
local_features = local_features.data.cpu()
gf.append(features)
lgf.append(local_features)
g_pids.extend(pids)
g_camids.extend(camids)
gf = torch.cat(gf, 0)
lgf = torch.cat(lgf,0)
g_pids = np.asarray(g_pids)
g_camids = np.asarray(g_camids)
print("Extracted features for gallery set, obtained {}-by-{} matrix".format(gf.size(0), gf.size(1)))
print("==> BatchTime(s)/BatchSize(img): {:.3f}/{}".format(batch_time.avg, args.test_batch))
# feature normlization
qf = 1. * qf / (torch.norm(qf, 2, dim = -1, keepdim=True).expand_as(qf) + 1e-12)
gf = 1. * gf / (torch.norm(gf, 2, dim = -1, keepdim=True).expand_as(gf) + 1e-12)
m, n = qf.size(0), gf.size(0)
distmat = torch.pow(qf, 2).sum(dim=1, keepdim=True).expand(m, n) + \
torch.pow(gf, 2).sum(dim=1, keepdim=True).expand(n, m).t()
distmat.addmm_(1, -2, qf, gf.t())
distmat = distmat.numpy()
if not args.test_distance== 'global':
print("Only using global branch")
from util.distance import low_memory_local_dist
lqf = lqf.permute(0,2,1)
lgf = lgf.permute(0,2,1)
local_distmat = low_memory_local_dist(lqf.numpy(),lgf.numpy(),aligned= not args.unaligned)
if args.test_distance== 'local':
print("Only using local branch")
distmat = local_distmat
if args.test_distance == 'global_local':
print("Using global and local branches")
distmat = local_distmat+distmat
print("Computing CMC and mAP")
cmc, mAP = evaluate(distmat, q_pids, g_pids, q_camids, g_camids, use_metric_cuhk03=args.use_metric_cuhk03)
print("Results ----------")
print("mAP: {:.1%}".format(mAP))
print("CMC curve")
for r in ranks:
print("Rank-{:<3}: {:.1%}".format(r, cmc[r - 1]))
print("------------------")
if args.reranking:
from util.re_ranking import re_ranking
if args.test_distance == 'global':
print("Only using global branch for reranking")
distmat = re_ranking(qf,gf,k1=20, k2=6, lambda_value=0.3)
else:
local_qq_distmat = low_memory_local_dist(lqf.numpy(), lqf.numpy(),aligned= not args.unaligned)
local_gg_distmat = low_memory_local_dist(lgf.numpy(), lgf.numpy(),aligned= not args.unaligned)
local_dist = np.concatenate(
[np.concatenate([local_qq_distmat, local_distmat], axis=1),
np.concatenate([local_distmat.T, local_gg_distmat], axis=1)],
axis=0)
if args.test_distance == 'local':
print("Only using local branch for reranking")
distmat = re_ranking(qf,gf,k1=20,k2=6,lambda_value=0.3,local_distmat=local_dist,only_local=True)
elif args.test_distance == 'global_local':
print("Using global and local branches for reranking")
distmat = re_ranking(qf,gf,k1=20,k2=6,lambda_value=0.3,local_distmat=local_dist,only_local=False)
print("Computing CMC and mAP for re_ranking")
cmc, mAP = evaluate(distmat, q_pids, g_pids, q_camids, g_camids, use_metric_cuhk03=args.use_metric_cuhk03)
print("Results ----------")
print("mAP(RK): {:.1%}".format(mAP))
print("CMC curve(RK)")
for r in ranks:
print("Rank-{:<3}: {:.1%}".format(r, cmc[r - 1]))
print("------------------")
return cmc[0]
if __name__ == '__main__':
main()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment