update

b6c19984 · dengjb · b6c19984 · b6c19984 · b6c19984 · b6c19984
Commit b6c19984 authored Nov 18, 2025 by dengjb
20 changed files
--- a/fastreid/utils/summary.py
+++ b/fastreid/utils/summary.py
+# encoding: utf-8
+"""
+@author:  liaoxingyu
+@contact: sherlockliao01@gmail.com
+"""
+
+import torch
+import torch.nn as nn
+from torch.autograd import Variable
+
+from collections import OrderedDict
+import numpy as np
+
+
+def summary(model, input_size, batch_size=-1, device="cuda"):
+    def register_hook(module):
+
+        def hook(module, input, output):
+            class_name = str(module.__class__).split(".")[-1].split("'")[0]
+            module_idx = len(summary)
+
+            m_key = "%s-%i" % (class_name, module_idx + 1)
+            summary[m_key] = OrderedDict()
+            summary[m_key]["input_shape"] = list(input[0].size())
+            summary[m_key]["input_shape"][0] = batch_size
+            if isinstance(output, (list, tuple)):
+                summary[m_key]["output_shape"] = [
+                    [-1] + list(o.size())[1:] for o in output
+                ]
+            else:
+                summary[m_key]["output_shape"] = list(output.size())
+                summary[m_key]["output_shape"][0] = batch_size
+
+            params = 0
+            if hasattr(module, "weight") and hasattr(module.weight, "size"):
+                params += torch.prod(torch.LongTensor(list(module.weight.size())))
+                summary[m_key]["trainable"] = module.weight.requires_grad
+            if hasattr(module, "bias") and hasattr(module.bias, "size"):
+                params += torch.prod(torch.LongTensor(list(module.bias.size())))
+            summary[m_key]["nb_params"] = params
+
+        if (
+                not isinstance(module, nn.Sequential)
+                and not isinstance(module, nn.ModuleList)
+                and not (module == model)
+        ):
+            hooks.append(module.register_forward_hook(hook))
+
+    device = device.lower()
+    assert device in [
+        "cuda",
+        "cpu",
+    ], "Input device is not valid, please specify 'cuda' or 'cpu'"
+
+    if device == "cuda" and torch.cuda.is_available():
+        dtype = torch.cuda.FloatTensor
+    else:
+        dtype = torch.FloatTensor
+
+    # multiple inputs to the network
+    if isinstance(input_size, tuple):
+        input_size = [input_size]
+
+    # batch_size of 2 for batchnorm
+    x = [torch.rand(2, *in_size).type(dtype) for in_size in input_size]
+    # print(type(x[0]))
+
+    # create properties
+    summary = OrderedDict()
+    hooks = []
+
+    # register hook
+    model.apply(register_hook)
+
+    # make a forward pass
+    # print(x.shape)
+    model(*x)
+
+    # remove these hooks
+    for h in hooks:
+        h.remove()
+
+    print("----------------------------------------------------------------")
+    line_new = "{:>20}  {:>25} {:>15}".format("Layer (type)", "Output Shape", "Param #")
+    print(line_new)
+    print("================================================================")
+    total_params = 0
+    total_output = 0
+    trainable_params = 0
+    for layer in summary:
+        # input_shape, output_shape, trainable, nb_params
+        line_new = "{:>20}  {:>25} {:>15}".format(
+            layer,
+            str(summary[layer]["output_shape"]),
+            "{0:,}".format(summary[layer]["nb_params"]),
+        )
+        total_params += summary[layer]["nb_params"]
+        total_output += np.prod(summary[layer]["output_shape"])
+        if "trainable" in summary[layer]:
+            if summary[layer]["trainable"] == True:
+                trainable_params += summary[layer]["nb_params"]
+        print(line_new)
+
+    # assume 4 bytes/number (float on cuda).
+    total_input_size = abs(np.prod(input_size) * batch_size * 4. / (1024 ** 2.))
+    total_output_size = abs(2. * total_output * 4. / (1024 ** 2.))  # x2 for gradients
+    total_params_size = abs(total_params.numpy() * 4. / (1024 ** 2.))
+    total_size = total_params_size + total_output_size + total_input_size
+
+    print("================================================================")
+    print("Total params: {0:,}".format(total_params))
+    print("Trainable params: {0:,}".format(trainable_params))
+    print("Non-trainable params: {0:,}".format(total_params - trainable_params))
+    print("----------------------------------------------------------------")
+    print("Input size (MB): %0.2f" % total_input_size)
+    print("Forward/backward pass size (MB): %0.2f" % total_output_size)
+    print("Params size (MB): %0.2f" % total_params_size)
+    print("Estimated Total Size (MB): %0.2f" % total_size)
+    print("----------------------------------------------------------------")
+    # return summary
--- a/fastreid/utils/timer.py
+++ b/fastreid/utils/timer.py
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
+# -*- coding: utf-8 -*-
+
+from time import perf_counter
+from typing import Optional
+
+
+class Timer:
+    """
+    A timer which computes the time elapsed since the start/reset of the timer.
+    """
+
+    def __init__(self):
+        self.reset()
+
+    def reset(self):
+        """
+        Reset the timer.
+        """
+        self._start = perf_counter()
+        self._paused: Optional[float] = None
+        self._total_paused = 0
+        self._count_start = 1
+
+    def pause(self):
+        """
+        Pause the timer.
+        """
+        if self._paused is not None:
+            raise ValueError("Trying to pause a Timer that is already paused!")
+        self._paused = perf_counter()
+
+    def is_paused(self) -> bool:
+        """
+        Returns:
+            bool: whether the timer is currently paused
+        """
+        return self._paused is not None
+
+    def resume(self):
+        """
+        Resume the timer.
+        """
+        if self._paused is None:
+            raise ValueError("Trying to resume a Timer that is not paused!")
+        self._total_paused += perf_counter() - self._paused
+        self._paused = None
+        self._count_start += 1
+
+    def seconds(self) -> float:
+        """
+        Returns:
+            (float): the total number of seconds since the start/reset of the
+                timer, excluding the time when the timer is paused.
+        """
+        if self._paused is not None:
+            end_time: float = self._paused  # type: ignore
+        else:
+            end_time = perf_counter()
+        return end_time - self._start - self._total_paused
+
+    def avg_seconds(self) -> float:
+        """
+        Returns:
+            (float): the average number of seconds between every start/reset and
+            pause.
+        """
+        return self.seconds() / self._count_start
--- a/fastreid/utils/visualizer.py
+++ b/fastreid/utils/visualizer.py
+# encoding: utf-8
+"""
+@author:  liaoxingyu
+@contact: sherlockliao01@gmail.com
+"""
+
+import os
+import pickle
+import random
+
+import matplotlib.pyplot as plt
+import numpy as np
+import tqdm
+from scipy.stats import norm
+from sklearn import metrics
+
+from .file_io import PathManager
+
+
+class Visualizer:
+    r"""Visualize images(activation map) ranking list of features generated by reid models."""
+
+    def __init__(self, dataset):
+        self.dataset = dataset
+
+    def get_model_output(self, all_ap, dist, q_pids, g_pids, q_camids, g_camids):
+        self.all_ap = all_ap
+        self.dist = dist
+        self.sim = 1 - dist
+        self.q_pids = q_pids
+        self.g_pids = g_pids
+        self.q_camids = q_camids
+        self.g_camids = g_camids
+
+        self.indices = np.argsort(dist, axis=1)
+        self.matches = (g_pids[self.indices] == q_pids[:, np.newaxis]).astype(np.int32)
+
+        self.num_query = len(q_pids)
+
+    def get_matched_result(self, q_index):
+        q_pid = self.q_pids[q_index]
+        q_camid = self.q_camids[q_index]
+
+        order = self.indices[q_index]
+        remove = (self.g_pids[order] == q_pid) & (self.g_camids[order] == q_camid)
+        keep = np.invert(remove)
+        cmc = self.matches[q_index][keep]
+        sort_idx = order[keep]
+        return cmc, sort_idx
+
+    def save_rank_result(self, query_indices, output, max_rank=5, vis_label=False, label_sort='ascending',
+                         actmap=False):
+        if vis_label:
+            fig, axes = plt.subplots(2, max_rank + 1, figsize=(3 * max_rank, 12))
+        else:
+            fig, axes = plt.subplots(1, max_rank + 1, figsize=(3 * max_rank, 6))
+        for cnt, q_idx in enumerate(tqdm.tqdm(query_indices)):
+            all_imgs = []
+            cmc, sort_idx = self.get_matched_result(q_idx)
+            query_info = self.dataset[q_idx]
+            query_img = query_info['images']
+            cam_id = query_info['camids']
+            query_name = query_info['img_paths'].split('/')[-1]
+            all_imgs.append(query_img)
+            query_img = np.rollaxis(np.asarray(query_img.numpy(), dtype=np.uint8), 0, 3)
+            plt.clf()
+            ax = fig.add_subplot(1, max_rank + 1, 1)
+            ax.imshow(query_img)
+            ax.set_title('{:.4f}/cam{}'.format(self.all_ap[q_idx], cam_id))
+            ax.axis("off")
+            for i in range(max_rank):
+                if vis_label:
+                    ax = fig.add_subplot(2, max_rank + 1, i + 2)
+                else:
+                    ax = fig.add_subplot(1, max_rank + 1, i + 2)
+                g_idx = self.num_query + sort_idx[i]
+                gallery_info = self.dataset[g_idx]
+                gallery_img = gallery_info['images']
+                cam_id = gallery_info['camids']
+                all_imgs.append(gallery_img)
+                gallery_img = np.rollaxis(np.asarray(gallery_img, dtype=np.uint8), 0, 3)
+                if cmc[i] == 1:
+                    label = 'true'
+                    ax.add_patch(plt.Rectangle(xy=(0, 0), width=gallery_img.shape[1] - 1,
+                                               height=gallery_img.shape[0] - 1, edgecolor=(1, 0, 0),
+                                               fill=False, linewidth=5))
+                else:
+                    label = 'false'
+                    ax.add_patch(plt.Rectangle(xy=(0, 0), width=gallery_img.shape[1] - 1,
+                                               height=gallery_img.shape[0] - 1,
+                                               edgecolor=(0, 0, 1), fill=False, linewidth=5))
+                ax.imshow(gallery_img)
+                ax.set_title(f'{self.sim[q_idx, sort_idx[i]]:.3f}/{label}/cam{cam_id}')
+                ax.axis("off")
+            # if actmap:
+            #     act_outputs = []
+            #
+            #     def hook_fns_forward(module, input, output):
+            #         act_outputs.append(output.cpu())
+            #
+            #     all_imgs = np.stack(all_imgs, axis=0)  # (b, 3, h, w)
+            #     all_imgs = torch.from_numpy(all_imgs).float()
+            #     # normalize
+            #     all_imgs = all_imgs.sub_(self.mean).div_(self.std)
+            #     sz = list(all_imgs.shape[-2:])
+            #     handle = m.base.register_forward_hook(hook_fns_forward)
+            #     with torch.no_grad():
+            #         _ = m(all_imgs.cuda())
+            #     handle.remove()
+            #     acts = self.get_actmap(act_outputs[0], sz)
+            #     for i in range(top + 1):
+            #         axes.flat[i].imshow(acts[i], alpha=0.3, cmap='jet')
+            if vis_label:
+                label_indice = np.where(cmc == 1)[0]
+                if label_sort == "ascending": label_indice = label_indice[::-1]
+                label_indice = label_indice[:max_rank]
+                for i in range(max_rank):
+                    if i >= len(label_indice): break
+                    j = label_indice[i]
+                    g_idx = self.num_query + sort_idx[j]
+                    gallery_info = self.dataset[g_idx]
+                    gallery_img = gallery_info['images']
+                    cam_id = gallery_info['camids']
+                    gallery_img = np.rollaxis(np.asarray(gallery_img, dtype=np.uint8), 0, 3)
+                    ax = fig.add_subplot(2, max_rank + 1, max_rank + 3 + i)
+                    ax.add_patch(plt.Rectangle(xy=(0, 0), width=gallery_img.shape[1] - 1,
+                                               height=gallery_img.shape[0] - 1,
+                                               edgecolor=(1, 0, 0),
+                                               fill=False, linewidth=5))
+                    ax.imshow(gallery_img)
+                    ax.set_title(f'{self.sim[q_idx, sort_idx[j]]:.3f}/cam{cam_id}')
+                    ax.axis("off")
+
+            plt.tight_layout()
+            filepath = os.path.join(output, "{}.jpg".format(cnt))
+            fig.savefig(filepath)
+
+    def vis_rank_list(self, output, vis_label, num_vis=100, rank_sort="ascending", label_sort="ascending", max_rank=5,
+                      actmap=False):
+        r"""Visualize rank list of query instance
+        Args:
+            output (str): a directory to save rank list result.
+            vis_label (bool): if visualize label of query
+            num_vis (int):
+            rank_sort (str): save visualization results by which order,
+                if rank_sort is ascending, AP from low to high, vice versa.
+            label_sort (bool):
+            max_rank (int): maximum number of rank result to visualize
+            actmap (bool):
+        """
+        assert rank_sort in ['ascending', 'descending'], "{} not match [ascending, descending]".format(rank_sort)
+
+        query_indices = np.argsort(self.all_ap)
+        if rank_sort == 'descending': query_indices = query_indices[::-1]
+
+        query_indices = query_indices[:int(num_vis)]
+        self.save_rank_result(query_indices, output, max_rank, vis_label, label_sort, actmap)
+
+    def vis_roc_curve(self, output):
+        PathManager.mkdirs(output)
+        pos, neg = [], []
+        for i, q in enumerate(self.q_pids):
+            cmc, sort_idx = self.get_matched_result(i)  # remove same id in same camera
+            ind_pos = np.where(cmc == 1)[0]
+            q_dist = self.dist[i]
+            pos.extend(q_dist[sort_idx[ind_pos]])
+
+            ind_neg = np.where(cmc == 0)[0]
+            neg.extend(q_dist[sort_idx[ind_neg]])
+
+        scores = np.hstack((pos, neg))
+        labels = np.hstack((np.zeros(len(pos)), np.ones(len(neg))))
+
+        fpr, tpr, thresholds = metrics.roc_curve(labels, scores)
+
+        self.plot_roc_curve(fpr, tpr)
+        filepath = os.path.join(output, "roc.jpg")
+        plt.savefig(filepath)
+        # self.plot_distribution(pos, neg)
+        # filepath = os.path.join(output, "pos_neg_dist.jpg")
+        # plt.savefig(filepath)
+        return fpr, tpr, pos, neg
+
+    @staticmethod
+    def plot_roc_curve(fpr, tpr, name='model', fig=None):
+        if fig is None:
+            fig = plt.figure()
+            plt.semilogx(np.arange(0, 1, 0.01), np.arange(0, 1, 0.01), 'r', linestyle='--', label='Random guess')
+        plt.semilogx(fpr, tpr, color=(random.uniform(0, 1), random.uniform(0, 1), random.uniform(0, 1)),
+                     label='ROC curve with {}'.format(name))
+        plt.title('Receiver Operating Characteristic')
+        plt.xlabel('False Positive Rate')
+        plt.ylabel('True Positive Rate')
+        plt.legend(loc='best')
+        return fig
+
+    @staticmethod
+    def plot_distribution(pos, neg, name='model', fig=None):
+        if fig is None:
+            fig = plt.figure()
+        pos_color = (random.uniform(0, 1), random.uniform(0, 1), random.uniform(0, 1))
+        n, bins, _ = plt.hist(pos, bins=80, alpha=0.7, density=True,
+                              color=pos_color,
+                              label='positive with {}'.format(name))
+        mu = np.mean(pos)
+        sigma = np.std(pos)
+        y = norm.pdf(bins, mu, sigma)  # fitting curve
+        plt.plot(bins, y, color=pos_color)  # plot y curve
+
+        neg_color = (random.uniform(0, 1), random.uniform(0, 1), random.uniform(0, 1))
+        n, bins, _ = plt.hist(neg, bins=80, alpha=0.5, density=True,
+                              color=neg_color,
+                              label='negative with {}'.format(name))
+        mu = np.mean(neg)
+        sigma = np.std(neg)
+        y = norm.pdf(bins, mu, sigma)  # fitting curve
+        plt.plot(bins, y, color=neg_color)  # plot y curve
+
+        plt.xticks(np.arange(0, 1.5, 0.1))
+        plt.title('positive and negative pairs distribution')
+        plt.legend(loc='best')
+        return fig
+
+    @staticmethod
+    def save_roc_info(output, fpr, tpr, pos, neg):
+        results = {
+            "fpr": np.asarray(fpr),
+            "tpr": np.asarray(tpr),
+            "pos": np.asarray(pos),
+            "neg": np.asarray(neg),
+        }
+        with open(os.path.join(output, "roc_info.pickle"), "wb") as handle:
+            pickle.dump(results, handle, protocol=pickle.HIGHEST_PROTOCOL)
+
+    @staticmethod
+    def load_roc_info(path):
+        with open(path, 'rb') as handle: res = pickle.load(handle)
+        return res
+
+    # def plot_camera_dist(self):
+    #     same_cam, diff_cam = [], []
+    #     for i, q in enumerate(self.q_pids):
+    #         q_camid = self.q_camids[i]
+    #
+    #         order = self.indices[i]
+    #         same = (self.g_pids[order] == q) & (self.g_camids[order] == q_camid)
+    #         diff = (self.g_pids[order] == q) & (self.g_camids[order] != q_camid)
+    #         sameCam_idx = order[same]
+    #         diffCam_idx = order[diff]
+    #
+    #         same_cam.extend(self.sim[i, sameCam_idx])
+    #         diff_cam.extend(self.sim[i, diffCam_idx])
+    #
+    #     fig = plt.figure(figsize=(10, 5))
+    #     plt.hist(same_cam, bins=80, alpha=0.7, density=True, color='red', label='same camera')
+    #     plt.hist(diff_cam, bins=80, alpha=0.5, density=True, color='blue', label='diff camera')
+    #     plt.xticks(np.arange(0.1, 1.0, 0.1))
+    #     plt.title('positive and negative pair distribution')
+    #     return fig
+
+    # def get_actmap(self, features, sz):
+    #     """
+    #     :param features: (1, 2048, 16, 8) activation map
+    #     :return:
+    #     """
+    #     features = (features ** 2).sum(1)  # (1, 16, 8)
+    #     b, h, w = features.size()
+    #     features = features.view(b, h * w)
+    #     features = nn.functional.normalize(features, p=2, dim=1)
+    #     acts = features.view(b, h, w)
+    #     all_acts = []
+    #     for i in range(b):
+    #         act = acts[i].numpy()
+    #         act = cv2.resize(act, (sz[1], sz[0]))
+    #         act = 255 * (act - act.max()) / (act.max() - act.min() + 1e-12)
+    #         act = np.uint8(np.floor(act))
+    #         all_acts.append(act)
+    #     return all_acts
--- a/model.properties
+++ b/model.properties
+# 模型唯一标识
+modelCode=1818
+# 模型名称
+modelName=Fast-ReID_pytorch
+# 模型描述
+modelDescription=FastReID是京东AI研究院开发的开源目标重识别（ReID）工具库，专注于行人重识别、车辆重识别等任务，支持多种深度学习模型和优化技术.
+# 应用场景
+processType=推理,训练
+# 算法类别
+appScenario=目标检测
+# 框架类型
+frameType=pytorch
+# 加速卡类型
+accelerateType=K100AI
\ No newline at end of file
--- a/projects/CrossDomainReID/README.md
+++ b/projects/CrossDomainReID/README.md
+# Cross-domain Person Re-Identification
+
+## Introduction
+
+[UDAStrongBaseline](https://github.com/zkcys001/UDAStrongBaseline) is a transitional code based pyTorch framework for both unsupervised learning (USL) 
+and unsupervised domain adaptation (UDA) in the object re-ID tasks. It provides stronger 
+baselines on these tasks. It needs the enviorment: Python >=3.6 and PyTorch >=1.1. We will transfer all the codes to the [fastreid](https://github.com/JDAI-CV/fast-reid) in the future (ongoing) from [UDAStrongBaseline](https://github.com/zkcys001/UDAStrongBaseline).
+
+
+### Unsupervised domain adaptation (UDA) on Person re-ID
+
+- `Direct Transfer` models are trained on the source-domain datasets 
+([source_pretrain]()) and directly tested on the target-domain datasets.
+- UDA methods (`MMT`, `SpCL`, etc.) starting from ImageNet means that they are trained end-to-end 
+in only one stage without source-domain pre-training. `MLT` denotes to the implementation of our NeurIPS-2020. 
+Please note that it is a pre-released repository for the anonymous review process, and the official 
+repository will be released upon the paper published.
+
+#### DukeMTMC-reID -> Market-1501
+
+| Method | Backbone | Pre-trained | mAP(%) | top-1(%) | top-5(%) | top-10(%) | Train time |
+| ----- | :------: | :---------: | :----: | :------: | :------: | :-------: | :------: | 
+| Direct Transfer | ResNet50 | DukeMTMC | 32.2 | 64.9 | 78.7 | 83.4 | ~1h | 
+| [UDA_TP](https://github.com/open-mmlab/OpenUnReID/) PR'2020| ResNet50 | DukeMTMC | 52.3 | 76.0 | 87.8 | 91.9 | ~2h | 
+| [MMT](https://github.com/open-mmlab/OpenUnReID/) ICLR'2020| ResNet50 | DukeMTMC | 80.9 | 92.2 | 97.6 | 98.4 | ~6h |
+| [SpCL](https://github.com/open-mmlab/OpenUnReID/) NIPS'2020 submission| ResNet50 | DukeMTMC | 78.2 | 90.5 | 96.6 | 97.8 | ~3h |
+| [strong_baseline](https://github.com/open-mmlab/OpenUnReID/) | ResNet50 | DukeMTMC | 75.6 | 90.9 | 96.6 | 97.8 | ~3h | 
+| [Our stronger_baseline](https://github.com/JDAI-CV/fast-reid) | ResNet50 | DukeMTMC | 78.0 | 91.0 | 96.4 | 97.7 | ~3h |
+| [MLT] NeurIPS'2020 submission| ResNet50 | DukeMTMC | 81.5| 92.8| 96.8| 97.9 | ~ |
+
+#### Market-1501 -> DukeMTMC-reID
+
+| Method | Backbone | Pre-trained | mAP(%) | top-1(%) | top-5(%) | top-10(%) | Train time |
+| ----- | :------: | :---------: | :----: | :------: | :------: | :-------: | :------: | 
+| Direct Transfer | ResNet50 | Market | 34.1 | 51.3 | 65.3 | 71.7 | ~1h | 
+| [UDA_TP](https://github.com/open-mmlab/OpenUnReID/) PR'2020| ResNet50 | Market | 45.7 | 65.5 | 78.0 | 81.7 | ~2h |
+| [MMT](https://github.com/open-mmlab/OpenUnReID/) ICLR'2020| ResNet50 | Market | 67.7 | 80.3 | 89.9 | 92.9 | ~6h |
+| [SpCL](https://github.com/open-mmlab/OpenUnReID/) NIPS'2020 submission | ResNet50 | Market | 70.4 | 83.8 | 91.2 | 93.4 | ~3h |
+| [strong_baseline](https://github.com/open-mmlab/OpenUnReID/) | ResNet50 | Market | 60.4 | 75.9 | 86.2 | 89.8 | ~3h |
+| [Our stronger_baseline](https://github.com/JDAI-CV/fast-reid) | ResNet50 | Market | 66.7 | 80.0 | 89.2 | 92.2  | ~3h |
+| [MLT] NeurIPS'2020 submission| ResNet50 | Market | 71.2 |83.9| 91.5| 93.2| ~ |
+
+### Market1501 -> MSMT17
+
+| Method | Source | Rank@1 | mAP | mINP |
+| :---: | :---: | :---: |:---: | :---: |
+| DirectTransfer(R50) | Market1501 | 29.8% | 10.3% | 9.3% |
+| Our method | DukeMTMC | 56.6% | 26.5% | - |
+
+### DukeMTMC -> MSMT17
+| Method | Source | Rank@1 | mAP | mINP |
+| :---: | :---: | :---: |:---: | :---: |
+| DirectTransfer(R50) | DukeMTMC | 34.8% | 12.5% | 0.3% |
+| Our method | DukeMTMC | 59.5% | 27.7% | - |
--- a/projects/DG-ReID/README.md
+++ b/projects/DG-ReID/README.md
+# Semi-Supervised Domain Generalizable Person Re-Identification (SSKD)
+
+## Introduction
+
+SSKD is implemented based on **FastReID v1.0.0**. You can refer to [sskd github link](https://github.com/xiaomingzhid/sskd) It provides a semi-supervised feature learning framework to learn domain-general representations. The framework is shown in 
+
+<img src="images/framework.png" width="850" >
+
+## Dataset
+
+**FastHuman** is very challenging, as it contains more complex application scenarios and large-scale training, testing datasets. It has diverse images from different application scenarios including campus, airport, shopping mall, street, and railway station.
+It contains 447,233 labeled images of 40,061 subjects captured by 82 cameras. The details of FastHuman, you can refer to [paper](https://arxiv.org/pdf/2108.05045.pdf).
+
+| Source Domain |  \#subjects | \#images | \#cameras | collection place |
+| ----- | :------: | :---------: | :----: | :------: |
+| CUHK03|  1,090 | 14,096 | 2 | campus |  
+| SAIVT | 152   | 7,150  | 8 | buildings |
+| AirportALERT | 9,651 | 30,243 | 6 | airport |
+|iLIDS|  300   | 4,515  | 2 | airport |
+|PKU  |  114   | 1,824  | 2 | campus |
+|PRAI |   1,580 | 39,481| 2 | aerial imagery |
+|SenseReID | 1,718 | 3,338  | 2 | unknown |
+|SYSU | 510  | 30,071 | 4 | campus |
+|Thermalworld | 409   | 8,103  | 1 | unknown |
+|3DPeS  | 193  | 1,012  | 1 | outdoor  |
+|CAVIARa | 72  | 1,220  | 1 | shopping mall |
+|VIPeR | 632   | 1,264  | 2 | unknown |
+|Shinpuhkan| 24 | 4,501  | 8 | unknown |
+|WildTrack | 313 | 33,979 | 7| outdoor |
+|cuhk-sysu | 11,934| 34,574 | 1| street |
+|LPW |  2,731 | 30,678 | 4 | street |
+|GRID |  1,025 | 1,275 | 8 | underground |
+|Total | 31,423| 246,049 | 57 | - |
+
+
+|Unseen Domain|  \#subjects | \#images | \#cameras | collection place  |
+| ----- | :------: | :---------: | :----: | :------: |
+|Market1501 | 1,501  | 32,217 | 6 | campus |
+|DukeMTMC | 1,812 | 36,441 | 8 | campus |
+|MSMT17 | 4,101 | 126,441| 15| campus |
+|PartialREID | 60 | 600| 6|campus |
+|PartialiLIDS | 119  | 238 | 2 | airport |
+|OccludedREID | 200  | 2,000| 5| campus |
+|CrowdREID | 845  | 3,257 | 11 | railway station| 
+|Total   | 8,638  | 201,184| 49 | - |
+
+**YouTube-Human** is a unlabeled human dataset. You can download the Street-View video from YouTube website, and the use the human detection algorithm ([centerX](https://github.com/JDAI-CV/centerX)) to obtain the human images.
+
+## Training & Evaluation
+
+The whole training process is divided into two stages:
+
+- Train a student model (r34-ibn) and a teacher model (r101_ibn), you can run:
+```bash
+python3 projects/Basic_Project/train_net.py --config-file projects/Basic_Project/configs/r34-ibn.yml --num-gpu 4
+python3 projects/Basic_Project/train_net.py --config-file projects/Basic_Project/configs/r101-ibn.yml --num-gpu 4
+```
+- Train the student model based unlabeled dataset and sskd, you can run:
+```bash
+python3 projects/SSKD/train_net.py --config-file projects/SSKD/configs/sskd.yml --num-gpu 4
+```
+### Results
+<img src="images/result1.png" width="550" >
+<img src="images/result2.png" width="500" >
+Other some experimental results you could find in our [arxiv paper](https://arxiv.org/pdf/2108.05045.pdf).
+
+## Reference Project
+- [fastreid](https://github.com/JDAI-CV/fast-reid)
+- [centerX](https://github.com/JDAI-CV/centerX)
+
+## Citation
+If you use **fastreid** or **sskd** in your research, please give credit to the following papers:
+
+```BibTeX
+@article{he2020fastreid,
+  title={FastReID: A Pytorch Toolbox for General Instance Re-identification},
+  author={He, Lingxiao and Liao, Xingyu and Liu, Wu and Liu, Xinchen and Cheng, Peng and Mei, Tao},
+  journal={arXiv preprint arXiv:2006.02631},
+  year={2020}
+}
+```
+```BibTeX
+@article{he2021semi,
+  title={Semi-Supervised Domain Generalizable Person Re-Identification},
+  author={He, Lingxiao and Liu, Wu and Liang, Jian and Zheng, Kecheng and Liao, Xingyu and Cheng, Peng and Mei, Tao},
+  journal={arXiv preprint arXiv:2108.05045},
+  year={2021}
+}
+```
--- a/projects/FastAttr/README.md
+++ b/projects/FastAttr/README.md
+# FastAttr in FastReID
+
+This project provides a strong baseline for pedestrian attribute recognition.
+
+## Datasets Preparation
+
+We use `PA100k` to evaluate the model's performance.
+You can do download dataset from [HydraPlus-Net](https://github.com/xh-liu/HydraPlus-Net).
+
+## Usage
+
+The training config file can be found in `projects/FastAttr/config`, which you can use to reproduce the results of the repo.
+
+For example
+
+```bash
+python3 projects/FastAttr/train_net.py --config-file projects/FastAttr/configs/pa100.yml --num-gpus 4
+```
+
+## Experiment Results
+
+We refer to [A Strong Baseline of Pedestrian Attribute Recognition](https://github.com/valencebond/Strong_Baseline_of_Pedestrian_Attribute_Recognition/tree/master) as our baseline methods and conduct the experiment
+with 4 GPUs.
+More details can be found in the config file and code.
+
+### PA100k
+
+| Method | Pretrained | mA | Accu | Prec | Recall | F1 | 
+| :---: | :---: | :---: |:---: | :---: | :---: | :---: |
+| attribute baseline | ImageNet | 80.50 | 78.84 | 87.24 | 87.12 | 86.78 | 
+| FastAttr | ImageNet | 77.57 | 78.03 | 88.39 | 84.98 | 86.65 | 
--- a/projects/FastAttr/configs/Base-attribute.yml
+++ b/projects/FastAttr/configs/Base-attribute.yml
+MODEL:
+  META_ARCHITECTURE: AttrBaseline
+
+  BACKBONE:
+    NAME: build_resnet_backbone
+    NORM: BN
+    DEPTH: 50x
+    LAST_STRIDE: 2
+    FEAT_DIM: 2048
+    WITH_IBN: False
+    PRETRAIN: True
+    PRETRAIN_PATH: /export/home/lxy/.cache/torch/checkpoints/resnet50-19c8e357.pth
+
+  HEADS:
+    NAME: AttrHead
+    WITH_BNNECK: True
+    POOL_LAYER: FastGlobalAvgPool
+    CLS_LAYER: Linear
+    NUM_CLASSES: 26
+
+  LOSSES:
+    NAME: ("BinaryCrossEntropyLoss",)
+
+    BCE:
+      WEIGHT_ENABLED: True
+      SCALE: 1.
+
+INPUT:
+  SIZE_TRAIN: [ 256, 192 ]
+  SIZE_TEST: [ 256, 192 ]
+
+  FLIP:
+    ENABLED: True
+
+  PADDING:
+    ENABLED: True
+
+DATALOADER:
+  SAMPLER_TRAIN: TrainingSampler
+  NUM_WORKERS: 8
+
+SOLVER:
+  MAX_EPOCH: 30
+  OPT: SGD
+  BASE_LR: 0.04
+  BIAS_LR_FACTOR: 2.
+  HEADS_LR_FACTOR: 10.
+  WEIGHT_DECAY: 0.0005
+  WEIGHT_DECAY_BIAS: 0.0005
+  IMS_PER_BATCH: 256
+
+  NESTEROV: False
+  SCHED: MultiStepLR
+  STEPS: [ 15, 20, 25 ]
+
+  WARMUP_FACTOR: 0.1
+  WARMUP_ITERS: 1000
+
+  CHECKPOINT_PERIOD: 10
+
+TEST:
+  EVAL_PERIOD: 10
+  IMS_PER_BATCH: 256
+
+CUDNN_BENCHMARK: True
+
--- a/projects/FastAttr/configs/dukemtmc.yml
+++ b/projects/FastAttr/configs/dukemtmc.yml
+_BASE_: Base-attribute.yml
+
+DATASETS:
+  NAMES: ("DukeMTMCAttr",)
+  TESTS: ("DukeMTMCAttr",)
+
+MODEL:
+  HEADS:
+    NUM_CLASSES: 23
+
+OUTPUT_DIR: projects/FastAttr/logs/dukemtmc/strong_baseline
\ No newline at end of file
--- a/projects/FastAttr/configs/market1501.yml
+++ b/projects/FastAttr/configs/market1501.yml
+_BASE_: Base-attribute.yml
+
+DATASETS:
+  NAMES: ("Market1501Attr",)
+  TESTS: ("Market1501Attr",)
+
+MODEL:
+  HEADS:
+    NUM_CLASSES: 27
+
+OUTPUT_DIR: projects/FastAttr/logs/market1501/strong_baseline
\ No newline at end of file
--- a/projects/FastAttr/configs/pa100.yml
+++ b/projects/FastAttr/configs/pa100.yml
+_BASE_: Base-attribute.yml
+
+DATASETS:
+  NAMES: ("PA100K",)
+  TESTS: ("PA100K",)
+
+OUTPUT_DIR: projects/FastAttr/logs/pa100k/strong_baseline
\ No newline at end of file
--- a/projects/FastAttr/fastattr/__init__.py
+++ b/projects/FastAttr/fastattr/__init__.py
+# encoding: utf-8
+"""
+@author:  xingyu liao
+@contact: sherlockliao01@gmail.com
+"""
+
+from .attr_evaluation import AttrEvaluator
+from .config import add_attr_config
+from .datasets import *
+from .modeling import *
+from .attr_dataset import AttrDataset
--- a/projects/FastAttr/fastattr/attr_dataset.py
+++ b/projects/FastAttr/fastattr/attr_dataset.py
+# encoding: utf-8
+"""
+@author:  liaoxingyu
+@contact: sherlockliao01@gmail.com
+"""
+
+import torch
+from torch.utils.data import Dataset
+
+from fastreid.data.data_utils import read_image
+
+
+class AttrDataset(Dataset):
+    """Image Person Attribute Dataset"""
+
+    def __init__(self, img_items, transform, attr_dict):
+        self.img_items = img_items
+        self.transform = transform
+        self.attr_dict = attr_dict
+
+    def __len__(self):
+        return len(self.img_items)
+
+    def __getitem__(self, index):
+        img_path, labels = self.img_items[index]
+        img = read_image(img_path)
+
+        if self.transform is not None: img = self.transform(img)
+
+        labels = torch.as_tensor(labels)
+
+        return {
+            "images": img,
+            "targets": labels,
+            "img_paths": img_path,
+        }
+
+    @property
+    def num_classes(self):
+        return len(self.attr_dict)
+
+    @property
+    def sample_weights(self):
+        sample_weights = torch.zeros(self.num_classes, dtype=torch.float32)
+        for _, attr in self.img_items:
+            sample_weights += torch.as_tensor(attr)
+        sample_weights /= len(self)
+        return sample_weights
--- a/projects/FastAttr/fastattr/attr_evaluation.py
+++ b/projects/FastAttr/fastattr/attr_evaluation.py
+# encoding: utf-8
+"""
+@author:  liaoxingyu
+@contact: sherlockliao01@gmail.com
+"""
+import copy
+import logging
+from collections import OrderedDict
+
+import torch
+
+from fastreid.evaluation.evaluator import DatasetEvaluator
+from fastreid.utils import comm
+
+logger = logging.getLogger("fastreid.attr_evaluation")
+
+
+class AttrEvaluator(DatasetEvaluator):
+    def __init__(self, cfg, attr_dict, thres=0.5, output_dir=None):
+        self.cfg = cfg
+        self.attr_dict = attr_dict
+        self.thres = thres
+        self._output_dir = output_dir
+
+        self._cpu_device = torch.device("cpu")
+
+        self.pred_logits = []
+        self.gt_labels = []
+
+    def reset(self):
+        self.pred_logits = []
+        self.gt_labels = []
+
+    def process(self, inputs, outputs):
+        self.gt_labels.extend(inputs["targets"].to(self._cpu_device))
+        self.pred_logits.extend(outputs.to(self._cpu_device, torch.float32))
+
+    @staticmethod
+    def get_attr_metrics(gt_labels, pred_logits, thres):
+
+        eps = 1e-20
+
+        pred_labels = copy.deepcopy(pred_logits)
+        pred_labels[pred_logits < thres] = 0
+        pred_labels[pred_logits >= thres] = 1
+
+        # Compute label-based metric
+        overlaps = pred_labels * gt_labels
+        correct_pos = overlaps.sum(axis=0)
+        real_pos = gt_labels.sum(axis=0)
+        inv_overlaps = (1 - pred_labels) * (1 - gt_labels)
+        correct_neg = inv_overlaps.sum(axis=0)
+        real_neg = (1 - gt_labels).sum(axis=0)
+
+        # Compute instance-based accuracy
+        pred_labels = pred_labels.astype(bool)
+        gt_labels = gt_labels.astype(bool)
+        intersect = (pred_labels & gt_labels).astype(float)
+        union = (pred_labels | gt_labels).astype(float)
+        ins_acc = (intersect.sum(axis=1) / (union.sum(axis=1) + eps)).mean()
+        ins_prec = (intersect.sum(axis=1) / (pred_labels.astype(float).sum(axis=1) + eps)).mean()
+        ins_rec = (intersect.sum(axis=1) / (gt_labels.astype(float).sum(axis=1) + eps)).mean()
+        ins_f1 = (2 * ins_prec * ins_rec) / (ins_prec + ins_rec + eps)
+
+        term1 = correct_pos / (real_pos + eps)
+        term2 = correct_neg / (real_neg + eps)
+        label_mA_verbose = (term1 + term2) * 0.5
+        label_mA = label_mA_verbose.mean()
+
+        results = OrderedDict()
+        results["Accu"] = ins_acc * 100
+        results["Prec"] = ins_prec * 100
+        results["Recall"] = ins_rec * 100
+        results["F1"] = ins_f1 * 100
+        results["mA"] = label_mA * 100
+        results["metric"] = label_mA * 100
+        return results
+
+    def evaluate(self):
+        if comm.get_world_size() > 1:
+            comm.synchronize()
+            pred_logits = comm.gather(self.pred_logits)
+            pred_logits = sum(pred_logits, [])
+
+            gt_labels = comm.gather(self.gt_labels)
+            gt_labels = sum(gt_labels, [])
+
+            if not comm.is_main_process():
+                return {}
+        else:
+            pred_logits = self.pred_logits
+            gt_labels = self.gt_labels
+
+        pred_logits = torch.stack(pred_logits, dim=0).numpy()
+        gt_labels = torch.stack(gt_labels, dim=0).numpy()
+
+        # Pedestrian attribute metrics
+        thres = self.cfg.TEST.THRES
+        self._results = self.get_attr_metrics(gt_labels, pred_logits, thres)
+
+        return copy.deepcopy(self._results)
--- a/projects/FastAttr/fastattr/config.py
+++ b/projects/FastAttr/fastattr/config.py
+# encoding: utf-8
+"""
+@author:  xingyu liao
+@contact: sherlockliao01@gmail.com
+"""
+
+from fastreid.config import CfgNode as CN
+
+
+def add_attr_config(cfg):
+    _C = cfg
+
+    _C.MODEL.LOSSES.BCE = CN({"WEIGHT_ENABLED": True})
+    _C.MODEL.LOSSES.BCE.SCALE = 1.
+
+    _C.TEST.THRES = 0.5
--- a/projects/FastAttr/fastattr/datasets/__init__.py
+++ b/projects/FastAttr/fastattr/datasets/__init__.py
+# encoding: utf-8
+"""
+@author:  xingyu liao
+@contact: sherlockliao01@gmail.com
+"""
+
+# Attributed datasets
+from .pa100k import PA100K
+from .market1501attr import Market1501Attr
+from .dukemtmcattr import DukeMTMCAttr
--- a/projects/FastAttr/fastattr/datasets/bases.py
+++ b/projects/FastAttr/fastattr/datasets/bases.py
+# encoding: utf-8
+"""
+@author:  xingyu liao
+@contact: sherlockliao01@gmail.com
+"""
+
+import copy
+import logging
+import os
+
+from tabulate import tabulate
+from termcolor import colored
+
+logger = logging.getLogger("fastreid.attr_dataset")
+
+
+class Dataset(object):
+
+    def __init__(
+            self,
+            train,
+            val,
+            test,
+            attr_dict,
+            mode='train',
+            verbose=True,
+            **kwargs,
+    ):
+        self.train = train
+        self.val = val
+        self.test = test
+        self._attr_dict = attr_dict
+        self._num_attrs = len(self.attr_dict)
+
+        if mode == 'train':
+            self.data = self.train
+        elif mode == 'val':
+            self.data = self.val
+        else:
+            self.data = self.test
+
+    @property
+    def num_attrs(self):
+        return self._num_attrs
+
+    @property
+    def attr_dict(self):
+        return self._attr_dict
+
+    def __len__(self):
+        return len(self.data)
+
+    def __getitem__(self, index):
+        raise NotImplementedError
+
+    def check_before_run(self, required_files):
+        """Checks if required files exist before going deeper.
+        Args:
+            required_files (str or list): string file name(s).
+        """
+        if isinstance(required_files, str):
+            required_files = [required_files]
+
+        for fpath in required_files:
+            if not os.path.exists(fpath):
+                raise RuntimeError('"{}" is not found'.format(fpath))
+
+    def combine_all(self):
+        """Combines train, val and test in a dataset for training."""
+        combined = copy.deepcopy(self.train)
+
+        def _combine_data(data):
+            for img_path, pid, camid in data:
+                if pid in self._junk_pids:
+                    continue
+                pid = self.dataset_name + "_" + str(pid)
+                camid = self.dataset_name + "_" + str(camid)
+                combined.append((img_path, pid, camid))
+
+        _combine_data(self.query)
+        _combine_data(self.gallery)
+
+        self.train = combined
+        self.num_train_pids = self.get_num_pids(self.train)
+
+    def show_train(self):
+        num_train = len(self.train)
+        num_val = len(self.val)
+        num_total = num_train + num_val
+
+        headers = ['subset', '# images']
+        csv_results = [
+            ['train', num_train],
+            ['val', num_val],
+            ['total', num_total],
+        ]
+
+        # tabulate it
+        table = tabulate(
+            csv_results,
+            tablefmt="pipe",
+            headers=headers,
+            numalign="left",
+        )
+        logger.info(f"=> Loaded {self.__class__.__name__} in csv format: \n" + colored(table, "cyan"))
+        logger.info("attributes:")
+        for label, attr in self.attr_dict.items():
+            logger.info('{:3d}: {}'.format(label, attr))
+        logger.info("------------------------------")
+        logger.info("# attributes: {}".format(len(self.attr_dict)))
+
+    def show_test(self):
+        num_test = len(self.test)
+
+        headers = ['subset', '# images']
+        csv_results = [
+            ['test', num_test],
+        ]
+
+        # tabulate it
+        table = tabulate(
+            csv_results,
+            tablefmt="pipe",
+            headers=headers,
+            numalign="left",
+        )
+        logger.info(f"=> Loaded {self.__class__.__name__} in csv format: \n" + colored(table, "cyan"))
--- a/projects/FastAttr/fastattr/datasets/dukemtmcattr.py
+++ b/projects/FastAttr/fastattr/datasets/dukemtmcattr.py
+# encoding: utf-8
+"""
+@author:  liaoxingyu
+@contact: liaoxingyu2@jd.com
+"""
+
+import glob
+import os.path as osp
+import re
+import mat4py
+import numpy as np
+
+from fastreid.data.datasets import DATASET_REGISTRY
+
+from .bases import Dataset
+
+
+@DATASET_REGISTRY.register()
+class DukeMTMCAttr(Dataset):
+    """DukeMTMCAttr.
+
+    Reference:
+        Lin, Yutian, et al. "Improving person re-identification by attribute and identity learning."
+        Pattern Recognition 95 (2019): 151-161.
+
+    URL: `<https://github.com/vana77/DukeMTMC-attribute>`_
+
+    The folder structure should be:
+        DukeMTMC-reID/
+            bounding_box_train/ # images
+            bounding_box_test/ # images
+            duke_attribute.mat
+    """
+    dataset_dir = 'DukeMTMC-reID'
+    dataset_url = 'http://vision.cs.duke.edu/DukeMTMC/data/misc/DukeMTMC-reID.zip'
+    dataset_name = "dukemtmc"
+
+    def __init__(self, root='datasets', **kwargs):
+        self.root = root
+        self.dataset_dir = osp.join(self.root, self.dataset_dir)
+        self.train_dir = osp.join(self.dataset_dir, 'bounding_box_train')
+        self.test_dir = osp.join(self.dataset_dir, 'bounding_box_test')
+
+        required_files = [
+            self.dataset_dir,
+            self.train_dir,
+            self.test_dir,
+        ]
+        self.check_before_run(required_files)
+
+        duke_attr = mat4py.loadmat(osp.join(self.dataset_dir, 'duke_attribute.mat'))['duke_attribute']
+        sorted_attrs = sorted(duke_attr['train'].keys())
+        sorted_attrs.remove('image_index')
+        attr_dict = {i: str(attr) for i, attr in enumerate(sorted_attrs)}
+
+        train = self.process_dir(self.train_dir, duke_attr['train'], sorted_attrs)
+        test = val = self.process_dir(self.test_dir, duke_attr['test'], sorted_attrs)
+
+        super(DukeMTMCAttr, self).__init__(train, val, test, attr_dict=attr_dict, **kwargs)
+
+    def process_dir(self, dir_path, annotation, sorted_attrs):
+        img_paths = glob.glob(osp.join(dir_path, '*.jpg'))
+        pattern = re.compile(r'([-\d]+)_c(\d)')
+
+        data = []
+        for img_path in img_paths:
+            pid, camid = map(int, pattern.search(img_path).groups())
+            assert 1 <= camid <= 8
+
+            img_index = annotation['image_index'].index(str(pid).zfill(4))
+            attrs = np.array([int(annotation[i][img_index]) - 1 for i in sorted_attrs], dtype=np.float32)
+            data.append((img_path, attrs))
+
+        return data
--- a/projects/FastAttr/fastattr/datasets/market1501attr.py
+++ b/projects/FastAttr/fastattr/datasets/market1501attr.py
+# encoding: utf-8
+"""
+@author:  sherlock
+@contact: sherlockliao01@gmail.com
+"""
+
+import glob
+import os.path as osp
+import re
+import warnings
+import mat4py
+import numpy as np
+
+from fastreid.data.datasets import DATASET_REGISTRY
+
+from .bases import Dataset
+
+
+@DATASET_REGISTRY.register()
+class Market1501Attr(Dataset):
+    """Market1501Attr.
+
+    Reference:
+        Lin, Yutian, et al. "Improving person re-identification by attribute and identity learning."
+        Pattern Recognition 95 (2019): 151-161.
+
+    URL: `<https://github.com/vana77/Market-1501_Attribute>`_
+
+    The folder structure should be:
+        Market-1501-v15.09.15/
+            bounding_box_train/ # images
+            bounding_box_test/ # images
+            market_attribute.mat
+    """
+    _junk_pids = [0, -1]
+    dataset_dir = ''
+    dataset_url = 'http://188.138.127.15:81/Datasets/Market-1501-v15.09.15.zip'
+    dataset_name = "market1501"
+
+    def __init__(self, root='datasets', market1501_500k=False, **kwargs):
+        self.root = root
+        self.dataset_dir = osp.join(self.root, self.dataset_dir)
+
+        # allow alternative directory structure
+        self.data_dir = self.dataset_dir
+        data_dir = osp.join(self.data_dir, 'Market-1501-v15.09.15')
+        if osp.isdir(data_dir):
+            self.data_dir = data_dir
+        else:
+            warnings.warn('The current data structure is deprecated. Please '
+                          'put data folders such as "bounding_box_train" under '
+                          '"Market-1501-v15.09.15".')
+
+        self.train_dir = osp.join(self.data_dir, 'bounding_box_train')
+        self.test_dir = osp.join(self.data_dir, 'bounding_box_test')
+
+        required_files = [
+            self.data_dir,
+            self.train_dir,
+            self.test_dir,
+        ]
+        self.check_before_run(required_files)
+
+        market_attr = mat4py.loadmat(osp.join(self.data_dir, 'market_attribute.mat'))['market_attribute']
+        sorted_attrs = sorted(market_attr['train'].keys())
+        sorted_attrs.remove('image_index')
+        attr_dict = {i: str(attr) for i, attr in enumerate(sorted_attrs)}
+
+        train = self.process_dir(self.train_dir, market_attr['train'], sorted_attrs)
+        test = val = self.process_dir(self.test_dir, market_attr['test'], sorted_attrs)
+
+        super(Market1501Attr, self).__init__(train, val, test, attr_dict=attr_dict, **kwargs)
+
+    def process_dir(self, dir_path, annotation, sorted_attrs):
+        img_paths = glob.glob(osp.join(dir_path, '*.jpg'))
+        pattern = re.compile(r'([-\d]+)_c(\d)')
+
+        data = []
+        for img_path in img_paths:
+            pid, camid = map(int, pattern.search(img_path).groups())
+            if pid == -1 or pid == 0:
+                continue  # junk images are just ignored
+            assert 0 <= pid <= 1501  # pid == 0 means background
+            assert 1 <= camid <= 6
+
+            img_index = annotation['image_index'].index(str(pid).zfill(4))
+            attrs = np.array([int(annotation[i][img_index])-1 for i in sorted_attrs], dtype=np.float32)
+            data.append((img_path, attrs))
+
+        return data
--- a/projects/FastAttr/fastattr/datasets/pa100k.py
+++ b/projects/FastAttr/fastattr/datasets/pa100k.py
+# encoding: utf-8
+"""
+@author:  xingyu liao
+@contact: sherlockliao01@gmail.com
+"""
+
+import os.path as osp
+
+import numpy as np
+from scipy.io import loadmat
+
+from fastreid.data.datasets import DATASET_REGISTRY
+
+from .bases import Dataset
+
+
+@DATASET_REGISTRY.register()
+class PA100K(Dataset):
+    """Pedestrian attribute dataset.
+    80k training images + 20k test images.
+    The folder structure should be:
+        pa100k/
+            data/ # images
+            annotation.mat
+    """
+    dataset_dir = 'PA-100K'
+
+    def __init__(self, root='', **kwargs):
+        self.root = root
+        self.dataset_dir = osp.join(self.root, self.dataset_dir)
+        self.data_dir = osp.join(self.dataset_dir, "data")
+        self.anno_mat_path = osp.join(
+            self.dataset_dir, "annotation.mat"
+        )
+
+        required_files = [self.data_dir, self.anno_mat_path]
+        self.check_before_run(required_files)
+
+        train, val, test, attr_dict = self.extract_data()
+        super(PA100K, self).__init__(train, val, test, attr_dict=attr_dict, **kwargs)
+
+    def extract_data(self):
+        # anno_mat is a dictionary with keys: ['test_images_name', 'val_images_name',
+        # 'train_images_name', 'val_label', 'attributes', 'test_label', 'train_label']
+        anno_mat = loadmat(self.anno_mat_path)
+
+        def _extract(key_name, key_label):
+            names = anno_mat[key_name]
+            labels = anno_mat[key_label]
+            num_imgs = names.shape[0]
+            data = []
+            for i in range(num_imgs):
+                name = names[i, 0][0]
+                attrs = labels[i, :].astype(np.float32)
+                img_path = osp.join(self.data_dir, name)
+                data.append((img_path, attrs))
+            return data
+
+        train = _extract('train_images_name', 'train_label')
+        val = _extract('val_images_name', 'val_label')
+        test = _extract('test_images_name', 'test_label')
+        attrs = anno_mat['attributes']
+        attr_dict = {i: str(attr[0][0]) for i, attr in enumerate(attrs)}
+
+        return train, val, test, attr_dict