Commit 79a9d52f authored by chenych's avatar chenych
Browse files

Modify README and add gen_data.py

parent 6a00812f
......@@ -4,10 +4,11 @@
## 模型结构
CenterFace是一种人脸检测算法,采用了轻量级网络mobileNetV2作为主干网络,结合特征金字塔网络(FPN)实现anchor free的人脸检测。
![Architecture of the CenterFace](Architecture of the CenterFace.png)
## 算法原理
CenterFace模型是一种基于单阶段人脸检测算法,作者借鉴了CenterNet的思想,将人脸检测转换为标准点问题,根据人脸中心点来回归人脸框的大小和五个标志点。
![Architecture of the CenterFace](Architecture of the CenterFace.png)
## 环境配置
### Docker(方法一)
......@@ -51,7 +52,7 @@ pip3 install -r requirements.txt
## 数据集
[WIDER_FACE](http://shuoyang1213.me/WIDERFACE/index.html)
WIDER_FACEhttp://shuoyang1213.me/WIDERFACE/index.html
![datasets](datasets.png)
......@@ -63,25 +64,76 @@ pip3 install -r requirements.txt
[WIDER Face Testing Images(Tencent Drive)](https://share.weiyun.com/5vSUomP)
annotation文件使用的是coco的格式,可以通过百度网盘下载
[Baidu](https://pan.baidu.com/s/1j_2wggZ3bvCuOAfZvjWqTg) 提取码:f9hh
数据集全部解压后的目录结构如下:
```
├── WIDER_train
│ ├── images
├── WIDER_test
│ ├── images
├── WIDER_val
│ ├── images
├── wider_face: 存放数据集根目录
│ ├── WIDER_train: 训练集解压后的文件目录
│ └── images:
│ ├── 0--Parade: 对应该类别的所有图片
│ ├── ........
│ └── 61--Street_Battle: 对应该类别的所有图片
│ ├── WIDER_val: 验证集解压后的文件目录
│ └── images:
│ ├── 0--Parade: 对应该类别的所有图片
│ ├── ........
│ └── 61--Street_Battle: 对应该类别的所有图片
│ ├── WIDER_test: 训练集解压后的文件目录
│ └── images:
│ ├── 0--Parade: 对应该类别的所有图片
│ ├── ........
│ └── 61--Street_Battle: 对应该类别的所有图片
```
解压完成后执行以下步骤:
1. 将训练图片放置于 ./datasets/images/train的目录下,验证数据放置于./datasets/images/val目录下,存放目录结如下
```
├── images
│ ├── train
│ ├── 0--Parade
│ ├── ........
│ └── 61--Street_Battle
│ ├── val
│ ├── 0--Parade
│ ├── ........
│ └── 61--Street_Battle
```
2. 如果是使用WIDER_train数据, 可直接将./datasets/labels/train下的train_wider_face.json重命名为train_face.json即可,无需进行标注文件格式转换;反之,需要将训练图片对应的人脸关键点标注信息文件(xxxx.txt),放置于 ./datasets/annotations/下(train存放训练图片的标注文件,val存放验证图片的标注文件),存放目录结构如下:
```
├── annotations
│ ├── train_wider_face.json
│ ├── val_wider_face.json
│ ├── train
│ ├── xxx.txt
│ ├── val
│ ├── xxx.txt
```
特别地,标注信息的格式为:
```
# img_file/image_name # #+空格+img_file/image_name
x, y, w, h, left_eye_x, left_eye_y, flag, right_eye_x, right_eye_y, flag, nose_x, nose_y, flag, left_mouth_x, left_mouth_y, flag, right_mouth_x, right_mouth_y, flag, confidence # x和y是检测框左上角的坐标
```
举个例子:
```
# 0--Parade/0_Parade_marchingband_1_849.jpg
449 330 122 149 488.906 373.643 0.0 542.089 376.442 0.0 515.031 412.83 0.0 485.174 425.893 0.0 538.357 431.491 0.0 0.82
```
3. 生成训练所需的json格式标注数据:
```
cd ./datasets
python gen_data.py --mode train
```
执行完成后会在./datasets/labels下生成训练数据的标注文件 train_face.json
## 训练
### 单机单卡
......@@ -110,7 +162,7 @@ python test_wider_face.py
### 精度
WIDER_FACE验证集上的测试结果如下
| Method | Easy | Medium | Hard|
| Method | Easy(p) | Medium(p) | Hard(p)|
|:--------:| :--------:| :---------:| :------:|
| ours(one scale) | 0.9264 | 0.9133 | 0.7479 |
| original | 0.922 | 0.911 | 0.782|
......
datasets.png

28.6 KB | W: | H:

datasets.png

29.3 KB | W: | H:

datasets.png
datasets.png
datasets.png
datasets.png
  • 2-up
  • Swipe
  • Onion skin
# -*- coding:utf-8 -*-
"""只需要按照实际改写images/annotations/categories另外两个字段其实可以忽略
在keypoints/categories内容是固定的不需修改
"""
import os
import json
import re
import cv2
import argparse
from tqdm import tqdm
import numpy as np
def get_landmark(txt_path, save_path):
txt_write = open(save_path, 'w')
annotationfile = open(txt_path)
min_bbox = 10
blur_value = 0.3
Init = False
img_path = None
bbox_landmarks = []
while (True):
line = annotationfile.readline()[:-1]
if not line:
break
if re.search('jpg', line):
if Init:
if len(bbox_landmarks) < 1:
continue
txt_write.write(img_path + '\n')
txt_write.write(str(len(bbox_landmarks)) + '\n')
for lm in bbox_landmarks:
txt_write.write(str(lm)+'\n')
Init = True
img_path = line.split('# ')[1] # line[2:]
bbox_landmarks = []
continue
else:
values = line.strip().split()
bbox = values[:4]
if min(int(bbox[2]), int(bbox[3])) < min_bbox:
continue
if len(values) > 4:
if float(values[19]) < blur_value:
continue
for li in range(5):
value = float(values[(li+2)*3])
if value == 0: # visible
values[(li + 2) * 3] = str(2)
elif value == 1: # visible
values[(li + 2) * 3] = str(1)
else:
values[3*li+4] = str(0)
values[3*li+5] = str(0)
values[3*li+6] = str(0)
values = ' '.join(values)
bbox_landmarks.append(values)
txt_write.close()
annotationfile.close()
class COCO(object):
def info(self):
return {"version": "1.0",
"year": 2020,
"contributor": "Mr.yang",
"date_created": "2018/08/21",
"github": "https://github.com/bleakie"}
def licenses(self):
return [
{
"url": "http://creativecommons.org/licenses/by-nc-sa/2.0/",
"name": "Attribution-NonCommercial-ShareAlike License",
"id": 1
}
]
def image(self):
return {
"license": 4,
"file_name": "000000397133.jpg", # 图片名
"coco_url": "http://images.cocodataset.org/val2017/000000397133.jpg", # 网路地址路径
"height": 427, # 高
"width": 640, # 宽
"date_captured": "2013-11-14 17:02:52", # 数据获取日期
"flickr_url": "http://farm7.staticflickr.com/6116/6255196340_da26cf2c9e_z.jpg", # flickr网路地址
"id": 397133 # 图片的ID编号(每张图片ID是唯一的)
}
def annotation(self):
return {
"segmentation": [ # 对象的边界点(边界多边形)
[
0., 0., # 第一个点 x,y坐标
0., 0., # 第二个点 x,y坐标
0., 0.,
0., 0.
]
],
"num_keypoints": 5,
# keypoints是按照以下关键点来标记的,如果nose 没有标则为0,0,0(3个数字为一组,分别为x,y,v v=0表示为标记此时的x=y=0,
# v=1表示标记了但是在图上是不可见,v=2表示标记了,在图上可见)
"keypoints": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
"area": 0., # 区域面积
"iscrowd": 0,
"image_id": 397133, # 对应的图片ID(与images中的ID对应)
"bbox": [0., 0., 0., 0.], # 定位边框 [x,y,w,h]
"category_id": 1, # 类别ID(与categories中的ID对应)
"id": 82445 # 对象ID,因为每一个图像有不止一个对象,所以要对每一个对象编号(每个对象的ID是唯一的)
}
def categorie(self):
return {
"supercategory": "face", # 主类别
"id": 1, # 类对应的id (0 默认为背景)
"name": "face", # 子类别
"keypoints": ["left_eye", "right_eye", "nose", "left_mouth", "right_mouth"],
# "skeleton": [[1, 3], [2, 3], [3, 4], [3, 5]]
}
class Keypoints2COCO(COCO):
def __init__(self, label_path, save_json_path, images_path):
self.label = open(label_path, )
self.save_json_path = save_json_path # 最终保存的json文件
self.images_path = images_path # 原始图片保存的位置
self.images = []
self.annotations = []
# self.label = []
self.annID = 1
self.height = 0
self.width = 0
self.num = 1
self.keypoints = ["left_eye", "right_eye",
"nose", "left_mouth", "right_mouth"]
self.num_keypoints = 5
def __call__(self):
while (True):
img_path = self.label.readline()[:-1]
if not img_path:
break
if img_path.endswith('.jpg'):
img_full_path = os.path.join(self.images_path, img_path)
if not os.path.exists(img_full_path):
# print("img not exist", img_full_path)
continue
# print("img_full_path", img_full_path)
# init image
image = self.image()
image["file_name"] = img_path
image["id"] = self.num
img = cv2.imread(img_full_path)
if img is None:
continue
image["height"] = img.shape[0]
image["width"] = img.shape[1]
line = self.label.readline()[:-1]
if not line:
break
facenum = (int)(line)
# print("facenum", facenum)
# init annotation
annotation = self.annotation()
for _ in range(facenum):
line = [float(x)
for x in self.label.readline().strip().split()]
# print("***", line)
bbox = list(line[:4])
if len(line) > 4:
line[6], line[9], line[12], line[15], line[18] = int(line[6]), int(
line[9]), int(line[12]), int(line[15]), int(line[18])
index = [line[6], line[9],
line[12], line[15], line[18]]
self.num_keypoints = len(np.minimum(index, 1))
annotation['keypoints'] = line[4:-1] # 默认为可见 v=2
annotation['num_keypoints'] = self.num_keypoints
annotation["image_id"] = self.num
annotation["id"] = self.annID
annotation["bbox"] = bbox
annotation['area'] = bbox[2]*bbox[3]
annotation['segmentation'] = [
bbox[0], bbox[1], bbox[0]+bbox[2], bbox[1]+bbox[3]]
self.annotations.append(annotation)
self.annID += 1 # 对应对象
annotation = self.annotation() # 计算下一个对象
self.num += 1 # 对应图像
self.images.append(image)
jsdata = {"info": self.info(), "licenses": self.licenses(), "images": self.images,
"annotations": self.annotations, "categories": [self.categorie()]}
json.dump(jsdata, open(self.save_json_path, 'w'), indent=4,
default=float) # python3 需加上default=float 否则会报错
parser = argparse.ArgumentParser()
# basic experiment setting
parser.add_argument('--mode', default='train',
help='Please input train or val')
opt = parser.parse_args()
if __name__ == "__main__":
ROOT_PATH = './'
label_type = opt.mode
img_path = os.path.join(ROOT_PATH, 'images', f'{label_type}')
txt_path = os.path.join(ROOT_PATH, 'annotations', f'{label_type}/{label_type}.txt')
save_landmark_path = txt_path[:txt_path.rfind('/')]
save_landmark_path = os.path.join(
save_landmark_path, f'landmark_{label_type}.txt')
get_landmark(txt_path, save_landmark_path) # 处理关键点标注数据
save_label_path = os.path.join(ROOT_PATH, 'labels')
if not os.path.exists(save_label_path):
os.makedirs(save_label_path)
save_label_path = os.path.join(
save_label_path, f'./{label_type}_face.json')
Keypoints2COCO(save_landmark_path, save_label_path, img_path)()
print("dealing labels end.")
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment