Commit ea13d43e authored by lijian6's avatar lijian6
Browse files

First commit


Signed-off-by: lijian6's avatarlijian <lijian6@sugon.com>
parents
# This file is used to ignore files which are generated
# ----------------------------------------------------------------------------
build/*
depend/*
.vscode/*
core.*
# qtcreator generated files
*.pro.user*
# VS
*.sdf
*.opensdf
*.ilk
*.pdb
*.exp
# Compiled Object files
*.slo
*.lo
*.o
*.obj
# Precompiled Headers
*.gch
*.pch
# Compiled Dynamic libraries
*.dylib
#*.so
# Fortran module files
*.mod
*.smod
# Compiled Static libraries
*.lai
*.la
*.a
# Executables
*.exe
*.out
*.app
# xemacs temporary files
*.flc
# Vim temporary files
.*.swp
# g++ main.cpp -std=c++17 -I/opt/dtk/include -I/usr/local/include -L/opt/dtk/lib -L/usr/local/lib -lopencv_core -lopencv_imgcodecs -lopencv_dnn -lmigraphx -lmigraphx_device -lmigraphx_gpu -lmigraphx_onnx -o ViT_MIGraphX
#
CC = g++
CFLAGS = -std=c++17
INC_P = -I/opt/dtk/include -I/usr/local/include
LIB_P = -L/opt/dtk/lib -L/usr/local/lib
LDLIBS = -lopencv_core -lopencv_imgcodecs -lopencv_dnn -lmigraphx -lmigraphx_device -lmigraphx_gpu -lmigraphx_onnx
SRC_F = src/main.cpp
EXEC = ViT_MIGraphX
.PHONY: all
all: $(EXEC)
$(EXEC): $(SRC_F)
${CC} $< $(CFLAGS) ${INC_P} ${LIB_P} $(LDLIBS) -o $@
<!--
* @Author: lijian6
* @email: lijian6@sugon.com
* @Date: 2023-06-06
* @LastEditTime: 2023-06-06
* @FilePath: \lpr\README.md
-->
# Vision Transformer(ViT)
## 模型介绍
ViT的是将Transformer模型应用于计算机视觉领域,以替代传统的卷积神经网络(CNN)模型。
## 模型结构
Vision Transformer模型结构如下图所示主要包括三部分,patch embeding 部分、transformer encoder部分、MLP head部分。ViT将输入图片分为多个patch,再将每个patch投影为固定长度的向量送入Transformer,后续encoder的操作和原始Transformer中完全相同。但是因为对图片分类,因此在输入序列中加入一个特殊的token,该token对应的输出即为最后的类别预测。
## 数据集
使用ImageNet数据集做pretrain,pretrain之后的模型使用[flower_photos](https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz)做fine-tuning
## 训练
### 环境配置
[光源](https://www.sourcefind.cn/#/service-details)可拉取训练以及推理的docker镜像,推荐的镜像如下:
* 训练镜像:docker pull image.sourcefind.cn:5000/dcu/admin/base/pytorch:1.10.0-centos7.6-dtk-22.10.1-py37-latest
[光合开发者社区](https://cancon.hpccube.com:65024/4/main/)可下载MIGraphX:
### Fine-tunning
模型的训练程序是train.py,预训练模型为[base_patch16_224_in21k](https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-vitjx/jx_vit_base_patch16_224_in21k-e5005f0a.pth),需要先下载与训练模型。fine-tuning训练模型使用以下命令:
python train.py
Fine-tuning时可调整epoch参数来调整模型。
### 预训练模型
在weights文件夹下我们提供了一个预训练模型以及对应的fine-tuning模型和onnx模型。
### 推理
推理测试模型用infer_pytroch.py和infer_migraphx.py对训练出的模型进行推理,使用方法如下:
python infer_pytroch.py ./flower_photos/daisy/
python infer_migraphx.py --imgpath=./flower_photos/daisy/
## 代码使用简介
1. 下载好数据集,代码中默认使用的是花分类数据集,下载地址: [https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz](https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz)
2.`train.py`脚本中将`--data-path`设置成解压后的`flower_photos`文件夹绝对路径
3. 下载预训练权重,在`vit_model.py`文件中每个模型都有提供预训练权重的下载地址,根据自己使用的模型下载对应预训练权重
4.`train.py`脚本中将`--weights`参数设成下载好的预训练权重路径
5. 设置好数据集的路径`--data-path`以及预训练权重的路径`--weights`就能使用`train.py`脚本开始训练了(训练过程中会自动生成`class_indices.json`文件)
6.`predict.py`脚本中导入和训练脚本中同样的模型,并将`model_weight_path`设置成训练好的模型权重路径(默认保存在weights文件夹下)
7.`predict.py`脚本中将`img_path`设置成你自己需要预测的图片绝对路径
8. 设置好权重路径`model_weight_path`和预测的图片路径`img_path`就能使用`predict.py`脚本进行预测了
9. 如果要使用自己的数据集,请按照花分类数据集的文件结构进行摆放(即一个类别对应一个文件夹),并且将训练以及预测脚本中的`num_classes`设置成你自己数据的类别数
import torch
from fvcore.nn import FlopCountAnalysis
from vit_model import Attention
def main():
# Self-Attention
a1 = Attention(dim=512, num_heads=1)
a1.proj = torch.nn.Identity() # remove Wo
# Multi-Head Attention
a2 = Attention(dim=512, num_heads=8)
# [batch_size, num_tokens, total_embed_dim]
t = (torch.rand(32, 1024, 512),)
flops1 = FlopCountAnalysis(a1, t)
print("Self-Attention FLOPs:", flops1.total())
flops2 = FlopCountAnalysis(a2, t)
print("Multi-Head Attention FLOPs:", flops2.total())
if __name__ == '__main__':
main()
from sys import argv
import json
import cv2
import numpy as np
import migraphx
import argparse
import os
import time
from PIL import Image
img_count = 0
match_cnt = 0
def Vit_Preprocess(image):
img = cv2.imread(image)
img = cv2.dnn.blobFromImage(img, scalefactor=1/127.5, size=(224,224), mean=[127.5, 127.5, 127.5], swapRB=True, crop=True, ddepth=cv2.CV_32F)
img -= 0.5
img /= 0.5
return img
def Vit_Postprocess(infer_res, class_indict, imgpath):
global img_count
global match_cnt
infer_res_exp = np.exp(infer_res)
predict = infer_res_exp / infer_res_exp.sum(axis=1)
for i in range(len(predict[0])):
if(predict[0][i] >= 0.5):
print("class: {:10} prob: {:.3}".format(class_indict[str(i)], predict[0][i]))
if (imgpath.find(class_indict[str(i)])):
match_cnt += 1
def Vit_Inference(args):
model = migraphx.parse_onnx(args.model)
model.compile(t=migraphx.get_target("gpu"), device_id=0)
json_path = 'models/class_indices.json'
assert os.path.exists(json_path), "file: '{}' dose not exist.".format(json_path)
with open(json_path, "r") as f:
class_indict = json.load(f)
global img_count
global match_cnt
if os.path.isdir(args.imgpath):
images = os.listdir(args.imgpath)
for image in images:
img_count += 1
img = Vit_Preprocess(os.path.join(args.imgpath, image))
inputName = model.get_parameter_names()[0]
inputShape = model.get_parameter_shapes()[inputName].lens()
results = model.run({inputName: migraphx.argument(img)})
Vit_Postprocess(np.array(results[0]), class_indict, args.imgpath)
print("Img_cnt: {:<5} match_cnt: {:<5} acc:{:.3}".format(img_count, match_cnt, match_cnt/img_count))
else:
img = Vit_Preprocess(args.imgpath)
inputName=model.get_parameter_names()[0]
inputShape=model.get_parameter_shapes()[inputName].lens()
results = model.run({inputName: migraphx.argument(img)})
Vit_Postprocess(np.array(results[0]), class_indict, args.imgpath)
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='parameters to vaildate net')
parser.add_argument('--model', default='models/model.onnx', help='model path to inference')
parser.add_argument('--imgpath', default='', help='the image path')
args = parser.parse_args()
Vit_Inference(args)
from sys import argv
import os
import cv2
import json
import numpy as np
import torch
from PIL import Image
from torchvision import transforms
import matplotlib.pyplot as plt
from vit_model import vit_base_patch16_224_in21k as create_model
def main(intputdir):
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
data_transform = transforms.Compose(
[transforms.Resize(256),
transforms.CenterCrop(224),
transforms.ToTensor(),
transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])])
# read class_indict
json_path = './models/class_indices.json'
assert os.path.exists(json_path), "file: '{}' dose not exist.".format(json_path)
with open(json_path, "r") as f:
class_indict = json.load(f)
# create model
model = create_model(num_classes=5, has_logits=False).to(device)
# load model weights
model_weight_path = "./models/model.pth"
model.load_state_dict(torch.load(model_weight_path, map_location=device))
# load image
Img_cnt = 0
match_cnt = 0
for filename in os.listdir(intputdir):
Img_cnt += 1
img_path = intputdir + filename
assert os.path.exists(img_path), "file: '{}' dose not exist.".format(img_path)
img = Image.open(img_path)
# [N, C, H, W]
img = data_transform(img)
print(img)
break
# expand batch dimension
img = torch.unsqueeze(img, dim=0)
model.eval()
with torch.no_grad():
# predict class
output = torch.squeeze(model(img.to(device))).cpu()
predict = torch.softmax(output, dim=0)
for i in range(len(predict)):
if(predict[i].numpy() >= 0.5):
print("class: {:10} prob: {:.3}".format(class_indict[str(i)], predict[i].numpy()))
if intputdir.find(class_indict[str(i)]):
match_cnt += 1
print("Img_cnt: {:<5} match_cnt: {:<5} acc:{:.3}".format(Img_cnt, match_cnt, match_cnt/Img_cnt))
if __name__ == '__main__':
main(argv[1])
from sys import argv
import os
import cv2
import json
import numpy as np
import migraphx
import torch
from PIL import Image
from torchvision import transforms
import matplotlib.pyplot as plt
from vit_model import vit_base_patch16_224_in21k as create_model
def main(intputdir):
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
data_transform = transforms.Compose(
[transforms.Resize(256),
transforms.CenterCrop(224),
transforms.ToTensor(),
transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])])
# read class_indict
json_path = './models/class_indices.json'
assert os.path.exists(json_path), "file: '{}' dose not exist.".format(json_path)
with open(json_path, "r") as f:
class_indict = json.load(f)
# create pth model
model = create_model(num_classes=5, has_logits=False).to(device)
# load model weights
model_weight_path = "./models/model.pth"
model.load_state_dict(torch.load(model_weight_path, map_location=device))
# create onnx model
model1 = migraphx.parse_onnx('./models/model.onnx')
model1.compile(t=migraphx.get_target("gpu"), device_id=1)
# load image
Img_cnt = 0
pytorch_match_cnt = 0
migraphx_match_cnt = 0
for filename in os.listdir(intputdir):
Img_cnt += 1
img_path = intputdir + filename
assert os.path.exists(img_path), "file: '{}' dose not exist.".format(img_path)
img = Image.open(img_path)
# [N, C, H, W]
img = data_transform(img)
# expand batch dimension
img = torch.unsqueeze(img, dim=0)
# pythorch eval
model.eval()
# migraphx run
img1 = img.numpy()
inputName = model1.get_parameter_names()[0]
inputShape = model1.get_parameter_shapes()[inputName].lens()
results = model1.run({inputName: migraphx.argument(img1)})
# migraphx postprocess
infer_res_exp = np.exp(results[0])
predict = infer_res_exp / infer_res_exp.sum(axis=1)
for i in range(len(predict[0])):
if(predict[0][i] >= 0.5):
print("migraphx class: {:10} prob: {:.3}".format(class_indict[str(i)], predict[0][i]))
if (intputdir.find(class_indict[str(i)])):
migraphx_match_cnt += 1
# pythorch postprocess
with torch.no_grad():
# predict class
output = torch.squeeze(model(img.to(device))).cpu()
predict = torch.softmax(output, dim=0)
for i in range(len(predict)):
if(predict[i].numpy() >= 0.5):
print("pythorch class: {:10} prob: {:.3}".format(class_indict[str(i)], predict[i].numpy()))
if intputdir.find(class_indict[str(i)]):
pytorch_match_cnt += 1
print("Pythorch Img_cnt: {:<5} match_cnt: {:<5} acc:{:.3}".format(Img_cnt, pytorch_match_cnt, pytorch_match_cnt/Img_cnt))
print("Migraphx Img_cnt: {:<5} match_cnt: {:<5} acc:{:.3}".format(Img_cnt, migraphx_match_cnt, migraphx_match_cnt/Img_cnt))
if __name__ == '__main__':
main(argv[1])
from PIL import Image
import torch
from torch.utils.data import Dataset
class MyDataSet(Dataset):
"""自定义数据集"""
def __init__(self, images_path: list, images_class: list, transform=None):
self.images_path = images_path
self.images_class = images_class
self.transform = transform
def __len__(self):
return len(self.images_path)
def __getitem__(self, item):
img = Image.open(self.images_path[item])
# RGB为彩色图片,L为灰度图片
if img.mode != 'RGB':
raise ValueError("image: {} isn't RGB mode.".format(self.images_path[item]))
label = self.images_class[item]
if self.transform is not None:
img = self.transform(img)
return img, label
@staticmethod
def collate_fn(batch):
# 官方实现的default_collate可以参考
# https://github.com/pytorch/pytorch/blob/67b7e751e6b5931a9f45274653f4f653a4e6cdf6/torch/utils/data/_utils/collate.py
images, labels = tuple(zip(*batch))
images = torch.stack(images, dim=0)
labels = torch.as_tensor(labels)
return images, labels
import os
import json
import torch
from PIL import Image
from torchvision import transforms
import matplotlib.pyplot as plt
from vit_model import vit_base_patch16_224_in21k as create_model
def main():
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
data_transform = transforms.Compose(
[transforms.Resize(256),
transforms.CenterCrop(224),
transforms.ToTensor(),
transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])])
# load image
img_path = "flower_photos/roses/10090824183_d02c613f10_m.jpg"
assert os.path.exists(img_path), "file: '{}' dose not exist.".format(img_path)
img = Image.open(img_path)
plt.imshow(img)
# [N, C, H, W]
img = data_transform(img)
# expand batch dimension
img = torch.unsqueeze(img, dim=0)
# read class_indict
json_path = './class_indices.json'
assert os.path.exists(json_path), "file: '{}' dose not exist.".format(json_path)
with open(json_path, "r") as f:
class_indict = json.load(f)
# create model
model = create_model(num_classes=5, has_logits=False).to(device)
# load model weights
model_weight_path = "./weights/model.pth"
model.load_state_dict(torch.load(model_weight_path, map_location=device))
model.eval()
torch.onnx.export(model, img.to(device), "./weights/model.onnx", opset_version=12, input_names=['input'], output_names=['output'])
with torch.no_grad():
# predict class
output = torch.squeeze(model(img.to(device))).cpu()
predict = torch.softmax(output, dim=0)
predict_cla = torch.argmax(predict).numpy()
print_res = "class: {} prob: {:.3}".format(class_indict[str(predict_cla)], predict[predict_cla].numpy())
plt.title(print_res)
for i in range(len(predict)):
print("class: {:10} prob: {:.3}".format(class_indict[str(i)], predict[i].numpy()))
plt.show()
if __name__ == '__main__':
main()
import os
import math
import argparse
import torch
import torch.optim as optim
import torch.optim.lr_scheduler as lr_scheduler
from torch.utils.tensorboard import SummaryWriter
from torchvision import transforms
from my_dataset import MyDataSet
from vit_model import vit_base_patch16_224_in21k as create_model
from utils import read_split_data, train_one_epoch, evaluate
def main(args):
device = torch.device(args.device if torch.cuda.is_available() else "cpu")
if os.path.exists("./weights") is False:
os.makedirs("./weights")
tb_writer = SummaryWriter()
train_images_path, train_images_label, val_images_path, val_images_label = read_split_data(args.data_path)
data_transform = {
"train": transforms.Compose([transforms.RandomResizedCrop(224),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])]),
"val": transforms.Compose([transforms.Resize(256),
transforms.CenterCrop(224),
transforms.ToTensor(),
transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])])}
# 实例化训练数据集
train_dataset = MyDataSet(images_path=train_images_path,
images_class=train_images_label,
transform=data_transform["train"])
# 实例化验证数据集
val_dataset = MyDataSet(images_path=val_images_path,
images_class=val_images_label,
transform=data_transform["val"])
batch_size = args.batch_size
nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8]) # number of workers
print('Using {} dataloader workers every process'.format(nw))
train_loader = torch.utils.data.DataLoader(train_dataset,
batch_size=batch_size,
shuffle=True,
pin_memory=True,
num_workers=nw,
collate_fn=train_dataset.collate_fn)
val_loader = torch.utils.data.DataLoader(val_dataset,
batch_size=batch_size,
shuffle=False,
pin_memory=True,
num_workers=nw,
collate_fn=val_dataset.collate_fn)
model = create_model(num_classes=args.num_classes, has_logits=False).to(device)
if args.weights != "":
assert os.path.exists(args.weights), "weights file: '{}' not exist.".format(args.weights)
weights_dict = torch.load(args.weights, map_location=device)
# 删除不需要的权重
del_keys = ['head.weight', 'head.bias'] if model.has_logits \
else ['pre_logits.fc.weight', 'pre_logits.fc.bias', 'head.weight', 'head.bias']
for k in del_keys:
del weights_dict[k]
print(model.load_state_dict(weights_dict, strict=False))
if args.freeze_layers:
for name, para in model.named_parameters():
# 除head, pre_logits外,其他权重全部冻结
if "head" not in name and "pre_logits" not in name:
para.requires_grad_(False)
else:
print("training {}".format(name))
pg = [p for p in model.parameters() if p.requires_grad]
optimizer = optim.SGD(pg, lr=args.lr, momentum=0.9, weight_decay=5E-5)
# Scheduler https://arxiv.org/pdf/1812.01187.pdf
lf = lambda x: ((1 + math.cos(x * math.pi / args.epochs)) / 2) * (1 - args.lrf) + args.lrf # cosine
scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf)
for epoch in range(args.epochs):
# train
train_loss, train_acc = train_one_epoch(model=model,
optimizer=optimizer,
data_loader=train_loader,
device=device,
epoch=epoch)
scheduler.step()
# validate
val_loss, val_acc = evaluate(model=model,
data_loader=val_loader,
device=device,
epoch=epoch)
tags = ["train_loss", "train_acc", "val_loss", "val_acc", "learning_rate"]
tb_writer.add_scalar(tags[0], train_loss, epoch)
tb_writer.add_scalar(tags[1], train_acc, epoch)
tb_writer.add_scalar(tags[2], val_loss, epoch)
tb_writer.add_scalar(tags[3], val_acc, epoch)
tb_writer.add_scalar(tags[4], optimizer.param_groups[0]["lr"], epoch)
torch.save(model.state_dict(), "./weights/model-{}.pth".format(epoch))
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--num_classes', type=int, default=5)
parser.add_argument('--epochs', type=int, default=20)
parser.add_argument('--batch-size', type=int, default=8)
parser.add_argument('--lr', type=float, default=0.001)
parser.add_argument('--lrf', type=float, default=0.01)
# 数据集所在根目录
# https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz
parser.add_argument('--data-path', type=str, default="./flower_photos")
parser.add_argument('--model-name', default='', help='create model name')
# 预训练权重路径,如果不想载入就设置为空字符
parser.add_argument('--weights', type=str, default='./weights/jx_vit_base_patch16_224_in21k-e5005f0a.pth',
help='initial weights path')
# 是否冻结权重
parser.add_argument('--freeze-layers', type=bool, default=True)
parser.add_argument('--device', default='cuda:0', help='device id (i.e. 0 or 0,1 or cpu)')
opt = parser.parse_args()
main(opt)
import os
import sys
import json
import pickle
import random
import torch
from tqdm import tqdm
import matplotlib.pyplot as plt
def read_split_data(root: str, val_rate: float = 0.2):
random.seed(0) # 保证随机结果可复现
assert os.path.exists(root), "dataset root: {} does not exist.".format(root)
# 遍历文件夹,一个文件夹对应一个类别
flower_class = [cla for cla in os.listdir(root) if os.path.isdir(os.path.join(root, cla))]
# 排序,保证各平台顺序一致
flower_class.sort()
# 生成类别名称以及对应的数字索引
class_indices = dict((k, v) for v, k in enumerate(flower_class))
json_str = json.dumps(dict((val, key) for key, val in class_indices.items()), indent=4)
with open('class_indices.json', 'w') as json_file:
json_file.write(json_str)
train_images_path = [] # 存储训练集的所有图片路径
train_images_label = [] # 存储训练集图片对应索引信息
val_images_path = [] # 存储验证集的所有图片路径
val_images_label = [] # 存储验证集图片对应索引信息
every_class_num = [] # 存储每个类别的样本总数
supported = [".jpg", ".JPG", ".png", ".PNG"] # 支持的文件后缀类型
# 遍历每个文件夹下的文件
for cla in flower_class:
cla_path = os.path.join(root, cla)
# 遍历获取supported支持的所有文件路径
images = [os.path.join(root, cla, i) for i in os.listdir(cla_path)
if os.path.splitext(i)[-1] in supported]
# 排序,保证各平台顺序一致
images.sort()
# 获取该类别对应的索引
image_class = class_indices[cla]
# 记录该类别的样本数量
every_class_num.append(len(images))
# 按比例随机采样验证样本
val_path = random.sample(images, k=int(len(images) * val_rate))
for img_path in images:
if img_path in val_path: # 如果该路径在采样的验证集样本中则存入验证集
val_images_path.append(img_path)
val_images_label.append(image_class)
else: # 否则存入训练集
train_images_path.append(img_path)
train_images_label.append(image_class)
print("{} images were found in the dataset.".format(sum(every_class_num)))
print("{} images for training.".format(len(train_images_path)))
print("{} images for validation.".format(len(val_images_path)))
assert len(train_images_path) > 0, "number of training images must greater than 0."
assert len(val_images_path) > 0, "number of validation images must greater than 0."
plot_image = False
if plot_image:
# 绘制每种类别个数柱状图
plt.bar(range(len(flower_class)), every_class_num, align='center')
# 将横坐标0,1,2,3,4替换为相应的类别名称
plt.xticks(range(len(flower_class)), flower_class)
# 在柱状图上添加数值标签
for i, v in enumerate(every_class_num):
plt.text(x=i, y=v + 5, s=str(v), ha='center')
# 设置x坐标
plt.xlabel('image class')
# 设置y坐标
plt.ylabel('number of images')
# 设置柱状图的标题
plt.title('flower class distribution')
plt.show()
return train_images_path, train_images_label, val_images_path, val_images_label
def plot_data_loader_image(data_loader):
batch_size = data_loader.batch_size
plot_num = min(batch_size, 4)
json_path = './class_indices.json'
assert os.path.exists(json_path), json_path + " does not exist."
json_file = open(json_path, 'r')
class_indices = json.load(json_file)
for data in data_loader:
images, labels = data
for i in range(plot_num):
# [C, H, W] -> [H, W, C]
img = images[i].numpy().transpose(1, 2, 0)
# 反Normalize操作
img = (img * [0.229, 0.224, 0.225] + [0.485, 0.456, 0.406]) * 255
label = labels[i].item()
plt.subplot(1, plot_num, i+1)
plt.xlabel(class_indices[str(label)])
plt.xticks([]) # 去掉x轴的刻度
plt.yticks([]) # 去掉y轴的刻度
plt.imshow(img.astype('uint8'))
plt.show()
def write_pickle(list_info: list, file_name: str):
with open(file_name, 'wb') as f:
pickle.dump(list_info, f)
def read_pickle(file_name: str) -> list:
with open(file_name, 'rb') as f:
info_list = pickle.load(f)
return info_list
def train_one_epoch(model, optimizer, data_loader, device, epoch):
model.train()
loss_function = torch.nn.CrossEntropyLoss()
accu_loss = torch.zeros(1).to(device) # 累计损失
accu_num = torch.zeros(1).to(device) # 累计预测正确的样本数
optimizer.zero_grad()
sample_num = 0
data_loader = tqdm(data_loader, file=sys.stdout)
for step, data in enumerate(data_loader):
images, labels = data
sample_num += images.shape[0]
pred = model(images.to(device))
pred_classes = torch.max(pred, dim=1)[1]
accu_num += torch.eq(pred_classes, labels.to(device)).sum()
loss = loss_function(pred, labels.to(device))
loss.backward()
accu_loss += loss.detach()
data_loader.desc = "[train epoch {}] loss: {:.3f}, acc: {:.3f}".format(epoch,
accu_loss.item() / (step + 1),
accu_num.item() / sample_num)
if not torch.isfinite(loss):
print('WARNING: non-finite loss, ending training ', loss)
sys.exit(1)
optimizer.step()
optimizer.zero_grad()
return accu_loss.item() / (step + 1), accu_num.item() / sample_num
@torch.no_grad()
def evaluate(model, data_loader, device, epoch):
loss_function = torch.nn.CrossEntropyLoss()
model.eval()
accu_num = torch.zeros(1).to(device) # 累计预测正确的样本数
accu_loss = torch.zeros(1).to(device) # 累计损失
sample_num = 0
data_loader = tqdm(data_loader, file=sys.stdout)
for step, data in enumerate(data_loader):
images, labels = data
sample_num += images.shape[0]
pred = model(images.to(device))
pred_classes = torch.max(pred, dim=1)[1]
accu_num += torch.eq(pred_classes, labels.to(device)).sum()
loss = loss_function(pred, labels.to(device))
accu_loss += loss
data_loader.desc = "[valid epoch {}] loss: {:.3f}, acc: {:.3f}".format(epoch,
accu_loss.item() / (step + 1),
accu_num.item() / sample_num)
return accu_loss.item() / (step + 1), accu_num.item() / sample_num
"""
original code from rwightman:
https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/vision_transformer.py
"""
from functools import partial
from collections import OrderedDict
import torch
import torch.nn as nn
def drop_path(x, drop_prob: float = 0., training: bool = False):
"""
Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).
This is the same as the DropConnect impl I created for EfficientNet, etc networks, however,
the original name is misleading as 'Drop Connect' is a different form of dropout in a separate paper...
See discussion: https://github.com/tensorflow/tpu/issues/494#issuecomment-532968956 ... I've opted for
changing the layer and argument names to 'drop path' rather than mix DropConnect as a layer name and use
'survival rate' as the argument.
"""
if drop_prob == 0. or not training:
return x
keep_prob = 1 - drop_prob
shape = (x.shape[0],) + (1,) * (x.ndim - 1) # work with diff dim tensors, not just 2D ConvNets
random_tensor = keep_prob + torch.rand(shape, dtype=x.dtype, device=x.device)
random_tensor.floor_() # binarize
output = x.div(keep_prob) * random_tensor
return output
class DropPath(nn.Module):
"""
Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).
"""
def __init__(self, drop_prob=None):
super(DropPath, self).__init__()
self.drop_prob = drop_prob
def forward(self, x):
return drop_path(x, self.drop_prob, self.training)
class PatchEmbed(nn.Module):
"""
2D Image to Patch Embedding
"""
def __init__(self, img_size=224, patch_size=16, in_c=3, embed_dim=768, norm_layer=None):
super().__init__()
img_size = (img_size, img_size)
patch_size = (patch_size, patch_size)
self.img_size = img_size
self.patch_size = patch_size
self.grid_size = (img_size[0] // patch_size[0], img_size[1] // patch_size[1])
self.num_patches = self.grid_size[0] * self.grid_size[1]
self.proj = nn.Conv2d(in_c, embed_dim, kernel_size=patch_size, stride=patch_size)
self.norm = norm_layer(embed_dim) if norm_layer else nn.Identity()
def forward(self, x):
B, C, H, W = x.shape
assert H == self.img_size[0] and W == self.img_size[1], \
f"Input image size ({H}*{W}) doesn't match model ({self.img_size[0]}*{self.img_size[1]})."
# flatten: [B, C, H, W] -> [B, C, HW]
# transpose: [B, C, HW] -> [B, HW, C]
x = self.proj(x).flatten(2).transpose(1, 2)
x = self.norm(x)
return x
class Attention(nn.Module):
def __init__(self,
dim, # 输入token的dim
num_heads=8,
qkv_bias=False,
qk_scale=None,
attn_drop_ratio=0.,
proj_drop_ratio=0.):
super(Attention, self).__init__()
self.num_heads = num_heads
head_dim = dim // num_heads
self.scale = qk_scale or head_dim ** -0.5
self.qkv = nn.Linear(dim, dim * 3, bias=qkv_bias)
self.attn_drop = nn.Dropout(attn_drop_ratio)
self.proj = nn.Linear(dim, dim)
self.proj_drop = nn.Dropout(proj_drop_ratio)
def forward(self, x):
# [batch_size, num_patches + 1, total_embed_dim]
B, N, C = x.shape
# qkv(): -> [batch_size, num_patches + 1, 3 * total_embed_dim]
# reshape: -> [batch_size, num_patches + 1, 3, num_heads, embed_dim_per_head]
# permute: -> [3, batch_size, num_heads, num_patches + 1, embed_dim_per_head]
qkv = self.qkv(x).reshape(B, N, 3, self.num_heads, C // self.num_heads).permute(2, 0, 3, 1, 4)
# [batch_size, num_heads, num_patches + 1, embed_dim_per_head]
q, k, v = qkv[0], qkv[1], qkv[2] # make torchscript happy (cannot use tensor as tuple)
# transpose: -> [batch_size, num_heads, embed_dim_per_head, num_patches + 1]
# @: multiply -> [batch_size, num_heads, num_patches + 1, num_patches + 1]
attn = (q @ k.transpose(-2, -1)) * self.scale
attn = attn.softmax(dim=-1)
attn = self.attn_drop(attn)
# @: multiply -> [batch_size, num_heads, num_patches + 1, embed_dim_per_head]
# transpose: -> [batch_size, num_patches + 1, num_heads, embed_dim_per_head]
# reshape: -> [batch_size, num_patches + 1, total_embed_dim]
x = (attn @ v).transpose(1, 2).reshape(B, N, C)
x = self.proj(x)
x = self.proj_drop(x)
return x
class Mlp(nn.Module):
"""
MLP as used in Vision Transformer, MLP-Mixer and related networks
"""
def __init__(self, in_features, hidden_features=None, out_features=None, act_layer=nn.GELU, drop=0.):
super().__init__()
out_features = out_features or in_features
hidden_features = hidden_features or in_features
self.fc1 = nn.Linear(in_features, hidden_features)
self.act = act_layer()
self.fc2 = nn.Linear(hidden_features, out_features)
self.drop = nn.Dropout(drop)
def forward(self, x):
x = self.fc1(x)
x = self.act(x)
x = self.drop(x)
x = self.fc2(x)
x = self.drop(x)
return x
class Block(nn.Module):
def __init__(self,
dim,
num_heads,
mlp_ratio=4.,
qkv_bias=False,
qk_scale=None,
drop_ratio=0.,
attn_drop_ratio=0.,
drop_path_ratio=0.,
act_layer=nn.GELU,
norm_layer=nn.LayerNorm):
super(Block, self).__init__()
self.norm1 = norm_layer(dim)
self.attn = Attention(dim, num_heads=num_heads, qkv_bias=qkv_bias, qk_scale=qk_scale,
attn_drop_ratio=attn_drop_ratio, proj_drop_ratio=drop_ratio)
# NOTE: drop path for stochastic depth, we shall see if this is better than dropout here
self.drop_path = DropPath(drop_path_ratio) if drop_path_ratio > 0. else nn.Identity()
self.norm2 = norm_layer(dim)
mlp_hidden_dim = int(dim * mlp_ratio)
self.mlp = Mlp(in_features=dim, hidden_features=mlp_hidden_dim, act_layer=act_layer, drop=drop_ratio)
def forward(self, x):
x = x + self.drop_path(self.attn(self.norm1(x)))
x = x + self.drop_path(self.mlp(self.norm2(x)))
return x
class VisionTransformer(nn.Module):
def __init__(self, img_size=224, patch_size=16, in_c=3, num_classes=1000,
embed_dim=768, depth=12, num_heads=12, mlp_ratio=4.0, qkv_bias=True,
qk_scale=None, representation_size=None, distilled=False, drop_ratio=0.,
attn_drop_ratio=0., drop_path_ratio=0., embed_layer=PatchEmbed, norm_layer=None,
act_layer=None):
"""
Args:
img_size (int, tuple): input image size
patch_size (int, tuple): patch size
in_c (int): number of input channels
num_classes (int): number of classes for classification head
embed_dim (int): embedding dimension
depth (int): depth of transformer
num_heads (int): number of attention heads
mlp_ratio (int): ratio of mlp hidden dim to embedding dim
qkv_bias (bool): enable bias for qkv if True
qk_scale (float): override default qk scale of head_dim ** -0.5 if set
representation_size (Optional[int]): enable and set representation layer (pre-logits) to this value if set
distilled (bool): model includes a distillation token and head as in DeiT models
drop_ratio (float): dropout rate
attn_drop_ratio (float): attention dropout rate
drop_path_ratio (float): stochastic depth rate
embed_layer (nn.Module): patch embedding layer
norm_layer: (nn.Module): normalization layer
"""
super(VisionTransformer, self).__init__()
self.num_classes = num_classes
self.num_features = self.embed_dim = embed_dim # num_features for consistency with other models
self.num_tokens = 2 if distilled else 1
norm_layer = norm_layer or partial(nn.LayerNorm, eps=1e-6)
act_layer = act_layer or nn.GELU
self.patch_embed = embed_layer(img_size=img_size, patch_size=patch_size, in_c=in_c, embed_dim=embed_dim)
num_patches = self.patch_embed.num_patches
self.cls_token = nn.Parameter(torch.zeros(1, 1, embed_dim))
self.dist_token = nn.Parameter(torch.zeros(1, 1, embed_dim)) if distilled else None
self.pos_embed = nn.Parameter(torch.zeros(1, num_patches + self.num_tokens, embed_dim))
self.pos_drop = nn.Dropout(p=drop_ratio)
dpr = [x.item() for x in torch.linspace(0, drop_path_ratio, depth)] # stochastic depth decay rule
self.blocks = nn.Sequential(*[
Block(dim=embed_dim, num_heads=num_heads, mlp_ratio=mlp_ratio, qkv_bias=qkv_bias, qk_scale=qk_scale,
drop_ratio=drop_ratio, attn_drop_ratio=attn_drop_ratio, drop_path_ratio=dpr[i],
norm_layer=norm_layer, act_layer=act_layer)
for i in range(depth)
])
self.norm = norm_layer(embed_dim)
# Representation layer
if representation_size and not distilled:
self.has_logits = True
self.num_features = representation_size
self.pre_logits = nn.Sequential(OrderedDict([
("fc", nn.Linear(embed_dim, representation_size)),
("act", nn.Tanh())
]))
else:
self.has_logits = False
self.pre_logits = nn.Identity()
# Classifier head(s)
self.head = nn.Linear(self.num_features, num_classes) if num_classes > 0 else nn.Identity()
self.head_dist = None
if distilled:
self.head_dist = nn.Linear(self.embed_dim, self.num_classes) if num_classes > 0 else nn.Identity()
# Weight init
nn.init.trunc_normal_(self.pos_embed, std=0.02)
if self.dist_token is not None:
nn.init.trunc_normal_(self.dist_token, std=0.02)
nn.init.trunc_normal_(self.cls_token, std=0.02)
self.apply(_init_vit_weights)
def forward_features(self, x):
# [B, C, H, W] -> [B, num_patches, embed_dim]
x = self.patch_embed(x) # [B, 196, 768]
# [1, 1, 768] -> [B, 1, 768]
cls_token = self.cls_token.expand(x.shape[0], -1, -1)
if self.dist_token is None:
x = torch.cat((cls_token, x), dim=1) # [B, 197, 768]
else:
x = torch.cat((cls_token, self.dist_token.expand(x.shape[0], -1, -1), x), dim=1)
x = self.pos_drop(x + self.pos_embed)
x = self.blocks(x)
x = self.norm(x)
if self.dist_token is None:
return self.pre_logits(x[:, 0])
else:
return x[:, 0], x[:, 1]
def forward(self, x):
x = self.forward_features(x)
if self.head_dist is not None:
x, x_dist = self.head(x[0]), self.head_dist(x[1])
if self.training and not torch.jit.is_scripting():
# during inference, return the average of both classifier predictions
return x, x_dist
else:
return (x + x_dist) / 2
else:
x = self.head(x)
return x
def _init_vit_weights(m):
"""
ViT weight initialization
:param m: module
"""
if isinstance(m, nn.Linear):
nn.init.trunc_normal_(m.weight, std=.01)
if m.bias is not None:
nn.init.zeros_(m.bias)
elif isinstance(m, nn.Conv2d):
nn.init.kaiming_normal_(m.weight, mode="fan_out")
if m.bias is not None:
nn.init.zeros_(m.bias)
elif isinstance(m, nn.LayerNorm):
nn.init.zeros_(m.bias)
nn.init.ones_(m.weight)
def vit_base_patch16_224(num_classes: int = 1000):
"""
ViT-Base model (ViT-B/16) from original paper (https://arxiv.org/abs/2010.11929).
ImageNet-1k weights @ 224x224, source https://github.com/google-research/vision_transformer.
weights ported from official Google JAX impl:
链接: https://pan.baidu.com/s/1zqb08naP0RPqqfSXfkB2EA 密码: eu9f
"""
model = VisionTransformer(img_size=224,
patch_size=16,
embed_dim=768,
depth=12,
num_heads=12,
representation_size=None,
num_classes=num_classes)
return model
def vit_base_patch16_224_in21k(num_classes: int = 21843, has_logits: bool = True):
"""
ViT-Base model (ViT-B/16) from original paper (https://arxiv.org/abs/2010.11929).
ImageNet-21k weights @ 224x224, source https://github.com/google-research/vision_transformer.
weights ported from official Google JAX impl:
https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-vitjx/jx_vit_base_patch16_224_in21k-e5005f0a.pth
"""
model = VisionTransformer(img_size=224,
patch_size=16,
embed_dim=768,
depth=12,
num_heads=12,
representation_size=768 if has_logits else None,
num_classes=num_classes)
return model
def vit_base_patch32_224(num_classes: int = 1000):
"""
ViT-Base model (ViT-B/32) from original paper (https://arxiv.org/abs/2010.11929).
ImageNet-1k weights @ 224x224, source https://github.com/google-research/vision_transformer.
weights ported from official Google JAX impl:
链接: https://pan.baidu.com/s/1hCv0U8pQomwAtHBYc4hmZg 密码: s5hl
"""
model = VisionTransformer(img_size=224,
patch_size=32,
embed_dim=768,
depth=12,
num_heads=12,
representation_size=None,
num_classes=num_classes)
return model
def vit_base_patch32_224_in21k(num_classes: int = 21843, has_logits: bool = True):
"""
ViT-Base model (ViT-B/32) from original paper (https://arxiv.org/abs/2010.11929).
ImageNet-21k weights @ 224x224, source https://github.com/google-research/vision_transformer.
weights ported from official Google JAX impl:
https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-vitjx/jx_vit_base_patch32_224_in21k-8db57226.pth
"""
model = VisionTransformer(img_size=224,
patch_size=32,
embed_dim=768,
depth=12,
num_heads=12,
representation_size=768 if has_logits else None,
num_classes=num_classes)
return model
def vit_large_patch16_224(num_classes: int = 1000):
"""
ViT-Large model (ViT-L/16) from original paper (https://arxiv.org/abs/2010.11929).
ImageNet-1k weights @ 224x224, source https://github.com/google-research/vision_transformer.
weights ported from official Google JAX impl:
链接: https://pan.baidu.com/s/1cxBgZJJ6qUWPSBNcE4TdRQ 密码: qqt8
"""
model = VisionTransformer(img_size=224,
patch_size=16,
embed_dim=1024,
depth=24,
num_heads=16,
representation_size=None,
num_classes=num_classes)
return model
def vit_large_patch16_224_in21k(num_classes: int = 21843, has_logits: bool = True):
"""
ViT-Large model (ViT-L/16) from original paper (https://arxiv.org/abs/2010.11929).
ImageNet-21k weights @ 224x224, source https://github.com/google-research/vision_transformer.
weights ported from official Google JAX impl:
https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-vitjx/jx_vit_large_patch16_224_in21k-606da67d.pth
"""
model = VisionTransformer(img_size=224,
patch_size=16,
embed_dim=1024,
depth=24,
num_heads=16,
representation_size=1024 if has_logits else None,
num_classes=num_classes)
return model
def vit_large_patch32_224_in21k(num_classes: int = 21843, has_logits: bool = True):
"""
ViT-Large model (ViT-L/32) from original paper (https://arxiv.org/abs/2010.11929).
ImageNet-21k weights @ 224x224, source https://github.com/google-research/vision_transformer.
weights ported from official Google JAX impl:
https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-vitjx/jx_vit_large_patch32_224_in21k-9046d2e7.pth
"""
model = VisionTransformer(img_size=224,
patch_size=32,
embed_dim=1024,
depth=24,
num_heads=16,
representation_size=1024 if has_logits else None,
num_classes=num_classes)
return model
def vit_huge_patch14_224_in21k(num_classes: int = 21843, has_logits: bool = True):
"""
ViT-Huge model (ViT-H/14) from original paper (https://arxiv.org/abs/2010.11929).
ImageNet-21k weights @ 224x224, source https://github.com/google-research/vision_transformer.
NOTE: converted weights not currently available, too large for github release hosting.
"""
model = VisionTransformer(img_size=224,
patch_size=14,
embed_dim=1280,
depth=32,
num_heads=16,
representation_size=1280 if has_logits else None,
num_classes=num_classes)
return model
# ViT_MIGraphX
## 目录
- [目录结构](#目录结构)
- [项目介绍](#项目介绍)
- [环境配置](#环境配置)
- [编译运行](#编译运行)
- [参考数据](#参考数据)
- [历史版本](#历史版本)
## 目录结构
```
├── Images
├── Makefile
├── Models
│   └── model.onnx
├── Python
├── README.md
└── src
└── main.cpp
```
## 项目介绍
ViT是将Transformer应用到视觉领域模型框架,本项目是ViT模型在MIGraphX框架上的分类推理示例
## 环境配置
推荐使用docker方式运行,提供[光源](https://www.sourcefind.cn/#/service-list)拉取的docker镜像
```
docker pull image.sourcefind.cn:5000/dcu/admin/base/custom:decode-ffmpeg-dtk23.04
```
## 编译运行
### 编译
```
git clone https://developer.hpccube.com/codes/modelzoo/vit_migraphx.git
cd vit_migraphx
make
```
### 运行
下载推理数据
```
wget https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz
tar -zxvf flower_photos.tgz
```
```
./ViT_MIGraphX
```
根据提示选择要运行的示例程序,比如执行
```
./ViT_MIGraphX --models=Models/model.onnx --input=flower_photos/daisy/
```
运行ViT模型,对daisy图片进行分类
## 准确率数据
测试数据使用的是[flower_photos](https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz),使用的加速卡是DCU Z100
| Engine | Model Path| Data | Accuracy(%) |
| :------: | :------: | :------: | :------: |
| MIGraphX | models/model.onnx | daisy | 98.4 |
| MIGraphX | models/model.onnx | dandelion | 98.1 |
| MIGraphX | models/model.onnx | roses | 91.3 |
| MIGraphX | models/model.onnx | sunflowers | 97.4 |
| MIGraphX | models/model.onnx | tulips | 94.1 |
## 历史版本
https://developer.hpccube.com/codes/modelzoo/vit_migraphx.git
# 模型名称
modelName=Vision_Transformer
# 模型描述
modelDescription=ViT是一个基于transformer的视觉图像分类模型
# 应用场景(多个标签以英文逗号分割)
appScenario=MIGraphX,Pythorch,图形分类,训练,推理,train,inference,C++
# 框架类型(多个标签以英文逗号分割)
frameType=MIGraphX,Pythorch
#include <cstring>
#include <stdio.h>
#include <stdlib.h>
#include <string>
#include <getopt.h>
#include <sys/stat.h>
#include <opencv2/opencv.hpp>
#include <migraphx/onnx.hpp>
#include <migraphx/gpu/target.hpp>
#include <migraphx/gpu/hip.hpp>
#include <migraphx/generate.hpp>
#include <migraphx/quantization.hpp>
using namespace std;
using namespace cv;
using namespace cv::dnn;
static struct option long_options[] = {
{"models", required_argument, NULL, 'm'},
{"input", required_argument, NULL, 'i'},
{NULL, 0, NULL, 0}
};
struct Classifier
{
migraphx::program net;
cv::Size inputSize;
std::string inputName;
migraphx::shape inputShape;
};
std::vector<float> ComputeSoftmax(const std::vector<float>& results)
{
float maxValue=-3.40e+38F;
for(int i=0;i<results.size();++i)
{
if(results[i]>maxValue)
{
maxValue=results[i];
}
}
std::vector<float> softmaxResults(results.size());
float sum=0.0;
for(int i=0;i<results.size();++i)
{
softmaxResults[i]= exp((float)(results[i] - maxValue));
sum+=softmaxResults[i];
}
for(int i=0;i<results.size();++i)
{
softmaxResults[i]= softmaxResults[i]/sum;
}
return softmaxResults;
}
void InitVit(std::string Model, struct Classifier *classifier)
{
// parse onnx
(*classifier).net = migraphx::parse_onnx(Model);
std::pair<std::string, migraphx::shape> inputAttribute = *((*classifier).net.get_parameter_shapes().begin());
(*classifier).inputName = inputAttribute.first;
(*classifier).inputShape = inputAttribute.second;
(*classifier).inputSize = cv::Size((*classifier).inputShape.lens()[3], (*classifier).inputShape.lens()[2]);
// compile net
migraphx::target gpuTarget = migraphx::gpu::target{};
migraphx::compile_options options;
options.device_id = 0;
options.offload_copy = true;
(*classifier).net.compile(gpuTarget,options);
fprintf(stdout, "succeed to compile model: %s\n", Model.c_str());
// run once for warmup
migraphx::parameter_map inputData;
inputData[(*classifier).inputName] = migraphx::generate_argument((*classifier).inputShape);
(*classifier).net.eval(inputData);
}
migraphx::parameter_map preprocess(cv::Mat srcImage, struct Classifier classifier)
{
cv::Mat inputBlob;
migraphx::parameter_map inputData;
cv::dnn::blobFromImage(srcImage, inputBlob, 1/127.5, classifier.inputSize, {127.5, 127.5, 127.5}, true, false);
inputData[classifier.inputName] = migraphx::argument{classifier.inputShape, (float*)inputBlob.data};
return inputData;
}
void postprocess(migraphx::argument result, int *n, string inputdir)
{
const char* labels[] = {"daisy", "dandelion", "roses", "sunflowers", "tulips"};
migraphx::shape outputShape = result.get_shape();
float *logits = (float *)result.data();
std::vector<float> logit;
for(int j=0; j<outputShape.elements(); ++j)
{
logit.push_back(logits[j]);
}
std::vector<float> probs = ComputeSoftmax(logit);
for (int j = 0; j < outputShape.elements(); ++j)
{
if (probs[j] >= 0.5)
{
fprintf(stdout, "labels: %s, confidence: %.3f\n", labels[j], probs[j]);
if (inputdir.find(labels[j]))
(*n) += 1;
}
}
}
int main(int argc, char *argv[])
{
if (argc < 3 || argc > 3)
{
fprintf(stdout, "Two args are required: ./a --models=/path_to_model --input=/path_to_imgs\n");
return -1;
}
int opt, index;
std::string Model, inputs;
while ((opt = getopt_long(argc, argv, "m:i:", long_options, NULL)) != -1)
{
switch (opt)
{
case 'm':
Model = optarg;
fprintf(stdout, "Run Model: %s\n", Model.c_str());
break;
case 'i':
inputs = optarg;
fprintf(stdout, "Image Path: %s\n", inputs.c_str());
break;
case '?':
fprintf(stdout, "argvs is wrong, use: ./a --models=/path_to_model --input=/path_to_imgs\n");
return 0;
default:
return 0;
}
}
struct Classifier classifier;
InitVit(Model, &classifier);
fprintf(stdout, "succeed to Init classifier net.\n");
struct stat s;
int result = stat(inputs.c_str(), &s);
if (S_IFDIR & s.st_mode)
{
vector<String> srcImages;
glob(inputs, srcImages, false);
int n = 0.0; size_t i;
for (i = 0; i < srcImages.size(); i++)
{
fprintf(stdout, "Inference for image[%d]:\n", i);
cv::Mat srcImage = cv::imread(srcImages[i], 1);
migraphx::parameter_map inputData = preprocess(srcImage, classifier);
std::vector<migraphx::argument> results = classifier.net.eval(inputData);
postprocess(results[0], &n, inputs);
}
printf("All images:%d, match images:%d, Accuracy: %.3f%\n", i, n, ((float)n/i)*100);
}
if (S_IFREG & s.st_mode)
{
int n = 0;
cv::Mat srcImage = cv::imread(inputs.c_str(), 1);
migraphx::parameter_map inputData = preprocess(srcImage, classifier);
std::vector<migraphx::argument> results = classifier.net.eval(inputData);
postprocess(results[0], &n, inputs);
}
return 0;
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment