import os import cv2 import torch import argparse from datasets import build_dataset, get_coco_api_from_dataset from models import build_test_model from datasets.coco_eval import CocoEvaluator import numpy as np import util.misc as utils def test_img(args, model, postprocessors, save_path): dataset_test = build_dataset( image_set="val", args=args, eval_in_training_set=False, ) sampler_test = torch.utils.data.SequentialSampler(dataset_test) data_loader_test = torch.utils.data.DataLoader( dataset_test, 1, sampler=sampler_test, drop_last=False, collate_fn=utils.collate_fn, num_workers=0, pin_memory=True,) base_ds = get_coco_api_from_dataset(dataset_test) for img_data, target in data_loader_test: img_data = img_data.to(device) target = [{k: v.to(device) for k, v in t.items()} for t in target] # 模型推理 outputs = model(img_data) # 结果后处理 orig_target_sizes = torch.stack([t["orig_size"] for t in target], dim=0) result = postprocessors['bbox'](outputs, orig_target_sizes) res = {target['image_id'].item(): output for target, output in zip(target, result)} iou_types = tuple(k for k in ("segm", "bbox") if k in postprocessors.keys()) coco_evaluator = CocoEvaluator(base_ds, iou_types) if coco_evaluator is not None: coco_evaluator.update(res) res = res[target[0]['image_id'].item()] # 输出结果到图片上并保存 min_score = 0.65 img_name = dataset_test.coco.loadImgs(target[0]['image_id'].item())[0]['file_name'] img = cv2.imread(os.path.join(args.coco_path, 'images/val2017', img_name)) draw_img = img.copy() save_status = False for i in range(0, 100): res_tmp = res['scores'] if float(res_tmp[i]) > min_score: save_status = True score = float(res_tmp[i]) label = int(res['labels'][i].cpu().numpy()) bbox = res['boxes'][i].cpu().numpy().tolist() print("***", label, bbox) cv2.putText(draw_img, "{} | {}".format(label, str(score)[:3]), (int(bbox[0]), int(bbox[1])-2), 0, 0.5, (255, 255, 255), 1) cv2.rectangle(draw_img, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (0, 0, 255), 1) if save_status: cv2.imwrite("{}/{}".format(save_path, img_name), draw_img) if coco_evaluator is not None: coco_evaluator.synchronize_between_processes() coco_evaluator.accumulate() coco_evaluator.summarize() print(coco_evaluator) def get_parser(): parser = argparse.ArgumentParser("HDETR Detector", add_help=False) parser.add_argument("--lr_backbone", default=2e-5, type=float) parser.add_argument("--two_stage", default=False, action="store_true") parser.add_argument("--dataset_file", default="coco") parser.add_argument("--coco_path", default="/home/datasets/COCO2017", type=str) parser.add_argument("--save_path", default="./result_img", type=str) parser.add_argument( "--cache_mode", default=False, action="store_true", help="whether to cache images on memory", ) parser.add_argument("--pre_trained_model", default="") # * Segmentation parser.add_argument( "--masks", action="store_true", help="Train segmentation head if the flag is provided", ) # * Backbone parser.add_argument( "--backbone", default="resnet50", type=str, help="Name of the convolutional backbone to use", ) parser.add_argument( "--dilation", action="store_true", help="If true, we replace stride with dilation in the last convolutional block (DC5)", ) parser.add_argument( "--position_embedding", default="sine", type=str, choices=("sine", "learned"), help="Type of positional embedding to use on top of the image features", ) parser.add_argument( "--position_embedding_scale", default=2 * np.pi, type=float, help="position / size * scale", ) parser.add_argument( "--num_feature_levels", default=4, type=int, help="number of feature levels" ) # swin backbone parser.add_argument( "--pretrained_backbone_path", default="./swin_tiny_patch4_window7_224.pkl", type=str, ) parser.add_argument("--drop_path_rate", default=0.2, type=float) # * Transformer parser.add_argument( "--enc_layers", default=6, type=int, help="Number of encoding layers in the transformer", ) parser.add_argument( "--dec_layers", default=6, type=int, help="Number of decoding layers in the transformer", ) parser.add_argument( "--dim_feedforward", default=2048, type=int, help="Intermediate size of the feedforward layers in the transformer blocks", ) parser.add_argument( "--hidden_dim", default=256, type=int, help="Size of the embeddings (dimension of the transformer)", ) parser.add_argument( "--dropout", default=0.1, type=float, help="Dropout applied in the transformer" ) parser.add_argument( "--nheads", default=8, type=int, help="Number of attention heads inside the transformer's attentions", ) parser.add_argument( "--num_queries_one2one", default=300, type=int, help="Number of query slots for one-to-one matching", ) parser.add_argument( "--num_queries_one2many", default=0, type=int, help="Number of query slots for one-to-many matchining", ) parser.add_argument("--dec_n_points", default=4, type=int) parser.add_argument("--enc_n_points", default=4, type=int) # Deformable DETR tricks parser.add_argument("--mixed_selection", action="store_true", default=False) parser.add_argument("--look_forward_twice", action="store_true", default=False) # hybrid branch parser.add_argument("--k_one2many", default=5, type=int) parser.add_argument("--lambda_one2many", default=1.0, type=float) parser.add_argument( "--device", default="cuda", help="device to use for testing" ) # * eval technologies parser.add_argument("--eval", action="store_true") # eval in training set parser.add_argument("--eval_in_training_set", default=False, action="store_true") # topk for eval parser.add_argument("--topk", default=100, type=int) # * training technologies parser.add_argument("--use_fp16", default=False, action="store_true") parser.add_argument("--use_checkpoint", default=False, action="store_true") return parser if __name__ == "__main__": args = get_parser().parse_args() device = torch.device(args.device) # checkpoint path if not os.path.exists(args.save_path): os.makedirs(args.save_path) # 构建模型 model, postprocessors = build_test_model(args) model.to(device) checkpoint = torch.load(args.pre_trained_model, map_location='cpu') model.load_state_dict(checkpoint["model"], False) model.num_queries = model.num_queries_one2one model.transformer.two_stage_num_proposals = model.num_queries model.eval() test_img(args, model, postprocessors, args.save_path)