Commit 64b02fb6 authored by liangjing's avatar liangjing
Browse files

version 1

parents
Pipeline #176 failed with stages
in 0 seconds
classdef CocoUtils
% Utility functions for testing and validation of COCO code.
%
% The following utility functions are defined:
% convertPascalGt - Convert ground truth for PASCAL to COCO format.
% convertImageNetGt - Convert ground truth for ImageNet to COCO format.
% convertPascalDt - Convert detections on PASCAL to COCO format.
% convertImageNetDt - Convert detections on ImageNet to COCO format.
% validateOnPascal - Validate COCO eval code against PASCAL code.
% validateOnImageNet - Validate COCO eval code against ImageNet code.
% generateFakeDt - Generate fake detections from ground truth.
% validateMaskApi - Validate MaskApi against Matlab functions.
% gasonSplit - Split JSON file into multiple JSON files.
% gasonMerge - Merge JSON files into single JSON file.
% Help on each functions can be accessed by: "help CocoUtils>function".
%
% See also CocoApi MaskApi CocoEval CocoUtils>convertPascalGt
% CocoUtils>convertImageNetGt CocoUtils>convertPascalDt
% CocoUtils>convertImageNetDt CocoUtils>validateOnPascal
% CocoUtils>validateOnImageNet CocoUtils>generateFakeDt
% CocoUtils>validateMaskApi CocoUtils>gasonSplit CocoUtils>gasonMerge
%
% Microsoft COCO Toolbox. version 2.0
% Data, paper, and tutorials available at: http://mscoco.org/
% Code written by Piotr Dollar and Tsung-Yi Lin, 2015.
% Licensed under the Simplified BSD License [see coco/license.txt]
methods( Static )
function convertPascalGt( dataDir, year, split, annFile )
% Convert ground truth for PASCAL to COCO format.
%
% USAGE
% CocoUtils.convertPascalGt( dataDir, year, split, annFile )
%
% INPUTS
% dataDir - dir containing VOCdevkit/
% year - dataset year (e.g. '2007')
% split - dataset split (e.g. 'val')
% annFile - annotation file for writing results
if(exist(annFile,'file')), return; end
fprintf('Converting PASCAL VOC dataset... '); clk=tic;
dev=[dataDir '/VOCdevkit/']; addpath(genpath([dev '/VOCcode']));
VOCinit; C=VOCopts.classes'; catsMap=containers.Map(C,1:length(C));
f=fopen([dev '/VOC' year '/ImageSets/Main/' split '.txt']);
is=textscan(f,'%s %*s'); is=is{1}; fclose(f); n=length(is);
data=CocoUtils.initData(C,n);
for i=1:n, nm=[is{i} '.jpg'];
f=[dev '/VOC' year '/Annotations/' is{i} '.xml'];
R=PASreadrecord(f); hw=R.imgsize([2 1]); O=R.objects;
id=is{i}; id(id=='_')=[]; id=str2double(id);
ignore=[O.difficult]; bbs=cat(1,O.bbox);
t=catsMap.values({O.class}); catIds=[t{:}]; iscrowd=ignore*0;
data=CocoUtils.addData(data,nm,id,hw,catIds,ignore,iscrowd,bbs);
end
f=fopen(annFile,'w'); fwrite(f,gason(data)); fclose(f);
fprintf('DONE (t=%0.2fs).\n',toc(clk));
end
function convertImageNetGt( dataDir, year, split, annFile )
% Convert ground truth for ImageNet to COCO format.
%
% USAGE
% CocoUtils.convertImageNetGt( dataDir, year, split, annFile )
%
% INPUTS
% dataDir - dir containing ILSVRC*/ folders
% year - dataset year (e.g. '2013')
% split - dataset split (e.g. 'val')
% annFile - annotation file for writing results
if(exist(annFile,'file')), return; end
fprintf('Converting ImageNet dataset... '); clk=tic;
dev=[dataDir '/ILSVRC' year '_devkit/'];
addpath(genpath([dev '/evaluation/']));
t=[dev '/data/meta_det.mat'];
t=load(t); synsets=t.synsets(1:200); catNms={synsets.name};
catsMap=containers.Map({synsets.WNID},1:length(catNms));
if(~strcmp(split,'val')), blacklist=cell(1,2); else
f=[dev '/data/' 'ILSVRC' year '_det_validation_blacklist.txt'];
f=fopen(f); blacklist=textscan(f,'%d %s'); fclose(f);
t=catsMap.values(blacklist{2}); blacklist{2}=[t{:}];
end
if(strcmp(split,'train'))
dl=@(i) [dev '/data/det_lists/' split '_pos_' int2str(i) '.txt'];
is=cell(1,200); for i=1:200, f=fopen(dl(i));
is{i}=textscan(f,'%s %*s'); is{i}=is{i}{1}; fclose(f); end
is=unique(cat(1,is{:})); n=length(is);
else
f=fopen([dev '/data/det_lists/' split '.txt']);
is=textscan(f,'%s %*s'); is=is{1}; fclose(f); n=length(is);
end
data=CocoUtils.initData(catNms,n);
for i=1:n
f=[dataDir '/ILSVRC' year '_DET_bbox_' split '/' is{i} '.xml'];
R=VOCreadxml(f); R=R.annotation; nm=[is{i} '.JPEG'];
hw=str2double({R.size.height R.size.width});
if(~isfield(R,'object')), catIds=[]; bbs=[]; else
O=R.object; t=catsMap.values({O.name}); catIds=[t{:}];
b=[O.bndbox]; bbs=str2double({b.xmin; b.ymin; b.xmax; b.ymax})';
end
j=blacklist{2}(blacklist{1}==i); m=numel(j); b=[0 0 hw(2) hw(1)];
catIds=[j catIds]; bbs=[repmat(b,m,1); bbs]; %#ok<AGROW>
ignore=ismember(catIds,j); iscrowd=ignore*0; iscrowd(1:m)=1;
data=CocoUtils.addData(data,nm,i,hw,catIds,ignore,iscrowd,bbs);
end
f=fopen(annFile,'w'); fwrite(f,gason(data)); fclose(f);
fprintf('DONE (t=%0.2fs).\n',toc(clk));
end
function convertPascalDt( srcFiles, tarFile )
% Convert detections on PASCAL to COCO format.
%
% USAGE
% CocoUtils.convertPascalDt( srcFiles, tarFile )
%
% INPUTS
% srcFiles - source detection file(s) in PASCAL format
% tarFile - target detection file in COCO format
if(exist(tarFile,'file')), return; end; R=[];
for i=1:length(srcFiles), f=fopen(srcFiles{i},'r');
R1=textscan(f,'%d %f %f %f %f %f'); fclose(f);
[~,~,x0,y0,x1,y1]=deal(R1{:}); b=[x0-1 y0-1 x1-x0+1 y1-y0+1];
b(:,3:4)=max(b(:,3:4),1); b=mat2cell(b,ones(1,size(b,1)),4);
R=[R; struct('image_id',num2cell(R1{1}),'bbox',b,...
'category_id',i,'score',num2cell(R1{2}))]; %#ok<AGROW>
end
f=fopen(tarFile,'w'); fwrite(f,gason(R)); fclose(f);
end
function convertImageNetDt( srcFile, tarFile )
% Convert detections on ImageNet to COCO format.
%
% USAGE
% CocoUtils.convertImageNetDt( srcFile, tarFile )
%
% INPUTS
% srcFile - source detection file in ImageNet format
% tarFile - target detection file in COCO format
if(exist(tarFile,'file')), return; end; f=fopen(srcFile,'r');
R=textscan(f,'%d %d %f %f %f %f %f'); fclose(f);
[~,~,~,x0,y0,x1,y1]=deal(R{:}); b=[x0-1 y0-1 x1-x0+1 y1-y0+1];
b(:,3:4)=max(b(:,3:4),1); bbox=mat2cell(b,ones(1,size(b,1)),4);
R=struct('image_id',num2cell(R{1}),'bbox',bbox,...
'category_id',num2cell(R{2}),'score',num2cell(R{3}));
f=fopen(tarFile,'w'); fwrite(f,gason(R)); fclose(f);
end
function validateOnPascal( dataDir )
% Validate COCO eval code against PASCAL code.
%
% USAGE
% CocoUtils.validateOnPascal( dataDir )
%
% INPUTS
% dataDir - dir containing VOCdevkit/
split='val'; year='2007'; thrs=0:.001:1; T=length(thrs);
dev=[dataDir '/VOCdevkit/']; addpath(genpath([dev '/VOCcode/']));
d=pwd; cd(dev); VOCinit; cd(d); O=VOCopts; O.testset=split;
O.detrespath=[O.detrespath(1:end-10) split '_%s.txt'];
catNms=O.classes; K=length(catNms); ap=zeros(K,1);
for i=1:K, [R,P]=VOCevaldet(O,'comp3',catNms{i},0); R1=[R; inf];
P1=[P; 0]; for t=1:T, ap(i)=ap(i)+max(P1(R1>=thrs(t)))/T; end; end
srcFile=[dev '/results/VOC' year '/Main/comp3_det_' split];
resFile=[srcFile '.json']; annFile=[dev '/VOC2007/' split '.json'];
sfs=cell(1,K); for i=1:K, sfs{i}=[srcFile '_' catNms{i} '.txt']; end
CocoUtils.convertPascalGt(dataDir,year,split,annFile);
CocoUtils.convertPascalDt(sfs,resFile);
D=CocoApi(annFile); R=D.loadRes(resFile); E=CocoEval(D,R);
p=E.params; p.recThrs=thrs; p.iouThrs=.5; p.areaRng=[0 inf];
p.useSegm=0; p.maxDets=inf; E.params=p; E.evaluate(); E.accumulate();
apCoco=squeeze(mean(E.eval.precision,2)); deltas=abs(apCoco-ap);
fprintf('AP delta: mean=%.2e median=%.2e max=%.2e\n',...
mean(deltas),median(deltas),max(deltas))
if(max(deltas)>1e-2), msg='FAILED'; else msg='PASSED'; end
warning(['Eval code *' msg '* validation!']);
end
function validateOnImageNet( dataDir )
% Validate COCO eval code against ImageNet code.
%
% USAGE
% CocoUtils.validateOnImageNet( dataDir )
%
% INPUTS
% dataDir - dir containing ILSVRC*/ folders
warning(['Set pixelTolerance=0 in line 30 of eval_detection.m '...
'(and delete cache) otherwise AP will differ by >1e-4!']);
year='2013'; dev=[dataDir '/ILSVRC' year '_devkit/'];
fs = { [dev 'evaluation/demo.val.pred.det.txt']
[dataDir '/ILSVRC' year '_DET_bbox_val/']
[dev 'data/meta_det.mat']
[dev 'data/det_lists/val.txt']
[dev 'data/ILSVRC' year '_det_validation_blacklist.txt']
[dev 'data/ILSVRC' year '_det_validation_cache.mat'] };
addpath(genpath([dev 'evaluation/']));
ap=eval_detection(fs{:})';
resFile=[fs{1}(1:end-3) 'json'];
annFile=[dev 'data/ILSVRC' year '_val.json'];
CocoUtils.convertImageNetDt(fs{1},resFile);
CocoUtils.convertImageNetGt(dataDir,year,'val',annFile)
D=CocoApi(annFile); R=D.loadRes(resFile); E=CocoEval(D,R);
p=E.params; p.recThrs=0:.0001:1; p.iouThrs=.5; p.areaRng=[0 inf];
p.useSegm=0; p.maxDets=inf; E.params=p; E.evaluate(); E.accumulate();
apCoco=squeeze(mean(E.eval.precision,2)); deltas=abs(apCoco-ap);
fprintf('AP delta: mean=%.2e median=%.2e max=%.2e\n',...
mean(deltas),median(deltas),max(deltas))
if(max(deltas)>1e-4), msg='FAILED'; else msg='PASSED'; end
warning(['Eval code *' msg '* validation!']);
end
function generateFakeDt( coco, dtFile, varargin )
% Generate fake detections from ground truth.
%
% USAGE
% CocoUtils.generateFakeDt( coco, dtFile, varargin )
%
% INPUTS
% coco - instance of CocoApi containing ground truth
% dtFile - target file for writing detection results
% params - parameters (struct or name/value pairs)
% .n - [100] number images for which to generate dets
% .fn - [.20] false negative rate (0<fn<1)
% .fp - [.10] false positive rate (0<fp<fn)
% .sigma - [.10] translation noise (relative to object width)
% .seed - [0] random seed for reproducibility
% .type - ['bbox'] can be 'bbox', 'segm', or 'keypoints'
fprintf('Generating fake detection data... '); clk=tic;
def={'n',100,'fn',.20,'fp',.10,'sigma',.10,'seed',0,'type','bbox'};
opts=getPrmDflt(varargin,def,1); n=opts.n;
if(strcmp(opts.type,'segm')), opts.type='segmentation'; end
assert(any(strcmp(opts.type,{'bbox','segmentation','keypoints'})));
rstream = RandStream('mrg32k3a','Seed',opts.seed); k=n*100;
R=struct('image_id',[],'category_id',[],opts.type,[],'score',[]);
imgIds=sort(coco.getImgIds()); imgIds=imgIds(1:n); R=repmat(R,1,k);
imgs=coco.loadImgs(imgIds); catIds=coco.getCatIds(); k=0;
for i=1:n
A=coco.loadAnns(coco.getAnnIds('imgIds',imgIds(i),'iscrowd',0));
m=length(A); h=imgs(i).height; w=imgs(i).width;
for j=1:m, t=rand(rstream);
if(t<opts.fp), catId=catIds(randi(rstream,length(catIds)));
elseif(t<opts.fn), continue; else catId=A(j).category_id; end
bb=A(j).bbox; dx=round(randn(rstream)*opts.sigma*bb(3));
if( strcmp(opts.type,'bbox') )
x0=max(0,bb(1)+dx); x1=min(w-1,bb(1)+bb(3)+dx-1);
bb(1)=x0; bb(3)=x1-x0+1; if(bb(3)==0), continue; end; o=bb;
elseif( strcmp(opts.type,'segmentation') )
M=MaskApi.decode(MaskApi.frPoly(A(j).segmentation,h,w)); T=M*0;
T(:,max(1,1+dx):min(w,w+dx))=M(:,max(1,1-dx):min(w,w-dx));
if(nnz(T)==0), continue; end; o=MaskApi.encode(T);
elseif( strcmp(opts.type,'keypoints') )
o=A(j).keypoints; v=o(3:3:end)>0; if(~any(v)), continue; end
x=o(1:3:end); y=o(2:3:end); x(~v)=mean(x(v)); y(~v)=mean(y(v));
x=max(0,min(w-1,x+dx)); o(1:3:end)=x; o(2:3:end)=y;
end
k=k+1; R(k).image_id=imgIds(i); R(k).category_id=catId;
R(k).(opts.type)=o; R(k).score=round(rand(rstream)*1000)/1000;
end
end
R=R(1:k); f=fopen(dtFile,'w'); fwrite(f,gason(R)); fclose(f);
fprintf('DONE (t=%0.2fs).\n',toc(clk));
end
function validateMaskApi( coco )
% Validate MaskApi against Matlab functions.
%
% USAGE
% CocoUtils.validateMaskApi( coco )
%
% INPUTS
% coco - instance of CocoApi containing ground truth
S=coco.data.annotations; S=S(~[S.iscrowd]); S={S.segmentation};
h=1000; n=1000; Z=cell(1,n); A=Z; B=Z; M=Z; IB=zeros(1,n);
fprintf('Running MaskApi implementations... '); clk=tic;
for i=1:n, A{i}=MaskApi.frPoly(S{i},h,h); end
Ia=MaskApi.iou(A{1},[A{:}]);
fprintf('DONE (t=%0.2fs).\n',toc(clk));
fprintf('Running Matlab implementations... '); clk=tic;
for i=1:n, M1=0; for j=1:length(S{i}), x=S{i}{j}+.5;
M1=M1+poly2mask(x(1:2:end),x(2:2:end),h,h); end
M{i}=uint8(M1>0); B{i}=MaskApi.encode(M{i});
IB(i)=sum(sum(M{1}&M{i}))/sum(sum(M{1}|M{i}));
end
fprintf('DONE (t=%0.2fs).\n',toc(clk));
if(isequal(A,B)&&isequal(Ia,IB)),
msg='PASSED'; else msg='FAILED'; end
warning(['MaskApi *' msg '* validation!']);
end
function gasonSplit( name, k )
% Split JSON file into multiple JSON files.
%
% Splits file 'name.json' into multiple files 'name-*.json'. Only
% works for JSON arrays. Memory efficient. Inverted by gasonMerge().
%
% USAGE
% CocoUtils.gasonSplit( name, k )
%
% INPUTS
% name - file containing JSON array (w/o '.json' ext)
% k - number of files to split JSON into
s=gasonMex('split',fileread([name '.json']),k); k=length(s);
for i=1:k, f=fopen(sprintf('%s-%06i.json',name,i),'w');
fwrite(f,s{i}); fclose(f); end
end
function gasonMerge( name )
% Merge JSON files into single JSON file.
%
% Merge files 'name-*.json' into single file 'name.json'. Only works
% for JSON arrays. Memory efficient. Inverted by gasonSplit().
%
% USAGE
% CocoUtils.gasonMerge( name )
%
% INPUTS
% name - files containing JSON arrays (w/o '.json' ext)
s=dir([name '-*.json']); s=sort({s.name}); k=length(s);
p=fileparts(name); for i=1:k, s{i}=fullfile(p,s{i}); end
for i=1:k, s{i}=fileread(s{i}); end; s=gasonMex('merge',s);
f=fopen([name '.json'],'w'); fwrite(f,s); fclose(f);
end
end
methods( Static, Access=private )
function data = initData( catNms, n )
% Helper for convert() functions: init annotations.
m=length(catNms); ms=num2cell(1:m);
I = struct('file_name',0,'height',0,'width',0,'id',0);
C = struct('supercategory','none','id',ms,'name',catNms);
A = struct('segmentation',0,'area',0,'iscrowd',0,...
'image_id',0,'bbox',0,'category_id',0,'id',0,'ignore',0);
I=repmat(I,1,n); A=repmat(A,1,n*20);
data = struct('images',I,'type','instances',...
'annotations',A,'categories',C,'nImgs',0,'nAnns',0);
end
function data = addData( data,nm,id,hw,catIds,ignore,iscrowd,bbs )
% Helper for convert() functions: add annotations.
data.nImgs=data.nImgs+1;
data.images(data.nImgs)=struct('file_name',nm,...
'height',hw(1),'width',hw(2),'id',id);
for j=1:length(catIds), data.nAnns=data.nAnns+1; k=data.nAnns;
b=bbs(j,:); b=b-1; b(3:4)=b(3:4)-b(1:2)+1;
x1=b(1); x2=b(1)+b(3); y1=b(2); y2=b(2)+b(4);
S={{[x1 y1 x1 y2 x2 y2 x2 y1]}}; a=b(3)*b(4);
data.annotations(k)=struct('segmentation',S,'area',a,...
'iscrowd',iscrowd(j),'image_id',id,'bbox',b,...
'category_id',catIds(j),'id',k,'ignore',ignore(j));
end
if( data.nImgs == length(data.images) )
data.annotations=data.annotations(1:data.nAnns);
data=rmfield(data,{'nImgs','nAnns'});
end
end
end
end
classdef MaskApi
% Interface for manipulating masks stored in RLE format.
%
% RLE is a simple yet efficient format for storing binary masks. RLE
% first divides a vector (or vectorized image) into a series of piecewise
% constant regions and then for each piece simply stores the length of
% that piece. For example, given M=[0 0 1 1 1 0 1] the RLE counts would
% be [2 3 1 1], or for M=[1 1 1 1 1 1 0] the counts would be [0 6 1]
% (note that the odd counts are always the numbers of zeros). Instead of
% storing the counts directly, additional compression is achieved with a
% variable bitrate representation based on a common scheme called LEB128.
%
% Compression is greatest given large piecewise constant regions.
% Specifically, the size of the RLE is proportional to the number of
% *boundaries* in M (or for an image the number of boundaries in the y
% direction). Assuming fairly simple shapes, the RLE representation is
% O(sqrt(n)) where n is number of pixels in the object. Hence space usage
% is substantially lower, especially for large simple objects (large n).
%
% Many common operations on masks can be computed directly using the RLE
% (without need for decoding). This includes computations such as area,
% union, intersection, etc. All of these operations are linear in the
% size of the RLE, in other words they are O(sqrt(n)) where n is the area
% of the object. Computing these operations on the original mask is O(n).
% Thus, using the RLE can result in substantial computational savings.
%
% The following API functions are defined:
% encode - Encode binary masks using RLE.
% decode - Decode binary masks encoded via RLE.
% merge - Compute union or intersection of encoded masks.
% iou - Compute intersection over union between masks.
% nms - Compute non-maximum suppression between ordered masks.
% area - Compute area of encoded masks.
% toBbox - Get bounding boxes surrounding encoded masks.
% frBbox - Convert bounding boxes to encoded masks.
% frPoly - Convert polygon to encoded mask.
%
% Usage:
% Rs = MaskApi.encode( masks )
% masks = MaskApi.decode( Rs )
% R = MaskApi.merge( Rs, [intersect=false] )
% o = MaskApi.iou( dt, gt, [iscrowd=false] )
% keep = MaskApi.nms( dt, thr )
% a = MaskApi.area( Rs )
% bbs = MaskApi.toBbox( Rs )
% Rs = MaskApi.frBbox( bbs, h, w )
% R = MaskApi.frPoly( poly, h, w )
%
% In the API the following formats are used:
% R,Rs - [struct] Run-length encoding of binary mask(s)
% masks - [hxwxn] Binary mask(s) (must have type uint8)
% bbs - [nx4] Bounding box(es) stored as [x y w h]
% poly - Polygon stored as {[x1 y1 x2 y2...],[x1 y1 ...],...}
% dt,gt - May be either bounding boxes or encoded masks
% Both poly and bbs are 0-indexed (bbox=[0 0 1 1] encloses first pixel).
%
% Finally, a note about the intersection over union (iou) computation.
% The standard iou of a ground truth (gt) and detected (dt) object is
% iou(gt,dt) = area(intersect(gt,dt)) / area(union(gt,dt))
% For "crowd" regions, we use a modified criteria. If a gt object is
% marked as "iscrowd", we allow a dt to match any subregion of the gt.
% Choosing gt' in the crowd gt that best matches the dt can be done using
% gt'=intersect(dt,gt). Since by definition union(gt',dt)=dt, computing
% iou(gt,dt,iscrowd) = iou(gt',dt) = area(intersect(gt,dt)) / area(dt)
% For crowd gt regions we use this modified criteria above for the iou.
%
% To compile use the following (some precompiled binaries are included):
% mex('CFLAGS=\$CFLAGS -Wall -std=c99','-largeArrayDims',...
% 'private/maskApiMex.c','../common/maskApi.c',...
% '-I../common/','-outdir','private');
% Please do not contact us for help with compiling.
%
% Microsoft COCO Toolbox. version 2.0
% Data, paper, and tutorials available at: http://mscoco.org/
% Code written by Piotr Dollar and Tsung-Yi Lin, 2015.
% Licensed under the Simplified BSD License [see coco/license.txt]
methods( Static )
function Rs = encode( masks )
Rs = maskApiMex( 'encode', masks );
end
function masks = decode( Rs )
masks = maskApiMex( 'decode', Rs );
end
function R = merge( Rs, varargin )
R = maskApiMex( 'merge', Rs, varargin{:} );
end
function o = iou( dt, gt, varargin )
o = maskApiMex( 'iou', dt', gt', varargin{:} );
end
function keep = nms( dt, thr )
keep = maskApiMex('nms',dt',thr);
end
function a = area( Rs )
a = maskApiMex( 'area', Rs );
end
function bbs = toBbox( Rs )
bbs = maskApiMex( 'toBbox', Rs )';
end
function Rs = frBbox( bbs, h, w )
Rs = maskApiMex( 'frBbox', bbs', h, w );
end
function R = frPoly( poly, h, w )
R = maskApiMex( 'frPoly', poly, h , w );
end
end
end
%% Demo for the CocoApi (see CocoApi.m)
%% initialize COCO api (please specify dataType/annType below)
annTypes = { 'instances', 'captions', 'person_keypoints' };
dataType='val2014'; annType=annTypes{1}; % specify dataType/annType
annFile=sprintf('../annotations/%s_%s.json',annType,dataType);
coco=CocoApi(annFile);
%% display COCO categories and supercategories
if( ~strcmp(annType,'captions') )
cats = coco.loadCats(coco.getCatIds());
nms={cats.name}; fprintf('COCO categories: ');
fprintf('%s, ',nms{:}); fprintf('\n');
nms=unique({cats.supercategory}); fprintf('COCO supercategories: ');
fprintf('%s, ',nms{:}); fprintf('\n');
end
%% get all images containing given categories, select one at random
catIds = coco.getCatIds('catNms',{'person','dog','skateboard'});
imgIds = coco.getImgIds('catIds',catIds);
imgId = imgIds(randi(length(imgIds)));
%% load and display image
img = coco.loadImgs(imgId);
I = imread(sprintf('../images/%s/%s',dataType,img.file_name));
figure(1); imagesc(I); axis('image'); set(gca,'XTick',[],'YTick',[])
%% load and display annotations
annIds = coco.getAnnIds('imgIds',imgId,'catIds',catIds,'iscrowd',[]);
anns = coco.loadAnns(annIds); coco.showAnns(anns);
%% Demo demonstrating the algorithm result formats for COCO
%% select results type for demo (either bbox or segm)
type = {'segm','bbox','keypoints'}; type = type{1}; % specify type here
fprintf('Running demo for *%s* results.\n\n',type);
%% initialize COCO ground truth api
dataDir='../'; prefix='instances'; dataType='val2014';
if(strcmp(type,'keypoints')), prefix='person_keypoints'; end
annFile=sprintf('%s/annotations/%s_%s.json',dataDir,prefix,dataType);
cocoGt=CocoApi(annFile);
%% initialize COCO detections api
resFile='%s/results/%s_%s_fake%s100_results.json';
resFile=sprintf(resFile,dataDir,prefix,dataType,type);
cocoDt=cocoGt.loadRes(resFile);
%% visialuze gt and dt side by side
imgIds=sort(cocoGt.getImgIds()); imgIds=imgIds(1:100);
imgId = imgIds(randi(100)); img = cocoGt.loadImgs(imgId);
I = imread(sprintf('%s/images/val2014/%s',dataDir,img.file_name));
figure(1); subplot(1,2,1); imagesc(I); axis('image'); axis off;
annIds = cocoGt.getAnnIds('imgIds',imgId); title('ground truth')
anns = cocoGt.loadAnns(annIds); cocoGt.showAnns(anns);
figure(1); subplot(1,2,2); imagesc(I); axis('image'); axis off;
annIds = cocoDt.getAnnIds('imgIds',imgId); title('results')
anns = cocoDt.loadAnns(annIds); cocoDt.showAnns(anns);
%% load raw JSON and show exact format for results
fprintf('results structure have the following format:\n');
res = gason(fileread(resFile)); disp(res)
%% the following command can be used to save the results back to disk
if(0), f=fopen(resFile,'w'); fwrite(f,gason(res)); fclose(f); end
%% run COCO evaluation code (see CocoEval.m)
cocoEval=CocoEval(cocoGt,cocoDt,type);
cocoEval.params.imgIds=imgIds;
cocoEval.evaluate();
cocoEval.accumulate();
cocoEval.summarize();
%% generate Derek Hoiem style analyis of false positives (slow)
if(0), cocoEval.analyze(); end
function out = gason( in )
% Convert between JSON strings and corresponding JSON objects.
%
% This parser is based on Gason written and maintained by Ivan Vashchaev:
% https://github.com/vivkin/gason
% Gason is a "lightweight and fast JSON parser for C++". Please see the
% above link for license information and additional details about Gason.
%
% Given a JSON string, gason calls the C++ parser and converts the output
% into an appropriate Matlab structure. As the parsing is performed in mex
% the resulting parser is blazingly fast. Large JSON structs (100MB+) take
% only a few seconds to parse (compared to hours for pure Matlab parsers).
%
% Given a JSON object, gason calls the C++ encoder to convert the object
% back into a JSON string representation. Nearly any Matlab struct, cell
% array, or numeric array represent a valid JSON object. Note that gason()
% can be used to go both from JSON string to JSON object and back.
%
% Gason requires C++11 to compile (for GCC this requires version 4.7 or
% later). The following command compiles the parser (may require tweaking):
% mex('CXXFLAGS=\$CXXFLAGS -std=c++11 -Wall','-largeArrayDims',...
% 'private/gasonMex.cpp','../common/gason.cpp',...
% '-I../common/','-outdir','private');
% Note the use of the "-std=c++11" flag. A number of precompiled binaries
% are included, please do not contact us for help with compiling. If needed
% you can specify a compiler by adding the option 'CXX="/usr/bin/g++"'.
%
% Note that by default JSON arrays that contain only numbers are stored as
% regular Matlab arrays. Likewise, JSON arrays that contain only objects of
% the same type are stored as Matlab struct arrays. This is much faster and
% can use considerably less memory than always using Matlab cell arrays.
%
% USAGE
% object = gason( string )
% string = gason( object )
%
% INPUTS/OUTPUTS
% string - JSON string
% object - JSON object
%
% EXAMPLE
% o = struct('first',{'piotr','ty'},'last',{'dollar','lin'})
% s = gason( o ) % convert JSON object -> JSON string
% p = gason( s ) % convert JSON string -> JSON object
%
% See also
%
% Microsoft COCO Toolbox. version 2.0
% Data, paper, and tutorials available at: http://mscoco.org/
% Code written by Piotr Dollar and Tsung-Yi Lin, 2015.
% Licensed under the Simplified BSD License [see coco/license.txt]
out = gasonMex( 'convert', in );
/**************************************************************************
* Microsoft COCO Toolbox. version 2.0
* Data, paper, and tutorials available at: http://mscoco.org/
* Code written by Piotr Dollar and Tsung-Yi Lin, 2015.
* Licensed under the Simplified BSD License [see coco/license.txt]
**************************************************************************/
#include "gason.h"
#include "mex.h"
#include "string.h"
#include "math.h"
#include <cstdint>
#include <iomanip>
#include <sstream>
typedef std::ostringstream ostrm;
typedef unsigned long siz;
typedef unsigned short ushort;
siz length( const JsonValue &a ) {
// get number of elements in JSON_ARRAY or JSON_OBJECT
siz k=0; auto n=a.toNode(); while(n) { k++; n=n->next; } return k;
}
bool isRegularObjArray( const JsonValue &a ) {
// check if all JSON_OBJECTs in JSON_ARRAY have the same fields
JsonValue o=a.toNode()->value; siz k, n; const char **keys;
n=length(o); keys=new const char*[n];
k=0; for(auto j:o) keys[k++]=j->key;
for( auto i:a ) {
if(length(i->value)!=n) return false; k=0;
for(auto j:i->value) if(strcmp(j->key,keys[k++])) return false;
}
delete [] keys; return true;
}
mxArray* json( const JsonValue &o ) {
// convert JsonValue to Matlab mxArray
siz k, m, n; mxArray *M; const char **keys;
switch( o.getTag() ) {
case JSON_NUMBER:
return mxCreateDoubleScalar(o.toNumber());
case JSON_STRING:
return mxCreateString(o.toString());
case JSON_ARRAY: {
if(!o.toNode()) return mxCreateDoubleMatrix(1,0,mxREAL);
JsonValue o0=o.toNode()->value; JsonTag tag=o0.getTag();
n=length(o); bool isRegular=true;
for(auto i:o) isRegular=isRegular && i->value.getTag()==tag;
if( isRegular && tag==JSON_OBJECT && isRegularObjArray(o) ) {
m=length(o0); keys=new const char*[m];
k=0; for(auto j:o0) keys[k++]=j->key;
M = mxCreateStructMatrix(1,n,m,keys);
k=0; for(auto i:o) { m=0; for(auto j:i->value)
mxSetFieldByNumber(M,k,m++,json(j->value)); k++; }
delete [] keys; return M;
} else if( isRegular && tag==JSON_NUMBER ) {
M = mxCreateDoubleMatrix(1,n,mxREAL); double *p=mxGetPr(M);
k=0; for(auto i:o) p[k++]=i->value.toNumber(); return M;
} else {
M = mxCreateCellMatrix(1,n);
k=0; for(auto i:o) mxSetCell(M,k++,json(i->value));
return M;
}
}
case JSON_OBJECT:
if(!o.toNode()) return mxCreateStructMatrix(1,0,0,NULL);
n=length(o); keys=new const char*[n];
k=0; for(auto i:o) keys[k++]=i->key;
M = mxCreateStructMatrix(1,1,n,keys); k=0;
for(auto i:o) mxSetFieldByNumber(M,0,k++,json(i->value));
delete [] keys; return M;
case JSON_TRUE:
return mxCreateDoubleScalar(1);
case JSON_FALSE:
return mxCreateDoubleScalar(0);
case JSON_NULL:
return mxCreateDoubleMatrix(0,0,mxREAL);
default: return NULL;
}
}
template<class T, class C> ostrm& json( ostrm &S, T *A, siz n ) {
// convert numeric array to JSON string with casting
if(n==0) { S<<"[]"; return S; } if(n==1) { S<<C(A[0]); return S; }
S<<"["; for(siz i=0; i<n-1; i++) S<<C(A[i])<<",";
S<<C(A[n-1]); S<<"]"; return S;
}
template<class T> ostrm& json( ostrm &S, T *A, siz n ) {
// convert numeric array to JSON string without casting
return json<T,T>(S,A,n);
}
ostrm& json( ostrm &S, const char *A ) {
// convert char array to JSON string (handle escape characters)
#define RPL(a,b) case a: { S << b; A++; break; }
S << "\""; while( *A>0 ) switch( *A ) {
RPL('"',"\\\""); RPL('\\',"\\\\"); RPL('/',"\\/"); RPL('\b',"\\b");
RPL('\f',"\\f"); RPL('\n',"\\n"); RPL('\r',"\\r"); RPL('\t',"\\t");
default: S << *A; A++;
}
S << "\""; return S;
}
ostrm& json( ostrm& S, const JsonValue *o ) {
// convert JsonValue to JSON string
switch( o->getTag() ) {
case JSON_NUMBER: S << o->toNumber(); return S;
case JSON_TRUE: S << "true"; return S;
case JSON_FALSE: S << "false"; return S;
case JSON_NULL: S << "null"; return S;
case JSON_STRING: return json(S,o->toString());
case JSON_ARRAY:
S << "["; for(auto i:*o) {
json(S,&i->value) << (i->next ? "," : ""); }
S << "]"; return S;
case JSON_OBJECT:
S << "{"; for(auto i:*o) {
json(S,i->key) << ":";
json(S,&i->value) << (i->next ? "," : ""); }
S << "}"; return S;
default: return S;
}
}
ostrm& json( ostrm& S, const mxArray *M ) {
// convert Matlab mxArray to JSON string
siz i, j, m, n=mxGetNumberOfElements(M);
void *A=mxGetData(M); ostrm *nms;
switch( mxGetClassID(M) ) {
case mxDOUBLE_CLASS: return json(S,(double*) A,n);
case mxSINGLE_CLASS: return json(S,(float*) A,n);
case mxINT64_CLASS: return json(S,(int64_t*) A,n);
case mxUINT64_CLASS: return json(S,(uint64_t*) A,n);
case mxINT32_CLASS: return json(S,(int32_t*) A,n);
case mxUINT32_CLASS: return json(S,(uint32_t*) A,n);
case mxINT16_CLASS: return json(S,(int16_t*) A,n);
case mxUINT16_CLASS: return json(S,(uint16_t*) A,n);
case mxINT8_CLASS: return json<int8_t,int32_t>(S,(int8_t*) A,n);
case mxUINT8_CLASS: return json<uint8_t,uint32_t>(S,(uint8_t*) A,n);
case mxLOGICAL_CLASS: return json<uint8_t,uint32_t>(S,(uint8_t*) A,n);
case mxCHAR_CLASS: return json(S,mxArrayToString(M));
case mxCELL_CLASS:
S << "["; for(i=0; i<n-1; i++) json(S,mxGetCell(M,i)) << ",";
if(n>0) json(S,mxGetCell(M,n-1)); S << "]"; return S;
case mxSTRUCT_CLASS:
if(n==0) { S<<"{}"; return S; } m=mxGetNumberOfFields(M);
if(m==0) { S<<"["; for(i=0; i<n; i++) S<<"{},"; S<<"]"; return S; }
if(n>1) S<<"["; nms=new ostrm[m];
for(j=0; j<m; j++) json(nms[j],mxGetFieldNameByNumber(M,j));
for(i=0; i<n; i++) for(j=0; j<m; j++) {
if(j==0) S << "{"; S << nms[j].str() << ":";
json(S,mxGetFieldByNumber(M,i,j)) << ((j<m-1) ? "," : "}");
if(j==m-1 && i<n-1) S<<",";
}
if(n>1) S<<"]"; delete [] nms; return S;
default:
mexErrMsgTxt( "Unknown type." ); return S;
}
}
mxArray* mxCreateStringRobust( const char* str ) {
// convert char* to Matlab string (robust version of mxCreateString)
mxArray *M; ushort *c; mwSize n[2]={1,strlen(str)};
M=mxCreateCharArray(2,n); c=(ushort*) mxGetData(M);
for( siz i=0; i<n[1]; i++ ) c[i]=str[i]; return M;
}
char* mxArrayToStringRobust( const mxArray *M ) {
// convert Matlab string to char* (robust version of mxArrayToString)
if(!mxIsChar(M)) mexErrMsgTxt("String expected.");
ushort *c=(ushort*) mxGetData(M); char* str; siz n;
n=mxGetNumberOfElements(M); str=(char*) mxMalloc(n+1);
for( siz i=0; i<n; i++ ) str[i]=c[i]; str[n]=0; return str;
}
void mexFunction( int nl, mxArray *pl[], int nr, const mxArray *pr[] )
{
char action[1024]; if(!nr) mexErrMsgTxt("Inputs expected.");
mxGetString(pr[0],action,1024); nr--; pr++;
char *endptr; JsonValue val; JsonAllocator allocator;
if( nl>1 ) mexErrMsgTxt("One output expected.");
if(!strcmp(action,"convert")) {
if( nr!=1 ) mexErrMsgTxt("One input expected.");
if( mxGetClassID(pr[0])==mxCHAR_CLASS ) {
// object = mexFunction( string )
char *str = mxArrayToStringRobust(pr[0]);
int status = jsonParse(str, &endptr, &val, allocator);
if( status != JSON_OK) mexErrMsgTxt(jsonStrError(status));
pl[0] = json(val); mxFree(str);
} else {
// string = mexFunction( object )
ostrm S; S << std::setprecision(12); json(S,pr[0]);
pl[0]=mxCreateStringRobust(S.str().c_str());
}
} else if(!strcmp(action,"split")) {
// strings = mexFunction( string, k )
if( nr!=2 ) mexErrMsgTxt("Two input expected.");
char *str = mxArrayToStringRobust(pr[0]);
int status = jsonParse(str, &endptr, &val, allocator);
if( status != JSON_OK) mexErrMsgTxt(jsonStrError(status));
if( val.getTag()!=JSON_ARRAY ) mexErrMsgTxt("Array expected");
siz i=0, t=0, n=length(val), k=(siz) mxGetScalar(pr[1]);
k=(k>n)?n:(k<1)?1:k; k=ceil(n/ceil(double(n)/k));
pl[0]=mxCreateCellMatrix(1,k); ostrm S; S<<std::setprecision(12);
for(auto o:val) {
if(!t) { S.str(std::string()); S << "["; t=ceil(double(n)/k); }
json(S,&o->value); t--; if(!o->next) t=0; S << (t ? "," : "]");
if(!t) mxSetCell(pl[0],i++,mxCreateStringRobust(S.str().c_str()));
}
} else if(!strcmp(action,"merge")) {
// string = mexFunction( strings )
if( nr!=1 ) mexErrMsgTxt("One input expected.");
if(!mxIsCell(pr[0])) mexErrMsgTxt("Cell array expected.");
siz n = mxGetNumberOfElements(pr[0]);
ostrm S; S << std::setprecision(12); S << "[";
for( siz i=0; i<n; i++ ) {
char *str = mxArrayToStringRobust(mxGetCell(pr[0],i));
int status = jsonParse(str, &endptr, &val, allocator);
if( status != JSON_OK) mexErrMsgTxt(jsonStrError(status));
if( val.getTag()!=JSON_ARRAY ) mexErrMsgTxt("Array expected");
for(auto j:val) json(S,&j->value) << (j->next ? "," : "");
mxFree(str); if(i<n-1) S<<",";
}
S << "]"; pl[0]=mxCreateStringRobust(S.str().c_str());
} else mexErrMsgTxt("Invalid action.");
}
function varargout = getPrmDflt( prm, dfs, checkExtra )
% Helper to set default values (if not already set) of parameter struct.
%
% Takes input parameters and a list of 'name'/default pairs, and for each
% 'name' for which prm has no value (prm.(name) is not a field or 'name'
% does not appear in prm list), getPrmDflt assigns the given default
% value. If default value for variable 'name' is 'REQ', and value for
% 'name' is not given, an error is thrown. See below for usage details.
%
% USAGE (nargout==1)
% prm = getPrmDflt( prm, dfs, [checkExtra] )
%
% USAGE (nargout>1)
% [ param1 ... paramN ] = getPrmDflt( prm, dfs, [checkExtra] )
%
% INPUTS
% prm - param struct or cell of form {'name1' v1 'name2' v2 ...}
% dfs - cell of form {'name1' def1 'name2' def2 ...}
% checkExtra - [0] if 1 throw error if prm contains params not in dfs
% if -1 if prm contains params not in dfs adds them
%
% OUTPUTS (nargout==1)
% prm - parameter struct with fields 'name1' through 'nameN' assigned
%
% OUTPUTS (nargout>1)
% param1 - value assigned to parameter with 'name1'
% ...
% paramN - value assigned to parameter with 'nameN'
%
% EXAMPLE
% dfs = { 'x','REQ', 'y',0, 'z',[], 'eps',1e-3 };
% prm = getPrmDflt( struct('x',1,'y',1), dfs )
% [ x y z eps ] = getPrmDflt( {'x',2,'y',1}, dfs )
%
% See also INPUTPARSER
%
% Piotr's Computer Vision Matlab Toolbox Version 2.60
% Copyright 2014 Piotr Dollar. [pdollar-at-gmail.com]
% Licensed under the Simplified BSD License [see external/bsd.txt]
if( mod(length(dfs),2) ), error('odd number of default parameters'); end
if nargin<=2, checkExtra = 0; end
% get the input parameters as two cell arrays: prmVal and prmField
if iscell(prm) && length(prm)==1, prm=prm{1}; end
if iscell(prm)
if(mod(length(prm),2)), error('odd number of parameters in prm'); end
prmField = prm(1:2:end); prmVal = prm(2:2:end);
else
if(~isstruct(prm)), error('prm must be a struct or a cell'); end
prmVal = struct2cell(prm); prmField = fieldnames(prm);
end
% get and update default values using quick for loop
dfsField = dfs(1:2:end); dfsVal = dfs(2:2:end);
if checkExtra>0
for i=1:length(prmField)
j = find(strcmp(prmField{i},dfsField));
if isempty(j), error('parameter %s is not valid', prmField{i}); end
dfsVal(j) = prmVal(i);
end
elseif checkExtra<0
for i=1:length(prmField)
j = find(strcmp(prmField{i},dfsField));
if isempty(j), j=length(dfsVal)+1; dfsField{j}=prmField{i}; end
dfsVal(j) = prmVal(i);
end
else
for i=1:length(prmField)
dfsVal(strcmp(prmField{i},dfsField)) = prmVal(i);
end
end
% check for missing values
if any(strcmp('REQ',dfsVal))
cmpArray = find(strcmp('REQ',dfsVal));
error(['Required field ''' dfsField{cmpArray(1)} ''' not specified.'] );
end
% set output
if nargout==1
varargout{1} = cell2struct( dfsVal, dfsField, 2 );
else
varargout = dfsVal;
end
all:
# install pycocotools locally
python setup.py build_ext --inplace
rm -rf build
install:
# install pycocotools to the Python site-packages
python setup.py build_ext install
rm -rf build
\ No newline at end of file
__author__ = 'tylin'
__version__ = '2.0'
# Interface for accessing the Microsoft COCO dataset.
# Microsoft COCO is a large image dataset designed for object detection,
# segmentation, and caption generation. pycocotools is a Python API that
# assists in loading, parsing and visualizing the annotations in COCO.
# Please visit http://mscoco.org/ for more information on COCO, including
# for the data, paper, and tutorials. The exact format of the annotations
# is also described on the COCO website. For example usage of the pycocotools
# please see pycocotools_demo.ipynb. In addition to this API, please download both
# the COCO images and annotations in order to run the demo.
# An alternative to using the API is to load the annotations directly
# into Python dictionary
# Using the API provides additional utility functions. Note that this API
# supports both *instance* and *caption* annotations. In the case of
# captions not all functions are defined (e.g. categories are undefined).
# The following API functions are defined:
# COCO - COCO api class that loads COCO annotation file and prepare data structures.
# decodeMask - Decode binary mask M encoded via run-length encoding.
# encodeMask - Encode binary mask M using run-length encoding.
# getAnnIds - Get ann ids that satisfy given filter conditions.
# getCatIds - Get cat ids that satisfy given filter conditions.
# getImgIds - Get img ids that satisfy given filter conditions.
# loadAnns - Load anns with the specified ids.
# loadCats - Load cats with the specified ids.
# loadImgs - Load imgs with the specified ids.
# annToMask - Convert segmentation in an annotation to binary mask.
# showAnns - Display the specified annotations.
# loadRes - Load algorithm results and create API for accessing them.
# download - Download COCO images from mscoco.org server.
# Throughout the API "ann"=annotation, "cat"=category, and "img"=image.
# Help on each functions can be accessed by: "help COCO>function".
# See also COCO>decodeMask,
# COCO>encodeMask, COCO>getAnnIds, COCO>getCatIds,
# COCO>getImgIds, COCO>loadAnns, COCO>loadCats,
# COCO>loadImgs, COCO>annToMask, COCO>showAnns
# Microsoft COCO Toolbox. version 2.0
# Data, paper, and tutorials available at: http://mscoco.org/
# Code written by Piotr Dollar and Tsung-Yi Lin, 2014.
# Licensed under the Simplified BSD License [see bsd.txt]
try:
import ujson as json
except:
import json
print("NOTE! Installing ujson may make loading annotations faster.")
import time
import matplotlib.pyplot as plt
from matplotlib.collections import PatchCollection
from matplotlib.patches import Polygon
import numpy as np
import copy
import itertools
from . import mask as maskUtils
import os
from collections import defaultdict
import sys
PYTHON_VERSION = sys.version_info[0]
if PYTHON_VERSION == 2:
from urllib import urlretrieve
elif PYTHON_VERSION == 3:
from urllib.request import urlretrieve
import ext
import multiprocessing as mp
def mp_pool_task(func, args):
#os.environ["OMP_PROC_BIND"]="close"
#os.environ["OMP_PLACES"]="cores"
results = func(*args)
if func is ext.cpp_evaluate:
return results
def mp_pool_wrapper(proc_pid_map, func, args):
pid = os.getpid()
if proc_pid_map is None or len(proc_pid_map) == 0:
proc_pid_map = {pid:0}
proc_id = proc_pid_map[pid]
if func is ext.cpp_create_index:
newargs = list(args)
newargs.insert(2, proc_id)
mp_pool_task(func, tuple(newargs))
if func is ext.cpp_load_res_numpy:
newresFile = np.load(args[0], allow_pickle=True)
mp_pool_task(func, (newresFile, args[1]))
if func is ext.cpp_load_res_json:
mp_pool_task(func, args)
if func is ext.cpp_evaluate:
return mp_pool_task(func, args)
def _isArrayLike(obj):
return hasattr(obj, '__iter__') and hasattr(obj, '__len__')
class COCO:
def __init__(self, annotation_file=None, use_ext=False, multi_procs=(1,None), num_threads=1):
"""
Constructor of Microsoft COCO helper class for reading and visualizing annotations.
:param annotation_file (str): location of annotation file
:param image_folder (str): location to the folder that hosts images.
:return:
"""
# load dataset
self.dataset,self.anns,self.cats,self.imgs = dict(),dict(),dict(),dict()
self.imgToAnns, self.catToImgs = defaultdict(list), defaultdict(list)
self.annotation_file = annotation_file
self.use_ext = use_ext
self.num_procs, self.proc_pid_map = multi_procs
self.num_threads = num_threads
if not self.annotation_file == None:
print('Loading annotations into memory...')
tic = time.time()
dataset = json.load(open(annotation_file, 'r'))
assert type(dataset)==dict, 'annotation file format {} not supported'.format(type(dataset))
self.dataset = dataset
print('Done (t={:0.2f}s)'.format(time.time()- tic))
self.createIndex()
def createIndex(self, use_ext=False):
# create index
print('Creating index...')
if self.use_ext or use_ext:
tic = time.time()
input_iter = (self.proc_pid_map, ext.cpp_create_index,
(self.annotation_file, self.num_procs, self.num_threads))
mp_pool_wrapper(*input_iter)
print('Done (t={:0.2f}s)'.format(time.time() - tic))
return
anns, cats, imgs = {}, {}, {}
imgToAnns,catToImgs = defaultdict(list),defaultdict(list)
if 'annotations' in self.dataset:
for ann in self.dataset['annotations']:
imgToAnns[ann['image_id']].append(ann)
anns[ann['id']] = ann
if 'images' in self.dataset:
for img in self.dataset['images']:
imgs[img['id']] = img
if 'categories' in self.dataset:
for cat in self.dataset['categories']:
cats[cat['id']] = cat
if 'annotations' in self.dataset and 'categories' in self.dataset:
for ann in self.dataset['annotations']:
catToImgs[ann['category_id']].append(ann['image_id'])
print('index created!')
# create class members
self.anns = anns
self.imgToAnns = imgToAnns
self.catToImgs = catToImgs
self.imgs = imgs
self.cats = cats
def info(self):
"""
Print information about the annotation file.
:return:
"""
for key, value in self.dataset['info'].items():
print('{}: {}'.format(key, value))
def getimgToAnns(self):
return self.imgToAnns;
def getAnnIds(self, imgIds=[], catIds=[], areaRng=[], iscrowd=None):
"""
Get ann ids that satisfy given filter conditions. default skips that filter
:param imgIds (int array) : get anns for given imgs
catIds (int array) : get anns for given cats
areaRng (float array) : get anns for given area range (e.g. [0 inf])
iscrowd (boolean) : get anns for given crowd label (False or True)
:return: ids (int array) : integer array of ann ids
"""
imgIds = imgIds if _isArrayLike(imgIds) else [imgIds]
catIds = catIds if _isArrayLike(catIds) else [catIds]
if len(imgIds) == len(catIds) == len(areaRng) == 0:
anns = self.dataset['annotations']
else:
if not len(imgIds) == 0:
lists = [self.imgToAnns[imgId] for imgId in imgIds if imgId in self.imgToAnns]
anns = list(itertools.chain.from_iterable(lists))
else:
anns = self.dataset['annotations']
anns = anns if len(catIds) == 0 else [ann for ann in anns if ann['category_id'] in catIds]
anns = anns if len(areaRng) == 0 else [ann for ann in anns if ann['area'] > areaRng[0] and ann['area'] < areaRng[1]]
if not iscrowd == None:
ids = [ann['id'] for ann in anns if ann['iscrowd'] == iscrowd]
else:
ids = [ann['id'] for ann in anns]
return ids
def getCatIds(self, catNms=[], supNms=[], catIds=[]):
"""
filtering parameters. default skips that filter.
:param catNms (str array) : get cats for given cat names
:param supNms (str array) : get cats for given supercategory names
:param catIds (int array) : get cats for given cat ids
:return: ids (int array) : integer array of cat ids
"""
catNms = catNms if _isArrayLike(catNms) else [catNms]
supNms = supNms if _isArrayLike(supNms) else [supNms]
catIds = catIds if _isArrayLike(catIds) else [catIds]
if len(catNms) == len(supNms) == len(catIds) == 0:
cats = self.dataset['categories']
else:
cats = self.dataset['categories']
cats = cats if len(catNms) == 0 else [cat for cat in cats if cat['name'] in catNms]
cats = cats if len(supNms) == 0 else [cat for cat in cats if cat['supercategory'] in supNms]
cats = cats if len(catIds) == 0 else [cat for cat in cats if cat['id'] in catIds]
ids = [cat['id'] for cat in cats]
return ids
def getImgIds(self, imgIds=[], catIds=[]):
'''
Get img ids that satisfy given filter conditions.
:param imgIds (int array) : get imgs for given ids
:param catIds (int array) : get imgs with all given cats
:return: ids (int array) : integer array of img ids
'''
imgIds = imgIds if _isArrayLike(imgIds) else [imgIds]
catIds = catIds if _isArrayLike(catIds) else [catIds]
if len(imgIds) == len(catIds) == 0:
ids = self.imgs.keys()
else:
ids = set(imgIds)
for i, catId in enumerate(catIds):
if i == 0 and len(ids) == 0:
ids = set(self.catToImgs[catId])
else:
ids &= set(self.catToImgs[catId])
return list(ids)
def getAnns(self):
return self.anns;
def loadAnns(self, ids=[]):
"""
Load anns with the specified ids.
:param ids (int array) : integer ids specifying anns
:return: anns (object array) : loaded ann objects
"""
if _isArrayLike(ids):
return [self.anns[id] for id in ids]
elif type(ids) == int:
return [self.anns[ids]]
def loadCats(self, ids=[]):
"""
Load cats with the specified ids.
:param ids (int array) : integer ids specifying cats
:return: cats (object array) : loaded cat objects
"""
if _isArrayLike(ids):
return [self.cats[id] for id in ids]
elif type(ids) == int:
return [self.cats[ids]]
def loadImgs(self, ids=[]):
"""
Load anns with the specified ids.
:param ids (int array) : integer ids specifying img
:return: imgs (object array) : loaded img objects
"""
if _isArrayLike(ids):
return [self.imgs[id] for id in ids]
elif type(ids) == int:
return [self.imgs[ids]]
def showAnns(self, anns):
"""
Display the specified annotations.
:param anns (array of object): annotations to display
:return: None
"""
if len(anns) == 0:
return 0
if 'segmentation' in anns[0] or 'keypoints' in anns[0]:
datasetType = 'instances'
elif 'caption' in anns[0]:
datasetType = 'captions'
else:
raise Exception('datasetType not supported')
if datasetType == 'instances':
ax = plt.gca()
ax.set_autoscale_on(False)
polygons = []
color = []
for ann in anns:
c = (np.random.random((1, 3))*0.6+0.4).tolist()[0]
if 'segmentation' in ann:
if type(ann['segmentation']) == list:
# polygon
for seg in ann['segmentation']:
poly = np.array(seg).reshape((int(len(seg)/2), 2))
polygons.append(Polygon(poly))
color.append(c)
else:
# mask
t = self.imgs[ann['image_id']]
if type(ann['segmentation']['counts']) == list:
rle = maskUtils.frPyObjects([ann['segmentation']], t['height'], t['width'])
else:
rle = [ann['segmentation']]
m = maskUtils.decode(rle)
img = np.ones( (m.shape[0], m.shape[1], 3) )
if ann['iscrowd'] == 1:
color_mask = np.array([2.0,166.0,101.0])/255
if ann['iscrowd'] == 0:
color_mask = np.random.random((1, 3)).tolist()[0]
for i in range(3):
img[:,:,i] = color_mask[i]
ax.imshow(np.dstack( (img, m*0.5) ))
if 'keypoints' in ann and type(ann['keypoints']) == list:
# turn skeleton into zero-based index
sks = np.array(self.loadCats(ann['category_id'])[0]['skeleton'])-1
kp = np.array(ann['keypoints'])
x = kp[0::3]
y = kp[1::3]
v = kp[2::3]
for sk in sks:
if np.all(v[sk]>0):
plt.plot(x[sk],y[sk], linewidth=3, color=c)
plt.plot(x[v>0], y[v>0],'o',markersize=8, markerfacecolor=c, markeredgecolor='k',markeredgewidth=2)
plt.plot(x[v>1], y[v>1],'o',markersize=8, markerfacecolor=c, markeredgecolor=c, markeredgewidth=2)
p = PatchCollection(polygons, facecolor=color, linewidths=0, alpha=0.4)
ax.add_collection(p)
p = PatchCollection(polygons, facecolor='none', edgecolors=color, linewidths=2)
ax.add_collection(p)
elif datasetType == 'captions':
for ann in anns:
print(ann['caption'])
def loadRes(self, resFile, use_ext=False):
"""
Load result file and return a result api object.
:param resFile (str) : file name of result file
:return: res (obj) : result api object
"""
print('Loading and preparing results...')
tic = time.time()
res = COCO()
if self.use_ext or use_ext:
if type(resFile) == np.ndarray:
resfilename = ''.join(['/dev/shm/resfile_',str(os.getpid()),'.npy'])
np.save(resfilename, resFile)
input_iter = (self.proc_pid_map, ext.cpp_load_res_numpy,
(resfilename, self.num_threads))
elif type(resFile) == str:
input_iter = (self.proc_pid_map, ext.cpp_load_res_json,
(resFile, self.num_threads))
else:
print("loadRes only supports numpy array or json file name as its input")
mp_pool_wrapper(*input_iter)
print('DONE (t={:0.2f}s)'.format(time.time()- tic))
return res
res.dataset['images'] = [img for img in self.dataset['images']]
if type(resFile) == str: #or type(resFile) == unicode:
anns = json.load(open(resFile))
elif type(resFile) == np.ndarray:
anns = self.loadNumpyAnnotations(resFile)
else:
anns = resFile
assert type(anns) == list, 'results in not an array of objects'
annsImgIds = [ann['image_id'] for ann in anns]
assert set(annsImgIds) == (set(annsImgIds) & set(self.getImgIds())), \
'Results do not correspond to current coco set'
if 'caption' in anns[0]:
imgIds = set([img['id'] for img in res.dataset['images']]) & set([ann['image_id'] for ann in anns])
res.dataset['images'] = [img for img in res.dataset['images'] if img['id'] in imgIds]
for id, ann in enumerate(anns):
ann['id'] = id+1
elif 'bbox' in anns[0] and not anns[0]['bbox'] == []:
res.dataset['categories'] = copy.deepcopy(self.dataset['categories'])
for id, ann in enumerate(anns):
bb = ann['bbox']
x1, x2, y1, y2 = [bb[0], bb[0]+bb[2], bb[1], bb[1]+bb[3]]
if not 'segmentation' in ann:
ann['segmentation'] = [[x1, y1, x1, y2, x2, y2, x2, y1]]
ann['area'] = bb[2]*bb[3]
ann['id'] = id+1
ann['iscrowd'] = 0
elif 'segmentation' in anns[0]:
res.dataset['categories'] = copy.deepcopy(self.dataset['categories'])
for id, ann in enumerate(anns):
# now only support compressed RLE format as segmentation results
ann['area'] = maskUtils.area(ann['segmentation'])
if not 'bbox' in ann:
ann['bbox'] = maskUtils.toBbox(ann['segmentation'])
ann['id'] = id+1
ann['iscrowd'] = 0
elif 'keypoints' in anns[0]:
res.dataset['categories'] = copy.deepcopy(self.dataset['categories'])
for id, ann in enumerate(anns):
s = ann['keypoints']
x = s[0::3]
y = s[1::3]
x0,x1,y0,y1 = np.min(x), np.max(x), np.min(y), np.max(y)
ann['area'] = (x1-x0)*(y1-y0)
ann['id'] = id + 1
ann['bbox'] = [x0,y0,x1-x0,y1-y0]
print('DONE (t={:0.2f}s)'.format(time.time()- tic))
res.dataset['annotations'] = anns
res.createIndex(self.use_ext)
return res
def download(self, tarDir = None, imgIds = [] ):
'''
Download COCO images from mscoco.org server.
:param tarDir (str): COCO results directory name
imgIds (list): images to be downloaded
:return:
'''
if tarDir is None:
print('Please specify target directory')
return -1
if len(imgIds) == 0:
imgs = self.imgs.values()
else:
imgs = self.loadImgs(imgIds)
N = len(imgs)
if not os.path.exists(tarDir):
os.makedirs(tarDir)
for i, img in enumerate(imgs):
tic = time.time()
fname = os.path.join(tarDir, img['file_name'])
if not os.path.exists(fname):
urlretrieve(img['coco_url'], fname)
print('downloaded {}/{} images (t={:0.1f}s)'.format(i, N, time.time()- tic))
def loadNumpyAnnotations(self, data):
"""
Convert result data from a numpy array [Nx7] where each row contains {imageID,x1,y1,w,h,score,class}
:param data (numpy.ndarray)
:return: annotations (python nested list)
"""
print('Converting ndarray to lists...')
assert(type(data) == np.ndarray)
print(data.shape)
assert(data.shape[1] == 7)
N = data.shape[0]
ann = []
for i in range(N):
if i % 1000000 == 0:
print('{}/{}'.format(i,N))
ann += [{
'image_id' : int(data[i, 0]),
'bbox' : [ data[i, 1], data[i, 2], data[i, 3], data[i, 4] ],
'score' : data[i, 5],
'category_id': int(data[i, 6]),
}]
return ann
def getImgs(self):
return self.imgs
def annToRLE(self, ann):
"""
Convert annotation which can be polygons, uncompressed RLE to RLE.
:return: binary mask (numpy 2D array)
"""
t = self.imgs[ann['image_id']]
h, w = t['height'], t['width']
segm = ann['segmentation']
if type(segm) == list:
# polygon -- a single object might consist of multiple parts
# we merge all parts into one mask rle code
rles = maskUtils.frPyObjects(segm, h, w)
rle = maskUtils.merge(rles)
elif type(segm['counts']) == list:
# uncompressed RLE
rle = maskUtils.frPyObjects(segm, h, w)
else:
# rle
rle = ann['segmentation']
return rle
def annToMask(self, ann):
"""
Convert annotation which can be polygons, uncompressed RLE, or RLE to binary mask.
:return: binary mask (numpy 2D array)
"""
rle = self.annToRLE(ann)
m = maskUtils.decode(rle)
return m
__author__ = 'tsungyi'
import numpy as np
import datetime
import time
from collections import defaultdict
from . import mask as maskUtils
import copy
import ext
from .coco import mp_pool_wrapper, mp_pool_task
class COCOeval:
# Interface for evaluating detection on the Microsoft COCO dataset.
#
# The usage for CocoEval is as follows:
# cocoGt=..., cocoDt=... # load dataset and results
# E = CocoEval(cocoGt,cocoDt); # initialize CocoEval object
# E.params.recThrs = ...; # set parameters as desired
# E.evaluate(); # run per image evaluation
# E.accumulate(); # accumulate per image results
# E.summarize(); # display summary metrics of results
# For example usage see evalDemo.m and http://mscoco.org/.
#
# The evaluation parameters are as follows (defaults in brackets):
# imgIds - [all] N img ids to use for evaluation
# catIds - [all] K cat ids to use for evaluation
# iouThrs - [.5:.05:.95] T=10 IoU thresholds for evaluation
# recThrs - [0:.01:1] R=101 recall thresholds for evaluation
# areaRng - [...] A=4 object area ranges for evaluation
# maxDets - [1 10 100] M=3 thresholds on max detections per image
# iouType - ['segm'] set iouType to 'segm', 'bbox' or 'keypoints'
# iouType replaced the now DEPRECATED useSegm parameter.
# useCats - [1] if true use category labels for evaluation
# Note: if useCats=0 category labels are ignored as in proposal scoring.
# Note: multiple areaRngs [Ax2] and maxDets [Mx1] can be specified.
#
# evaluate(): evaluates detections on every image and every category and
# concats the results into the "evalImgs" with fields:
# dtIds - [1xD] id for each of the D detections (dt)
# gtIds - [1xG] id for each of the G ground truths (gt)
# dtMatches - [TxD] matching gt id at each IoU or 0
# gtMatches - [TxG] matching dt id at each IoU or 0
# dtScores - [1xD] confidence of each dt
# gtIgnore - [1xG] ignore flag for each gt
# dtIgnore - [TxD] ignore flag for each dt at each IoU
#
# accumulate(): accumulates the per-image, per-category evaluation
# results in "evalImgs" into the dictionary "eval" with fields:
# params - parameters used for evaluation
# date - date evaluation was performed
# counts - [T,R,K,A,M] parameter dimensions (see above)
# precision - [TxRxKxAxM] precision for every evaluation setting
# recall - [TxKxAxM] max recall for every evaluation setting
# Note: precision and recall==-1 for settings with no gt objects.
#
# See also coco, mask, pycocoDemo, pycocoEvalDemo
#
# Microsoft COCO Toolbox. version 2.0
# Data, paper, and tutorials available at: http://mscoco.org/
# Code written by Piotr Dollar and Tsung-Yi Lin, 2015.
# Licensed under the Simplified BSD License [see coco/license.txt]
def __init__(self, cocoGt=None, cocoDt=None, iouType='segm', use_ext=False, multi_procs=(1, None), num_threads=1):
'''
Initialize CocoEval using coco APIs for gt and dt
:param cocoGt: coco object with ground truth annotations
:param cocoDt: coco object with detection results
:return: None
'''
if not iouType:
print('iouType not specified. use default iouType segm')
self.cocoGt = cocoGt # ground truth COCO API
self.cocoDt = cocoDt # detections COCO API
self.params = {} # evaluation parameters
self.evalImgs = defaultdict(list) # per-image per-category evaluation results [KxAxI] elements
self.eval = {} # accumulated evaluation results
self._gts = defaultdict(list) # gt for evaluation
self._dts = defaultdict(list) # dt for evaluation
self.params = Params(iouType=iouType) # parameters
self._paramsEval = {} # parameters for evaluation
self.stats = [] # result summarization
self.ious = {} # ious between all gts and dts
self.use_ext = use_ext # use c++ extension
self.num_threads = num_threads # number of OpenMP threads
self.num_procs, self.proc_pid_map = multi_procs
if not self.use_ext:
if not cocoGt is None:
self.params.imgIds = sorted(cocoGt.getImgIds())
self.params.catIds = sorted(cocoGt.getCatIds())
def _prepare(self):
'''
Prepare ._gts and ._dts for evaluation based on params
:return: None
'''
def _toMask(anns, coco):
# modify ann['segmentation'] by reference
for ann in anns:
rle = coco.annToRLE(ann)
ann['segmentation'] = rle
p = self.params
if p.useCats:
gts=self.cocoGt.loadAnns(self.cocoGt.getAnnIds(imgIds=p.imgIds, catIds=p.catIds))
dts=self.cocoDt.loadAnns(self.cocoDt.getAnnIds(imgIds=p.imgIds, catIds=p.catIds))
else:
gts=self.cocoGt.loadAnns(self.cocoGt.getAnnIds(imgIds=p.imgIds))
dts=self.cocoDt.loadAnns(self.cocoDt.getAnnIds(imgIds=p.imgIds))
# convert ground truth to mask if iouType == 'segm'
if p.iouType == 'segm':
_toMask(gts, self.cocoGt)
_toMask(dts, self.cocoDt)
# set ignore flag
for gt in gts:
gt['ignore'] = gt['ignore'] if 'ignore' in gt else 0
gt['ignore'] = 'iscrowd' in gt and gt['iscrowd']
if p.iouType == 'keypoints':
gt['ignore'] = (gt['num_keypoints'] == 0) or gt['ignore']
self._gts = defaultdict(list) # gt for evaluation
self._dts = defaultdict(list) # dt for evaluation
for gt in gts:
self._gts[gt['image_id'], gt['category_id']].append(gt)
for dt in dts:
self._dts[dt['image_id'], dt['category_id']].append(dt)
self.evalImgs = defaultdict(list) # per-image per-category evaluation results
self.eval = {} # accumulated evaluation results
def evaluate(self):
'''
Run per image evaluation on given images and store results (a list of dict) in self.evalImgs
:return: None
'''
tic = time.time()
print('Running per image evaluation...')
p = self.params
# add backward compatibility if useSegm is specified in params
if not p.useSegm is None:
p.iouType = 'segm' if p.useSegm == 1 else 'bbox'
print('useSegm (deprecated) is not None. Running {} evaluation'.format(p.iouType))
print('Evaluate annotation type *{}*'.format(p.iouType))
if not self.use_ext:
p.imgIds = list(np.unique(p.imgIds))
if p.useCats:
p.catIds = list(np.unique(p.catIds))
p.maxDets = sorted(p.maxDets)
self.params=p
if self.use_ext:
p.areaRng=np.array(p.areaRng)
p.maxDets=np.array(p.maxDets,dtype=np.int32)
input_iter = (self.proc_pid_map, ext.cpp_evaluate,
(p.useCats,p.areaRng,p.iouThrs,p.maxDets,p.recThrs,p.iouType,self.num_threads))
outputs = mp_pool_wrapper(*input_iter)
# if self.num_procs >1, outputs are only results from one proc;
# please average coco_eval.stats after summarize() call
p.imgIds, p.catIds, self.eval = outputs
print('DONE (t={:0.2f}s).'.format(time.time()-tic))
return
self._prepare()
# loop through images, area range, max detection number
catIds = p.catIds if p.useCats else [-1]
if p.iouType == 'segm' or p.iouType == 'bbox':
computeIoU = self.computeIoU
elif p.iouType == 'keypoints':
computeIoU = self.computeOks
self.ious = {(imgId, catId): computeIoU(imgId, catId) \
for imgId in p.imgIds
for catId in catIds}
evaluateImg = self.evaluateImg
maxDet = p.maxDets[-1]
self.evalImgs = [evaluateImg(imgId, catId, areaRng, maxDet)
for catId in catIds
for areaRng in p.areaRng
for imgId in p.imgIds
]
self._paramsEval = copy.deepcopy(self.params)
toc = time.time()
print('DONE (t={:0.2f}s).'.format(toc-tic))
def computeIoU(self, imgId, catId):
p = self.params
if p.useCats:
gt = self._gts[imgId,catId]
dt = self._dts[imgId,catId]
else:
gt = [_ for cId in p.catIds for _ in self._gts[imgId,cId]]
dt = [_ for cId in p.catIds for _ in self._dts[imgId,cId]]
if len(gt) == 0 and len(dt) ==0:
return []
inds = np.argsort([-d['score'] for d in dt], kind='mergesort')
dt = [dt[i] for i in inds]
if len(dt) > p.maxDets[-1]:
dt=dt[0:p.maxDets[-1]]
if p.iouType == 'segm':
g = [g['segmentation'] for g in gt]
d = [d['segmentation'] for d in dt]
elif p.iouType == 'bbox':
g = [g['bbox'] for g in gt]
d = [d['bbox'] for d in dt]
else:
raise Exception('unknown iouType for iou computation')
# compute iou between each dt and gt region
iscrowd = [int(o['iscrowd']) for o in gt]
ious = maskUtils.iou(d,g,iscrowd)
return ious
def computeOks(self, imgId, catId):
p = self.params
# dimention here should be Nxm
gts = self._gts[imgId, catId]
dts = self._dts[imgId, catId]
inds = np.argsort([-d['score'] for d in dts], kind='mergesort')
dts = [dts[i] for i in inds]
if len(dts) > p.maxDets[-1]:
dts = dts[0:p.maxDets[-1]]
# if len(gts) == 0 and len(dts) == 0:
if len(gts) == 0 or len(dts) == 0:
return []
ious = np.zeros((len(dts), len(gts)))
sigmas = np.array([.26, .25, .25, .35, .35, .79, .79, .72, .72, .62,.62, 1.07, 1.07, .87, .87, .89, .89])/10.0
vars = (sigmas * 2)**2
k = len(sigmas)
# compute oks between each detection and ground truth object
for j, gt in enumerate(gts):
# create bounds for ignore regions(double the gt bbox)
g = np.array(gt['keypoints'])
xg = g[0::3]; yg = g[1::3]; vg = g[2::3]
k1 = np.count_nonzero(vg > 0)
bb = gt['bbox']
x0 = bb[0] - bb[2]; x1 = bb[0] + bb[2] * 2
y0 = bb[1] - bb[3]; y1 = bb[1] + bb[3] * 2
for i, dt in enumerate(dts):
d = np.array(dt['keypoints'])
xd = d[0::3]; yd = d[1::3]
if k1>0:
# measure the per-keypoint distance if keypoints visible
dx = xd - xg
dy = yd - yg
else:
# measure minimum distance to keypoints in (x0,y0) & (x1,y1)
z = np.zeros((k))
dx = np.max((z, x0-xd),axis=0)+np.max((z, xd-x1),axis=0)
dy = np.max((z, y0-yd),axis=0)+np.max((z, yd-y1),axis=0)
e = (dx**2 + dy**2) / vars / (gt['area']+np.spacing(1)) / 2
if k1 > 0:
e=e[vg > 0]
ious[i, j] = np.sum(np.exp(-e)) / e.shape[0]
return ious
def evaluateImg(self, imgId, catId, aRng, maxDet):
'''
perform evaluation for single category and image
:return: dict (single image results)
'''
p = self.params
if p.useCats:
gt = self._gts[imgId,catId]
dt = self._dts[imgId,catId]
else:
gt = [_ for cId in p.catIds for _ in self._gts[imgId,cId]]
dt = [_ for cId in p.catIds for _ in self._dts[imgId,cId]]
if len(gt) == 0 and len(dt) ==0:
return None
for g in gt:
if g['ignore'] or (g['area']<aRng[0] or g['area']>aRng[1]):
g['_ignore'] = 1
else:
g['_ignore'] = 0
# sort dt highest score first, sort gt ignore last
gtind = np.argsort([g['_ignore'] for g in gt], kind='mergesort')
gt = [gt[i] for i in gtind]
dtind = np.argsort([-d['score'] for d in dt], kind='mergesort')
dt = [dt[i] for i in dtind[0:maxDet]]
iscrowd = [int(o['iscrowd']) for o in gt]
# load computed ious
ious = self.ious[imgId, catId][:, gtind] if len(self.ious[imgId, catId]) > 0 else self.ious[imgId, catId]
T = len(p.iouThrs)
G = len(gt)
D = len(dt)
gtm = np.zeros((T,G))
dtm = np.zeros((T,D))
gtIg = np.array([g['_ignore'] for g in gt])
dtIg = np.zeros((T,D))
if not len(ious)==0:
for tind, t in enumerate(p.iouThrs):
for dind, d in enumerate(dt):
# information about best match so far (m=-1 -> unmatched)
iou = min([t,1-1e-10])
m = -1
for gind, g in enumerate(gt):
# if this gt already matched, and not a crowd, continue
if gtm[tind,gind]>0 and not iscrowd[gind]:
continue
# if dt matched to reg gt, and on ignore gt, stop
if m>-1 and gtIg[m]==0 and gtIg[gind]==1:
break
# continue to next gt unless better match made
if ious[dind,gind] < iou:
continue
# if match successful and best so far, store appropriately
iou=ious[dind,gind]
m=gind
# if match made store id of match for both dt and gt
if m ==-1:
continue
dtIg[tind,dind] = gtIg[m]
dtm[tind,dind] = gt[m]['id']
gtm[tind,m] = d['id']
# set unmatched detections outside of area range to ignore
a = np.array([d['area']<aRng[0] or d['area']>aRng[1] for d in dt]).reshape((1, len(dt)))
dtIg = np.logical_or(dtIg, np.logical_and(dtm==0, np.repeat(a,T,0)))
# store results for given image and category
return {
'image_id': imgId,
'category_id': catId,
'aRng': aRng,
'maxDet': maxDet,
'dtIds': [d['id'] for d in dt],
'gtIds': [g['id'] for g in gt],
'dtMatches': dtm,
'gtMatches': gtm,
'dtScores': [d['score'] for d in dt],
'gtIgnore': gtIg,
'dtIgnore': dtIg,
}
def accumulate(self, p = None):
'''
Accumulate per image evaluation results and store the result in self.eval
:param p: input params for evaluation
:return: None
'''
print('Accumulating evaluation results...')
tic = time.time()
#if not self.evalImgs:
# print('Please run evaluate() first')
# allows input customized parameters
if self.use_ext:
toc = time.time()
print('DONE (t={:0.2f}s).'.format( toc-tic))
return
if p is None:
p = self.params
p.catIds = p.catIds if p.useCats == 1 else [-1]
T = len(p.iouThrs)
R = len(p.recThrs)
K = len(p.catIds) if p.useCats else 1
A = len(p.areaRng)
M = len(p.maxDets)
precision = -np.ones((T,R,K,A,M)) # -1 for the precision of absent categories
recall = -np.ones((T,K,A,M))
scores = -np.ones((T,R,K,A,M))
# create dictionary for future indexing
_pe = self._paramsEval
catIds = _pe.catIds if _pe.useCats else [-1]
setK = set(catIds)
setA = set(map(tuple, _pe.areaRng))
setM = set(_pe.maxDets)
setI = set(_pe.imgIds)
# get inds to evaluate
k_list = [n for n, k in enumerate(p.catIds) if k in setK]
m_list = [m for n, m in enumerate(p.maxDets) if m in setM]
a_list = [n for n, a in enumerate(map(lambda x: tuple(x), p.areaRng)) if a in setA]
i_list = [n for n, i in enumerate(p.imgIds) if i in setI]
I0 = len(_pe.imgIds)
A0 = len(_pe.areaRng)
# retrieve E at each category, area range, and max number of detections
for k, k0 in enumerate(k_list):
Nk = k0*A0*I0
for a, a0 in enumerate(a_list):
Na = a0*I0
for m, maxDet in enumerate(m_list):
E = [self.evalImgs[Nk + Na + i] for i in i_list]
E = [e for e in E if not e is None]
if len(E) == 0:
continue
dtScores = np.concatenate([e['dtScores'][0:maxDet] for e in E])
# different sorting method generates slightly different results.
# mergesort is used to be consistent as Matlab implementation.
inds = np.argsort(-dtScores, kind='mergesort')
dtScoresSorted = dtScores[inds]
dtm = np.concatenate([e['dtMatches'][:,0:maxDet] for e in E], axis=1)[:,inds]
dtIg = np.concatenate([e['dtIgnore'][:,0:maxDet] for e in E], axis=1)[:,inds]
gtIg = np.concatenate([e['gtIgnore'] for e in E])
npig = np.count_nonzero(gtIg==0 )
if npig == 0:
continue
tps = np.logical_and( dtm, np.logical_not(dtIg) )
fps = np.logical_and(np.logical_not(dtm), np.logical_not(dtIg) )
tp_sum = np.cumsum(tps, axis=1).astype(dtype=np.float)
fp_sum = np.cumsum(fps, axis=1).astype(dtype=np.float)
for t, (tp, fp) in enumerate(zip(tp_sum, fp_sum)):
tp = np.array(tp)
fp = np.array(fp)
nd = len(tp)
rc = tp / npig
pr = tp / (fp+tp+np.spacing(1))
q = np.zeros((R,))
ss = np.zeros((R,))
if nd:
recall[t,k,a,m] = rc[-1]
else:
recall[t,k,a,m] = 0
# numpy is slow without cython optimization for accessing elements
# use python array gets significant speed improvement
pr = pr.tolist(); q = q.tolist()
for i in range(nd-1, 0, -1):
if pr[i] > pr[i-1]:
pr[i-1] = pr[i]
inds = np.searchsorted(rc, p.recThrs, side='left')
try:
for ri, pi in enumerate(inds):
q[ri] = pr[pi]
ss[ri] = dtScoresSorted[pi]
except:
pass
precision[t,:,k,a,m] = np.array(q)
scores[t,:,k,a,m] = np.array(ss)
self.eval = {
'params': p,
'counts': [T, R, K, A, M],
'date': datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
'precision': precision,
'recall': recall,
'scores': scores,
}
toc = time.time()
print('DONE (t={:0.2f}s).'.format( toc-tic))
def summarize(self):
'''
Compute and display summary metrics for evaluation results.
Note this functin can *only* be applied on the default parameter setting
'''
def _summarize( ap=1, iouThr=None, areaRng='all', maxDets=100 ):
p = self.params
iStr = ' {:<18} {} @[ IoU={:<9} | area={:>6s} | maxDets={:>3d} ] = {:0.5f}'
titleStr = 'Average Precision' if ap == 1 else 'Average Recall'
typeStr = '(AP)' if ap==1 else '(AR)'
iouStr = '{:0.2f}:{:0.2f}'.format(p.iouThrs[0], p.iouThrs[-1]) \
if iouThr is None else '{:0.2f}'.format(iouThr)
aind = [i for i, aRng in enumerate(p.areaRngLbl) if aRng == areaRng]
mind = [i for i, mDet in enumerate(p.maxDets) if mDet == maxDets]
if ap == 1:
# dimension of precision: [TxRxKxAxM]
s = self.eval['precision']
# IoU
if iouThr is not None:
t = np.where(iouThr == p.iouThrs)[0]
s = s[:,:,:,t,:]
s = s[:,aind,mind,:,:]
else:
# dimension of recall: [TxKxAxM]
s = self.eval['recall']
if iouThr is not None:
t = np.where(iouThr == p.iouThrs)[0]
s = s[:,:,:,t]
s = s[:,aind,mind,:]
if len(s[s>-1])==0:
mean_s = -1
else:
mean_s = np.mean(s[s>-1])
print(iStr.format(titleStr, typeStr, iouStr, areaRng, maxDets, mean_s))
return mean_s
def _summarizeDets():
stats = np.zeros((12,))
stats[0] = _summarize(1)
stats[1] = _summarize(1, iouThr=.5, maxDets=self.params.maxDets[2])
stats[2] = _summarize(1, iouThr=.75, maxDets=self.params.maxDets[2])
stats[3] = _summarize(1, areaRng='small', maxDets=self.params.maxDets[2])
stats[4] = _summarize(1, areaRng='medium', maxDets=self.params.maxDets[2])
stats[5] = _summarize(1, areaRng='large', maxDets=self.params.maxDets[2])
stats[6] = _summarize(0, maxDets=self.params.maxDets[0])
stats[7] = _summarize(0, maxDets=self.params.maxDets[1])
stats[8] = _summarize(0, maxDets=self.params.maxDets[2])
stats[9] = _summarize(0, areaRng='small', maxDets=self.params.maxDets[2])
stats[10] = _summarize(0, areaRng='medium', maxDets=self.params.maxDets[2])
stats[11] = _summarize(0, areaRng='large', maxDets=self.params.maxDets[2])
return stats
def _summarizeKps():
stats = np.zeros((10,))
stats[0] = _summarize(1, maxDets=20)
stats[1] = _summarize(1, maxDets=20, iouThr=.5)
stats[2] = _summarize(1, maxDets=20, iouThr=.75)
stats[3] = _summarize(1, maxDets=20, areaRng='medium')
stats[4] = _summarize(1, maxDets=20, areaRng='large')
stats[5] = _summarize(0, maxDets=20)
stats[6] = _summarize(0, maxDets=20, iouThr=.5)
stats[7] = _summarize(0, maxDets=20, iouThr=.75)
stats[8] = _summarize(0, maxDets=20, areaRng='medium')
stats[9] = _summarize(0, maxDets=20, areaRng='large')
return stats
if not self.eval:
raise Exception('Please run accumulate() first')
iouType = self.params.iouType
if iouType == 'segm' or iouType == 'bbox':
summarize = _summarizeDets
elif iouType == 'keypoints':
summarize = _summarizeKps
self.stats = summarize()
def __str__(self):
self.summarize()
class Params:
'''
Params for coco evaluation api
'''
def setDetParams(self):
self.imgIds = []
self.catIds = []
# np.arange causes trouble. the data point on arange is slightly larger than the true value
self.iouThrs = np.linspace(.5, 0.95, int(np.round((0.95 - .5) / .05) + 1), endpoint=True)
self.recThrs = np.linspace(.0, 1.00, int(np.round((1.00 - .0) / .01) + 1), endpoint=True)
self.maxDets = [1, 10, 100]
self.areaRng = [[0 ** 2, 1e5 ** 2], [0 ** 2, 32 ** 2], [32 ** 2, 96 ** 2], [96 ** 2, 1e5 ** 2]]
self.areaRngLbl = ['all', 'small', 'medium', 'large']
self.useCats = 1
def setKpParams(self):
self.imgIds = []
self.catIds = []
# np.arange causes trouble. the data point on arange is slightly larger than the true value
self.iouThrs = np.linspace(.5, 0.95, int(np.round((0.95 - .5) / .05) + 1), endpoint=True)
self.recThrs = np.linspace(.0, 1.00, int(np.round((1.00 - .0) / .01) + 1), endpoint=True)
self.maxDets = [20]
self.areaRng = [[0 ** 2, 1e5 ** 2], [32 ** 2, 96 ** 2], [96 ** 2, 1e5 ** 2]]
self.areaRngLbl = ['all', 'medium', 'large']
self.useCats = 1
def __init__(self, iouType='segm'):
if iouType == 'segm' or iouType == 'bbox':
self.setDetParams()
elif iouType == 'keypoints':
self.setKpParams()
else:
raise Exception('iouType not supported')
self.iouType = iouType
# useSegm is deprecated
self.useSegm = None
__author__ = 'tsungyi'
import pycocotools._mask as _mask
# Interface for manipulating masks stored in RLE format.
#
# RLE is a simple yet efficient format for storing binary masks. RLE
# first divides a vector (or vectorized image) into a series of piecewise
# constant regions and then for each piece simply stores the length of
# that piece. For example, given M=[0 0 1 1 1 0 1] the RLE counts would
# be [2 3 1 1], or for M=[1 1 1 1 1 1 0] the counts would be [0 6 1]
# (note that the odd counts are always the numbers of zeros). Instead of
# storing the counts directly, additional compression is achieved with a
# variable bitrate representation based on a common scheme called LEB128.
#
# Compression is greatest given large piecewise constant regions.
# Specifically, the size of the RLE is proportional to the number of
# *boundaries* in M (or for an image the number of boundaries in the y
# direction). Assuming fairly simple shapes, the RLE representation is
# O(sqrt(n)) where n is number of pixels in the object. Hence space usage
# is substantially lower, especially for large simple objects (large n).
#
# Many common operations on masks can be computed directly using the RLE
# (without need for decoding). This includes computations such as area,
# union, intersection, etc. All of these operations are linear in the
# size of the RLE, in other words they are O(sqrt(n)) where n is the area
# of the object. Computing these operations on the original mask is O(n).
# Thus, using the RLE can result in substantial computational savings.
#
# The following API functions are defined:
# encode - Encode binary masks using RLE.
# decode - Decode binary masks encoded via RLE.
# merge - Compute union or intersection of encoded masks.
# iou - Compute intersection over union between masks.
# area - Compute area of encoded masks.
# toBbox - Get bounding boxes surrounding encoded masks.
# frPyObjects - Convert polygon, bbox, and uncompressed RLE to encoded RLE mask.
#
# Usage:
# Rs = encode( masks )
# masks = decode( Rs )
# R = merge( Rs, intersect=false )
# o = iou( dt, gt, iscrowd )
# a = area( Rs )
# bbs = toBbox( Rs )
# Rs = frPyObjects( [pyObjects], h, w )
#
# In the API the following formats are used:
# Rs - [dict] Run-length encoding of binary masks
# R - dict Run-length encoding of binary mask
# masks - [hxwxn] Binary mask(s) (must have type np.ndarray(dtype=uint8) in column-major order)
# iscrowd - [nx1] list of np.ndarray. 1 indicates corresponding gt image has crowd region to ignore
# bbs - [nx4] Bounding box(es) stored as [x y w h]
# poly - Polygon stored as [[x1 y1 x2 y2...],[x1 y1 ...],...] (2D list)
# dt,gt - May be either bounding boxes or encoded masks
# Both poly and bbs are 0-indexed (bbox=[0 0 1 1] encloses first pixel).
#
# Finally, a note about the intersection over union (iou) computation.
# The standard iou of a ground truth (gt) and detected (dt) object is
# iou(gt,dt) = area(intersect(gt,dt)) / area(union(gt,dt))
# For "crowd" regions, we use a modified criteria. If a gt object is
# marked as "iscrowd", we allow a dt to match any subregion of the gt.
# Choosing gt' in the crowd gt that best matches the dt can be done using
# gt'=intersect(dt,gt). Since by definition union(gt',dt)=dt, computing
# iou(gt,dt,iscrowd) = iou(gt',dt) = area(intersect(gt,dt)) / area(dt)
# For crowd gt regions we use this modified criteria above for the iou.
#
# To compile run "python setup.py build_ext --inplace"
# Please do not contact us for help with compiling.
#
# Microsoft COCO Toolbox. version 2.0
# Data, paper, and tutorials available at: http://mscoco.org/
# Code written by Piotr Dollar and Tsung-Yi Lin, 2015.
# Licensed under the Simplified BSD License [see coco/license.txt]
iou = _mask.iou
merge = _mask.merge
frPyObjects = _mask.frPyObjects
def encode(bimask, paste_args={}):
if len(bimask.shape) == 3:
return _mask.encode(bimask, **paste_args)
elif len(bimask.shape) == 2:
h, w = bimask.shape
return _mask.encode(bimask.reshape((h, w, 1), order='F'), **paste_args)[0]
def decode(rleObjs):
if type(rleObjs) == list:
return _mask.decode(rleObjs)
else:
return _mask.decode([rleObjs])[:,:,0]
def area(rleObjs):
if type(rleObjs) == list:
return _mask.area(rleObjs)
else:
return _mask.area([rleObjs])[0]
def toBbox(rleObjs):
if type(rleObjs) == list:
return _mask.toBbox(rleObjs)
else:
return _mask.toBbox([rleObjs])[0]
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment