Commit f55a786e authored by luopl's avatar luopl
Browse files

Initial commit

parents
Pipeline #1081 canceled with stages
This diff is collapsed.
["accordion", "aeroplane", "airconditioner", "antenna", "artillery", "ashtray", "atrium", "babycarriage", "bag", "ball", "balloon", "bambooweaving", "barrel", "baseballbat", "basket", "basketballbackboard", "bathtub", "bed", "bedclothes", "beer", "bell", "bench", "bicycle", "binoculars", "bird", "birdcage", "birdfeeder", "birdnest", "blackboard", "board", "boat", "bone", "book", "bottle", "bottleopener", "bowl", "box", "bracelet", "brick", "bridge", "broom", "brush", "bucket", "building", "bus", "cabinet", "cabinetdoor", "cage", "cake", "calculator", "calendar", "camel", "camera", "cameralens", "can", "candle", "candleholder", "cap", "car", "card", "cart", "case", "casetterecorder", "cashregister", "cat", "cd", "cdplayer", "ceiling", "cellphone", "cello", "chain", "chair", "chessboard", "chicken", "chopstick", "clip", "clippers", "clock", "closet", "cloth", "clothestree", "coffee", "coffeemachine", "comb", "computer", "concrete", "cone", "container", "controlbooth", "controller", "cooker", "copyingmachine", "coral", "cork", "corkscrew", "counter", "court", "cow", "crabstick", "crane", "crate", "cross", "crutch", "cup", "curtain", "cushion", "cuttingboard", "dais", "disc", "disccase", "dishwasher", "dock", "dog", "dolphin", "door", "drainer", "dray", "drinkdispenser", "drinkingmachine", "drop", "drug", "drum", "drumkit", "duck", "dumbbell", "earphone", "earrings", "egg", "electricfan", "electriciron", "electricpot", "electricsaw", "electronickeyboard", "engine", "envelope", "equipment", "escalator", "exhibitionbooth", "extinguisher", "eyeglass", "fan", "faucet", "faxmachine", "fence", "ferriswheel", "fireextinguisher", "firehydrant", "fireplace", "fish", "fishtank", "fishbowl", "fishingnet", "fishingpole", "flag", "flagstaff", "flame", "flashlight", "floor", "flower", "fly", "foam", "food", "footbridge", "forceps", "fork", "forklift", "fountain", "fox", "frame", "fridge", "frog", "fruit", "funnel", "furnace", "gamecontroller", "gamemachine", "gascylinder", "gashood", "gasstove", "giftbox", "glass", "glassmarble", "globe", "glove", "goal", "grandstand", "grass", "gravestone", "ground", "guardrail", "guitar", "gun", "hammer", "handcart", "handle", "handrail", "hanger", "harddiskdrive", "hat", "hay", "headphone", "heater", "helicopter", "helmet", "holder", "hook", "horse", "horse-drawncarriage", "hot-airballoon", "hydrovalve", "ice", "inflatorpump", "ipod", "iron", "ironingboard", "jar", "kart", "kettle", "key", "keyboard", "kitchenrange", "kite", "knife", "knifeblock", "ladder", "laddertruck", "ladle", "laptop", "leaves", "lid", "lifebuoy", "light", "lightbulb", "lighter", "line", "lion", "lobster", "lock", "machine", "mailbox", "mannequin", "map", "mask", "mat", "matchbook", "mattress", "menu", "metal", "meterbox", "microphone", "microwave", "mirror", "missile", "model", "money", "monkey", "mop", "motorbike", "mountain", "mouse", "mousepad", "musicalinstrument", "napkin", "net", "newspaper", "oar", "ornament", "outlet", "oven", "oxygenbottle", "pack", "pan", "paper", "paperbox", "papercutter", "parachute", "parasol", "parterre", "patio", "pelage", "pen", "pencontainer", "pencil", "person", "photo", "piano", "picture", "pig", "pillar", "pillow", "pipe", "pitcher", "plant", "plastic", "plate", "platform", "player", "playground", "pliers", "plume", "poker", "pokerchip", "pole", "pooltable", "postcard", "poster", "pot", "pottedplant", "printer", "projector", "pumpkin", "rabbit", "racket", "radiator", "radio", "rail", "rake", "ramp", "rangehood", "receiver", "recorder", "recreationalmachines", "remotecontrol", "road", "robot", "rock", "rocket", "rockinghorse", "rope", "rug", "ruler", "runway", "saddle", "sand", "saw", "scale", "scanner", "scissors", "scoop", "screen", "screwdriver", "sculpture", "scythe", "sewer", "sewingmachine", "shed", "sheep", "shell", "shelves", "shoe", "shoppingcart", "shovel", "sidecar", "sidewalk", "sign", "signallight", "sink", "skateboard", "ski", "sky", "sled", "slippers", "smoke", "snail", "snake", "snow", "snowmobiles", "sofa", "spanner", "spatula", "speaker", "speedbump", "spicecontainer", "spoon", "sprayer", "squirrel", "stage", "stair", "stapler", "stick", "stickynote", "stone", "stool", "stove", "straw", "stretcher", "sun", "sunglass", "sunshade", "surveillancecamera", "swan", "sweeper", "swimring", "swimmingpool", "swing", "switch", "table", "tableware", "tank", "tap", "tape", "tarp", "telephone", "telephonebooth", "tent", "tire", "toaster", "toilet", "tong", "tool", "toothbrush", "towel", "toy", "toycar", "track", "train", "trampoline", "trashbin", "tray", "tree", "tricycle", "tripod", "trophy", "truck", "tube", "turtle", "tvmonitor", "tweezers", "typewriter", "umbrella", "unknown", "vacuumcleaner", "vendingmachine", "videocamera", "videogameconsole", "videoplayer", "videotape", "violin", "wakeboard", "wall", "wallet", "wardrobe", "washingmachine", "watch", "water", "waterdispenser", "waterpipe", "waterskateboard", "watermelon", "whale", "wharf", "wheel", "wheelchair", "window", "windowblinds", "wineglass", "wire", "wood", "wool"]
["aeroplane", "bag", "bed", "bedclothes", "bench", "bicycle", "bird", "boat", "book", "bottle", "building", "bus", "cabinet", "car", "cat", "ceiling", "chair", "cloth", "computer", "cow", "cup", "curtain", "dog", "door", "fence", "floor", "flower", "food", "grass", "ground", "horse", "keyboard", "light", "motorbike", "mountain", "mouse", "person", "plate", "platform", "pottedplant", "road", "rock", "sheep", "shelves", "sidewalk", "sign", "sky", "snow", "sofa", "diningtable", "track", "train", "tree", "truck", "tvmonitor", "wall", "water", "window", "wood"]
\ No newline at end of file
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# Copyright (c) Facebook, Inc. and its affiliates.
import os
from pathlib import Path
import numpy as np
import tqdm
from PIL import Image
def convert(input, output):
img = np.asarray(Image.open(input))
assert img.dtype == np.uint8
img = img - 1 # 0 (ignore) becomes 255. others are shifted by 1
Image.fromarray(img).save(output)
if __name__ == "__main__":
dataset_dir = Path(os.getenv("DETECTRON2_DATASETS", "datasets")) / "ADEChallengeData2016"
for name in ["validation"]:
annotation_dir = dataset_dir / "annotations" / name
output_dir = dataset_dir / "annotations_detectron2" / name
output_dir.mkdir(parents=True, exist_ok=True)
for file in tqdm.tqdm(list(annotation_dir.iterdir())):
output_file = output_dir / file.name
convert(file, output_file)
\ No newline at end of file
This diff is collapsed.
import os
import os.path as osp
from pathlib import Path
import tqdm
from glob import glob
import numpy as np
from PIL import Image
COCO_CATEGORIES = [{'color': [220, 20, 60], 'isthing': 1, 'id': 0, 'name': 'person', 'trainId': 0},
{'color': [119, 11, 32], 'isthing': 1, 'id': 1, 'name': 'bicycle', 'trainId': 1},
{'color': [0, 0, 142], 'isthing': 1, 'id': 2, 'name': 'car', 'trainId': 2},
{'color': [0, 0, 230], 'isthing': 1, 'id': 3, 'name': 'motorcycle', 'trainId': 3},
{'color': [106, 0, 228], 'isthing': 1, 'id': 4, 'name': 'airplane', 'trainId': 4},
{'color': [0, 60, 100], 'isthing': 1, 'id': 5, 'name': 'bus', 'trainId': 5},
{'color': [0, 80, 100], 'isthing': 1, 'id': 6, 'name': 'train', 'trainId': 6},
{'color': [0, 0, 70], 'isthing': 1, 'id': 7, 'name': 'truck', 'trainId': 7},
{'color': [0, 0, 192], 'isthing': 1, 'id': 8, 'name': 'boat', 'trainId': 8},
{'color': [250, 170, 30], 'isthing': 1, 'id': 9, 'name': 'traffic light', 'trainId': 9},
{'color': [100, 170, 30], 'isthing': 1, 'id': 10, 'name': 'fire hydrant', 'trainId': 10},
{'color': [220, 220, 0], 'isthing': 1, 'id': 12, 'name': 'stop sign', 'trainId': 11},
{'color': [175, 116, 175], 'isthing': 1, 'id': 13, 'name': 'parking meter', 'trainId': 12},
{'color': [250, 0, 30], 'isthing': 1, 'id': 14, 'name': 'bench', 'trainId': 13},
{'color': [165, 42, 42], 'isthing': 1, 'id': 15, 'name': 'bird', 'trainId': 14},
{'color': [255, 77, 255], 'isthing': 1, 'id': 16, 'name': 'cat', 'trainId': 15},
{'color': [0, 226, 252], 'isthing': 1, 'id': 17, 'name': 'dog', 'trainId': 16},
{'color': [182, 182, 255], 'isthing': 1, 'id': 18, 'name': 'horse', 'trainId': 17},
{'color': [0, 82, 0], 'isthing': 1, 'id': 19, 'name': 'sheep', 'trainId': 18},
{'color': [120, 166, 157], 'isthing': 1, 'id': 20, 'name': 'cow', 'trainId': 19},
{'color': [110, 76, 0], 'isthing': 1, 'id': 21, 'name': 'elephant', 'trainId': 20},
{'color': [174, 57, 255], 'isthing': 1, 'id': 22, 'name': 'bear', 'trainId': 21},
{'color': [199, 100, 0], 'isthing': 1, 'id': 23, 'name': 'zebra', 'trainId': 22},
{'color': [72, 0, 118], 'isthing': 1, 'id': 24, 'name': 'giraffe', 'trainId': 23},
{'color': [255, 179, 240], 'isthing': 1, 'id': 26, 'name': 'backpack', 'trainId': 24},
{'color': [0, 125, 92], 'isthing': 1, 'id': 27, 'name': 'umbrella', 'trainId': 25},
{'color': [209, 0, 151], 'isthing': 1, 'id': 30, 'name': 'handbag', 'trainId': 26},
{'color': [188, 208, 182], 'isthing': 1, 'id': 31, 'name': 'tie', 'trainId': 27},
{'color': [0, 220, 176], 'isthing': 1, 'id': 32, 'name': 'suitcase', 'trainId': 28},
{'color': [255, 99, 164], 'isthing': 1, 'id': 33, 'name': 'frisbee', 'trainId': 29},
{'color': [92, 0, 73], 'isthing': 1, 'id': 34, 'name': 'skis', 'trainId': 30},
{'color': [133, 129, 255], 'isthing': 1, 'id': 35, 'name': 'snowboard', 'trainId': 31},
{'color': [78, 180, 255], 'isthing': 1, 'id': 36, 'name': 'sports ball', 'trainId': 32},
{'color': [0, 228, 0], 'isthing': 1, 'id': 37, 'name': 'kite', 'trainId': 33},
{'color': [174, 255, 243], 'isthing': 1, 'id': 38, 'name': 'baseball bat', 'trainId': 34},
{'color': [45, 89, 255], 'isthing': 1, 'id': 39, 'name': 'baseball glove', 'trainId': 35},
{'color': [134, 134, 103], 'isthing': 1, 'id': 40, 'name': 'skateboard', 'trainId': 36},
{'color': [145, 148, 174], 'isthing': 1, 'id': 41, 'name': 'surfboard', 'trainId': 37},
{'color': [255, 208, 186], 'isthing': 1, 'id': 42, 'name': 'tennis racket', 'trainId': 38},
{'color': [197, 226, 255], 'isthing': 1, 'id': 43, 'name': 'bottle', 'trainId': 39},
{'color': [171, 134, 1], 'isthing': 1, 'id': 45, 'name': 'wine glass', 'trainId': 40},
{'color': [109, 63, 54], 'isthing': 1, 'id': 46, 'name': 'cup', 'trainId': 41},
{'color': [207, 138, 255], 'isthing': 1, 'id': 47, 'name': 'fork', 'trainId': 42},
{'color': [151, 0, 95], 'isthing': 1, 'id': 48, 'name': 'knife', 'trainId': 43},
{'color': [9, 80, 61], 'isthing': 1, 'id': 49, 'name': 'spoon', 'trainId': 44},
{'color': [84, 105, 51], 'isthing': 1, 'id': 50, 'name': 'bowl', 'trainId': 45},
{'color': [74, 65, 105], 'isthing': 1, 'id': 51, 'name': 'banana', 'trainId': 46},
{'color': [166, 196, 102], 'isthing': 1, 'id': 52, 'name': 'apple', 'trainId': 47},
{'color': [208, 195, 210], 'isthing': 1, 'id': 53, 'name': 'sandwich', 'trainId': 48},
{'color': [255, 109, 65], 'isthing': 1, 'id': 54, 'name': 'orange', 'trainId': 49},
{'color': [0, 143, 149], 'isthing': 1, 'id': 55, 'name': 'broccoli', 'trainId': 50},
{'color': [179, 0, 194], 'isthing': 1, 'id': 56, 'name': 'carrot', 'trainId': 51},
{'color': [209, 99, 106], 'isthing': 1, 'id': 57, 'name': 'hot dog', 'trainId': 52},
{'color': [5, 121, 0], 'isthing': 1, 'id': 58, 'name': 'pizza', 'trainId': 53},
{'color': [227, 255, 205], 'isthing': 1, 'id': 59, 'name': 'donut', 'trainId': 54},
{'color': [147, 186, 208], 'isthing': 1, 'id': 60, 'name': 'cake', 'trainId': 55},
{'color': [153, 69, 1], 'isthing': 1, 'id': 61, 'name': 'chair', 'trainId': 56},
{'color': [3, 95, 161], 'isthing': 1, 'id': 62, 'name': 'couch', 'trainId': 57},
{'color': [163, 255, 0], 'isthing': 1, 'id': 63, 'name': 'potted plant', 'trainId': 58},
{'color': [119, 0, 170], 'isthing': 1, 'id': 64, 'name': 'bed', 'trainId': 59},
{'color': [0, 182, 199], 'isthing': 1, 'id': 66, 'name': 'dining table', 'trainId': 60},
{'color': [0, 165, 120], 'isthing': 1, 'id': 69, 'name': 'toilet', 'trainId': 61},
{'color': [183, 130, 88], 'isthing': 1, 'id': 71, 'name': 'tv', 'trainId': 62},
{'color': [95, 32, 0], 'isthing': 1, 'id': 72, 'name': 'laptop', 'trainId': 63},
{'color': [130, 114, 135], 'isthing': 1, 'id': 73, 'name': 'mouse', 'trainId': 64},
{'color': [110, 129, 133], 'isthing': 1, 'id': 74, 'name': 'remote', 'trainId': 65},
{'color': [166, 74, 118], 'isthing': 1, 'id': 75, 'name': 'keyboard', 'trainId': 66},
{'color': [219, 142, 185], 'isthing': 1, 'id': 76, 'name': 'cell phone', 'trainId': 67},
{'color': [79, 210, 114], 'isthing': 1, 'id': 77, 'name': 'microwave', 'trainId': 68},
{'color': [178, 90, 62], 'isthing': 1, 'id': 78, 'name': 'oven', 'trainId': 69},
{'color': [65, 70, 15], 'isthing': 1, 'id': 79, 'name': 'toaster', 'trainId': 70},
{'color': [127, 167, 115], 'isthing': 1, 'id': 80, 'name': 'sink', 'trainId': 71},
{'color': [59, 105, 106], 'isthing': 1, 'id': 81, 'name': 'refrigerator', 'trainId': 72},
{'color': [142, 108, 45], 'isthing': 1, 'id': 83, 'name': 'book', 'trainId': 73},
{'color': [196, 172, 0], 'isthing': 1, 'id': 84, 'name': 'clock', 'trainId': 74},
{'color': [95, 54, 80], 'isthing': 1, 'id': 85, 'name': 'vase', 'trainId': 75},
{'color': [128, 76, 255], 'isthing': 1, 'id': 86, 'name': 'scissors', 'trainId': 76},
{'color': [201, 57, 1], 'isthing': 1, 'id': 87, 'name': 'teddy bear', 'trainId': 77},
{'color': [246, 0, 122], 'isthing': 1, 'id': 88, 'name': 'hair drier', 'trainId': 78},
{'color': [191, 162, 208], 'isthing': 1, 'id': 89, 'name': 'toothbrush', 'trainId': 79},
{'id': 91, 'name': 'banner', 'supercategory': 'textile', 'trainId': 80},
{'id': 92, 'name': 'blanket', 'supercategory': 'textile', 'trainId': 81},
{'id': 93, 'name': 'branch', 'supercategory': 'plant', 'trainId': 82},
{'id': 94, 'name': 'bridge', 'supercategory': 'building', 'trainId': 83},
{'id': 95, 'name': 'building-other', 'supercategory': 'building', 'trainId': 84},
{'id': 96, 'name': 'bush', 'supercategory': 'plant', 'trainId': 85},
{'id': 97, 'name': 'cabinet', 'supercategory': 'furniture-stuff', 'trainId': 86},
{'id': 98, 'name': 'cage', 'supercategory': 'structural', 'trainId': 87},
{'id': 99, 'name': 'cardboard', 'supercategory': 'raw-material', 'trainId': 88},
{'id': 100, 'name': 'carpet', 'supercategory': 'floor', 'trainId': 89},
{'id': 101, 'name': 'ceiling-other', 'supercategory': 'ceiling', 'trainId': 90},
{'id': 102, 'name': 'ceiling-tile', 'supercategory': 'ceiling', 'trainId': 91},
{'id': 103, 'name': 'cloth', 'supercategory': 'textile', 'trainId': 92},
{'id': 104, 'name': 'clothes', 'supercategory': 'textile', 'trainId': 93},
{'id': 105, 'name': 'clouds', 'supercategory': 'sky', 'trainId': 94},
{'id': 106, 'name': 'counter', 'supercategory': 'furniture-stuff', 'trainId': 95},
{'id': 107, 'name': 'cupboard', 'supercategory': 'furniture-stuff', 'trainId': 96},
{'id': 108, 'name': 'curtain', 'supercategory': 'textile', 'trainId': 97},
{'id': 109, 'name': 'desk-stuff', 'supercategory': 'furniture-stuff', 'trainId': 98},
{'id': 110, 'name': 'dirt', 'supercategory': 'ground', 'trainId': 99},
{'id': 111, 'name': 'door-stuff', 'supercategory': 'furniture-stuff', 'trainId': 100},
{'id': 112, 'name': 'fence', 'supercategory': 'structural', 'trainId': 101},
{'id': 113, 'name': 'floor-marble', 'supercategory': 'floor', 'trainId': 102},
{'id': 114, 'name': 'floor-other', 'supercategory': 'floor', 'trainId': 103},
{'id': 115, 'name': 'floor-stone', 'supercategory': 'floor', 'trainId': 104},
{'id': 116, 'name': 'floor-tile', 'supercategory': 'floor', 'trainId': 105},
{'id': 117, 'name': 'floor-wood', 'supercategory': 'floor', 'trainId': 106},
{'id': 118, 'name': 'flower', 'supercategory': 'plant', 'trainId': 107},
{'id': 119, 'name': 'fog', 'supercategory': 'water', 'trainId': 108},
{'id': 120, 'name': 'food-other', 'supercategory': 'food-stuff', 'trainId': 109},
{'id': 121, 'name': 'fruit', 'supercategory': 'food-stuff', 'trainId': 110},
{'id': 122, 'name': 'furniture-other', 'supercategory': 'furniture-stuff', 'trainId': 111},
{'id': 123, 'name': 'grass', 'supercategory': 'plant', 'trainId': 112},
{'id': 124, 'name': 'gravel', 'supercategory': 'ground', 'trainId': 113},
{'id': 125, 'name': 'ground-other', 'supercategory': 'ground', 'trainId': 114},
{'id': 126, 'name': 'hill', 'supercategory': 'solid', 'trainId': 115},
{'id': 127, 'name': 'house', 'supercategory': 'building', 'trainId': 116},
{'id': 128, 'name': 'leaves', 'supercategory': 'plant', 'trainId': 117},
{'id': 129, 'name': 'light', 'supercategory': 'furniture-stuff', 'trainId': 118},
{'id': 130, 'name': 'mat', 'supercategory': 'textile', 'trainId': 119},
{'id': 131, 'name': 'metal', 'supercategory': 'raw-material', 'trainId': 120},
{'id': 132, 'name': 'mirror-stuff', 'supercategory': 'furniture-stuff', 'trainId': 121},
{'id': 133, 'name': 'moss', 'supercategory': 'plant', 'trainId': 122},
{'id': 134, 'name': 'mountain', 'supercategory': 'solid', 'trainId': 123},
{'id': 135, 'name': 'mud', 'supercategory': 'ground', 'trainId': 124},
{'id': 136, 'name': 'napkin', 'supercategory': 'textile', 'trainId': 125},
{'id': 137, 'name': 'net', 'supercategory': 'structural', 'trainId': 126},
{'id': 138, 'name': 'paper', 'supercategory': 'raw-material', 'trainId': 127},
{'id': 139, 'name': 'pavement', 'supercategory': 'ground', 'trainId': 128},
{'id': 140, 'name': 'pillow', 'supercategory': 'textile', 'trainId': 129},
{'id': 141, 'name': 'plant-other', 'supercategory': 'plant', 'trainId': 130},
{'id': 142, 'name': 'plastic', 'supercategory': 'raw-material', 'trainId': 131},
{'id': 143, 'name': 'platform', 'supercategory': 'ground', 'trainId': 132},
{'id': 144, 'name': 'playingfield', 'supercategory': 'ground', 'trainId': 133},
{'id': 145, 'name': 'railing', 'supercategory': 'structural', 'trainId': 134},
{'id': 146, 'name': 'railroad', 'supercategory': 'ground', 'trainId': 135},
{'id': 147, 'name': 'river', 'supercategory': 'water', 'trainId': 136},
{'id': 148, 'name': 'road', 'supercategory': 'ground', 'trainId': 137},
{'id': 149, 'name': 'rock', 'supercategory': 'solid', 'trainId': 138},
{'id': 150, 'name': 'roof', 'supercategory': 'building', 'trainId': 139},
{'id': 151, 'name': 'rug', 'supercategory': 'textile', 'trainId': 140},
{'id': 152, 'name': 'salad', 'supercategory': 'food-stuff', 'trainId': 141},
{'id': 153, 'name': 'sand', 'supercategory': 'ground', 'trainId': 142},
{'id': 154, 'name': 'sea', 'supercategory': 'water', 'trainId': 143},
{'id': 155, 'name': 'shelf', 'supercategory': 'furniture-stuff', 'trainId': 144},
{'id': 156, 'name': 'sky-other', 'supercategory': 'sky', 'trainId': 145},
{'id': 157, 'name': 'skyscraper', 'supercategory': 'building', 'trainId': 146},
{'id': 158, 'name': 'snow', 'supercategory': 'ground', 'trainId': 147},
{'id': 159, 'name': 'solid-other', 'supercategory': 'solid', 'trainId': 148},
{'id': 160, 'name': 'stairs', 'supercategory': 'furniture-stuff', 'trainId': 149},
{'id': 161, 'name': 'stone', 'supercategory': 'solid', 'trainId': 150},
{'id': 162, 'name': 'straw', 'supercategory': 'plant', 'trainId': 151},
{'id': 163, 'name': 'structural-other', 'supercategory': 'structural', 'trainId': 152},
{'id': 164, 'name': 'table', 'supercategory': 'furniture-stuff', 'trainId': 153},
{'id': 165, 'name': 'tent', 'supercategory': 'building', 'trainId': 154},
{'id': 166, 'name': 'textile-other', 'supercategory': 'textile', 'trainId': 155},
{'id': 167, 'name': 'towel', 'supercategory': 'textile', 'trainId': 156},
{'id': 168, 'name': 'tree', 'supercategory': 'plant', 'trainId': 157},
{'id': 169, 'name': 'vegetable', 'supercategory': 'food-stuff', 'trainId': 158},
{'id': 170, 'name': 'wall-brick', 'supercategory': 'wall', 'trainId': 159},
{'id': 171, 'name': 'wall-concrete', 'supercategory': 'wall', 'trainId': 160},
{'id': 172, 'name': 'wall-other', 'supercategory': 'wall', 'trainId': 161},
{'id': 173, 'name': 'wall-panel', 'supercategory': 'wall', 'trainId': 162},
{'id': 174, 'name': 'wall-stone', 'supercategory': 'wall', 'trainId': 163},
{'id': 175, 'name': 'wall-tile', 'supercategory': 'wall', 'trainId': 164},
{'id': 176, 'name': 'wall-wood', 'supercategory': 'wall', 'trainId': 165},
{'id': 177, 'name': 'water-other', 'supercategory': 'water', 'trainId': 166},
{'id': 178, 'name': 'waterdrops', 'supercategory': 'water', 'trainId': 167},
{'id': 179, 'name': 'window-blind', 'supercategory': 'window', 'trainId': 168},
{'id': 180, 'name': 'window-other', 'supercategory': 'window', 'trainId': 169},
{'id': 181, 'name': 'wood', 'supercategory': 'solid', 'trainId': 170}]
if __name__ == "__main__":
dataset_dir = Path(os.getenv("DETECTRON2_DATASETS", "datasets")) / "coco-stuff"
id_map = {}
for cat in COCO_CATEGORIES:
id_map[cat["id"]] = cat["trainId"]
for name in ["train2017", "val2017"]:
annotation_dir = dataset_dir / "annotations" / name
output_dir = dataset_dir / "annotations_detectron2" / name
output_dir.mkdir(parents=True, exist_ok=True)
for file in tqdm.tqdm(list(annotation_dir.iterdir())):
output_file = output_dir / file.name
lab = np.asarray(Image.open(file))
assert lab.dtype == np.uint8
output = np.zeros_like(lab, dtype=np.uint8) + 255
for obj_id in np.unique(lab):
if obj_id in id_map:
output[lab == obj_id] = id_map[obj_id]
Image.fromarray(output).save(output_file)
\ No newline at end of file
# Copyright (c) Facebook, Inc. and its affiliates.
# Copyright (c) Meta Platforms, Inc. All Rights Reserved
import tqdm
import os
import os.path as osp
from pathlib import Path
import numpy as np
from PIL import Image
import scipy.io
def convert_pc459(mask_path, new_mask_path):
mat = scipy.io.loadmat(mask_path)
mask = mat['LabelMap']
mask = mask - 1
min_value = np.amin(mask)
assert min_value >= 0, print(min_value)
Image.fromarray(mask).save(new_mask_path, "TIFF")
if __name__ == "__main__":
dataset_dir = Path(os.getenv("DETECTRON2_DATASETS", "datasets"))
print('Caution: we only generate the validation set!')
pc_path = dataset_dir / "VOCdevkit/VOC2010"
val_list = open(pc_path / "pascalcontext_val.txt", "r")
pc459_labels = open(pc_path / "labels.txt", "r")
pc459_dict = {}
for line in pc459_labels.readlines():
if ':' in line:
idx, name = line.split(':')
idx = int(idx.strip())
name = name.strip()
pc459_dict[name] = idx
pc459_dir = pc_path / "annotations_detectron2" / "pc459_val"
pc459_dir.mkdir(parents=True, exist_ok=True)
for line in tqdm.tqdm(val_list.readlines()):
fileid = line.strip()
ori_mask = f'{pc_path}/trainval/{fileid}.mat'
pc459_dst = f'{pc459_dir}/{fileid}.tif'
if osp.exists(ori_mask):
convert_pc459(ori_mask, pc459_dst)
\ No newline at end of file
import os
import tqdm
import json
import numpy as np
from pathlib import Path
from PIL import Image
from pycocotools import mask as m
_mapping = np.sort(
np.array([
0, 2, 259, 260, 415, 324, 9, 258, 144, 18, 19, 22, 23, 397, 25, 284,
158, 159, 416, 33, 162, 420, 454, 295, 296, 427, 44, 45, 46, 308, 59,
440, 445, 31, 232, 65, 354, 424, 68, 326, 72, 458, 34, 207, 80, 355,
85, 347, 220, 349, 360, 98, 187, 104, 105, 366, 189, 368, 113, 115
]))
_key = np.array(range(len(_mapping))).astype('uint8')
_key = _key - 1
_map = {}
for (k, v) in zip(_mapping, _key):
_map[k] = v
def generate_labels(img_id, anno, out_dir):
def _class_to_index(mask, _map):
out = np.ones_like(mask, dtype=np.uint8) * 255
for k, v in _map.items():
out[mask == k] = v
return out
img_id['image_id']
mask = Image.fromarray(
_class_to_index(anno, _map))
#_class_to_index(detail.getMask(img_id), _map))
filename = img_id['file_name']
mask.save(os.path.join(out_dir, filename.replace('jpg', 'png')))
return os.path.splitext(os.path.basename(filename))[0]
if __name__ == '__main__':
dataset_dir = Path(os.getenv("DETECTRON2_DATASETS", "datasets"))
voc_path = dataset_dir / "VOCdevkit" / "VOC2010"
out_dir = voc_path / "annotations_detectron2" / "pc59_val"
json_path = voc_path / "trainval_merged.json"
os.makedirs(out_dir, exist_ok=True)
img_dir = out_dir / "JPEGImages"
print("loading annotations...")
data = json.load(open(json_path, 'r'))
val_images = {d['image_id'] : d for d in data['images'] if d['phase'] == "val"}
annos = {}
print("building annotations...")
for ann in data['annos_segmentation']:
key = ann['image_id']
if key in val_images.keys():
if key in annos.keys():
annos[key].append(ann)
else:
annos[key] = [ann]
for k, v in annos.items():
mask = np.zeros((val_images[k]['height'], val_images[k]['width']))
for c in v:
x = m.decode(c['segmentation'])
mask[np.nonzero(x)] = c['category_id']
annos[k] = mask
print("converting annotations...")
for id, dat in tqdm.tqdm(val_images.items()):
generate_labels(dat, annos[id],out_dir=out_dir)
print("done")
\ No newline at end of file
# Copyright (c) Facebook, Inc. and its affiliates.
# Copyright (c) Meta Platforms, Inc. All Rights Reserved
# Modified by Feng Liang from https://github.com/MendelXu/zsseg.baseline/blob/master/datasets/prepare_voc_sem_seg.py
# Modified by Heeseong Shin from https://github.com/facebookresearch/ov-seg/blob/main/datasets/prepare_voc_sem_seg.py
import os
import os.path as osp
from pathlib import Path
import tqdm
import numpy as np
from PIL import Image
clsID_to_trID = {
0: 255,
1: 0,
2: 1,
3: 2,
4: 3,
5: 4,
6: 5,
7: 6,
8: 7,
9: 8,
10: 9,
11: 10,
12: 11,
13: 12,
14: 13,
15: 14,
16: 15,
17: 16,
18: 17,
19: 18,
20: 19,
255: 255,
}
clsID_to_trID_bg = clsID_to_trID.copy()
clsID_to_trID_bg[0] = 20
def convert_to_trainID(
maskpath, out_mask_dir, is_train, clsID_to_trID=clsID_to_trID, suffix=""
):
mask = np.array(Image.open(maskpath))
mask_copy = np.ones_like(mask, dtype=np.uint8) * 255
for clsID, trID in clsID_to_trID.items():
mask_copy[mask == clsID] = trID
seg_filename = (
osp.join(out_mask_dir, "train" + suffix, osp.basename(maskpath))
if is_train
else osp.join(out_mask_dir, "val" + suffix, osp.basename(maskpath))
)
if len(np.unique(mask_copy)) == 1 and np.unique(mask_copy)[0] == 255:
return
Image.fromarray(mask_copy).save(seg_filename, "PNG")
if __name__ == "__main__":
dataset_dir = Path(os.getenv("DETECTRON2_DATASETS", "datasets"))
print('Caution: we only generate the validation set!')
voc_path = dataset_dir / "VOCdevkit" / "VOC2012"
out_mask_dir = voc_path / "annotations_detectron2"
out_mask_dir_bg = voc_path / "annotations_detectron2_bg"
#out_image_dir = voc_path / "images_detectron2"
for name in ["val"]:
os.makedirs((out_mask_dir / name), exist_ok=True)
os.makedirs((out_mask_dir_bg / name), exist_ok=True)
#os.makedirs((out_image_dir / name), exist_ok=True)
val_list = [
osp.join(voc_path, "SegmentationClassAug", f + ".png")
for f in np.loadtxt(osp.join(voc_path, "ImageSets/Segmentation/val.txt"), dtype=np.str).tolist()
]
for file in tqdm.tqdm(val_list):
convert_to_trainID(file, out_mask_dir, is_train=False)
convert_to_trainID(file, out_mask_dir_bg, is_train=False, clsID_to_trID=clsID_to_trID_bg)
\ No newline at end of file
["aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"]
["aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor", "bag", "bed", "bench", "book", "building", "cabinet", "ceiling", "cloth", "computer", "cup", "door", "fence", "floor", "flower", "food", "grass", "ground", "keyboard", "light", "mountain", "mouse", "curtain", "platform", "sign", "plate", "road", "rock", "shelves", "sidewalk", "sky", "snow", "bedclothes", "track", "tree", "truck", "wall", "water", "window", "wood"]
# Copyright (c) Facebook, Inc. and its affiliates.
# Modified by Bowen Cheng from: https://github.com/facebookresearch/detectron2/blob/master/demo/demo.py
import argparse
import glob
import multiprocessing as mp
import os
# fmt: off
import sys
sys.path.insert(1, os.path.join(sys.path[0], '..'))
# fmt: on
import tempfile
import time
import warnings
import cv2
import numpy as np
import tqdm
from detectron2.config import get_cfg
from detectron2.data.detection_utils import read_image
from detectron2.projects.deeplab import add_deeplab_config
from detectron2.utils.logger import setup_logger
from sed import add_sed_config
# from predictor import VisualizationDemo
from visualizer import VisualizationGt
from PIL import Image
# constants
WINDOW_NAME = "MaskFormer demo"
def setup_cfg(args):
# load config from file and command-line arguments
cfg = get_cfg()
add_deeplab_config(cfg)
add_sed_config(cfg)
cfg.merge_from_file(args.config_file)
cfg.merge_from_list(args.opts)
cfg.freeze()
return cfg
def get_parser():
parser = argparse.ArgumentParser(description="Detectron2 demo for builtin configs")
parser.add_argument(
"--config-file",
default="configs/ade20k-150/maskformer_R50_bs16_160k.yaml",
metavar="FILE",
help="path to config file",
)
parser.add_argument("--webcam", action="store_true", help="Take inputs from webcam.")
parser.add_argument("--video-input", help="Path to video file.")
parser.add_argument(
"--input",
nargs="+",
help="A list of space separated input images; "
"or a single glob pattern such as 'directory/*.jpg'",
)
# parser.add_argument(
# "--gt",
# nargs="+",
# help="A list of space seperated ground truth images;"
# "or a single glob pattern such as 'directory/*.png'"
# )
parser.add_argument(
"--gt",
# type="str",
help="ground truth path of segmentation"
)
parser.add_argument(
"--output",
help="A file or directory to save output visualizations. "
"If not given, will show output in an OpenCV window.",
)
parser.add_argument(
"--confidence-threshold",
type=float,
default=0.5,
help="Minimum score for instance predictions to be shown",
)
parser.add_argument(
"--opts",
help="Modify config options using the command-line 'KEY VALUE' pairs",
default=[],
nargs=argparse.REMAINDER,
)
return parser
def test_opencv_video_format(codec, file_ext):
with tempfile.TemporaryDirectory(prefix="video_format_test") as dir:
filename = os.path.join(dir, "test_file" + file_ext)
writer = cv2.VideoWriter(
filename=filename,
fourcc=cv2.VideoWriter_fourcc(*codec),
fps=float(30),
frameSize=(10, 10),
isColor=True,
)
[writer.write(np.zeros((10, 10, 3), np.uint8)) for _ in range(30)]
writer.release()
if os.path.isfile(filename):
return True
return False
if __name__ == "__main__":
mp.set_start_method("spawn", force=True)
args = get_parser().parse_args()
setup_logger(name="fvcore")
logger = setup_logger()
logger.info("Arguments: " + str(args))
cfg = setup_cfg(args)
demo = VisualizationGt(cfg)
gt_path = args.gt
if args.input:
if len(args.input) == 1:
args.input = glob.glob(os.path.expanduser(args.input[0]))
assert args.input, "The input path(s) was not found"
for path in tqdm.tqdm(args.input, disable=not args.output):
# use PIL, to be consistent with evaluation
img = read_image(path, format="BGR")
start_time = time.time()
predictions = {}
gt_file = os.path.join(gt_path, os.path.splitext(os.path.basename(path))[0] + '.png')
# import pdb; pdb.set_trace()
predictions['sem_seg'] = np.asarray(Image.open(gt_file))
predictions, visualized_output = demo.run_on_image(img, predictions)
logger.info(
"{}: {} in {:.2f}s".format(
path,
"detected {} instances".format(len(predictions["instances"]))
if "instances" in predictions
else "finished",
time.time() - start_time,
)
)
if args.output:
if os.path.isdir(args.output):
assert os.path.isdir(args.output), args.output
out_filename = os.path.join(args.output, os.path.basename(path))
else:
assert len(args.input) == 1, "Please specify a directory with args.output"
out_filename = args.output
visualized_output.save(out_filename)
else:
cv2.namedWindow(WINDOW_NAME, cv2.WINDOW_NORMAL)
cv2.imshow(WINDOW_NAME, visualized_output.get_image()[:, :, ::-1])
if cv2.waitKey(0) == 27:
break # esc to quit
elif args.webcam:
assert args.input is None, "Cannot have both --input and --webcam!"
assert args.output is None, "output not yet supported with --webcam!"
cam = cv2.VideoCapture(0)
for vis in tqdm.tqdm(demo.run_on_video(cam)):
cv2.namedWindow(WINDOW_NAME, cv2.WINDOW_NORMAL)
cv2.imshow(WINDOW_NAME, vis)
if cv2.waitKey(1) == 27:
break # esc to quit
cam.release()
cv2.destroyAllWindows()
elif args.video_input:
video = cv2.VideoCapture(args.video_input)
width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))
frames_per_second = video.get(cv2.CAP_PROP_FPS)
num_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
basename = os.path.basename(args.video_input)
codec, file_ext = (
("x264", ".mkv") if test_opencv_video_format("x264", ".mkv") else ("mp4v", ".mp4")
)
if codec == ".mp4v":
warnings.warn("x264 codec not available, switching to mp4v")
if args.output:
if os.path.isdir(args.output):
output_fname = os.path.join(args.output, basename)
output_fname = os.path.splitext(output_fname)[0] + file_ext
else:
output_fname = args.output
assert not os.path.isfile(output_fname), output_fname
output_file = cv2.VideoWriter(
filename=output_fname,
# some installation of opencv may not support x264 (due to its license),
# you can try other format (e.g. MPEG)
fourcc=cv2.VideoWriter_fourcc(*codec),
fps=float(frames_per_second),
frameSize=(width, height),
isColor=True,
)
assert os.path.isfile(args.video_input)
for vis_frame in tqdm.tqdm(demo.run_on_video(video), total=num_frames):
if args.output:
output_file.write(vis_frame)
else:
cv2.namedWindow(basename, cv2.WINDOW_NORMAL)
cv2.imshow(basename, vis_frame)
if cv2.waitKey(1) == 27:
break # esc to quit
video.release()
if args.output:
output_file.release()
else:
cv2.destroyAllWindows()
# Copyright (c) Facebook, Inc. and its affiliates.
# Modified by Bowen Cheng from: https://github.com/facebookresearch/detectron2/blob/master/demo/demo.py
import argparse
import glob
import multiprocessing as mp
import os
# fmt: off
import sys
sys.path.insert(1, os.path.join(sys.path[0], '..'))
# fmt: on
import tempfile
import time
import warnings
import cv2
import numpy as np
import tqdm
from detectron2.config import get_cfg
from detectron2.data.detection_utils import read_image
from detectron2.projects.deeplab import add_deeplab_config
from detectron2.utils.logger import setup_logger
from sed import add_sed_config
from predictor import VisualizationDemo
# constants
WINDOW_NAME = "MaskFormer demo"
def setup_cfg(args):
# load config from file and command-line arguments
cfg = get_cfg()
add_deeplab_config(cfg)
add_sed_config(cfg)
cfg.merge_from_file(args.config_file)
cfg.merge_from_list(args.opts)
cfg.freeze()
return cfg
def get_parser():
parser = argparse.ArgumentParser(description="Detectron2 demo for builtin configs")
parser.add_argument(
"--config-file",
#default="configs/ade20k-150/maskformer_R50_bs16_160k.yaml",
default='configs/convnextB_768.yaml',
metavar="FILE",
help="path to config file",
)
parser.add_argument("--webcam", action="store_true", help="Take inputs from webcam.")
parser.add_argument("--video-input", help="Path to video file.")
parser.add_argument(
"--input",
nargs="+",
help="A list of space separated input images; "
"or a single glob pattern such as 'directory/*.jpg'",
)
parser.add_argument(
"--output",
help="A file or directory to save output visualizations. "
"If not given, will show output in an OpenCV window.",
)
parser.add_argument(
"--confidence-threshold",
type=float,
default=0.5,
help="Minimum score for instance predictions to be shown",
)
parser.add_argument(
"--opts",
help="Modify config options using the command-line 'KEY VALUE' pairs",
default=[],
nargs=argparse.REMAINDER,
)
return parser
def test_opencv_video_format(codec, file_ext):
with tempfile.TemporaryDirectory(prefix="video_format_test") as dir:
filename = os.path.join(dir, "test_file" + file_ext)
writer = cv2.VideoWriter(
filename=filename,
fourcc=cv2.VideoWriter_fourcc(*codec),
fps=float(30),
frameSize=(10, 10),
isColor=True,
)
[writer.write(np.zeros((10, 10, 3), np.uint8)) for _ in range(30)]
writer.release()
if os.path.isfile(filename):
return True
return False
if __name__ == "__main__":
mp.set_start_method("spawn", force=True)
args = get_parser().parse_args()
setup_logger(name="fvcore")
logger = setup_logger()
logger.info("Arguments: " + str(args))
cfg = setup_cfg(args)
demo = VisualizationDemo(cfg)
if args.input:
if len(args.input) == 1:
args.input = glob.glob(os.path.expanduser(args.input[0]))
assert args.input, "The input path(s) was not found"
for path in tqdm.tqdm(args.input, disable=not args.output):
# use PIL, to be consistent with evaluation
img = read_image(path, format="BGR")
start_time = time.time()
predictions, visualized_output = demo.run_on_image(img)
logger.info(
"{}: {} in {:.2f}s".format(
path,
"detected {} instances".format(len(predictions["instances"]))
if "instances" in predictions
else "finished",
time.time() - start_time,
)
)
if args.output:
if os.path.isdir(args.output):
assert os.path.isdir(args.output), args.output
out_filename = os.path.join(args.output, os.path.basename(path))
else:
assert len(args.input) == 1, "Please specify a directory with args.output"
out_filename = args.output
visualized_output.save(out_filename)
else:
cv2.namedWindow(WINDOW_NAME, cv2.WINDOW_NORMAL)
cv2.imshow(WINDOW_NAME, visualized_output.get_image()[:, :, ::-1])
if cv2.waitKey(0) == 27:
break # esc to quit
elif args.webcam:
assert args.input is None, "Cannot have both --input and --webcam!"
assert args.output is None, "output not yet supported with --webcam!"
cam = cv2.VideoCapture(0)
for vis in tqdm.tqdm(demo.run_on_video(cam)):
cv2.namedWindow(WINDOW_NAME, cv2.WINDOW_NORMAL)
cv2.imshow(WINDOW_NAME, vis)
if cv2.waitKey(1) == 27:
break # esc to quit
cam.release()
cv2.destroyAllWindows()
elif args.video_input:
video = cv2.VideoCapture(args.video_input)
width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))
frames_per_second = video.get(cv2.CAP_PROP_FPS)
num_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
basename = os.path.basename(args.video_input)
codec, file_ext = (
("x264", ".mkv") if test_opencv_video_format("x264", ".mkv") else ("mp4v", ".mp4")
)
if codec == ".mp4v":
warnings.warn("x264 codec not available, switching to mp4v")
if args.output:
if os.path.isdir(args.output):
output_fname = os.path.join(args.output, basename)
output_fname = os.path.splitext(output_fname)[0] + file_ext
else:
output_fname = args.output
assert not os.path.isfile(output_fname), output_fname
output_file = cv2.VideoWriter(
filename=output_fname,
# some installation of opencv may not support x264 (due to its license),
# you can try other format (e.g. MPEG)
fourcc=cv2.VideoWriter_fourcc(*codec),
fps=float(frames_per_second),
frameSize=(width, height),
isColor=True,
)
assert os.path.isfile(args.video_input)
for vis_frame in tqdm.tqdm(demo.run_on_video(video), total=num_frames):
if args.output:
output_file.write(vis_frame)
else:
cv2.namedWindow(basename, cv2.WINDOW_NORMAL)
cv2.imshow(basename, vis_frame)
if cv2.waitKey(1) == 27:
break # esc to quit
video.release()
if args.output:
output_file.release()
else:
cv2.destroyAllWindows()
# Copyright (c) Facebook, Inc. and its affiliates.
# Modified by Bowen Cheng from: https://github.com/facebookresearch/detectron2/blob/master/demo/demo.py
import argparse
import glob
import multiprocessing as mp
import os
# fmt: off
import sys
sys.path.insert(1, os.path.join(sys.path[0], '..'))
# fmt: on
import tempfile
import time
import warnings
import cv2
import numpy as np
import tqdm
from detectron2.config import get_cfg
from detectron2.data.detection_utils import read_image
from detectron2.projects.deeplab import add_deeplab_config
from detectron2.utils.logger import setup_logger
from mask_former import add_mask_former_config
# from predictor import VisualizationDemo
from visualizer import VisualizationGt
from PIL import Image
# constants
WINDOW_NAME = "MaskFormer demo"
def setup_cfg(args):
# load config from file and command-line arguments
cfg = get_cfg()
add_deeplab_config(cfg)
add_mask_former_config(cfg)
cfg.merge_from_file(args.config_file)
cfg.merge_from_list(args.opts)
cfg.freeze()
return cfg
def get_parser():
parser = argparse.ArgumentParser(description="Detectron2 demo for builtin configs")
parser.add_argument(
"--config-file",
default="configs/ade20k-150/maskformer_R50_bs16_160k.yaml",
metavar="FILE",
help="path to config file",
)
parser.add_argument("--webcam", action="store_true", help="Take inputs from webcam.")
parser.add_argument("--video-input", help="Path to video file.")
parser.add_argument(
"--input",
nargs="+",
help="A list of space separated input images; "
"or a single glob pattern such as 'directory/*.jpg'",
)
# parser.add_argument(
# "--gt",
# nargs="+",
# help="A list of space seperated ground truth images;"
# "or a single glob pattern such as 'directory/*.png'"
# )
parser.add_argument(
"--gt",
# type="str",
help="ground truth path of segmentation"
)
parser.add_argument(
"--output",
help="A file or directory to save output visualizations. "
"If not given, will show output in an OpenCV window.",
)
parser.add_argument(
"--confidence-threshold",
type=float,
default=0.5,
help="Minimum score for instance predictions to be shown",
)
parser.add_argument(
"--opts",
help="Modify config options using the command-line 'KEY VALUE' pairs",
default=[],
nargs=argparse.REMAINDER,
)
return parser
def test_opencv_video_format(codec, file_ext):
with tempfile.TemporaryDirectory(prefix="video_format_test") as dir:
filename = os.path.join(dir, "test_file" + file_ext)
writer = cv2.VideoWriter(
filename=filename,
fourcc=cv2.VideoWriter_fourcc(*codec),
fps=float(30),
frameSize=(10, 10),
isColor=True,
)
[writer.write(np.zeros((10, 10, 3), np.uint8)) for _ in range(30)]
writer.release()
if os.path.isfile(filename):
return True
return False
if __name__ == "__main__":
mp.set_start_method("spawn", force=True)
args = get_parser().parse_args()
setup_logger(name="fvcore")
logger = setup_logger()
logger.info("Arguments: " + str(args))
cfg = setup_cfg(args)
demo = VisualizationGt(cfg)
gt_path = args.gt
if args.input:
if len(args.input) == 1:
args.input = glob.glob(os.path.expanduser(args.input[0]))
assert args.input, "The input path(s) was not found"
for path in tqdm.tqdm(args.input, disable=not args.output):
# use PIL, to be consistent with evaluation
img = read_image(path, format="BGR")
start_time = time.time()
predictions = {}
gt_file = os.path.join(gt_path, os.path.splitext(os.path.basename(path))[0] + '.png')
# import pdb; pdb.set_trace()
predictions['sem_seg'] = np.asarray(Image.open(gt_file))
predictions, visualized_output = demo.run_on_image(img, predictions)
logger.info(
"{}: {} in {:.2f}s".format(
path,
"detected {} instances".format(len(predictions["instances"]))
if "instances" in predictions
else "finished",
time.time() - start_time,
)
)
if args.output:
if os.path.isdir(args.output):
assert os.path.isdir(args.output), args.output
out_filename = os.path.join(args.output, os.path.basename(path))
else:
assert len(args.input) == 1, "Please specify a directory with args.output"
out_filename = args.output
visualized_output.save(out_filename)
else:
cv2.namedWindow(WINDOW_NAME, cv2.WINDOW_NORMAL)
cv2.imshow(WINDOW_NAME, visualized_output.get_image()[:, :, ::-1])
if cv2.waitKey(0) == 27:
break # esc to quit
elif args.webcam:
assert args.input is None, "Cannot have both --input and --webcam!"
assert args.output is None, "output not yet supported with --webcam!"
cam = cv2.VideoCapture(0)
for vis in tqdm.tqdm(demo.run_on_video(cam)):
cv2.namedWindow(WINDOW_NAME, cv2.WINDOW_NORMAL)
cv2.imshow(WINDOW_NAME, vis)
if cv2.waitKey(1) == 27:
break # esc to quit
cam.release()
cv2.destroyAllWindows()
elif args.video_input:
video = cv2.VideoCapture(args.video_input)
width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))
frames_per_second = video.get(cv2.CAP_PROP_FPS)
num_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
basename = os.path.basename(args.video_input)
codec, file_ext = (
("x264", ".mkv") if test_opencv_video_format("x264", ".mkv") else ("mp4v", ".mp4")
)
if codec == ".mp4v":
warnings.warn("x264 codec not available, switching to mp4v")
if args.output:
if os.path.isdir(args.output):
output_fname = os.path.join(args.output, basename)
output_fname = os.path.splitext(output_fname)[0] + file_ext
else:
output_fname = args.output
assert not os.path.isfile(output_fname), output_fname
output_file = cv2.VideoWriter(
filename=output_fname,
# some installation of opencv may not support x264 (due to its license),
# you can try other format (e.g. MPEG)
fourcc=cv2.VideoWriter_fourcc(*codec),
fps=float(frames_per_second),
frameSize=(width, height),
isColor=True,
)
assert os.path.isfile(args.video_input)
for vis_frame in tqdm.tqdm(demo.run_on_video(video), total=num_frames):
if args.output:
output_file.write(vis_frame)
else:
cv2.namedWindow(basename, cv2.WINDOW_NORMAL)
cv2.imshow(basename, vis_frame)
if cv2.waitKey(1) == 27:
break # esc to quit
video.release()
if args.output:
output_file.release()
else:
cv2.destroyAllWindows()
# Copyright (c) Facebook, Inc. and its affiliates.
# Copied from: https://github.com/facebookresearch/detectron2/blob/master/demo/predictor.py
import atexit
import bisect
import multiprocessing as mp
from collections import deque
import cv2
import torch
from detectron2.data import MetadataCatalog
from detectron2.engine.defaults import DefaultPredictor
from detectron2.utils.video_visualizer import VideoVisualizer
from detectron2.utils.visualizer import ColorMode, Visualizer
class VisualizationDemo(object):
def __init__(self, cfg, instance_mode=ColorMode.IMAGE, parallel=False):
"""
Args:
cfg (CfgNode):
instance_mode (ColorMode):
parallel (bool): whether to run the model in different processes from visualization.
Useful since the visualization logic can be slow.
"""
self.metadata = MetadataCatalog.get(
cfg.DATASETS.TEST[0] if len(cfg.DATASETS.TEST) else "__unused"
)
self.cpu_device = torch.device("cpu")
self.instance_mode = instance_mode
self.parallel = parallel
if parallel:
num_gpu = torch.cuda.device_count()
self.predictor = AsyncPredictor(cfg, num_gpus=num_gpu)
else:
self.predictor = DefaultPredictor(cfg)
def run_on_image(self, image):
"""
Args:
image (np.ndarray): an image of shape (H, W, C) (in BGR order).
This is the format used by OpenCV.
Returns:
predictions (dict): the output of the model.
vis_output (VisImage): the visualized image output.
"""
vis_output = None
predictions = self.predictor(image)
# Convert image from OpenCV BGR format to Matplotlib RGB format.
image = image[:, :, ::-1]
visualizer = Visualizer(image, self.metadata, instance_mode=self.instance_mode)
if "panoptic_seg" in predictions:
panoptic_seg, segments_info = predictions["panoptic_seg"]
vis_output = visualizer.draw_panoptic_seg_predictions(
panoptic_seg.to(self.cpu_device), segments_info
)
else:
if "sem_seg" in predictions:
vis_output = visualizer.draw_sem_seg(
predictions["sem_seg"].argmax(dim=0).to(self.cpu_device),
alpha=0.4,
)
if "instances" in predictions:
instances = predictions["instances"].to(self.cpu_device)
vis_output = visualizer.draw_instance_predictions(predictions=instances)
return predictions, vis_output
def _frame_from_video(self, video):
while video.isOpened():
success, frame = video.read()
if success:
yield frame
else:
break
def run_on_video(self, video):
"""
Visualizes predictions on frames of the input video.
Args:
video (cv2.VideoCapture): a :class:`VideoCapture` object, whose source can be
either a webcam or a video file.
Yields:
ndarray: BGR visualizations of each video frame.
"""
video_visualizer = VideoVisualizer(self.metadata, self.instance_mode)
def process_predictions(frame, predictions):
frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
if "panoptic_seg" in predictions:
panoptic_seg, segments_info = predictions["panoptic_seg"]
vis_frame = video_visualizer.draw_panoptic_seg_predictions(
frame, panoptic_seg.to(self.cpu_device), segments_info
)
elif "instances" in predictions:
predictions = predictions["instances"].to(self.cpu_device)
vis_frame = video_visualizer.draw_instance_predictions(frame, predictions)
elif "sem_seg" in predictions:
vis_frame = video_visualizer.draw_sem_seg(
frame, predictions["sem_seg"].argmax(dim=0).to(self.cpu_device)
)
# Converts Matplotlib RGB format to OpenCV BGR format
vis_frame = cv2.cvtColor(vis_frame.get_image(), cv2.COLOR_RGB2BGR)
return vis_frame
frame_gen = self._frame_from_video(video)
if self.parallel:
buffer_size = self.predictor.default_buffer_size
frame_data = deque()
for cnt, frame in enumerate(frame_gen):
frame_data.append(frame)
self.predictor.put(frame)
if cnt >= buffer_size:
frame = frame_data.popleft()
predictions = self.predictor.get()
yield process_predictions(frame, predictions)
while len(frame_data):
frame = frame_data.popleft()
predictions = self.predictor.get()
yield process_predictions(frame, predictions)
else:
for frame in frame_gen:
yield process_predictions(frame, self.predictor(frame))
class AsyncPredictor:
"""
A predictor that runs the model asynchronously, possibly on >1 GPUs.
Because rendering the visualization takes considerably amount of time,
this helps improve throughput a little bit when rendering videos.
"""
class _StopToken:
pass
class _PredictWorker(mp.Process):
def __init__(self, cfg, task_queue, result_queue):
self.cfg = cfg
self.task_queue = task_queue
self.result_queue = result_queue
super().__init__()
def run(self):
predictor = DefaultPredictor(self.cfg)
while True:
task = self.task_queue.get()
if isinstance(task, AsyncPredictor._StopToken):
break
idx, data = task
result = predictor(data)
self.result_queue.put((idx, result))
def __init__(self, cfg, num_gpus: int = 1):
"""
Args:
cfg (CfgNode):
num_gpus (int): if 0, will run on CPU
"""
num_workers = max(num_gpus, 1)
self.task_queue = mp.Queue(maxsize=num_workers * 3)
self.result_queue = mp.Queue(maxsize=num_workers * 3)
self.procs = []
for gpuid in range(max(num_gpus, 1)):
cfg = cfg.clone()
cfg.defrost()
cfg.MODEL.DEVICE = "cuda:{}".format(gpuid) if num_gpus > 0 else "cpu"
self.procs.append(
AsyncPredictor._PredictWorker(cfg, self.task_queue, self.result_queue)
)
self.put_idx = 0
self.get_idx = 0
self.result_rank = []
self.result_data = []
for p in self.procs:
p.start()
atexit.register(self.shutdown)
def put(self, image):
self.put_idx += 1
self.task_queue.put((self.put_idx, image))
def get(self):
self.get_idx += 1 # the index needed for this request
if len(self.result_rank) and self.result_rank[0] == self.get_idx:
res = self.result_data[0]
del self.result_data[0], self.result_rank[0]
return res
while True:
# make sure the results are returned in the correct order
idx, res = self.result_queue.get()
if idx == self.get_idx:
return res
insert = bisect.bisect(self.result_rank, idx)
self.result_rank.insert(insert, idx)
self.result_data.insert(insert, res)
def __len__(self):
return self.put_idx - self.get_idx
def __call__(self, image):
self.put(image)
return self.get()
def shutdown(self):
for _ in self.procs:
self.task_queue.put(AsyncPredictor._StopToken())
@property
def default_buffer_size(self):
return len(self.procs) * 5
# Copyright (c) Facebook, Inc. and its affiliates.
# Copied from: https://github.com/facebookresearch/detectron2/blob/master/demo/predictor.py
import atexit
import bisect
import multiprocessing as mp
from collections import deque
import cv2
import torch
from detectron2.data import MetadataCatalog
from detectron2.engine.defaults import DefaultPredictor
from detectron2.utils.video_visualizer import VideoVisualizer
from detectron2.utils.visualizer import ColorMode, Visualizer
class VisualizationGt(object):
def __init__(self, cfg, instance_mode=ColorMode.IMAGE, parallel=False):
"""
Args:
cfg (CfgNode):
instance_mode (ColorMode):
parallel (bool): whether to run the model in different processes from visualization.
Useful since the visualization logic can be slow.
"""
self.metadata = MetadataCatalog.get(
cfg.DATASETS.TEST[0] if len(cfg.DATASETS.TEST) else "__unused"
)
self.cpu_device = torch.device("cpu")
self.instance_mode = instance_mode
self.parallel = parallel
if parallel:
num_gpu = torch.cuda.device_count()
self.predictor = AsyncPredictor(cfg, num_gpus=num_gpu)
else:
self.predictor = DefaultPredictor(cfg)
def run_on_image(self, image, predictions):
"""
Args:
image (np.ndarray): an image of shape (H, W, C) (in BGR order).
This is the format used by OpenCV.
Returns:
predictions (dict): the output of the model.
vis_output (VisImage): the visualized image output.
"""
vis_output = None
# predictions = self.predictor(image)
# Convert image from OpenCV BGR format to Matplotlib RGB format.
image = image[:, :, ::-1]
visualizer = Visualizer(image, self.metadata, instance_mode=self.instance_mode)
if "panoptic_seg" in predictions:
panoptic_seg, segments_info = predictions["panoptic_seg"]
vis_output = visualizer.draw_panoptic_seg_predictions(
panoptic_seg.to(self.cpu_device), segments_info
)
else:
if "sem_seg" in predictions:
vis_output = visualizer.draw_sem_seg(
predictions["sem_seg"]
)
if "instances" in predictions:
instances = predictions["instances"].to(self.cpu_device)
vis_output = visualizer.draw_instance_predictions(predictions=instances)
return predictions, vis_output
def _frame_from_video(self, video):
while video.isOpened():
success, frame = video.read()
if success:
yield frame
else:
break
def run_on_video(self, video):
"""
Visualizes predictions on frames of the input video.
Args:
video (cv2.VideoCapture): a :class:`VideoCapture` object, whose source can be
either a webcam or a video file.
Yields:
ndarray: BGR visualizations of each video frame.
"""
video_visualizer = VideoVisualizer(self.metadata, self.instance_mode)
def process_predictions(frame, predictions):
frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
if "panoptic_seg" in predictions:
panoptic_seg, segments_info = predictions["panoptic_seg"]
vis_frame = video_visualizer.draw_panoptic_seg_predictions(
frame, panoptic_seg.to(self.cpu_device), segments_info
)
elif "instances" in predictions:
predictions = predictions["instances"].to(self.cpu_device)
vis_frame = video_visualizer.draw_instance_predictions(frame, predictions)
elif "sem_seg" in predictions:
vis_frame = video_visualizer.draw_sem_seg(
frame, predictions["sem_seg"].argmax(dim=0).to(self.cpu_device)
)
# Converts Matplotlib RGB format to OpenCV BGR format
vis_frame = cv2.cvtColor(vis_frame.get_image(), cv2.COLOR_RGB2BGR)
return vis_frame
frame_gen = self._frame_from_video(video)
if self.parallel:
buffer_size = self.predictor.default_buffer_size
frame_data = deque()
for cnt, frame in enumerate(frame_gen):
frame_data.append(frame)
self.predictor.put(frame)
if cnt >= buffer_size:
frame = frame_data.popleft()
predictions = self.predictor.get()
yield process_predictions(frame, predictions)
while len(frame_data):
frame = frame_data.popleft()
predictions = self.predictor.get()
yield process_predictions(frame, predictions)
else:
for frame in frame_gen:
yield process_predictions(frame, self.predictor(frame))
class AsyncPredictor:
"""
A predictor that runs the model asynchronously, possibly on >1 GPUs.
Because rendering the visualization takes considerably amount of time,
this helps improve throughput a little bit when rendering videos.
"""
class _StopToken:
pass
class _PredictWorker(mp.Process):
def __init__(self, cfg, task_queue, result_queue):
self.cfg = cfg
self.task_queue = task_queue
self.result_queue = result_queue
super().__init__()
def run(self):
predictor = DefaultPredictor(self.cfg)
while True:
task = self.task_queue.get()
if isinstance(task, AsyncPredictor._StopToken):
break
idx, data = task
result = predictor(data)
self.result_queue.put((idx, result))
def __init__(self, cfg, num_gpus: int = 1):
"""
Args:
cfg (CfgNode):
num_gpus (int): if 0, will run on CPU
"""
num_workers = max(num_gpus, 1)
self.task_queue = mp.Queue(maxsize=num_workers * 3)
self.result_queue = mp.Queue(maxsize=num_workers * 3)
self.procs = []
for gpuid in range(max(num_gpus, 1)):
cfg = cfg.clone()
cfg.defrost()
cfg.MODEL.DEVICE = "cuda:{}".format(gpuid) if num_gpus > 0 else "cpu"
self.procs.append(
AsyncPredictor._PredictWorker(cfg, self.task_queue, self.result_queue)
)
self.put_idx = 0
self.get_idx = 0
self.result_rank = []
self.result_data = []
for p in self.procs:
p.start()
atexit.register(self.shutdown)
def put(self, image):
self.put_idx += 1
self.task_queue.put((self.put_idx, image))
def get(self):
self.get_idx += 1 # the index needed for this request
if len(self.result_rank) and self.result_rank[0] == self.get_idx:
res = self.result_data[0]
del self.result_data[0], self.result_rank[0]
return res
while True:
# make sure the results are returned in the correct order
idx, res = self.result_queue.get()
if idx == self.get_idx:
return res
insert = bisect.bisect(self.result_rank, idx)
self.result_rank.insert(insert, idx)
self.result_data.insert(insert, res)
def __len__(self):
return self.put_idx - self.get_idx
def __call__(self, image):
self.put(image)
return self.get()
def shutdown(self):
for _ in self.procs:
self.task_queue.put(AsyncPredictor._StopToken())
@property
def default_buffer_size(self):
return len(self.procs) * 5
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment