Commit e395f013 authored by A. Unique TensorFlower's avatar A. Unique TensorFlower
Browse files

Internal change

PiperOrigin-RevId: 473275427
parent f0e7fa62
...@@ -17,49 +17,93 @@ ...@@ -17,49 +17,93 @@
#! /usr/bin/env python3 #! /usr/bin/env python3
import json import json
from absl import app
from absl import flags
import numpy as np import numpy as np
import pandas as pd import pandas as pd
# Define the flags
FLAGS = flags.FLAGS
# path to annotated JSON file whose distribution needs to be plotted def data_creation(path: str) -> pd.DataFrame:
_PATH = flags.DEFINE_string( """Create a dataframe with the occurences of images and categories.
'path', None, 'path to the annotated JSON file', required=True)
def visualize_annotation_file(path: str) -> None:
"""Plot a bar graph showing the category distribution.
Args: Args:
path: path to the annotated JSON file. path: path to the annotated JSON file.
Returns:
dataset consisting of the counts of images and categories.
""" """
# get annotation file data into a variable # get annotation file data into a variable
with open(path) as json_file: with open(path) as json_file:
data = json.load(json_file) data = json.load(json_file)
# count the occurance of each category in the annotation file # count the occurance of each category and an image in the annotation file
category_names = [i['name'] for i in data['categories']] category_names = [i['name'] for i in data['categories']]
category_ids = [i['category_id'] for i in data['annotations']] category_ids = [i['category_id'] for i in data['annotations']]
values, counts = np.unique(category_ids, return_counts=True) image_ids = [i['image_id'] for i in data['annotations']]
# create a dataframe with all possible values # create a dataframe
# with their counts and visualize it. df = pd.DataFrame(
df = pd.DataFrame(counts, index=values, columns=['counts']) list(zip(category_ids, image_ids)), columns=['category_ids', 'image_ids'])
df = df.reindex(range(1, len(data['categories']) + 1), fill_value=0) df = df.groupby('category_ids').agg(
df.index = category_names object_count=('category_ids', 'count'),
df.plot.bar( image_count=('image_ids', 'nunique'))
figsize=(20, 5), df = df.reindex(range(1, len(data['categories']) + 1), fill_value=0)
width=0.5, df.index = category_names
xlabel='Material types', return df
ylabel='count of material types')
def main(_): def visualize_detailed_counts_horizontally(path: str) -> None:
visualize_annotation_file(_PATH.value) """Plot a vertical bar graph showing the counts of images & categories.
Args:
path: path to the annotated JSON file.
"""
df = data_creation(path)
ax = df.plot(
kind='bar',
figsize=(40, 10),
xlabel='Categories',
ylabel='Counts',
width=0.8,
linewidth=1,
edgecolor='white') # rot = 0 for horizontal labeling
for p in ax.patches:
ax.annotate(
text=np.round(p.get_height()),
xy=(p.get_x() + p.get_width() / 2., p.get_height()),
ha='center',
va='top',
xytext=(4, 14),
textcoords='offset points')
if __name__ == '__main__': def visualize_detailed_counts_vertically(path: str) -> None:
app.run(main) """Plot a horizontal bar graph showing the counts of images & categories.
Args:
path: path to the annotated JSON file.
"""
df = data_creation(path)
ax = df.plot(
kind='barh',
figsize=(15, 40),
xlabel='Categories',
ylabel='Counts',
width=0.6)
for p in ax.patches:
ax.annotate(
str(p.get_width()), (p.get_x() + p.get_width(), p.get_y()),
xytext=(4, 6),
textcoords='offset points')
def visualize_annotation_file(path: str) -> None:
"""Plot a bar graph showing the category distribution.
Args:
path: path to the annotated JSON file.
"""
df = data_creation(path)
df['object_count'].plot.bar(
figsize=(20, 5),
width=0.5,
xlabel='Material types',
ylabel='count of material types')
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment