Internal change

PiperOrigin-RevId: 473275427

Internal change
PiperOrigin-RevId: 473275427
e395f013 · A. Unique TensorFlower · f0e7fa62 · e395f013
Commit e395f013 authored Sep 09, 2022 by A. Unique TensorFlower
Hide whitespace changes
Inline Side-by-side

Showing with 73 additions and 29 deletions

official/projects/waste_identification_ml/pre_processing/config/visualization.py ..._identification_ml/pre_processing/config/visualization.py +73 -29

No files found.
--- a/official/projects/waste_identification_ml/pre_processing/config/visualization.py
+++ b/official/projects/waste_identification_ml/pre_processing/config/visualization.py
@@ -17,49 +17,93 @@
 #! /usr/bin/env python3
 import json
-from absl import app
-from absl import flags
 import numpy as np
 import pandas as pd
-# Define the flags
-FLAGS = flags.FLAGS
-# path to annotated JSON file whose distribution needs to be plotted
+def data_creation(path: str) -> pd.DataFrame:
-_PATH = flags.DEFINE_string(
+  """Create a dataframe with the occurences of images and categories.
-    'path', None, 'path to the annotated JSON file', required=True)
-def visualize_annotation_file(path: str) -> None:
-  """Plot a bar graph showing the category distribution.
  Args:
    path: path to the annotated JSON file.
+  Returns:
+    dataset consisting of the counts of images and categories.
  """
  # get annotation file data into a variable
  with open(path) as json_file:
    data = json.load(json_file)
-    # count the occurance of each category in the annotation file
+  # count the occurance of each category and an image in the annotation file
-    category_names = [i['name'] for i in data['categories']]
+  category_names = [i['name'] for i in data['categories']]
-    category_ids = [i['category_id'] for i in data['annotations']]
+  category_ids = [i['category_id'] for i in data['annotations']]
-    values, counts = np.unique(category_ids, return_counts=True)
+  image_ids = [i['image_id'] for i in data['annotations']]
-    # create a dataframe with all possible values
+  # create a dataframe
-    # with their counts and visualize it.
+  df = pd.DataFrame(
-    df = pd.DataFrame(counts, index=values, columns=['counts'])
+      list(zip(category_ids, image_ids)), columns=['category_ids', 'image_ids'])
-    df = df.reindex(range(1, len(data['categories']) + 1), fill_value=0)
+  df = df.groupby('category_ids').agg(
-    df.index = category_names
+      object_count=('category_ids', 'count'),
-    df.plot.bar(
+      image_count=('image_ids', 'nunique'))
-        figsize=(20, 5),
+  df = df.reindex(range(1, len(data['categories']) + 1), fill_value=0)
-        width=0.5,
+  df.index = category_names
-        xlabel='Material types',
+  return df
-        ylabel='count of material types')
-def main(_):
+def visualize_detailed_counts_horizontally(path: str) -> None:
-  visualize_annotation_file(_PATH.value)
+  """Plot a vertical bar graph showing the counts of images & categories.
+  Args:
+    path: path to the annotated JSON file.
+  """
+  df = data_creation(path)
+  ax = df.plot(
+      kind='bar',
+      figsize=(40, 10),
+      xlabel='Categories',
+      ylabel='Counts',
+      width=0.8,
+      linewidth=1,
+      edgecolor='white')  # rot = 0 for horizontal labeling
+  for p in ax.patches:
+    ax.annotate(
+        text=np.round(p.get_height()),
+        xy=(p.get_x() + p.get_width() / 2., p.get_height()),
+        ha='center',
+        va='top',
+        xytext=(4, 14),
+        textcoords='offset points')
-if __name__ == '__main__':
+def visualize_detailed_counts_vertically(path: str) -> None:
-  app.run(main)
+  """Plot a horizontal bar graph showing the counts of images & categories.
+  Args:
+    path: path to the annotated JSON file.
+  """
+  df = data_creation(path)
+  ax = df.plot(
+      kind='barh',
+      figsize=(15, 40),
+      xlabel='Categories',
+      ylabel='Counts',
+      width=0.6)
+  for p in ax.patches:
+    ax.annotate(
+        str(p.get_width()), (p.get_x() + p.get_width(), p.get_y()),
+        xytext=(4, 6),
+        textcoords='offset points')
+def visualize_annotation_file(path: str) -> None:
+  """Plot a bar graph showing the category distribution.
+  Args:
+    path: path to the annotated JSON file.
+  """
+  df = data_creation(path)
+  df['object_count'].plot.bar(
+      figsize=(20, 5),
+      width=0.5,
+      xlabel='Material types',
+      ylabel='count of material types')