docs: Benchmarking guide interpreting results (#701)

Co-authored-by: Ziqi Fan <ziqif@nvidia.com>

docs: Benchmarking guide interpreting results (#701)
Co-authored-by: Ziqi Fan <ziqif@nvidia.com>
fc5ddd2f · Jacky · GitHub · d0d364e3 · fc5ddd2f · fc5ddd2f
Unverified Commit fc5ddd2f authored Jun 12, 2025 by Jacky Committed by GitHub Jun 12, 2025
5 changed files
--- a/benchmarks/llm/perf.sh
+++ b/benchmarks/llm/perf.sh
@@ -14,7 +14,13 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

-model=neuralmagic/DeepSeek-R1-Distill-Llama-70B-FP8-dynamic
+
+# Parse command line arguments
+model="neuralmagic/DeepSeek-R1-Distill-Llama-70B-FP8-dynamic"
+url="http://localhost:8000"
+mode="aggregated"
+artifacts_root_dir="artifacts_root"
+deployment_kind="dynamo"

 # Input Sequence Length (isl) 3000 and Output Sequence Length (osl) 150 are
 # selected for chat use case. Note that for other use cases, the results and
@@ -22,6 +28,135 @@ model=neuralmagic/DeepSeek-R1-Distill-Llama-70B-FP8-dynamic
 isl=3000
 osl=150

+tp=0
+dp=0
+prefill_tp=0
+prefill_dp=0
+decode_tp=0
+decode_dp=0
+
+# The defaults can be overridden by command line arguments.
+while [[ $# -gt 0 ]]; do
+  case $1 in
+    --tensor-parallelism)
+      tp="$2"
+      shift 2
+      ;;
+    --data-parallelism)
+      dp="$2"
+      shift 2
+      ;;
+    --prefill-tensor-parallelism)
+      prefill_tp="$2"
+      shift 2
+      ;;
+    --prefill-data-parallelism)
+      prefill_dp="$2"
+      shift 2
+      ;;
+    --decode-tensor-parallelism)
+      decode_tp="$2"
+      shift 2
+      ;;
+    --decode-data-parallelism)
+      decode_dp="$2"
+      shift 2
+      ;;
+      --model)
+      model="$2"
+      shift 2
+      ;;
+    --input-sequence-length)
+      isl="$2"
+      shift 2
+      ;;
+    --output-sequence-length)
+      osl="$2"
+      shift 2
+      ;;
+    --url)
+      url="$2"
+      shift 2
+      ;;
+    --mode)
+      mode="$2"
+      shift 2
+      ;;
+    --artifacts-root-dir)
+      artifacts_root_dir="$2"
+      shift 2
+      ;;
+    --deployment-kind)
+      deployment_kind="$2"
+      shift 2
+      ;;
+    *)
+      echo "Unknown option: $1"
+      exit 1
+      ;;
+  esac
+done
+
+if [ "${mode}" == "aggregated" ]; then
+  if [ "${tp}" == "0" ] && [ "${dp}" == "0" ]; then
+    echo "--tensor-parallelism and --data-parallelism must be set for aggregated mode."
+    exit 1
+  fi
+  echo "Starting benchmark for the deployment service with the following configuration:"
+  echo "  - Tensor Parallelism: ${tp}"
+  echo "  - Data Parallelism: ${dp}"
+elif [ "${mode}" == "disaggregated" ]; then
+  if [ "${prefill_tp}" == "0" ] && [ "${prefill_dp}" == "0" ] && [ "${decode_tp}" == "0" ] && [ "${decode_dp}" == "0" ]; then
+    echo "--prefill-tensor-parallelism, --prefill-data-parallelism, --decode-tensor-parallelism and --decode-data-parallelism must be set for disaggregated mode."
+    exit 1
+  fi
+  echo "Starting benchmark for the deployment service with the following configuration:"
+  echo "  - Prefill Tensor Parallelism: ${prefill_tp}"
+  echo "  - Prefill Data Parallelism: ${prefill_dp}"
+  echo "  - Decode Tensor Parallelism: ${decode_tp}"
+  echo "  - Decode Data Parallelism: ${decode_dp}"
+else
+  echo "Unknown mode: ${mode}. Only 'aggregated' and 'disaggregated' modes are supported."
+  exit 1
+fi
+
+echo "--------------------------------"
+echo "WARNING: This script does not validate tensor_parallelism=${tp} and data_parallelism=${dp} settings."
+echo "         The user is responsible for ensuring these match the deployment configuration being benchmarked."
+echo "         Incorrect settings may lead to misleading benchmark results."
+echo "--------------------------------"
+
+
+# Create artifacts root directory if it doesn't exist
+if [ ! -d "${artifacts_root_dir}" ]; then
+    mkdir -p "${artifacts_root_dir}"
+fi
+
+# Find the next available artifacts directory index
+index=0
+while [ -d "${artifacts_root_dir}/artifacts_${index}" ]; do
+    index=$((index + 1))
+done
+
+# Create the new artifacts directory
+artifact_dir="${artifacts_root_dir}/artifacts_${index}"
+mkdir -p "${artifact_dir}"
+
+# Print warning about existing artifacts directories
+if [ $index -gt 0 ]; then
+    echo "--------------------------------"
+    echo "WARNING: Found ${index} existing artifacts directories:"
+    for ((i=0; i<index; i++)); do
+        if [ -f "${artifacts_root_dir}/artifacts_${i}/deployment_config.json" ]; then
+            echo "artifacts_${i}:"
+            cat "${artifacts_root_dir}/artifacts_${i}/deployment_config.json"
+            echo "--------------------------------"
+        fi
+    done
+    echo "Creating new artifacts directory: artifacts_${index}"
+    echo "--------------------------------"
+fi
+
 # Concurrency levels to test
 for concurrency in 1 2 4 8 16 32 64 128 256; do

@@ -33,7 +168,7 @@ for concurrency in 1 2 4 8 16 32 64 128 256; do
    --endpoint-type chat \
    --endpoint /v1/chat/completions \
    --streaming \
-    --url http://localhost:8000 \
+    --url ${url} \
    --synthetic-input-tokens-mean ${isl} \
    --synthetic-input-tokens-stddev 0 \
    --output-tokens-mean ${osl} \
@@ -47,6 +182,7 @@ for concurrency in 1 2 4 8 16 32 64 128 256; do
    --warmup-request-count $(($concurrency*2)) \
    --num-dataset-entries $(($concurrency*12)) \
    --random-seed 100 \
+    --artifact-dir ${artifact_dir} \
    -- \
    -v \
    --max-threads 256 \
@@ -54,3 +190,31 @@ for concurrency in 1 2 4 8 16 32 64 128 256; do
    -H 'Accept: text/event-stream'

 done
+
+# The configuration is dumped to a JSON file which hold details of the OAI service
+# being benchmarked.
+deployment_config=$(cat << EOF
+{
+  "kind": "${deployment_kind}",
+  "model": "${model}",
+  "input_sequence_length": ${isl},
+  "output_sequence_length": ${osl},
+  "tensor_parallelism": ${tp},
+  "data_parallelism": ${dp},
+  "prefill_tensor_parallelism": ${prefill_tp},
+  "prefill_data_parallelism": ${prefill_dp},
+  "decode_tensor_parallelism": ${decode_tp},
+  "decode_data_parallelism": ${decode_dp},
+  "mode": "${mode}"
+}
+EOF
+)
+
+mkdir -p "${artifact_dir}"
+if [ -f "${artifact_dir}/deployment_config.json" ]; then
+  echo "Deployment configuration already exists. Overwriting..."
+  rm -f "${artifact_dir}/deployment_config.json"
+fi
+echo "${deployment_config}" > "${artifact_dir}/deployment_config.json"
+
+echo "Benchmarking Successful!!!"
--- a/benchmarks/llm/plot_pareto.py
+++ b/benchmarks/llm/plot_pareto.py
+#!/usr/bin/env python3
+# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import json
+import os
+import re
+
+import matplotlib.pyplot as plt
+import numpy as np
+import pandas as pd
+import seaborn as sns
+from matplotlib.ticker import MultipleLocator
+
+
+def get_json_paths(search_paths):
+    genai_perf_profile_export_json_paths = []
+    deployment_config_json_paths = []
+    for search_path in search_paths:
+        deployment_config_json_path = os.path.join(
+            search_path, "deployment_config.json"
+        )
+        if not os.path.exists(deployment_config_json_path):
+            raise Exception(f"deployment_config.json not found in {search_path}")
+        for root, dirs, files in os.walk(search_path):
+            for file in files:
+                if file == "profile_export_genai_perf.json":
+                    genai_perf_profile_export_json_paths.append(
+                        os.path.join(root, file)
+                    )
+                    deployment_config_json_paths.append(deployment_config_json_path)
+
+    return genai_perf_profile_export_json_paths, deployment_config_json_paths
+
+
+# search for -concurrency<number> in the name
+def parse_concurrency(name):
+    matches = re.findall(r"-concurrency(\d+)", name)
+    if len(matches) != 1:
+        raise Exception(f"non-unique matches: {matches}")
+    concurrency = 0
+    for c in matches:
+        concurrency += int(c)
+    return concurrency
+
+
+# Get the number of GPUs from the deployment config
+def parse_gpus(deployment_config_json_path):
+    with open(deployment_config_json_path, "r") as f:
+        deployment_config = json.load(f)
+    if deployment_config.get("mode") == "aggregated":
+        return deployment_config.get("tensor_parallelism") * deployment_config.get(
+            "data_parallelism"
+        )
+    else:
+        return deployment_config.get(
+            "prefill_tensor_parallelism"
+        ) * deployment_config.get("prefill_data_parallelism") + deployment_config.get(
+            "decode_tensor_parallelism"
+        ) * deployment_config.get(
+            "decode_data_parallelism"
+        )
+
+
+def parse_kind_and_mode(deployment_config_json_path):
+    with open(deployment_config_json_path, "r") as f:
+        deployment_config = json.load(f)
+    return deployment_config.get("kind"), deployment_config.get("mode")
+
+
+def extract_val_and_concurrency(
+    genai_perf_profile_export_json_paths, deployment_config_json_paths, stat_value="avg"
+):
+    results = []
+    for genai_perf_profile_export_json_path, deployment_config_json_path in zip(
+        genai_perf_profile_export_json_paths, deployment_config_json_paths
+    ):
+        with open(genai_perf_profile_export_json_path, "r") as f:
+            data = json.load(f)
+            # output_token_throughput contains only avg
+            output_token_throughput = data.get("output_token_throughput", {}).get("avg")
+            output_token_throughput_per_user = data.get(
+                "output_token_throughput_per_user", {}
+            ).get(stat_value)
+            time_to_first_token = data.get("time_to_first_token", {}).get(stat_value)
+            inter_token_latency = data.get("inter_token_latency", {}).get(stat_value)
+            # request_throughput contains only avg
+            request_throughput = data.get("request_throughput", {}).get("avg")
+
+        concurrency = parse_concurrency(genai_perf_profile_export_json_path)
+        num_gpus = parse_gpus(deployment_config_json_path)
+        kind, mode = parse_kind_and_mode(deployment_config_json_path)
+
+        # Handle the case of num_gpus=0 to avoid division by zero
+        if num_gpus > 0 and output_token_throughput is not None:
+            output_token_throughput_per_gpu = output_token_throughput / num_gpus
+        else:
+            output_token_throughput_per_gpu = 0.0
+
+        if num_gpus > 0 and request_throughput is not None:
+            request_throughput_per_gpu = request_throughput / num_gpus
+        else:
+            request_throughput_per_gpu = 0.0
+
+        results.append(
+            {
+                "configuration": genai_perf_profile_export_json_path,
+                "kind": kind,
+                "mode": mode,
+                "num_gpus": num_gpus,
+                "concurrency": float(concurrency),
+                "output_token_throughput_avg": output_token_throughput,
+                f"output_token_throughput_per_user_{stat_value}": output_token_throughput_per_user,
+                "output_token_throughput_per_gpu_avg": output_token_throughput_per_gpu,
+                f"time_to_first_token_{stat_value}": time_to_first_token,
+                f"inter_token_latency_{stat_value}": inter_token_latency,
+                "request_throughput_per_gpu_avg": request_throughput_per_gpu,
+            }
+        )
+    return results
+
+
+def create_pareto_graph(results, title="", stat_value="avg"):
+    data_points = [
+        {
+            "label": f"{result['kind']}_{result['mode']}",
+            "configuration": result["configuration"],
+            "concurrency": float(result["concurrency"]),
+            f"output_token_throughput_per_user_{stat_value}": result[
+                f"output_token_throughput_per_user_{stat_value}"
+            ],
+            "output_token_throughput_per_gpu_avg": result[
+                "output_token_throughput_per_gpu_avg"
+            ],
+            f"time_to_first_token_{stat_value}": result[
+                f"time_to_first_token_{stat_value}"
+            ],
+            f"inter_token_latency_{stat_value}": result[
+                f"inter_token_latency_{stat_value}"
+            ],
+            "is_pareto_efficient": False,
+        }
+        for result in results
+    ]
+    df = pd.DataFrame(data_points)
+
+    def pareto_efficient(ids, points):
+        """
+        Mark Pareto-efficient points.
+        A point p is dominated if there's another q
+        such that q is >= p in all dimensions.
+        """
+        points = np.array(points)
+        pareto_points = []
+        for i, (point_id, point) in enumerate(zip(ids, points)):
+            dominated = False
+            for j, other_point in enumerate(points):
+                if i != j and all(other_point >= point):
+                    dominated = True
+                    break
+            if not dominated:
+                pareto_points.append(point)
+                df.at[point_id, "is_pareto_efficient"] = True
+        return np.array(pareto_points)
+
+    sns.set(style="whitegrid")
+    fig, ax = plt.subplots(figsize=(14, 6), constrained_layout=True)
+
+    labels = df["label"].unique()
+
+    for label in labels:
+        group = df[df["label"] == label]
+        # Scatter all points
+        ax.scatter(
+            group[f"output_token_throughput_per_user_{stat_value}"],
+            group["output_token_throughput_per_gpu_avg"],
+            label=f"Label {label}",
+        )
+
+        # Identify and mark Pareto frontier
+        pareto_points = pareto_efficient(
+            group.index,
+            group[
+                [
+                    f"output_token_throughput_per_user_{stat_value}",
+                    "output_token_throughput_per_gpu_avg",
+                ]
+            ].values,
+        )
+        # Sort by x-value for a clean line
+        pareto_points = pareto_points[np.argsort(pareto_points[:, 0])]
+        ax.plot(
+            pareto_points[:, 0],
+            pareto_points[:, 1],
+            linestyle="--",
+            label=f"Pareto Frontier {label}",
+        )
+
+    # Save CSV
+    if stat_value == "avg":
+        df_file_name = "results.csv"
+    else:
+        df_file_name = f"results_{stat_value}.csv"
+    df.to_csv(df_file_name)
+
+    # Axis labels and tick intervals
+    ax.set_xlabel(f"tokens/s/user {stat_value}")
+    ax.set_ylabel("tokens/s/gpu avg")
+    ax.set_title(f"Pareto - {title}")
+    ax.legend(bbox_to_anchor=(1.02, 1), loc="upper left")
+
+    ax.grid(True)
+    x_interval = 5
+    y_interval = 5
+    ax.xaxis.set_major_locator(MultipleLocator(x_interval))
+    ax.yaxis.set_major_locator(MultipleLocator(y_interval))
+
+    if stat_value == "avg":
+        file_name = "pareto_plot.png"
+    else:
+        file_name = f"pareto_plot_{stat_value}.png"
+    plt.savefig(file_name, dpi=300)
+    plt.close()
+
+
+if __name__ == "__main__":
+    import argparse
+    import glob
+    import os
+
+    parser = argparse.ArgumentParser(
+        description="Plot Pareto graph from GenAI-Perf artifacts"
+    )
+    parser.add_argument(
+        "--artifacts-root-dir",
+        required=True,
+        help="Root directory containing artifact directories to search for profile_export_genai_perf.json files",
+    )
+    parser.add_argument(
+        "--title",
+        default="Single Node",
+        help="Title for the Pareto graph",
+    )
+    args = parser.parse_args()
+
+    # Find all artifacts directories under the root
+    artifacts_dirs = glob.glob(os.path.join(args.artifacts_root_dir, "artifacts_*"))
+    if not artifacts_dirs:
+        raise ValueError(f"No artifacts directories found in {args.artifacts_root_dir}")
+
+    genai_perf_profile_export_json_paths, deployment_config_json_paths = get_json_paths(
+        artifacts_dirs
+    )
+
+    if len(genai_perf_profile_export_json_paths) != len(deployment_config_json_paths):
+        raise ValueError(
+            f"Number of genai_perf_profile_export_json_paths ({len(genai_perf_profile_export_json_paths)}) does not match number of deployment_config_json_paths ({len(deployment_config_json_paths)})"
+        )
+
+    extracted_values = extract_val_and_concurrency(
+        genai_perf_profile_export_json_paths, deployment_config_json_paths
+    )
+    create_pareto_graph(extracted_values, title=args.title)
--- a/examples/llm/benchmarks/README.md
+++ b/examples/llm/benchmarks/README.md
@@ -98,8 +98,16 @@ With the Dynamo repository, benchmarking image and model available, and **NATS a
    > [!Tip]
    > Check the `disagg.log` to make sure the service is fully started before collecting performance numbers.

- 3. Collect the performance numbers as shown on the [Collecting Performance Numbers](#collecting-performance-numbers) section below.
+ 3. Collect the performance numbers:

+ ```bash
+ bash -x /workspace/benchmarks/llm/perf.sh --mode disaggregated --deployment-kind dynamo_vllm --prefill-tensor-parallelism 1 --prefill-data-parallelism 4 --decode-tensor-parallelism 4 --decode-data-parallelism 1
+ ```
+
+ > [!Important]
+ > We should be careful in specifying these options in `perf.sh` script. They should closely reflect the deployment config that is being benchmarked. See `perf.sh --help` to learn more about these option. In the above command, we described that our deployment is using disaggregated serving in dynamo with vLLM backend. We have also accurately described that we have 4 prefill workers with TP=1 and 1 decode worker with TP=4
+
+For more information see [Collecting Performance Numbers](#collecting-performance-numbers) section below.

 ## Disaggregated Multinode Benchmarking

@@ -155,7 +163,16 @@ With the Dynamo repository, benchmarking image and model available, and **NATS a
    > [!Tip]
    > Check the `prefill_multinode.log` to make sure the service is fully started before collecting performance numbers.

- 5. Collect the performance numbers as shown on the [Collecting Performance Numbers](#collecting-performance-numbers) section above.
+ 5. Collect the performance numbers:
+
+ ```bash
+ bash -x /workspace/benchmarks/llm/perf.sh --mode disaggregated --deployment-kind dynamo_vllm --prefill-tensor-parallelism 1 --prefill-data-parallelism 8 --decode-tensor-parallelism 8 --decode-data-parallelism 1
+ ```
+
+ > [!Important]
+ > We should be careful in specifying these options in `perf.sh` script. They should closely reflect the deployment config that is being benchmarked. See `perf.sh --help` to learn more about these option. In the above command, we described that our deployment is using disaggregated serving in dynamo with vLLM backend. We have also accurately described that we have 8 prefill workers with TP=1 and 1 decode worker with TP=8
+
+For more information see [Collecting Performance Numbers](#collecting-performance-numbers) section below.


 ## vLLM Aggregated Baseline Benchmarking
@@ -211,22 +228,79 @@ With the Dynamo repository and the benchmarking image available, perform the fol
    > [!Note]
    > If benchmarking over 2 nodes, the `upstream` configuration will need to be updated to link to the `vllm serve` on the second node.

- 4. Collect the performance numbers as shown on the [Collecting Performance Numbers](#collecting-performance-numbers) section below.
+ 4. Collect the performance numbers:
+
+Single-Node
+
+ ```bash
+ bash -x /workspace/benchmarks/llm/perf.sh --mode aggregated --deployment-kind vllm_serve --tensor-parallelism 4 --data-parallelism 2
+ ```
+
+ Two Nodes

+ ```bash
+ bash -x /workspace/benchmarks/llm/perf.sh --mode aggregated --deployment-kind vllm_serve --tensor-parallelism 8 --data-parallelism 2
+ ```
+
+ > [!Important]
+ > We should be careful in specifying these options in `perf.sh` script. They should closely reflect the deployment config that is being benchmarked. See `perf.sh --help` to learn more about these option. In the above command, we described that our deployment is using aggregated serving in `vllm serve`. We have also accurately described that we have 2 workers with TP=4(or TP=8 for two nodes).
+
+For more information see [Collecting Performance Numbers](#collecting-performance-numbers) section below.

 ## Collecting Performance Numbers

-Run the benchmarking script
+Currently, there is no consistent way of obtaining the configuration of deployment service. Hence, we need to provide this information to the script in form of command line arguments. The benchmarking script `/workspace/examples/llm/benchmarks/perf.sh` uses GenAI-Perf tool to collect the performance numbers at various different request concurrencies. The perf.sh script can be run multiple times to collect numbers for various different deployments. Each script execution will create a new artifacts directory in `artifacts_root` and dump these numbers in it. See [Plotting Pareto Graphs](#plotting-pareto-graphs) to learn how to convert the data from this `artifacts_root` to generate pareto graphs for the performance.

-```bash
-bash -x /workspace/benchmarks/llm/perf.sh
-```
+Note: As each `perf.sh` adds a new artifacts directory in the `artifacts_root` always, proper care should be taken that we are starting experiment with clean `artifacts_root` so we include only results from runs that we want to compare.

 > [!Tip]
 > See [GenAI-Perf tutorial](https://github.com/triton-inference-server/perf_analyzer/blob/main/genai-perf/docs/tutorial.md)
 > @ [GitHub](https://github.com/triton-inference-server/perf_analyzer) for additional information about how to run GenAI-Perf
 > and how to interpret results.

+## Iterpreting Results
+
+### Plotting Pareto Graphs
+
+The `artifacts` directory generated by GenAI-Perf contains the raw performance number from the benchmarking.
+
+Using the benchmarking image, install the dependencies for plotting Pareto graph
+```bash
+pip3 install matplotlib seaborn
+```
+At the directory where the artifacts are located, plot the Pareto graph
+
+Single-Node:
+
+```bash
+python3 /workspace/benchmarks/llm/plot_pareto.py --artifacts-root-dir artifacts_root
+```
+
+Two Nodes:
+
+```bash
+python3 /workspace/benchmarks/llm/plot_pareto.py --artifacts-root-dir artifacts_root --title "Two Nodes"
+```
+The graph will be saved to the current directory and named `pareto_plot.png`.
+
+### Interpreting Pareto Graphs
+
+The question we want to answer in this comparison is how much Output Token Throughput can be improved by switching from
+aggregated to disaggregated serving when both are performing under similar Inter Token Latency.
+
+For each concurrency benchmarked, it produces a latency and throughput value pair. The x-axis on the Pareto graph is
+latency (tokens/s/user), which the latency is lower if the value is higher. The y-axis on the Pareto graph is throughput
+(tokens/s/gpu). The latency and throughput value pair forms a dot on the Pareto graph. A line (Pareto Frontier) is
+formed when the dots from different concurrency values are plotted on the graph.
+
+With the Pareto Frontiers of the baseline and the disaggregated results plotted on the graph, we can look for the
+greatest increase in throughput (along the y-axis) between the baseline and the disaggregated result Pareto Frontier,
+over different latencies (along the x-axis).
+
+For example, at 45 tokens/s/user, the increase in tokens/s/gpu is `145 - 80 = 65`, from the orange baseline to the
+blue disaggregated line, so the improvement is around 1.44x speed up:
+![Example Pareto Plot](./example_plots/single_node_pareto_plot.png)
+Note: The above example was collected over a single benchmarking run, the actual number may vary between runs, configurations and hardware.

 ## Supporting Additional Models


--- a/examples/llm/benchmarks/example_plots/single_node_pareto_plot.png
+++ b/examples/llm/benchmarks/example_plots/single_node_pareto_plot.png
--- a/examples/llm/benchmarks/example_plots/two_node_pareto_plot.png
+++ b/examples/llm/benchmarks/example_plots/two_node_pareto_plot.png