git init

3c15726c · yangzhong · 3c15726c · 3c15726c · 3c15726c · 3c15726c
Commit 3c15726c authored Nov 01, 2025 by yangzhong
20 changed files
--- a/compliance/nvidia/TEST04/audit.config
+++ b/compliance/nvidia/TEST04/audit.config
+# The format of this config file is 'key = value'.
+# The key has the format 'model.scenario.key'. Value is mostly int64_t.
+# Model maybe '*' as wildcard. In that case the value applies to all models.
+# All times are in milli seconds
+*.MultiStream.mode = 2
+*.MultiStream.performance_issue_unique = 0
+*.MultiStream.performance_issue_same = 1
+*.MultiStream.performance_issue_same_index = 3
+*.Offline.mode = 2
+*.Offline.performance_issue_unique = 0
+*.Offline.performance_issue_same = 1
+*.Offline.performance_issue_same_index = 3
+*.SingleStream.mode = 2
+*.SingleStream.performance_issue_unique = 0
+*.SingleStream.performance_issue_same = 1
+*.SingleStream.performance_issue_same_index = 3
+*.Server.mode = 2
+*.Server.performance_issue_unique = 0
+*.Server.performance_issue_same = 1
+*.Server.performance_issue_same_index = 3
+stable-diffusion-xl.Offline.min_query_count = 500
+# You can optionally set the target qps to match the expected query count 
+# in the min duration with the min_query_count. But take into account you
+# system expected qps, the min duration for this test is 10 minutes
+# stable-diffusion-xl.Offline.target_qps = 0.75
--- a/compliance/nvidia/TEST04/run_verification.py
+++ b/compliance/nvidia/TEST04/run_verification.py
+#! /usr/bin/env python3
+# Copyright 2022 The MLPerf Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
+import os
+import sys
+import shutil
+import subprocess
+import argparse
+
+sys.path.append(os.getcwd())
+
+
+def main():
+
+    # Parse arguments to identify the path to the logs from
+    #   the performance runs
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--results_dir",
+        "-r",
+        help="Specifies the path to the corresponding results directory that contains the performance subdirectories containing the submission logs, i.e. inference_results_v0.7/closed/NVIDIA/results/T4x8/resnet/Offline.",
+        required=True,
+    )
+    parser.add_argument(
+        "--compliance_dir",
+        "-c",
+        help="Specifies the path to the directory containing the logs from the compliance test run.",
+        required=True,
+    )
+    parser.add_argument(
+        "--output_dir",
+        "-o",
+        help="Specifies the path to the output directory where compliance logs will be uploaded from, i.e. inference_results_v0.7/closed/NVIDIA/compliance/T4x8/resnet/Offline.",
+        required=True,
+    )
+
+    args = parser.parse_args()
+
+    print("Parsing arguments.")
+    results_dir = args.results_dir
+    compliance_dir = args.compliance_dir
+    output_dir = os.path.join(args.output_dir, "TEST04")
+
+    # run verify performance
+    verify_performance_binary = os.path.join(
+        os.path.dirname(__file__), "verify_performance.py"
+    )
+    verify_performance_command = (
+        "python3 "
+        + verify_performance_binary
+        + " -r "
+        + results_dir
+        + "/performance/run_1/mlperf_log_summary.txt"
+        + " -t "
+        + compliance_dir
+        + "/mlperf_log_summary.txt | tee verify_performance.txt"
+    )
+    try:
+        os.system(verify_performance_command)
+    except Exception:
+        print(
+            "Exception occurred trying to execute:\n  " +
+            verify_performance_command)
+
+    # check if verify performance script passes
+    performance_pass_command = "grep PASS verify_performance.txt"
+    try:
+        performance_pass = "TEST PASS" in subprocess.check_output(
+            performance_pass_command, shell=True
+        ).decode("utf-8")
+    except Exception:
+        performance_pass = False
+
+    # setup output compliance directory structure
+    output_performance_dir = os.path.join(output_dir, "performance", "run_1")
+    try:
+        if not os.path.isdir(output_performance_dir):
+            os.makedirs(output_performance_dir)
+    except Exception:
+        print("Exception occurred trying to create " + output_performance_dir)
+
+    # copy compliance logs to output compliance directory
+    shutil.copy2("verify_performance.txt", output_dir)
+    summary_file = os.path.join(compliance_dir, "mlperf_log_summary.txt")
+    detail_file = os.path.join(compliance_dir, "mlperf_log_detail.txt")
+
+    try:
+        shutil.copy2(summary_file, output_performance_dir)
+    except Exception:
+        print(
+            "Exception occured trying to copy "
+            + summary_file
+            + " to "
+            + output_performance_dir
+        )
+    try:
+        shutil.copy2(detail_file, output_performance_dir)
+    except Exception:
+        print(
+            "Exception occured trying to copy "
+            + detail_file
+            + " to "
+            + output_performance_dir
+        )
+
+    print("Performance check pass: {:}".format(performance_pass))
+    print("TEST04 verification complete")
+
+
+if __name__ == "__main__":
+    main()
--- a/compliance/nvidia/TEST04/verify_performance.py
+++ b/compliance/nvidia/TEST04/verify_performance.py
+#! /usr/bin/env python3
+# Copyright 2022 The MLPerf Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
+import argparse
+import os
+import sys
+import re
+
+sys.path.append(os.getcwd())
+
+
+def main():
+    # Parse arguments to identify the path to the accuracy logs from
+    #   the accuracy and performance runs
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--reference_summary",
+        "-r",
+        help="Specifies the path to the summary log for the performance run.",
+        default="",
+    )
+    parser.add_argument(
+        "--test_summary",
+        "-t",
+        help="Specifies the path to the summary log for this test.",
+        default="",
+    )
+    args = parser.parse_args()
+
+    print("Verifying performance.")
+    ref_file = open(args.reference_summary, "r")
+    test_file = open(args.test_summary, "r")
+    ref_score = 0
+    test_score = 0
+    ref_mode = ""
+    test_mode = ""
+
+    for line in ref_file:
+        if re.match("Scenario", line):
+            ref_mode = line.split(": ", 1)[1].strip()
+            continue
+
+        if ref_mode == "SingleStream":
+            if re.match(".*Early stopping 90th percentile estimate", line):
+                ref_score = line.split(": ", 1)[1].strip()
+                ref_score = 1e9 / float(ref_score)
+                continue
+
+        if ref_mode == "MultiStream":
+            if re.match(".*Early stopping 99th percentile estimate", line):
+                ref_score = line.split(": ", 1)[1].strip()
+                ref_score = 1e9 / float(ref_score)
+                continue
+
+        if ref_mode == "Server":
+            if re.match("Completed samples per second", line):
+                ref_score = line.split(": ", 1)[1].strip()
+                continue
+            if re.match("target_latency (ns)", line):
+                ref_target_latency = line.split(": ", 1)[1].strip()
+                continue
+
+        if ref_mode == "Offline":
+            if re.match("Samples per second", line):
+                ref_score = line.split(": ", 1)[1].strip()
+                continue
+
+        if re.match("Result is", line):
+            valid = line.split(": ", 1)[1].strip()
+            if valid == "INVALID":
+                sys.exit("TEST FAIL: Reference results are invalid")
+
+        if re.match("\\d+ ERROR", line):
+            error = line.split(" ", 1)[0].strip()
+            print("WARNING: " + error + " ERROR reported in reference results")
+
+    for line in test_file:
+        if re.match("Scenario", line):
+            test_mode = line.split(": ", 1)[1].strip()
+            continue
+
+        if test_mode == "SingleStream":
+            if re.match(".*Early stopping 90th percentile estimate", line):
+                test_score = line.split(": ", 1)[1].strip()
+                test_score = 1e9 / float(test_score)
+                continue
+
+        if test_mode == "MultiStream":
+            if re.match(".*Early stopping 99th percentile estimate", line):
+                test_score = line.split(": ", 1)[1].strip()
+                test_score = 1e9 / float(test_score)
+                continue
+
+        if test_mode == "Server":
+            if re.match("Completed samples per second", line):
+                test_score = line.split(": ", 1)[1].strip()
+                continue
+            if re.match("target_latency (ns)", line):
+                test_target_latency = line.split(": ", 1)[1].strip()
+                if test_target_latency != ref_target_latency:
+                    print("TEST FAIL: Server target latency mismatch")
+                    sys.exit()
+                continue
+
+        if test_mode == "Offline":
+            if re.match("Samples per second", line):
+                test_score = line.split(": ", 1)[1].strip()
+                continue
+
+        if re.match("Result is", line):
+            valid = line.split(": ", 1)[1].strip()
+            if valid == "INVALID":
+                sys.exit("TEST FAIL: Test results are invalid")
+
+        if re.match("\\d+ ERROR", line):
+            error = line.split(" ", 1)[0].strip()
+            print("WARNING: " + error + " ERROR reported in test results")
+
+    if test_mode != ref_mode:
+        sys.exit("Test and reference scenarios do not match!")
+
+    print("reference score = {}".format(ref_score))
+    print("test score = {}".format(test_score))
+
+    threshold = 0.10
+
+    # In single-/multi-stream mode, latencies can be very short for high performance systems
+    # and run-to-run variation due to external disturbances (OS) can be significant.
+    # In this case we relax pass threshold to 20%
+    if (ref_mode == "SingleStream" and float(ref_score) <= 200000) or (
+        ref_mode == "MultiStream" and float(ref_score) <= 1600000
+    ):
+        threshold = 0.20
+
+    if float(test_score) < float(ref_score) * (1 + threshold):
+        print("TEST PASS")
+    else:
+        print("TEST FAIL: Test score invalid")
+
+
+if __name__ == "__main__":
+    main()
--- a/compliance/nvidia/TEST06/README.md
+++ b/compliance/nvidia/TEST06/README.md
+# Test 06 - Verify consistency of the output of the Llama-v2-70b
+This repository provides the config files and scripts to run and verify TEST 06 - Verify consistency of the Llama-v2-70b output.
+
+# Table of contents
+1. [Introduction](#introduction)
+2. [Requisites](#Requisites)
+2. [Instructions](#instructions)
+
+## Introduction
+
+The purpose of this test is to ensure the consistency of the output of the LLM (Llama2 and Mixtral) model and avoid a potential EOS exploit. This test will make a performance run, with a limit of 100 samples and logging them into `mlperf_log_accuracy.json`. To achieve a passing result in this test, three criteria must be met:
+- In the case the first token is reported independently (not applicable for Offline scenario), it should match for every query with the first token of the model output.
+- For each query, the model output should only end with zero or one EOS token.
+- The number of reported tokens should match with the length of output sequence.
+
+## Requisites
+
+For this test, you need to be able to run the `Llama2-70b` benchmark. Therefore all it's requirements are also required for this test. Additionally, you need to have `numpy` installed.
+```
+pip install numpy
+```
+
+## Instructions
+### Part I
+Run the benchmark with the provided audit.config in the corresponding subdirectory. Note that audit.config must be copied to the directory where the benchmark is being run from. Verification that audit.config was properly read can be done by checking that loadgen has found audit.config in mlperf_log_detail.txt
+
+### Part II
+Run the verification script
+```
+python3 run_verification.py -c COMPLIANCE_DIR -o OUTPUT_DIR -s SCENARIO
+```
+- COMPLIANCE_DIR: Specifies the path to the directory containing the logs from the compliance test run. 
+- OUTPUT_DIR: Specifies the path to the output directory where compliance logs will be uploaded from,   i.e. `inference_results_v0.7/closed/NVIDIA/compliance/TEST06/llama2-70b/Offline`
+- SCENARIO: Specifies the scenario the benchmark was run. One of ["Offline", "Server", "SingleStream", "MultiStream"]
+
+Expected output
+```
+First token check pass: True                
+EOS check pass: True             
+Sample length check pass: True  
+TEST06 verification complete   
+```
+
+Or:
+
+```
+First token check pass: Skipped                
+EOS check pass: True             
+Sample length check pass: True  
+TEST06 verification complete     
+```
\ No newline at end of file
--- a/compliance/nvidia/TEST06/audit.config
+++ b/compliance/nvidia/TEST06/audit.config
+# The format of this config file is 'key = value'.
+# The key has the format 'model.scenario.key'. Value is mostly int64_t.
+# Model maybe '*' as wildcard. In that case the value applies to all models.
+# All times are in milli seconds
+
+# mode dictionary (0 = submission, 1 = accuracy, 2 = performance, 3 = find peak perf)
+*.*.mode = 2
+*.*.accuracy_log_rng_seed = 720381539243781796
+*.*.accuracy_log_sampling_target = 100
+*.*.min_query_count = 100
+*.*.min_duration = 0
+# Turn off equal issue mode for TEST06
+*.*.sample_concatenate_permutation = 0
\ No newline at end of file
--- a/compliance/nvidia/TEST06/run_verification.py
+++ b/compliance/nvidia/TEST06/run_verification.py
+#! /usr/bin/env python3
+# Copyright 2018-2022 The MLPerf Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
+import os
+import sys
+import shutil
+import argparse
+import json
+
+import numpy as np
+
+DTYPE_MAP = {
+    "int64": np.int64,
+    "int32": np.int32,
+    "int16": np.int16,
+    "float32": np.float32,
+}
+
+
+def get_args():
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--compliance_dir",
+        "-c",
+        help="Specifies the path to the directory containing the logs from the compliance test run.",
+        required=True,
+    )
+    parser.add_argument(
+        "--output_dir",
+        "-o",
+        help="Specifies the path to the output directory where compliance logs will be uploaded from, i.e. inference_results_v0.7/closed/NVIDIA/compliance/T4x8/resnet/Offline.",
+        required=True,
+    )
+    parser.add_argument(
+        "--eos_token_id", "-e", default=2, help="EOS token id of the tokenizer"
+    )
+    parser.add_argument(
+        "--dtype", "-d", default="int32", choices=["int64", "int32", "int16", "float32"]
+    )
+    parser.add_argument(
+        "--scenario",
+        "-s",
+        required=True,
+        choices=["Offline", "Server", "SingleStream", "MultiStream"],
+    )
+    args = parser.parse_args()
+    return args
+
+
+def eos_check(acc_data, dtype, eos_token_id=2):
+    for sample in acc_data:
+        data = np.frombuffer(bytes.fromhex(sample["data"]), dtype=dtype)
+        i = data.shape[0] - 1
+        n_eos_tokens = 0
+        while i > 0:
+            if data[i] == eos_token_id:
+                n_eos_tokens += 1
+            if n_eos_tokens >= 2:
+                return False
+            if data[i] != eos_token_id:
+                break
+            i -= 1
+    return True
+
+
+def first_token_check(acc_data, dtype):
+    for sample in acc_data:
+        data = np.frombuffer(bytes.fromhex(sample["data"]), dtype=dtype)
+        token_data = np.frombuffer(
+            bytes.fromhex(
+                sample["token_data"]),
+            dtype=dtype)
+        for t1, t2 in zip(data, token_data):
+            if t1 != t2:
+                return False
+
+    return True
+
+
+def sample_len_check(acc_data, dtype):
+    for sample in acc_data:
+        data = np.frombuffer(bytes.fromhex(sample["data"]), dtype=dtype)
+        token_count = int(sample["token_count"])
+        if len(data) != token_count:
+            return False
+    return True
+
+
+def main():
+    args = get_args()
+    accuracy_file = os.path.join(
+        args.compliance_dir,
+        "mlperf_log_accuracy.json")
+
+    with open(accuracy_file, "r") as acc_json:
+        acc_data = json.load(acc_json)
+
+    try:
+        eos_pass = eos_check(acc_data,
+                             DTYPE_MAP[args.dtype],
+                             args.eos_token_id)
+    except Exception:
+        print("Unexpected error occured while doing the EOS check")
+        eos_pass = False
+
+    need_first_token_check = args.scenario != "Offline"
+    first_token_pass = True
+    if need_first_token_check:
+        try:
+            first_token_pass = first_token_check(
+                acc_data, DTYPE_MAP[args.dtype])
+        except Exception:
+            print("Unexpected error occured while doing the first token check")
+            first_token_pass = False
+
+    sample_len_pass = sample_len_check(acc_data, DTYPE_MAP[args.dtype])
+
+    # Construct output based on the results of checks
+    output = ""
+    # Add first token check
+    if need_first_token_check:
+        output += f"First token check pass: {first_token_pass}\n"
+    else:
+        output += f"First token check pass: Skipped\n"
+
+    # Add EOS check
+    output += f"EOS check pass: {eos_pass}\n"
+
+    # Add sample length check
+    output += f"Sample length check pass: {sample_len_pass}\n"
+
+    if eos_pass and first_token_pass and sample_len_pass:
+        output += "TEST06 verification complete\n"
+    else:
+        output += "TEST06 verification failed\n"
+
+    # Output test output to console and folder
+    output_dir = os.path.join(args.output_dir, "TEST06")
+    output_accuracy_dir = os.path.join(output_dir, "accuracy")
+
+    if not os.path.isdir(output_dir):
+        os.makedirs(output_dir)
+    if not os.path.isdir(output_accuracy_dir):
+        os.makedirs(output_accuracy_dir)
+
+    with open(os.path.join(output_dir, "verify_accuracy.txt"), "w") as f:
+        f.write(output)
+
+    try:
+        shutil.copy2(accuracy_file, output_accuracy_dir)
+    except Exception:
+        print(
+            "Exception occured trying to copy "
+            + accuracy_file
+            + " to "
+            + output_accuracy_dir
+        )
+    print(output)
+
+
+if __name__ == "__main__":
+    main()
--- a/docs/README.md
+++ b/docs/README.md
+# Documentation Website for MLPerf Inference using the unified CM interface
+
+## Commands to get the website running locally
+```
+git clone https://github.com/mlcommons/inference
+cd inference
+pip install -r docs/requirements.txt
+mkdocs serve
+```
--- a/docs/benchmarks/graph/get-rgat-data.md
+++ b/docs/benchmarks/graph/get-rgat-data.md
+---
+hide:
+  - toc
+---
+
+# Graph Neural Network using R-GAT 
+
+## Dataset
+
+The benchmark implementation run command will automatically download the validation and calibration datasets and do the necessary preprocessing. In case you want to download only the datasets, you can use the below commands.
+
+=== "Full Dataset"
+    R-GAT validation run uses the IGBH dataset consisting of 547,306,935 nodes and 5,812,005,639 edges.
+
+    ### Get Full Dataset
+    ```
+    cm run script --tags=get,dataset,igbh,_full -j
+    ```
+
+=== "Debug Dataset"
+    R-GAT debug run uses the IGBH debug dataset(tiny).
+
+    ### Get Full Dataset
+    ```
+    cm run script --tags=get,dataset,igbh,_debug -j
+    ```
+
+## Model
+The benchmark implementation run command will automatically download the required model and do the necessary conversions. In case you want to only download the official model, you can use the below commands.
+
+Get the Official MLPerf R-GAT Model
+
+=== "PyTorch"
+
+    ### PyTorch
+    ```
+    cm run script --tags=get,ml-model,rgat -j
+    ```
+
--- a/docs/benchmarks/graph/rgat.md
+++ b/docs/benchmarks/graph/rgat.md
+---
+hide:
+  - toc
+---
+
+
+# Graph Neural Network using R-GAT 
+
+
+=== "MLCommons-Python"
+    ## MLPerf Reference Implementation in Python
+    
+{{ mlperf_inference_implementation_readme (4, "rgat", "reference", devices = ["CPU", "CUDA"]) }}
\ No newline at end of file
--- a/docs/benchmarks/image_classification/get-resnet50-data.md
+++ b/docs/benchmarks/image_classification/get-resnet50-data.md
+---
+hide:
+  - toc
+---
+
+# Image Classification using ResNet50 
+
+## Dataset
+
+The benchmark implementation run command will automatically download the validation and calibration datasets and do the necessary preprocessing. In case you want to download only the datasets, you can use the below commands.
+
+=== "Validation"
+    ResNet50 validation run uses the Imagenet 2012 validation dataset consisting of 50,000 images.
+
+    ### Get Validation Dataset
+    ```
+    cm run script --tags=get,dataset,imagenet,validation -j
+    ```
+=== "Calibration"
+    ResNet50 calibration dataset consist of 500 images selected from the Imagenet 2012 validation dataset. There are 2 alternative options for the calibration dataset.
+
+    ### Get Calibration Dataset Using Option 1
+    ```
+    cm run script --tags=get,dataset,imagenet,calibration,_mlperf.option1 -j
+    ```
+    ### Get Calibration Dataset Using Option 2
+    ```
+    cm run script --tags=get,dataset,imagenet,calibration,_mlperf.option2 -j
+    ```
+
+## Model
+The benchmark implementation run command will automatically download the required model and do the necessary conversions. In case you want to only download the official model, you can use the below commands.
+
+Get the Official MLPerf ResNet50 Model
+
+=== "Tensorflow"
+
+    ### Tensorflow
+    ```
+    cm run script --tags=get,ml-model,resnet50,_tensorflow -j
+    ```
+=== "Onnx"
+
+    ### Onnx
+    ```
+    cm run script --tags=get,ml-model,resnet50,_onnx -j
+    ```
+
--- a/docs/benchmarks/image_classification/mobilenets.md
+++ b/docs/benchmarks/image_classification/mobilenets.md
+---
+hide:
+  - toc
+---
+
+# Image Classification using Mobilenet models
+
+Install CM following the [installation page](site:install).
+
+Mobilenet models are not official MLPerf models and so cannot be used for a Closed division MLPerf inference submission. But since they can be run with Imagenet dataset, we are allowed to use them for Open division submission. Only CPU runs are supported now. 
+
+## TFLite Backend
+
+=== "Mobilenet-V1"
+    ### Mobilenet V1
+    ```bash
+    cm run script --tags=run,mobilenet-models,_tflite,_mobilenet-v1 \
+     --adr.compiler.tags=gcc
+    ```
+=== "Mobilenet-V2"
+    ### Mobilenet V2
+    ```bash
+    cm run script --tags=run,mobilenet-models,_tflite,_mobilenet-v2 \
+     --adr.compiler.tags=gcc
+    ```
+=== "Mobilenet-V2"
+    ### Mobilenet V2
+    ```bash
+    cm run script --tags=run,mobilenet-models,_tflite,_mobilenet-v2 \
+     --adr.compiler.tags=gcc
+    ```
+=== "Mobilenets"
+    ### Mobilenet V1,V2,V3
+    ```bash
+    cm run script --tags=run,mobilenet-models,_tflite,_mobilenet \
+     --adr.compiler.tags=gcc
+    ```
+=== "Efficientnet"
+    ### Efficientnet
+    ```bash
+    cm run script --tags=run,mobilenet-models,_tflite,_efficientnet \
+     --adr.compiler.tags=gcc
+    ```
+
+## ARMNN Backend
+=== "Mobilenet-V1"
+    ### Mobilenet V1
+    ```bash
+    cm run script --tags=run,mobilenet-models,_tflite,_armnn,_mobilenet-v1 \
+     --adr.compiler.tags=gcc
+    ```
+=== "Mobilenet-V2"
+    ### Mobilenet V2
+    ```bash
+    cm run script --tags=run,mobilenet-models,_tflite,_armnn,_mobilenet-v2 \
+     --adr.compiler.tags=gcc
+    ```
+=== "Mobilenet-V2"
+    ### Mobilenet V2
+    ```bash
+    cm run script --tags=run,mobilenet-models,_tflite,_armnn,_mobilenet-v2 \
+     --adr.compiler.tags=gcc
+    ```
+=== "Mobilenets"
+    ### Mobilenet V1,V2,V3
+    ```bash
+    cm run script --tags=run,mobilenet-models,_tflite,_armnn,_mobilenet \
+     --adr.compiler.tags=gcc
+    ```
+=== "Efficientnet"
+    ### Efficientnet
+    ```bash
+    cm run script --tags=run,mobilenet-models,_tflite,_armnn,_efficientnet \
+     --adr.compiler.tags=gcc
+    ```
+
--- a/docs/benchmarks/image_classification/resnet50.md
+++ b/docs/benchmarks/image_classification/resnet50.md
+---
+hide:
+  - toc
+---
+
+
+# Image Classification using ResNet50 
+
+
+=== "MLCommons-Python"
+    ## MLPerf Reference Implementation in Python
+    
+{{ mlperf_inference_implementation_readme (4, "resnet50", "reference") }}
+
+=== "Nvidia"
+    ## Nvidia MLPerf Implementation
+    
+{{ mlperf_inference_implementation_readme (4, "resnet50", "nvidia") }}
+
+=== "Intel"
+    ## Intel MLPerf Implementation
+    
+{{ mlperf_inference_implementation_readme (4, "resnet50", "intel") }}
+
+=== "Qualcomm"
+    ## Qualcomm AI100 MLPerf Implementation
+    
+{{ mlperf_inference_implementation_readme (4, "resnet50", "qualcomm") }}
+
+=== "MLCommons-C++"
+    ## MLPerf Modular Implementation in C++
+    
+{{ mlperf_inference_implementation_readme (4, "resnet50", "cpp") }}
--- a/docs/benchmarks/language/bert.md
+++ b/docs/benchmarks/language/bert.md
+---
+hide:
+  - toc
+---
+
+# Question Answering using Bert-Large
+
+=== "MLCommons-Python"
+    ## MLPerf Reference Implementation in Python
+    
+{{ mlperf_inference_implementation_readme (4, "bert-99", "reference") }}
+
+{{ mlperf_inference_implementation_readme (4, "bert-99.9", "reference") }}
+
+=== "Nvidia"
+    ## Nvidia MLPerf Implementation
+    
+{{ mlperf_inference_implementation_readme (4, "bert-99", "nvidia") }}
+
+{{ mlperf_inference_implementation_readme (4, "bert-99.9", "nvidia") }}
+
+=== "Intel"
+    ## Intel MLPerf Implementation
+    
+{{ mlperf_inference_implementation_readme (4, "bert-99", "intel") }}
+
+{{ mlperf_inference_implementation_readme (4, "bert-99.9", "intel") }}
+
+=== "Qualcomm"
+    ## Qualcomm AI100 MLPerf Implementation
+
+{{ mlperf_inference_implementation_readme (4, "bert-99", "qualcomm") }}
+
+{{ mlperf_inference_implementation_readme (4, "bert-99.9", "qualcomm") }}
--- a/docs/benchmarks/language/get-bert-data.md
+++ b/docs/benchmarks/language/get-bert-data.md
+---
+hide:
+  - toc
+---
+
+# Question Answering using Bert-Large
+
+## Dataset
+
+The benchmark implementation run command will automatically download the validation and calibration datasets and do the necessary preprocessing. In case you want to download only the datasets, you can use the below commands.
+
+=== "Validation"
+    BERT validation run uses the SQuAD v1.1 dataset.
+
+    ### Get Validation Dataset
+    ```
+    cm run script --tags=get,dataset,squad,validation -j
+    ```
+
+## Model
+The benchmark implementation run command will automatically download the required model and do the necessary conversions. In case you want to only download the official model, you can use the below commands.
+
+Get the Official MLPerf Bert-Large Model
+
+=== "Pytorch"
+
+    ### Pytorch
+    ```
+    cm run script --tags=get,ml-model,bert-large,_pytorch -j
+    ```
+=== "Onnx"
+
+    ### Onnx
+    ```
+    cm run script --tags=get,ml-model,bert-large,_onnx -j
+    ```
+=== "Tensorflow"
+
+    ### Tensorflow
+    ```
+    cm run script --tags=get,ml-model,bert-large,_tensorflow -j
+    ```
+
--- a/docs/benchmarks/language/get-gptj-data.md
+++ b/docs/benchmarks/language/get-gptj-data.md
+---
+hide:
+  - toc
+---
+
+# Text Summarization using GPT-J
+
+## Dataset
+
+The benchmark implementation run command will automatically download the validation and calibration datasets and do the necessary preprocessing. In case you want to download only the datasets, you can use the below commands.
+
+=== "Validation"
+    GPT-J validation run uses the CNNDM dataset.
+
+    ### Get Validation Dataset
+    ```
+    cm run script --tags=get,dataset,cnndm,validation -j
+    ```
+
+## Model
+The benchmark implementation run command will automatically download the required model and do the necessary conversions. In case you want to only download the official model, you can use the below commands.
+
+Get the Official MLPerf GPT-J Model
+
+=== "Pytorch"
+
+    ### Pytorch
+    ```
+    cm run script --tags=get,ml-model,gptj,_pytorch -j
+    ```
--- a/docs/benchmarks/language/get-llama2-70b-data.md
+++ b/docs/benchmarks/language/get-llama2-70b-data.md
+---
+hide:
+  - toc
+---
+
+# Text Summarization using LLAMA2-70b
+
+## Dataset
+
+The benchmark implementation run command will automatically download the validation and calibration datasets and do the necessary preprocessing. In case you want to download only the datasets, you can use the below commands.
+
+=== "Validation"
+    LLAMA2-70b validation run uses the Open ORCA dataset.
+
+    ### Get Validation Dataset
+    ```
+    cm run script --tags=get,dataset,openorca,validation -j
+    ```
+
+## Model
+The benchmark implementation run command will automatically download the required model and do the necessary conversions. In case you want to only download the official model, you can use the below commands.
+
+Get the Official MLPerf LLAMA2-70b Model
+
+=== "Pytorch"
+
+    ### Pytorch
+    ```
+    cm run script --tags=get,ml-model,llama2-70b,_pytorch -j
+    ```
+  
+!!! tip
+
+    Downloading llama2-70B model from Hugging Face will prompt you to enter the Hugging Face username and password. Please note that the password required is the [**access token**](https://huggingface.co/settings/tokens) generated for your account. Additionally, ensure that your account has access to the [llama2-70B](https://huggingface.co/meta-llama/Llama-2-70b-chat-hf) model.
+
--- a/docs/benchmarks/language/get-llama3_1-405b-data.md
+++ b/docs/benchmarks/language/get-llama3_1-405b-data.md
+---
+hide:
+  - toc
+---
+
+# Text Summarization using LLAMA3.1-405b
+
+## Dataset
+
+The benchmark implementation run command will automatically download the validation and calibration datasets and do the necessary preprocessing. In case you want to download only the datasets, you can use the below commands.
+
+=== "Validation"
+
+    ### Get Validation Dataset
+    ```
+    cm run script --tags=get,dataset,mlperf,inference,llama3,_validation --outdirname=<path to download> -j
+    ```
+    
+=== "Calibration"
+
+    ### Get Calibration Dataset
+    ```
+    cm run script --tags=get,dataset,mlperf,inference,llama3,_calibration --outdirname=<path to download> -j
+    ```
+
+## Model
+The benchmark implementation run command will automatically download the required model and do the necessary conversions. In case you want to only download the official model, you can use the below commands.
+
+Get the Official MLPerf LLAMA3.1-405b Model
+
+=== "Pytorch"
+
+    ### Pytorch
+    ```
+    cm run script --tags=get,ml-model,llama3 --outdirname=<path to download> --hf_token=<huggingface access token> -j
+    ```
+  
+!!! tip
+
+    Downloading llama3.1-405B model from Hugging Face will require an [**access token**](https://huggingface.co/settings/tokens) which could be generated for your account. Additionally, ensure that your account has access to the [llama3.1-405B](https://huggingface.co/meta-llama/Llama-3.1-405B-Instruct) model. 
+
--- a/docs/benchmarks/language/get-mixtral-8x7b-data.md
+++ b/docs/benchmarks/language/get-mixtral-8x7b-data.md
+---
+hide:
+  - toc
+---
+
+## Dataset
+
+The benchmark implementation run command will automatically download the preprocessed validation and calibration datasets. In case you want to download only the datasets, you can use the below commands.
+
+=== "Validation"
+    mixtral-8x7b validation run uses the combined dataset - Open ORCA, GSM8K and MBXP.
+
+    ### Get Validation Dataset
+    ```
+    cm run script --tags=get,dataset-mixtral,openorca-mbxp-gsm8k-combined -j
+    ```
+
+## Model
+The benchmark implementation run command will automatically download the required model and do the necessary conversions. In case you want to only download the official model, you can use the below commands.
+
+Get the Official MLPerf MIXTRAL-8x7b Model
+
+=== "Pytorch"
+
+    ### Pytorch
+    ```
+    cm run script --tags=get,ml-model,mixtral -j
+    ```
\ No newline at end of file
--- a/docs/benchmarks/language/gpt-j.md
+++ b/docs/benchmarks/language/gpt-j.md
+---
+hide:
+  - toc
+---
+
+# Text Summarization using GPT-J
+
+=== "MLCommons-Python"
+    ## MLPerf Reference Implementation in Python
+
+
+{{ mlperf_inference_implementation_readme (4, "gptj-99", "reference") }}
+
+
+{{ mlperf_inference_implementation_readme (4, "gptj-99.9", "reference") }}
+
+=== "Nvidia"
+    ## Nvidia MLPerf Implementation
+    
+
+{{ mlperf_inference_implementation_readme (4, "gptj-99", "nvidia") }}
+
+
+{{ mlperf_inference_implementation_readme (4, "gptj-99.9", "nvidia") }}
+
+=== "Intel"
+    ## Intel MLPerf Implementation
+
+{{ mlperf_inference_implementation_readme (4, "gptj-99", "intel") }}
+
+
+=== "Qualcomm"
+    ## Qualcomm AI100 MLPerf Implementation
+
+
+{{ mlperf_inference_implementation_readme (4, "gptj-99", "qualcomm") }}
+
--- a/docs/benchmarks/language/llama2-70b.md
+++ b/docs/benchmarks/language/llama2-70b.md
+---
+hide:
+  - toc
+---
+
+# Text Summarization using LLAMA2-70b
+
+=== "MLCommons-Python"
+    ## MLPerf Reference Implementation in Python
+    
+{{ mlperf_inference_implementation_readme (4, "llama2-70b-99", "reference") }}
+
+{{ mlperf_inference_implementation_readme (4, "llama2-70b-99.9", "reference") }}
+
+=== "Nvidia"
+    ## Nvidia MLPerf Implementation
+    
+{{ mlperf_inference_implementation_readme (4, "llama2-70b-99", "nvidia") }}
+
+{{ mlperf_inference_implementation_readme (4, "llama2-70b-99.9", "nvidia") }}
+
+=== "Neural Magic"
+    ## Neural Magic MLPerf Implementation
+    
+{{ mlperf_inference_implementation_readme (4, "llama2-70b-99", "neuralmagic") }}
+
+{{ mlperf_inference_implementation_readme (4, "llama2-70b-99.9", "neuralmagic") }}
+
+=== "AMD"
+    ## AMD MLPerf Implementation
+    
+{{ mlperf_inference_implementation_readme (4, "llama2-70b-99", "amd") }}
+
+{{ mlperf_inference_implementation_readme (4, "llama2-70b-99.9", "amd") }}