Release - SuperBench v0.3.0 (#212)

**Description** Cherry-pick bug fixes from v0.3.0 to main. **Major Revisions** * Docs - Upgrade version and release note (#209) * Benchmarks: Build Pipeline - Update rccl-test git submodule to dc1ad48 (#210) * Benchmarks: Update - Update benchmarks in configuration file (#208) * CI/CD - Update GitHub Action VM (#211) * Benchmarks: Fix Bug - Fix wrong parameters for gpu-sm-copy-bw in configuration examples (#203) * CI/CD - Fix bug in build image for push event (#205) * Benchmark: Fix Bug - fix error message of communication-computation-overlap (#204) * Tool: Fix bug - Fix function naming issue in system info (#200) * CI/CD - Push images in GitHub Action (#202) * Bug - Fix torch.distributed command for single node (#201) * CLI - Integrate system info for node (#199) * Benchmarks: Code Revision - Revise CMake files for microbenchmarks. (#196) * CI/CD - Add ROCm image build in GitHub Actions (#194) * Bug: Fix bug - fix bug of hipBusBandwidth build (#193) * Benchmarks: Build Pipeline - Restore rocblas build logic (#197) * Bug: Fix Bug - Add barrier before 'destroy_process_group' in model benchmarks (#198) * Bug - Revise 'docker run' in sb deploy (#195) * Bug - Fix Bug : fix bug of error param operations to operation in rccl-bw of hpe config (#190) Co-authored-by: Yuting Jiang <v-yujiang@microsoft.com> Co-authored-by: Guoshuai Zhao <guzhao@microsoft.com> Co-authored-by: Ziyue Yang <ziyyang@microsoft.com>

Release - SuperBench v0.3.0 (#212)
**Description** Cherry-pick bug fixes from v0.3.0 to main. **Major Revisions** * Docs - Upgrade version and release note (#209) * Benchmarks: Build Pipeline - Update rccl-test git submodule to dc1ad48 (#210) * Benchmarks: Update - Update benchmarks in configuration file (#208) * CI/CD - Update GitHub Action VM (#211) * Benchmarks: Fix Bug - Fix wrong parameters for gpu-sm-copy-bw in configuration examples (#203) * CI/CD - Fix bug in build image for push event (#205) * Benchmark: Fix Bug - fix error message of communication-computation-overlap (#204) * Tool: Fix bug - Fix function naming issue in system info (#200) * CI/CD - Push images in GitHub Action (#202) * Bug - Fix torch.distributed command for single node (#201) * CLI - Integrate system info for node (#199) * Benchmarks: Code Revision - Revise CMake files for microbenchmarks. (#196) * CI/CD - Add ROCm image build in GitHub Actions (#194) * Bug: Fix bug - fix bug of hipBusBandwidth build (#193) * Benchmarks: Build Pipeline - Restore rocblas build logic (#197) * Bug: Fix Bug - Add barrier before 'destroy_process_group' in model benchmarks (#198) * Bug - Revise 'docker run' in sb deploy (#195) * Bug - Fix Bug : fix bug of error param operations to operation in rccl-bw of hpe config (#190) Co-authored-by: Yuting Jiang <v-yujiang@microsoft.com> Co-authored-by: Guoshuai Zhao <guzhao@microsoft.com> Co-authored-by: Ziyue Yang <ziyyang@microsoft.com>
dfbd70b1 · Yifan Xiong · GitHub · 37b15db9 · dfbd70b1 · dfbd70b1
Unverified Commit dfbd70b1 authored Sep 26, 2021 by Yifan Xiong Committed by GitHub Sep 26, 2021
15 changed files
--- a/superbench/config/amd_mi100_hpe.yaml
+++ b/superbench/config/amd_mi100_hpe.yaml
@@ -3,7 +3,7 @@
 # Server:
 #   - Product: HPE Apollo 6500

-version: v0.2
+version: v0.3
 superbench:
  enable: null
  var:
@@ -40,24 +40,20 @@ superbench:
    rccl-bw:
      enable: true
      modes:
-        - name: mpi
-          proc_num: 8
-          env:  
-            NCCL_SOCKET_IFNAME: ens17f0 
-            NCCL_IB_GDR_LEVEL: 1
+        - name: local
+          proc_num: 1
+          parallel: no
      parameters:
-        maxbytes: 128M
-        minbytes: 32M
-        iters: 50
-        ngpus: 1
-        operations: allreduce
+        maxbytes: 8G
+        ngpus: 8
+        operation: allreduce
    mem-bw:
      <<: *default_local_mode
    gemm-flops:
      <<: *default_local_mode
      parameters:
-        m: 7680 
-        n: 8192 
+        m: 7680
+        n: 8192
        k: 8192
    ib-loopback:
      enable: true
@@ -75,15 +71,16 @@ superbench:
      parameters:
        block_devices: []
    gpu-sm-copy-bw:
-      enable: false
+      enable: true
      modes:
        - name: local
          proc_num: 32
-          prefix: CUDA_VISIBLE_DEVICES=$(({proc_rank}%8)) numactl -N $(({proc_rank}%4)) -m $(({proc_rank}%4))
+          prefix: HIP_VISIBLE_DEVICES=$(({proc_rank}%8)) numactl -N $(({proc_rank}%4)) -m $(({proc_rank}%4))
          parallel: no
      parameters:
-        dtoh: true
-        htod: true
+        mem_type:
+          - dtoh
+          - htod
    gpt_models:
      <<: *default_pytorch_mode
      models:

--- a/superbench/config/amd_mi100_z53.yaml
+++ b/superbench/config/amd_mi100_z53.yaml
@@ -4,7 +4,7 @@
 #   - Product: G482-Z53
 #   - Link: https://www.gigabyte.cn/FileUpload/Global/MicroSite/553/G482-Z53.html

-version: v0.2
+version: v0.3
 superbench:
  enable: null
  var:
@@ -13,7 +13,7 @@ superbench:
      modes:
        - name: local
          proc_num: 8
-          prefix: CUDA_VISIBLE_DEVICES={proc_rank}
+          prefix: HIP_VISIBLE_DEVICES={proc_rank}
          parallel: yes
    default_pytorch_mode: &default_pytorch_mode
      enable: true
@@ -36,6 +36,52 @@ superbench:
        - train
      pin_memory: yes
  benchmarks:
+    kernel-launch:
+      <<: *default_local_mode
+    rccl-bw:
+      enable: true
+      modes:
+        - name: local
+          proc_num: 1
+          parallel: no
+      parameters:
+        maxbytes: 8G
+        ngpus: 8
+        operation: allreduce
+    mem-bw:
+      <<: *default_local_mode
+    gemm-flops:
+      <<: *default_local_mode
+      parameters:
+        m: 7680 
+        n: 8192 
+        k: 8192
+    ib-loopback:
+      enable: true
+      modes:
+        - name: local
+          proc_num: 2
+          prefix: PROC_RANK={proc_rank} IB_DEVICES=0,1
+          parallel: no
+    disk-benchmark:
+      enable: false
+      modes:
+        - name: local
+          proc_num: 1
+          parallel: no
+      parameters:
+        block_devices: []
+    gpu-sm-copy-bw:
+      enable: true
+      modes:
+        - name: local
+          proc_num: 32
+          prefix: HIP_VISIBLE_DEVICES=$(({proc_rank}%8)) numactl -N $(({proc_rank}%4)) -m $(({proc_rank}%4))
+          parallel: no
+      parameters:
+        mem_type:
+          - dtoh
+          - htod
    gpt_models:
      <<: *default_pytorch_mode
      models:

--- a/superbench/config/azure_ndv4.yaml
+++ b/superbench/config/azure_ndv4.yaml
 # SuperBench Config
-version: v0.2
+version: v0.3
 superbench:
  enable: null
  var:
@@ -35,6 +35,51 @@ superbench:
      <<: *default_local_mode
    gemm-flops:
      <<: *default_local_mode
+    nccl-bw:
+      enable: true
+      modes:
+        - name: local
+          proc_num: 1
+          parallel: no
+      parameters:
+        ngpus: 8
+    ib-loopback:
+      enable: true
+      modes:
+        - name: local
+          proc_num: 4
+          prefix: PROC_RANK={proc_rank} IB_DEVICES=0,2,4,6 NUMA_NODES=1,0,3,2
+          parallel: yes
+        - name: local
+          proc_num: 4
+          prefix: PROC_RANK={proc_rank} IB_DEVICES=1,3,5,7 NUMA_NODES=1,0,3,2
+          parallel: yes
+    mem-bw:
+      enable: true
+      modes:
+        - name: local
+          proc_num: 8
+          prefix: CUDA_VISIBLE_DEVICES={proc_rank} numactl -c $(({proc_rank}/2))
+          parallel: yes
+    disk-benchmark:
+      enable: false
+      modes:
+        - name: local
+          proc_num: 1
+          parallel: no
+      parameters:
+        block_devices: []
+    gpu-sm-copy-bw:
+      enable: true
+      modes:
+        - name: local
+          proc_num: 32
+          prefix: CUDA_VISIBLE_DEVICES=$(({proc_rank}%8)) numactl -N $(({proc_rank}%4)) -m $(({proc_rank}%4))
+          parallel: no
+      parameters:
+        mem_type:
+          - dtoh
+          - htod
    cudnn-function:
      <<: *default_local_mode
    cublas-function:

--- a/superbench/config/default.yaml
+++ b/superbench/config/default.yaml
 # SuperBench Config
-version: v0.2
+version: v0.3
 superbench:
  enable: null
  var:
@@ -32,7 +32,10 @@ superbench:
      enable: true
      modes:
        - name: local
-          prefix: NCCL_DEBUG=INFO NCCL_IB_DISABLE=1
+          proc_num: 1
+          parallel: no
+      parameters:
+        ngpus: 8
    ib-loopback:
      enable: true
      modes:
@@ -61,15 +64,16 @@ superbench:
          prefix: CUDA_VISIBLE_DEVICES={proc_rank} numactl -c $(({proc_rank}/2))
          parallel: yes
    gpu-sm-copy-bw:
-      enable: false
+      enable: true
      modes:
        - name: local
          proc_num: 32
          prefix: CUDA_VISIBLE_DEVICES=$(({proc_rank}%8)) numactl -N $(({proc_rank}%4)) -m $(({proc_rank}%4))
          parallel: no
      parameters:
-        dtoh: true
-        htod: true
+        mem_type:
+          - dtoh
+          - htod
    kernel-launch:
      <<: *default_local_mode
    gemm-flops:

--- a/superbench/runner/playbooks/deploy.yaml
+++ b/superbench/runner/playbooks/deploy.yaml
@@ -101,7 +101,7 @@
          {{ '--security-opt seccomp=unconfined --group-add video' if amd_gpu_exist else '' }} \
          -w /root -v {{ workspace }}:/root -v /mnt:/mnt \
          -v /var/run/docker.sock:/var/run/docker.sock \
-          {{ docker_image }} bash && \
+          --entrypoint /bin/bash {{ docker_image }} && \
        docker exec {{ container }} bash -c \
          "chown -R root:root ~ && \
          sed -i 's/[# ]*Port.*/Port {{ ssh_port }}/g' /etc/ssh/sshd_config && \

--- a/superbench/runner/runner.py
+++ b/superbench/runner/runner.py
@@ -123,20 +123,13 @@ def __get_mode_command(self, benchmark_name, mode):
        elif mode.name == 'torch.distributed':
            # TODO: replace with torch.distributed.run in v1.9
            # TODO: only supports node_num=1 and node_num=all currently
+            torch_dist_params = '' if mode.node_num == 1 else \
+                '--nnodes=$NNODES --node_rank=$NODE_RANK --master_addr=$MASTER_ADDR --master_port=$MASTER_PORT '
            mode_command = (
-                'python3 -m torch.distributed.launch '
-                '--use_env --no_python --nproc_per_node={proc_num} '
-                '--nnodes={node_num} --node_rank=$NODE_RANK '
-                '--master_addr=$MASTER_ADDR --master_port=$MASTER_PORT '
-                '{command} {torch_distributed_suffix}'
-            ).format(
-                proc_num=mode.proc_num,
-                node_num=1 if mode.node_num == 1 else '$NNODES',
-                command=exec_command,
-                torch_distributed_suffix=(
-                    'superbench.benchmarks.{name}.parameters.distributed_impl=ddp '
-                    'superbench.benchmarks.{name}.parameters.distributed_backend=nccl'
-                ).format(name=benchmark_name),
+                f'python3 -m torch.distributed.launch'
+                f' --use_env --no_python --nproc_per_node={mode.proc_num} {torch_dist_params}{exec_command}'
+                f' superbench.benchmarks.{benchmark_name}.parameters.distributed_impl=ddp'
+                f' superbench.benchmarks.{benchmark_name}.parameters.distributed_backend=nccl'
            )
        elif mode.name == 'mpi':
            mode_command = (

--- a/superbench/tools/system_info.py
+++ b/superbench/tools/system_info.py
--- a/tests/cli/test_sb.py
+++ b/tests/cli/test_sb.py
@@ -81,3 +81,7 @@ def test_sb_run_nonexist_host_file(self):
        """Test sb run, --host-file does not exist, should fail."""
        result = self.cmd('sb run --host-file ./nonexist.yaml', expect_failure=True)
        self.assertEqual(result.exit_code, 1)
+
+    def test_sb_node_info(self):
+        """Test sb node info, should fail."""
+        self.cmd('sb node info', expect_failure=False)
--- a/tests/runner/test_runner.py
+++ b/tests/runner/test_runner.py
@@ -116,8 +116,6 @@ def test_get_mode_command(self):
                'expected_command': (
                    'python3 -m torch.distributed.launch '
                    '--use_env --no_python --nproc_per_node=8 '
-                    '--nnodes=1 --node_rank=$NODE_RANK '
-                    '--master_addr=$MASTER_ADDR --master_port=$MASTER_PORT '
                    f'sb exec --output-dir {self.sb_output_dir} -c sb.config.yaml -C superbench.enable=foo '
                    'superbench.benchmarks.foo.parameters.distributed_impl=ddp '
                    'superbench.benchmarks.foo.parameters.distributed_backend=nccl'

--- a/third_party/Makefile
+++ b/third_party/Makefile
@@ -8,7 +8,6 @@ MPI_HOME ?= /usr/local/mpi
 HIP_HOME ?= /opt/rocm/hip
 RCCL_HOME ?= /opt/rocm/rccl
 ROCM_VERSION ?= rocm-$(shell dpkg -l | grep 'rocm-dev ' | awk '{print $$3}' | cut -d '.' -f1-3)
-ROCM_ARCH ?= $(shell rocminfo | grep " gfx" | uniq | awk '{print $$2}')

 .PHONY: all cuda rocm common cuda_cutlass cuda_bandwidthTest cuda_nccl_tests cuda_perftest rocm_perftest fio rocm_rccl_tests rocm_rocblas rocm_bandwidthTest

@@ -66,7 +65,7 @@ ifneq (,$(wildcard fio/Makefile))
 	cd ./fio && ./configure --prefix=$(SB_MICRO_PATH) && make -j && make install
 endif

-# Build rccl-tests from commit cc34c5 of develop branch (default branch).
+# Build rccl-tests from commit dc1ad48 of develop branch (default branch).
 rocm_rccl_tests: sb_micro_path
 ifneq (, $(wildcard rccl-tests/Makefile))
 	cd ./rccl-tests && make MPI=1 MPI_HOME=$(MPI_HOME) HIP_HOME=$(HIP_HOME) RCCL_HOME=$(RCCL_HOME) -j
@@ -81,21 +80,14 @@ rocm_rocblas: sb_micro_path
 ifeq (, $(wildcard $(SB_MICRO_PATH)/bin/rocblas-bench))
 	if [ -d rocBLAS ]; then rm -rf rocBLAS; fi
 	git clone -b ${ROCM_VERSION} https://github.com/ROCmSoftwarePlatform/rocBLAS.git ./rocBLAS
-ifeq (${ROCM_VERSION}, rocm-4.0.0)
-	sed -i '/CMAKE_MATCH_1/a\      get_filename_component(HIP_CLANG_ROOT "$${HIP_CLANG_ROOT}" DIRECTORY)'  /opt/rocm/hip/lib/cmake/hip/hip-config.cmake
-	cd ./rocBLAS && HIPCC_COMPILE_FLAGS_APPEND="-D_OPENMP=201811 -O3 -Wno-format-nonliteral -DCMAKE_HAVE_LIBC_PTHREAD -parallel-jobs=2" HIPCC_LINK_FLAGS_APPEND="-lpthread -O3 -parallel-jobs=2" ./install.sh -idc -a ${ROCM_ARCH}
-else 
-	cd ./rocBLAS && ./install.sh -idc
-endif
+	cd ./rocBLAS && ./install.sh --dependencies --clients-only
 	cp -v ./rocBLAS/build/release/clients/staging/rocblas-bench $(SB_MICRO_PATH)/bin/
 endif

 # Build hipBusBandwidth.
 # HIP is released with rocm, like rocm-4.2.0 and so on.
 # The version we use is the released tag which is consistent with the rocm version in the environment or docker.
-rocm_bandwidthTest:
-	cp -r -v $(shell hipconfig -p) ./
-ifneq (, $(wildcard hip/samples/1_Utils/hipBusBandwidth/CMakeLists.txt))
-	cd ./hip/samples/1_Utils/hipBusBandwidth/ && mkdir -p build && cd build && cmake .. && make
-	cp -v ./hip/samples/1_Utils/hipBusBandwidth/build/hipBusBandwidth $(SB_MICRO_PATH)/bin/
-endif
+rocm_bandwidthTest: sb_micro_path
+	cp -r -v $(shell hipconfig -p)/samples/1_Utils/hipBusBandwidth ./
+	cd ./hipBusBandwidth/ && mkdir -p build && cd build && cmake .. && make
+	cp -v ./hipBusBandwidth/build/hipBusBandwidth $(SB_MICRO_PATH)/bin/
--- a/rccl-tests @ dc1ad485
+++ b/rccl-tests @ dc1ad485
-Subproject commit cc34c545098145bc148e5035e4c8e767b4d71ece
+Subproject commit dc1ad4853d7ec738387d42a75a58a98d7af00c7b
--- a/website/blog/2021-09-22-release-0-3.md
+++ b/website/blog/2021-09-22-release-0-3.md
+---
+slug: release-sb-v0.3
+title: Releasing SuperBench v0.3
+author: Peng Cheng
+author_title: SuperBench Team
+author_url: https://github.com/cp5555
+author_image_url: https://github.com/cp5555.png
+tags: [superbench, announcement, release]
+---
+
+We are very happy to announce that **SuperBench 0.3.0 version** is officially released today!
+
+You can install and try superbench by following [Getting Started Tutorial](https://microsoft.github.io/superbenchmark/docs/getting-started/installation).
+
+## SuperBench 0.3.0 Release Notes
+
+### SuperBench Framework
+
+#### Runner
+
+- Implement MPI mode.
+
+#### Benchmarks
+
+- Support Docker benchmark.
+
+### Single-node Validation
+
+#### Micro Benchmarks
+
+1. Memory (Tool: NVIDIA/AMD Bandwidth Test Tool)
+
+   | Metrics        | Unit | Description                         |
+   |----------------|------|-------------------------------------|
+   | H2D_Mem_BW_GPU | GB/s | host-to-GPU bandwidth for each GPU  |
+   | D2H_Mem_BW_GPU | GB/s | GPU-to-host bandwidth  for each GPU |
+
+2. IBLoopback (Tool: PerfTest – Standard RDMA Test Tool)
+
+   | Metrics  | Unit | Description                                                   |
+   |----------|------|---------------------------------------------------------------|
+   | IB_Write | MB/s | The IB write loopback throughput with different message sizes |
+   | IB_Read  | MB/s | The IB read loopback throughput with different message sizes  |
+   | IB_Send  | MB/s | The IB send loopback throughput with different message sizes  |
+
+3. NCCL/RCCL (Tool: NCCL/RCCL Tests)
+
+   | Metrics             | Unit | Description                                                     |
+   |---------------------|------|-----------------------------------------------------------------|
+   | NCCL_AllReduce      | GB/s | The NCCL AllReduce performance with different message sizes     |
+   | NCCL_AllGather      | GB/s | The NCCL AllGather performance with different message sizes     |
+   | NCCL_broadcast      | GB/s | The NCCL Broadcast performance with different message sizes     |
+   | NCCL_reduce         | GB/s | The NCCL Reduce performance with different message sizes        |
+   | NCCL_reduce_scatter | GB/s | The NCCL ReduceScatter performance with different message sizes |
+
+4. Disk (Tool: FIO – Standard Disk Performance Tool)
+
+   | Metrics        | Unit | Description                                                                     |
+   |----------------|------|---------------------------------------------------------------------------------|
+   | Seq_Read       | MB/s | Sequential read performance                                                     |
+   | Seq_Write      | MB/s | Sequential write performance                                                    |
+   | Rand_Read      | MB/s | Random read performance                                                         |
+   | Rand_Write     | MB/s | Random write performance                                                        |
+   | Seq_R/W_Read   | MB/s | Read performance in sequential read/write, fixed measurement (read:write = 4:1) |
+   | Seq_R/W_Write  | MB/s | Write performance in sequential read/write (read:write = 4:1)                   |
+   | Rand_R/W_Read  | MB/s | Read performance in random read/write (read:write = 4:1)                        |
+   | Rand_R/W_Write | MB/s | Write performance in random read/write (read:write = 4:1)                       |
+
+5. H2D/D2H SM Transmission Bandwidth (Tool: MSR-A build)
+
+   | Metrics       | Unit | Description                                         |
+   |---------------|------|-----------------------------------------------------|
+   | H2D_SM_BW_GPU | GB/s | host-to-GPU bandwidth using GPU kernel for each GPU |
+   | D2H_SM_BW_GPU | GB/s | GPU-to-host bandwidth using GPU kernel for each GPU |
+
+### AMD GPU Support
+
+#### Docker Image Support
+
+- ROCm 4.2 PyTorch 1.7.0
+- ROCm 4.0 PyTorch 1.7.0
+
+#### Micro Benchmarks
+
+1. Kernel Launch (Tool: MSR-A build)
+
+   | Metrics                  | Unit      | Description                                                  |
+   |--------------------------|-----------|--------------------------------------------------------------|
+   | Kernel_Launch_Event_Time | Time (ms) | Dispatch latency measured in GPU time using hipEventRecord() |
+   | Kernel_Launch_Wall_Time  | Time (ms) | Dispatch latency measured in CPU time                        |
+
+2. GEMM FLOPS (Tool: AMD rocblas-bench Tool)
+
+   | Metrics  | Unit   | Description                   |
+   |----------|--------|-------------------------------|
+   | FP64     | GFLOPS | FP64 FLOPS without MatrixCore |
+   | FP32(MC) | GFLOPS | TF32 FLOPS with MatrixCore    |
+   | FP16(MC) | GFLOPS | FP16 FLOPS with MatrixCore    |
+   | BF16(MC) | GFLOPS | BF16 FLOPS with MatrixCore    |
+   | INT8(MC) | GOPS   | INT8 FLOPS with MatrixCore    |
+
+#### E2E Benchmarks
+
+1. CNN models -- Use PyTorch torchvision models
+   - ResNet: ResNet-50, ResNet-101, ResNet-152
+   - DenseNet: DenseNet-169, DenseNet-201
+   - VGG: VGG-11, VGG-13, VGG-16, VGG-19
+
+2. BERT -- Use huggingface Transformers
+   - BERT
+   - BERT Large
+
+3. LSTM -- Use PyTorch
+4. GPT-2 -- Use huggingface Transformers
+
+### Bug Fix
+
+- VGG models failed on A100 GPU with batch_size=128
+
+### Other Improvement
+
+1. Contribution related
+   - Contribute rule
+   - System information collection
+
+2. Document
+   - Add release process doc
+   - Add design documents
+   - Add developer guide doc for coding style
+   - Add contribution rules
+   - Add docker image list
+   - Add initial validation results
--- a/website/docusaurus.config.js
+++ b/website/docusaurus.config.js
@@ -101,7 +101,7 @@ module.exports = {
    announcementBar: {
      id: 'supportus',
      content:
-        '📢 <a href="https://microsoft.github.io/superbenchmark/blog/release-sb-v0.2">v0.2.1</a> has been released! ' +
+        '📢 <a href="https://microsoft.github.io/superbenchmark/blog/release-sb-v0.3">v0.3.0</a> has been released! ' +
        '⭐️ If you like SuperBench, give it a star on <a target="_blank" rel="noopener noreferrer" href="https://github.com/microsoft/superbenchmark">GitHub</a>! ⭐️',
    },
    algolia: {

--- a/website/package-lock.json
+++ b/website/package-lock.json
 {
  "name": "superbench-website",
-  "version": "0.2.1",
+  "version": "0.3.0",
  "lockfileVersion": 1,
  "requires": true,
  "dependencies": {

--- a/website/package.json
+++ b/website/package.json
 {
  "name": "superbench-website",
-  "version": "0.2.1",
+  "version": "0.3.0",
  "private": true,
  "scripts": {
    "docusaurus": "docusaurus",