Unverified Commit 54da021b authored by user4543's avatar user4543 Committed by GitHub
Browse files

Analyzer - Fix bugs in data diagnosis (#355)

**Description**
Fix bugs in data diagnosis.

**Major Revision**
- add support to get baseline of the metric which uses custom benchmark naming with ':' like 'nccl-bw:default/allreduce_8_bw:0'
- save raw data of all metrics rather than metrics defined in diagnosis_rules.yaml when output_all is True
- fix bug of using wrong column index when applying format(red color and percentile) in the excel
parent 3f135e46
......@@ -63,8 +63,8 @@ def _get_baseline_of_metric(self, baseline, metric):
if metric in baseline:
return baseline[metric]
else:
# exclude rank info
short = metric.split(':')[0]
# exclude rank info, for example, '.*:\d+'->'.*'
short = metric.strip(metric.split(':')[-1]).strip(':')
if short in baseline:
return baseline[short]
# baseline not defined
......@@ -221,7 +221,7 @@ def output_all_nodes_results(self, raw_data_df, data_not_accept_df):
DataFrame: all nodes' detailed information inluding ['Accept','#Issues','Category','Issue_Details']
"""
append_columns = ['Accept', '#Issues', 'Category', 'Issue_Details']
all_data_df = (raw_data_df[self._enable_metrics]).astype('float64')
all_data_df = (raw_data_df).astype('float64')
if data_not_accept_df.shape[0] == 0:
all_data_df['Accept'] = [True for i in range(len(all_data_df))]
......
......@@ -120,7 +120,8 @@ def output_excel_data_not_accept(writer, data_not_accept_df, rules):
for rule in rules:
for metric in rules[rule]['metrics']:
col_index = columns.index(metric)
# The column index of the metrics should start from 1
col_index = columns.index(metric) + 1
# Apply percent format for the columns whose rules are variance type.
if rules[rule]['function'] == 'variance':
worksheet.conditional_format(
......
[
{
"bert_models/pytorch-bert-base/steptime_train_float32": 114.5916701062,
"bert_models/pytorch-bert-base/throughput_train_float32": 279.8794623591,
"bert_models/pytorch-bert-base/steptime_train_float16": 83.8895108318,
"bert_models/pytorch-bert-base/throughput_train_float16": 382.0672582742,
"bert_models/pytorch-bert-large/steptime_train_float32": 307.9359371914,
"bert_models/pytorch-bert-large/throughput_train_float32": 103.9487609742,
"bert_models/pytorch-bert-large/steptime_train_float16": 206.8114168942,
"bert_models/pytorch-bert-large/throughput_train_float16": 154.8408911711,
"pytorch-computation-communication-overlap/mul_cost:0": 44.1822062144,
"pytorch-computation-communication-overlap/mul_cost:1": 44.1822139389,
"pytorch-computation-communication-overlap/mul_cost:2": 43.9701470781,
"pytorch-computation-communication-overlap/mul_cost:3": 43.9701478756,
"pytorch-computation-communication-overlap/mul_cost:4": 43.9701779317,
"pytorch-computation-communication-overlap/mul_cost:5": 43.9701571606,
"pytorch-computation-communication-overlap/mul_cost:6": 43.9701651983,
"pytorch-computation-communication-overlap/mul_cost:7": 44.1795444785,
"pytorch-computation-communication-overlap/matmul_cost:0": 137.0477370556,
"pytorch-computation-communication-overlap/matmul_cost:1": 137.0478344693,
"pytorch-computation-communication-overlap/matmul_cost:2": 137.047772209,
"pytorch-computation-communication-overlap/matmul_cost:3": 137.0477969726,
"pytorch-computation-communication-overlap/matmul_cost:4": 137.0481367431,
"pytorch-computation-communication-overlap/matmul_cost:5": 137.0482198877,
"pytorch-computation-communication-overlap/matmul_cost:6": 137.0477532237,
"pytorch-computation-communication-overlap/matmul_cost:7": 137.0478081607,
"densenet_models/pytorch-densenet169/steptime_train_float32": 150.6415554322,
"densenet_models/pytorch-densenet169/throughput_train_float32": 212.4701719243,
"densenet_models/pytorch-densenet169/steptime_train_float16": 145.3081957297,
"densenet_models/pytorch-densenet169/throughput_train_float16": 220.2932337305,
"densenet_models/pytorch-densenet201/steptime_train_float32": 182.9111778643,
"densenet_models/pytorch-densenet201/throughput_train_float32": 174.9742278232,
"densenet_models/pytorch-densenet201/steptime_train_float16": 176.3489063596,
"densenet_models/pytorch-densenet201/throughput_train_float16": 181.5446573603,
"gemm-flops/FP64:0": 9031.23,
"gemm-flops/FP64:1": 9040.85,
"gemm-flops/FP64:2": 9010.56,
"gemm-flops/FP64:3": 9041.26,
"gemm-flops/FP64:4": 9039.19,
"gemm-flops/FP64:5": 9015.69,
"gemm-flops/FP64:6": 9022.19,
"gemm-flops/FP64:7": 9030.2,
"gemm-flops/FP32:0": 18362.1,
"gemm-flops/FP32:1": 18375.6,
"gemm-flops/FP32:2": 18314.9,
"gemm-flops/FP32:3": 18375.6,
"gemm-flops/FP32:4": 18368.6,
"gemm-flops/FP32:5": 18347.1,
"gemm-flops/FP32:6": 18247.4,
"gemm-flops/FP32:7": 18318.4,
"gemm-flops/FP16:0": 33878.0,
"gemm-flops/FP16:1": 33911.1,
"gemm-flops/FP16:2": 33769.3,
"gemm-flops/FP16:3": 33909.9,
"gemm-flops/FP16:4": 33896.5,
"gemm-flops/FP16:5": 33798.1,
"gemm-flops/FP16:6": 33647.3,
"gemm-flops/FP16:7": 33764.8,
"gemm-flops/FP64_TC:0": 18963.6,
"gemm-flops/FP64_TC:1": 18924.2,
"gemm-flops/FP64_TC:2": 18930.3,
"gemm-flops/FP64_TC:3": 18971.9,
"gemm-flops/FP64_TC:4": 18946.0,
"gemm-flops/FP64_TC:5": 18945.0,
"gemm-flops/FP64_TC:6": 18822.9,
"gemm-flops/FP64_TC:7": 18911.1,
"gemm-flops/TF32_TC:0": 127900.0,
"gemm-flops/TF32_TC:1": 129094.0,
"gemm-flops/TF32_TC:2": 127831.0,
"gemm-flops/TF32_TC:3": 128709.0,
"gemm-flops/TF32_TC:4": 127388.0,
"gemm-flops/TF32_TC:5": 127861.0,
"gemm-flops/TF32_TC:6": 128492.0,
"gemm-flops/TF32_TC:7": 127720.0,
"gemm-flops/BF16_TC:0": 264965.0,
"gemm-flops/BF16_TC:1": 266638.0,
"gemm-flops/BF16_TC:2": 263151.0,
"gemm-flops/BF16_TC:3": 264752.0,
"gemm-flops/BF16_TC:4": 263049.0,
"gemm-flops/BF16_TC:5": 266605.0,
"gemm-flops/BF16_TC:6": 267501.0,
"gemm-flops/BF16_TC:7": 263880.0,
"gemm-flops/FP16_TC:0": 279474.0,
"gemm-flops/FP16_TC:1": 281256.0,
"gemm-flops/FP16_TC:2": 277403.0,
"gemm-flops/FP16_TC:3": 279147.0,
"gemm-flops/FP16_TC:4": 277587.0,
"gemm-flops/FP16_TC:5": 281537.0,
"gemm-flops/FP16_TC:6": 282132.0,
"gemm-flops/FP16_TC:7": 277788.0,
"gemm-flops/INT8_TC:0": 475160.0,
"gemm-flops/INT8_TC:1": 477725.0,
"gemm-flops/INT8_TC:2": 471621.0,
"gemm-flops/INT8_TC:3": 473716.0,
"gemm-flops/INT8_TC:4": 472124.0,
"gemm-flops/INT8_TC:5": 479972.0,
"gemm-flops/INT8_TC:6": 481327.0,
"gemm-flops/INT8_TC:7": 474710.0,
"gemm-flops/INT4_TC:0": 970330.0,
"gemm-flops/INT4_TC:1": 976837.0,
"gemm-flops/INT4_TC:2": 966003.0,
"gemm-flops/INT4_TC:3": 971315.0,
"gemm-flops/INT4_TC:4": 964441.0,
"gemm-flops/INT4_TC:5": 982461.0,
"gemm-flops/INT4_TC:6": 979610.0,
"gemm-flops/INT4_TC:7": 968359.0,
"gpt_models/pytorch-gpt2-large/steptime_train_float32": 295.0526971836,
"gpt_models/pytorch-gpt2-large/throughput_train_float32": 27.1154543969,
"gpt_models/pytorch-gpt2-large/steptime_train_float16": 194.4957742235,
"gpt_models/pytorch-gpt2-large/throughput_train_float16": 41.1394499411,
"gpu-sm-copy-bw/dtoh:0": 3.91755,
"gpu-sm-copy-bw/dtoh:1": 4.45414,
"gpu-sm-copy-bw/dtoh:2": 1.26483,
"gpu-sm-copy-bw/dtoh:3": 1.30041,
"gpu-sm-copy-bw/dtoh:4": 1.31577,
"gpu-sm-copy-bw/dtoh:5": 1.27968,
"gpu-sm-copy-bw/dtoh:6": 4.47849,
"gpu-sm-copy-bw/dtoh:7": 3.96231,
"gpu-sm-copy-bw/dtoh:8": 3.91705,
"gpu-sm-copy-bw/dtoh:9": 4.45487,
"gpu-sm-copy-bw/dtoh:10": 1.26352,
"gpu-sm-copy-bw/dtoh:11": 1.2999,
"gpu-sm-copy-bw/dtoh:12": 1.31677,
"gpu-sm-copy-bw/dtoh:13": 1.27885,
"gpu-sm-copy-bw/dtoh:14": 4.47913,
"gpu-sm-copy-bw/dtoh:15": 3.95893,
"gpu-sm-copy-bw/dtoh:16": 3.91729,
"gpu-sm-copy-bw/dtoh:17": 4.45627,
"gpu-sm-copy-bw/dtoh:18": 1.26437,
"gpu-sm-copy-bw/dtoh:19": 1.30144,
"gpu-sm-copy-bw/dtoh:20": 1.31704,
"gpu-sm-copy-bw/dtoh:21": 1.27857,
"gpu-sm-copy-bw/dtoh:22": 4.47889,
"gpu-sm-copy-bw/dtoh:23": 3.95984,
"gpu-sm-copy-bw/dtoh:24": 3.92025,
"gpu-sm-copy-bw/dtoh:25": 4.45423,
"gpu-sm-copy-bw/dtoh:26": 1.26449,
"gpu-sm-copy-bw/dtoh:27": 1.29954,
"gpu-sm-copy-bw/dtoh:28": 1.31731,
"gpu-sm-copy-bw/dtoh:29": 1.27916,
"gpu-sm-copy-bw/dtoh:30": 4.4797,
"gpu-sm-copy-bw/dtoh:31": 3.96124,
"gpu-sm-copy-bw/htod:0": 23.9685,
"gpu-sm-copy-bw/htod:1": 23.967,
"gpu-sm-copy-bw/htod:2": 19.9898,
"gpu-sm-copy-bw/htod:3": 20.2848,
"gpu-sm-copy-bw/htod:4": 20.3931,
"gpu-sm-copy-bw/htod:5": 20.0888,
"gpu-sm-copy-bw/htod:6": 23.9766,
"gpu-sm-copy-bw/htod:7": 23.9792,
"gpu-sm-copy-bw/htod:8": 23.9497,
"gpu-sm-copy-bw/htod:9": 23.9438,
"gpu-sm-copy-bw/htod:10": 20.0039,
"gpu-sm-copy-bw/htod:11": 20.2469,
"gpu-sm-copy-bw/htod:12": 20.4344,
"gpu-sm-copy-bw/htod:13": 20.1005,
"gpu-sm-copy-bw/htod:14": 23.9491,
"gpu-sm-copy-bw/htod:15": 23.9898,
"gpu-sm-copy-bw/htod:16": 23.962,
"gpu-sm-copy-bw/htod:17": 23.97,
"gpu-sm-copy-bw/htod:18": 19.987,
"gpu-sm-copy-bw/htod:19": 20.1549,
"gpu-sm-copy-bw/htod:20": 20.3931,
"gpu-sm-copy-bw/htod:21": 20.1013,
"gpu-sm-copy-bw/htod:22": 23.9865,
"gpu-sm-copy-bw/htod:23": 23.972,
"gpu-sm-copy-bw/htod:24": 23.9569,
"gpu-sm-copy-bw/htod:25": 23.9762,
"gpu-sm-copy-bw/htod:26": 19.92,
"gpu-sm-copy-bw/htod:27": 20.2638,
"gpu-sm-copy-bw/htod:28": 20.4419,
"gpu-sm-copy-bw/htod:29": 20.1054,
"gpu-sm-copy-bw/htod:30": 23.9752,
"gpu-sm-copy-bw/htod:31": 23.9631,
"ib-loopback/IB_write_512_Avg_0:0": 1492.89,
"ib-loopback/IB_write_1024_Avg_0:0": 3224.92,
"ib-loopback/IB_write_2048_Avg_0:0": 6714.5,
"ib-loopback/IB_write_4096_Avg_0:0": 12871.93,
"ib-loopback/IB_write_8192_Avg_0:0": 19990.78,
"ib-loopback/IB_write_16384_Avg_0:0": 22172.25,
"ib-loopback/IB_write_32768_Avg_0:0": 23073.19,
"ib-loopback/IB_write_65536_Avg_0:0": 23527.09,
"ib-loopback/IB_write_131072_Avg_0:0": 23805.92,
"ib-loopback/IB_write_262144_Avg_0:0": 23380.51,
"ib-loopback/IB_write_524288_Avg_0:0": 23856.63,
"ib-loopback/IB_write_1048576_Avg_0:0": 23869.58,
"ib-loopback/IB_write_2097152_Avg_0:0": 23885.7,
"ib-loopback/IB_write_4194304_Avg_0:0": 23914.56,
"ib-loopback/IB_write_8388608_Avg_0:0": 23935.21,
"ib-loopback/IB_write_512_Avg_1:0": 1479.1,
"ib-loopback/IB_write_1024_Avg_1:0": 3246.48,
"ib-loopback/IB_write_2048_Avg_1:0": 6754.94,
"ib-loopback/IB_write_4096_Avg_1:0": 13101.09,
"ib-loopback/IB_write_8192_Avg_1:0": 19945.81,
"ib-loopback/IB_write_16384_Avg_1:0": 22631.62,
"ib-loopback/IB_write_32768_Avg_1:0": 23381.58,
"ib-loopback/IB_write_65536_Avg_1:0": 23515.29,
"ib-loopback/IB_write_131072_Avg_1:0": 23756.01,
"ib-loopback/IB_write_262144_Avg_1:0": 23875.91,
"ib-loopback/IB_write_524288_Avg_1:0": 23911.13,
"ib-loopback/IB_write_1048576_Avg_1:0": 23935.42,
"ib-loopback/IB_write_2097152_Avg_1:0": 23941.25,
"ib-loopback/IB_write_4194304_Avg_1:0": 23922.06,
"ib-loopback/IB_write_8388608_Avg_1:0": 23928.59,
"ib-loopback/IB_write_512_Avg_2:0": 1505.29,
"ib-loopback/IB_write_1024_Avg_2:0": 3215.97,
"ib-loopback/IB_write_2048_Avg_2:0": 6745.49,
"ib-loopback/IB_write_4096_Avg_2:0": 11548.88,
"ib-loopback/IB_write_8192_Avg_2:0": 19432.15,
"ib-loopback/IB_write_16384_Avg_2:0": 22765.51,
"ib-loopback/IB_write_32768_Avg_2:0": 23235.07,
"ib-loopback/IB_write_65536_Avg_2:0": 23620.08,
"ib-loopback/IB_write_131072_Avg_2:0": 23759.08,
"ib-loopback/IB_write_262144_Avg_2:0": 23859.82,
"ib-loopback/IB_write_524288_Avg_2:0": 23775.01,
"ib-loopback/IB_write_1048576_Avg_2:0": 23885.96,
"ib-loopback/IB_write_2097152_Avg_2:0": 23894.73,
"ib-loopback/IB_write_4194304_Avg_2:0": 23909.98,
"ib-loopback/IB_write_8388608_Avg_2:0": 23927.21,
"ib-loopback/IB_write_512_Avg_3:0": 1474.47,
"ib-loopback/IB_write_1024_Avg_3:0": 3344.68,
"ib-loopback/IB_write_2048_Avg_3:0": 6606.35,
"ib-loopback/IB_write_4096_Avg_3:0": 12071.4,
"ib-loopback/IB_write_8192_Avg_3:0": 18408.33,
"ib-loopback/IB_write_16384_Avg_3:0": 20789.27,
"ib-loopback/IB_write_32768_Avg_3:0": 22469.43,
"ib-loopback/IB_write_65536_Avg_3:0": 22777.1,
"ib-loopback/IB_write_131072_Avg_3:0": 23461.03,
"ib-loopback/IB_write_262144_Avg_3:0": 23397.19,
"ib-loopback/IB_write_524288_Avg_3:0": 23526.55,
"ib-loopback/IB_write_1048576_Avg_3:0": 23854.76,
"ib-loopback/IB_write_2097152_Avg_3:0": 23862.23,
"ib-loopback/IB_write_4194304_Avg_3:0": 23931.15,
"ib-loopback/IB_write_8388608_Avg_3:0": 23924.44,
"ib-loopback/IB_write_512_Avg_4:0": 1523.33,
"ib-loopback/IB_write_1024_Avg_4:0": 3233.23,
"ib-loopback/IB_write_2048_Avg_4:0": 6792.88,
"ib-loopback/IB_write_4096_Avg_4:0": 12616.05,
"ib-loopback/IB_write_8192_Avg_4:0": 19324.05,
"ib-loopback/IB_write_16384_Avg_4:0": 22082.51,
"ib-loopback/IB_write_32768_Avg_4:0": 23294.23,
"ib-loopback/IB_write_65536_Avg_4:0": 23546.22,
"ib-loopback/IB_write_131072_Avg_4:0": 23727.91,
"ib-loopback/IB_write_262144_Avg_4:0": 23843.93,
"ib-loopback/IB_write_524288_Avg_4:0": 23905.96,
"ib-loopback/IB_write_1048576_Avg_4:0": 23902.37,
"ib-loopback/IB_write_2097152_Avg_4:0": 23921.03,
"ib-loopback/IB_write_4194304_Avg_4:0": 23921.06,
"ib-loopback/IB_write_8388608_Avg_4:0": 23922.4,
"ib-loopback/IB_write_512_Avg_5:0": 1506.63,
"ib-loopback/IB_write_1024_Avg_5:0": 3261.71,
"ib-loopback/IB_write_2048_Avg_5:0": 6752.38,
"ib-loopback/IB_write_4096_Avg_5:0": 13592.41,
"ib-loopback/IB_write_8192_Avg_5:0": 19989.31,
"ib-loopback/IB_write_16384_Avg_5:0": 22560.57,
"ib-loopback/IB_write_32768_Avg_5:0": 23389.29,
"ib-loopback/IB_write_65536_Avg_5:0": 23503.32,
"ib-loopback/IB_write_131072_Avg_5:0": 23741.51,
"ib-loopback/IB_write_262144_Avg_5:0": 23866.43,
"ib-loopback/IB_write_524288_Avg_5:0": 23898.95,
"ib-loopback/IB_write_1048576_Avg_5:0": 23876.36,
"ib-loopback/IB_write_2097152_Avg_5:0": 23919.63,
"ib-loopback/IB_write_4194304_Avg_5:0": 23924.68,
"ib-loopback/IB_write_8388608_Avg_5:0": 23930.37,
"ib-loopback/IB_write_512_Avg_6:0": 1467.69,
"ib-loopback/IB_write_1024_Avg_6:0": 3157.04,
"ib-loopback/IB_write_2048_Avg_6:0": 6494.61,
"ib-loopback/IB_write_4096_Avg_6:0": 12883.51,
"ib-loopback/IB_write_8192_Avg_6:0": 19207.67,
"ib-loopback/IB_write_16384_Avg_6:0": 22519.39,
"ib-loopback/IB_write_32768_Avg_6:0": 23323.46,
"ib-loopback/IB_write_65536_Avg_6:0": 23523.6,
"ib-loopback/IB_write_131072_Avg_6:0": 23626.67,
"ib-loopback/IB_write_262144_Avg_6:0": 23836.99,
"ib-loopback/IB_write_524288_Avg_6:0": 23904.51,
"ib-loopback/IB_write_1048576_Avg_6:0": 23919.07,
"ib-loopback/IB_write_2097152_Avg_6:0": 23943.82,
"ib-loopback/IB_write_4194304_Avg_6:0": 23936.77,
"ib-loopback/IB_write_8388608_Avg_6:0": 23941.57,
"ib-loopback/IB_write_512_Avg_7:0": 1505.63,
"ib-loopback/IB_write_1024_Avg_7:0": 3259.93,
"ib-loopback/IB_write_2048_Avg_7:0": 6738.6,
"ib-loopback/IB_write_4096_Avg_7:0": 13352.06,
"ib-loopback/IB_write_8192_Avg_7:0": 19941.35,
"ib-loopback/IB_write_16384_Avg_7:0": 22566.09,
"ib-loopback/IB_write_32768_Avg_7:0": 23244.77,
"ib-loopback/IB_write_65536_Avg_7:0": 23377.67,
"ib-loopback/IB_write_131072_Avg_7:0": 23736.17,
"ib-loopback/IB_write_262144_Avg_7:0": 23829.25,
"ib-loopback/IB_write_524288_Avg_7:0": 23879.6,
"ib-loopback/IB_write_1048576_Avg_7:0": 23895.1,
"ib-loopback/IB_write_2097152_Avg_7:0": 23930.64,
"ib-loopback/IB_write_4194304_Avg_7:0": 23845.63,
"ib-loopback/IB_write_8388608_Avg_7:0": 23896.94,
"kernel-launch/return_code": 0.0,
"kernel-launch/event_overhead:0": 0.1,
"kernel-launch/event_overhead:1": 0.00595,
"kernel-launch/event_overhead:2": 0.00557,
......@@ -8,7 +301,6 @@
"kernel-launch/event_overhead:5": 0.00589,
"kernel-launch/event_overhead:6": 0.00572,
"kernel-launch/event_overhead:7": 0.0059,
"kernel-launch/return_code": 0.0,
"kernel-launch/wall_overhead:0": 0.01026,
"kernel-launch/wall_overhead:1": 0.01026,
"kernel-launch/wall_overhead:2": 0.01046,
......@@ -17,14 +309,12 @@
"kernel-launch/wall_overhead:5": 0.01006,
"kernel-launch/wall_overhead:6": 0.01045,
"kernel-launch/wall_overhead:7": 0.01071,
"mem-bw/D2H_Mem_BW:0": 24.3,
"mem-bw/D2H_Mem_BW:1": 24.6,
"mem-bw/D2H_Mem_BW:2": 24.5,
"mem-bw/D2H_Mem_BW:3": 24.6,
"mem-bw/D2H_Mem_BW:4": 24.3,
"mem-bw/D2H_Mem_BW:5": 24.3,
"mem-bw/D2H_Mem_BW:6": 23.9,
"mem-bw/D2H_Mem_BW:7": 24.6,
"lstm_models/pytorch-lstm/steptime_train_float32": 48.0702451896,
"lstm_models/pytorch-lstm/throughput_train_float32": 4806.4724411328,
"lstm_models/pytorch-lstm/steptime_train_float16": 25.9531298652,
"lstm_models/pytorch-lstm/throughput_train_float16": 9069.9080925588,
"pytorch-matmul/nosharding": 34.6449975967,
"mem-bw/return_code": 0.0,
"mem-bw/H2D_Mem_BW:0": 25.6,
"mem-bw/H2D_Mem_BW:1": 25.8,
"mem-bw/H2D_Mem_BW:2": 26.0,
......@@ -33,7 +323,145 @@
"mem-bw/H2D_Mem_BW:5": 25.8,
"mem-bw/H2D_Mem_BW:6": 25.3,
"mem-bw/H2D_Mem_BW:7": 26.1,
"mem-bw/return_code": 0.0,
"mem-bw/D2H_Mem_BW:0": 24.3,
"mem-bw/D2H_Mem_BW:1": 24.6,
"mem-bw/D2H_Mem_BW:2": 24.5,
"mem-bw/D2H_Mem_BW:3": 24.6,
"mem-bw/D2H_Mem_BW:4": 24.3,
"mem-bw/D2H_Mem_BW:5": 24.3,
"mem-bw/D2H_Mem_BW:6": 23.9,
"mem-bw/D2H_Mem_BW:7": 24.6,
"mem-bw/D2D_Mem_BW:0": 1118.0,
"mem-bw/D2D_Mem_BW:1": 1114.6,
"mem-bw/D2D_Mem_BW:2": 1119.7,
"mem-bw/D2D_Mem_BW:3": 1121.9,
"mem-bw/D2D_Mem_BW:4": 1109.7,
"mem-bw/D2D_Mem_BW:5": 1110.1,
"mem-bw/D2D_Mem_BW:6": 1123.3,
"mem-bw/D2D_Mem_BW:7": 1117.6,
"nccl-bw/allreduce_8_busbw:0": 0.0,
"nccl-bw/allreduce_8_algbw:0": 0.0,
"nccl-bw/allreduce_8_time:0": 37.84,
"nccl-bw/allreduce_16_busbw:0": 0.0,
"nccl-bw/allreduce_16_algbw:0": 0.0,
"nccl-bw/allreduce_16_time:0": 36.42,
"nccl-bw/allreduce_32_busbw:0": 0.0,
"nccl-bw/allreduce_32_algbw:0": 0.0,
"nccl-bw/allreduce_32_time:0": 36.87,
"nccl-bw/allreduce_64_busbw:0": 0.0,
"nccl-bw/allreduce_64_algbw:0": 0.0,
"nccl-bw/allreduce_64_time:0": 35.83,
"nccl-bw/allreduce_128_busbw:0": 0.01,
"nccl-bw/allreduce_128_algbw:0": 0.0,
"nccl-bw/allreduce_128_time:0": 36.91,
"nccl-bw/allreduce_256_busbw:0": 0.01,
"nccl-bw/allreduce_256_algbw:0": 0.01,
"nccl-bw/allreduce_256_time:0": 37.58,
"nccl-bw/allreduce_512_busbw:0": 0.02,
"nccl-bw/allreduce_512_algbw:0": 0.01,
"nccl-bw/allreduce_512_time:0": 36.98,
"nccl-bw/allreduce_1024_busbw:0": 0.05,
"nccl-bw/allreduce_1024_algbw:0": 0.03,
"nccl-bw/allreduce_1024_time:0": 36.93,
"nccl-bw/allreduce_2048_busbw:0": 0.1,
"nccl-bw/allreduce_2048_algbw:0": 0.06,
"nccl-bw/allreduce_2048_time:0": 36.06,
"nccl-bw/allreduce_4096_busbw:0": 0.19,
"nccl-bw/allreduce_4096_algbw:0": 0.11,
"nccl-bw/allreduce_4096_time:0": 37.2,
"nccl-bw/allreduce_8192_busbw:0": 0.39,
"nccl-bw/allreduce_8192_algbw:0": 0.22,
"nccl-bw/allreduce_8192_time:0": 37.04,
"nccl-bw/allreduce_16384_busbw:0": 0.77,
"nccl-bw/allreduce_16384_algbw:0": 0.44,
"nccl-bw/allreduce_16384_time:0": 37.46,
"nccl-bw/allreduce_32768_busbw:0": 1.52,
"nccl-bw/allreduce_32768_algbw:0": 0.87,
"nccl-bw/allreduce_32768_time:0": 37.64,
"nccl-bw/allreduce_65536_busbw:0": 3.0,
"nccl-bw/allreduce_65536_algbw:0": 1.71,
"nccl-bw/allreduce_65536_time:0": 38.22,
"nccl-bw/allreduce_131072_busbw:0": 5.31,
"nccl-bw/allreduce_131072_algbw:0": 3.04,
"nccl-bw/allreduce_131072_time:0": 43.17,
"nccl-bw/allreduce_262144_busbw:0": 9.5,
"nccl-bw/allreduce_262144_algbw:0": 5.43,
"nccl-bw/allreduce_262144_time:0": 48.29,
"nccl-bw/allreduce_524288_busbw:0": 15.11,
"nccl-bw/allreduce_524288_algbw:0": 8.64,
"nccl-bw/allreduce_524288_time:0": 60.71,
"nccl-bw/allreduce_1048576_busbw:0": 24.1,
"nccl-bw/allreduce_1048576_algbw:0": 13.77,
"nccl-bw/allreduce_1048576_time:0": 76.13,
"nccl-bw/allreduce_2097152_busbw:0": 38.12,
"nccl-bw/allreduce_2097152_algbw:0": 21.78,
"nccl-bw/allreduce_2097152_time:0": 96.28,
"nccl-bw/allreduce_4194304_busbw:0": 65.75,
"nccl-bw/allreduce_4194304_algbw:0": 37.57,
"nccl-bw/allreduce_4194304_time:0": 111.6,
"nccl-bw/allreduce_8388608_busbw:0": 89.51,
"nccl-bw/allreduce_8388608_algbw:0": 51.15,
"nccl-bw/allreduce_8388608_time:0": 164.0,
"nccl-bw/allreduce_16777216_busbw:0": 114.38,
"nccl-bw/allreduce_16777216_algbw:0": 65.36,
"nccl-bw/allreduce_16777216_time:0": 256.7,
"nccl-bw/allreduce_33554432_busbw:0": 154.89,
"nccl-bw/allreduce_33554432_algbw:0": 88.51,
"nccl-bw/allreduce_33554432_time:0": 379.1,
"nccl-bw/allreduce_67108864_busbw:0": 200.01,
"nccl-bw/allreduce_67108864_algbw:0": 114.29,
"nccl-bw/allreduce_67108864_time:0": 587.2,
"nccl-bw/allreduce_134217728_busbw:0": 202.97,
"nccl-bw/allreduce_134217728_algbw:0": 115.98,
"nccl-bw/allreduce_134217728_time:0": 1157.2,
"nccl-bw/allreduce_268435456_busbw:0": 221.82,
"nccl-bw/allreduce_268435456_algbw:0": 126.75,
"nccl-bw/allreduce_268435456_time:0": 2117.8,
"nccl-bw/allreduce_536870912_busbw:0": 224.54,
"nccl-bw/allreduce_536870912_algbw:0": 128.31,
"nccl-bw/allreduce_536870912_time:0": 4184.2,
"nccl-bw/allreduce_1073741824_busbw:0": 230.15,
"nccl-bw/allreduce_1073741824_algbw:0": 131.51,
"nccl-bw/allreduce_1073741824_time:0": 8164.5,
"nccl-bw/allreduce_2147483648_busbw:0": 231.89,
"nccl-bw/allreduce_2147483648_algbw:0": 132.51,
"nccl-bw/allreduce_2147483648_time:0": 16207.0,
"nccl-bw/allreduce_4294967296_busbw:0": 234.45,
"nccl-bw/allreduce_4294967296_algbw:0": 133.97,
"nccl-bw/allreduce_4294967296_time:0": 32059.0,
"nccl-bw/allreduce_8589934592_busbw:0": 235.36,
"nccl-bw/allreduce_8589934592_algbw:0": 134.49,
"nccl-bw/allreduce_8589934592_time:0": 63870.0,
"resnet_models/pytorch-resnet50/steptime_train_float32": 253.9552273229,
"resnet_models/pytorch-resnet50/throughput_train_float32": 760.334809913,
"resnet_models/pytorch-resnet50/steptime_train_float16": 200.0860618427,
"resnet_models/pytorch-resnet50/throughput_train_float16": 971.0651430923,
"resnet_models/pytorch-resnet101/steptime_train_float32": 389.0860509127,
"resnet_models/pytorch-resnet101/throughput_train_float32": 496.117474093,
"resnet_models/pytorch-resnet101/steptime_train_float16": 308.6274107918,
"resnet_models/pytorch-resnet101/throughput_train_float16": 627.2056272195,
"resnet_models/pytorch-resnet152/steptime_train_float32": 547.6558278315,
"resnet_models/pytorch-resnet152/throughput_train_float32": 352.0709954335,
"resnet_models/pytorch-resnet152/steptime_train_float16": 424.5809856802,
"resnet_models/pytorch-resnet152/throughput_train_float16": 454.8335998154,
"pytorch-sharding-matmul/allreduce": 10.574411869,
"pytorch-sharding-matmul/allgather": 10.0846967697,
"vgg_models/pytorch-vgg11/steptime_train_float32": 40.3528367169,
"vgg_models/pytorch-vgg11/throughput_train_float32": 796.361593695,
"vgg_models/pytorch-vgg11/steptime_train_float16": 24.1335148457,
"vgg_models/pytorch-vgg11/throughput_train_float16": 1330.4113614585,
"vgg_models/pytorch-vgg13/steptime_train_float32": 55.466310936,
"vgg_models/pytorch-vgg13/throughput_train_float32": 580.2341074444,
"vgg_models/pytorch-vgg13/steptime_train_float16": 33.3522899309,
"vgg_models/pytorch-vgg13/throughput_train_float16": 962.5332023902,
"vgg_models/pytorch-vgg16/steptime_train_float32": 65.225199447,
"vgg_models/pytorch-vgg16/throughput_train_float32": 493.4268638876,
"vgg_models/pytorch-vgg16/steptime_train_float16": 39.2528773518,
"vgg_models/pytorch-vgg16/throughput_train_float16": 817.2008546148,
"vgg_models/pytorch-vgg19/steptime_train_float32": 74.9348710524,
"vgg_models/pytorch-vgg19/throughput_train_float32": 429.8092158311,
"vgg_models/pytorch-vgg19/steptime_train_float16": 45.2033062465,
"vgg_models/pytorch-vgg19/throughput_train_float16": 709.1127328377,
"Accept": false,
"#Issues": 1.0,
"Category": "KernelLaunch",
......@@ -41,6 +469,299 @@
"Index": "sb-validation-01"
},
{
"bert_models/pytorch-bert-base/steptime_train_float32": 114.5916701062,
"bert_models/pytorch-bert-base/throughput_train_float32": 279.8794623591,
"bert_models/pytorch-bert-base/steptime_train_float16": 83.8895108318,
"bert_models/pytorch-bert-base/throughput_train_float16": 382.0672582742,
"bert_models/pytorch-bert-large/steptime_train_float32": 307.9359371914,
"bert_models/pytorch-bert-large/throughput_train_float32": 103.9487609742,
"bert_models/pytorch-bert-large/steptime_train_float16": 206.8114168942,
"bert_models/pytorch-bert-large/throughput_train_float16": 154.8408911711,
"pytorch-computation-communication-overlap/mul_cost:0": 44.1822062144,
"pytorch-computation-communication-overlap/mul_cost:1": 44.1822139389,
"pytorch-computation-communication-overlap/mul_cost:2": 43.9701470781,
"pytorch-computation-communication-overlap/mul_cost:3": 43.9701478756,
"pytorch-computation-communication-overlap/mul_cost:4": 43.9701779317,
"pytorch-computation-communication-overlap/mul_cost:5": 43.9701571606,
"pytorch-computation-communication-overlap/mul_cost:6": 43.9701651983,
"pytorch-computation-communication-overlap/mul_cost:7": 44.1795444785,
"pytorch-computation-communication-overlap/matmul_cost:0": 137.0477370556,
"pytorch-computation-communication-overlap/matmul_cost:1": 137.0478344693,
"pytorch-computation-communication-overlap/matmul_cost:2": 137.047772209,
"pytorch-computation-communication-overlap/matmul_cost:3": 137.0477969726,
"pytorch-computation-communication-overlap/matmul_cost:4": 137.0481367431,
"pytorch-computation-communication-overlap/matmul_cost:5": 137.0482198877,
"pytorch-computation-communication-overlap/matmul_cost:6": 137.0477532237,
"pytorch-computation-communication-overlap/matmul_cost:7": 137.0478081607,
"densenet_models/pytorch-densenet169/steptime_train_float32": 150.6415554322,
"densenet_models/pytorch-densenet169/throughput_train_float32": 212.4701719243,
"densenet_models/pytorch-densenet169/steptime_train_float16": 145.3081957297,
"densenet_models/pytorch-densenet169/throughput_train_float16": 220.2932337305,
"densenet_models/pytorch-densenet201/steptime_train_float32": 182.9111778643,
"densenet_models/pytorch-densenet201/throughput_train_float32": 174.9742278232,
"densenet_models/pytorch-densenet201/steptime_train_float16": 176.3489063596,
"densenet_models/pytorch-densenet201/throughput_train_float16": 181.5446573603,
"gemm-flops/FP64:0": 9031.23,
"gemm-flops/FP64:1": 9040.85,
"gemm-flops/FP64:2": 9010.56,
"gemm-flops/FP64:3": 9041.26,
"gemm-flops/FP64:4": 9039.19,
"gemm-flops/FP64:5": 9015.69,
"gemm-flops/FP64:6": 9022.19,
"gemm-flops/FP64:7": 9030.2,
"gemm-flops/FP32:0": 18362.1,
"gemm-flops/FP32:1": 18375.6,
"gemm-flops/FP32:2": 18314.9,
"gemm-flops/FP32:3": 18375.6,
"gemm-flops/FP32:4": 18368.6,
"gemm-flops/FP32:5": 18347.1,
"gemm-flops/FP32:6": 18247.4,
"gemm-flops/FP32:7": 18318.4,
"gemm-flops/FP16:0": 33878.0,
"gemm-flops/FP16:1": 33911.1,
"gemm-flops/FP16:2": 33769.3,
"gemm-flops/FP16:3": 33909.9,
"gemm-flops/FP16:4": 33896.5,
"gemm-flops/FP16:5": 33798.1,
"gemm-flops/FP16:6": 33647.3,
"gemm-flops/FP16:7": 33764.8,
"gemm-flops/FP64_TC:0": 18963.6,
"gemm-flops/FP64_TC:1": 18924.2,
"gemm-flops/FP64_TC:2": 18930.3,
"gemm-flops/FP64_TC:3": 18971.9,
"gemm-flops/FP64_TC:4": 18946.0,
"gemm-flops/FP64_TC:5": 18945.0,
"gemm-flops/FP64_TC:6": 18822.9,
"gemm-flops/FP64_TC:7": 18911.1,
"gemm-flops/TF32_TC:0": 127900.0,
"gemm-flops/TF32_TC:1": 129094.0,
"gemm-flops/TF32_TC:2": 127831.0,
"gemm-flops/TF32_TC:3": 128709.0,
"gemm-flops/TF32_TC:4": 127388.0,
"gemm-flops/TF32_TC:5": 127861.0,
"gemm-flops/TF32_TC:6": 128492.0,
"gemm-flops/TF32_TC:7": 127720.0,
"gemm-flops/BF16_TC:0": 264965.0,
"gemm-flops/BF16_TC:1": 266638.0,
"gemm-flops/BF16_TC:2": 263151.0,
"gemm-flops/BF16_TC:3": 264752.0,
"gemm-flops/BF16_TC:4": 263049.0,
"gemm-flops/BF16_TC:5": 266605.0,
"gemm-flops/BF16_TC:6": 267501.0,
"gemm-flops/BF16_TC:7": 263880.0,
"gemm-flops/FP16_TC:0": 279474.0,
"gemm-flops/FP16_TC:1": 281256.0,
"gemm-flops/FP16_TC:2": 277403.0,
"gemm-flops/FP16_TC:3": 279147.0,
"gemm-flops/FP16_TC:4": 277587.0,
"gemm-flops/FP16_TC:5": 281537.0,
"gemm-flops/FP16_TC:6": 282132.0,
"gemm-flops/FP16_TC:7": 277788.0,
"gemm-flops/INT8_TC:0": 475160.0,
"gemm-flops/INT8_TC:1": 477725.0,
"gemm-flops/INT8_TC:2": 471621.0,
"gemm-flops/INT8_TC:3": 473716.0,
"gemm-flops/INT8_TC:4": 472124.0,
"gemm-flops/INT8_TC:5": 479972.0,
"gemm-flops/INT8_TC:6": 481327.0,
"gemm-flops/INT8_TC:7": 474710.0,
"gemm-flops/INT4_TC:0": 970330.0,
"gemm-flops/INT4_TC:1": 976837.0,
"gemm-flops/INT4_TC:2": 966003.0,
"gemm-flops/INT4_TC:3": 971315.0,
"gemm-flops/INT4_TC:4": 964441.0,
"gemm-flops/INT4_TC:5": 982461.0,
"gemm-flops/INT4_TC:6": 979610.0,
"gemm-flops/INT4_TC:7": 968359.0,
"gpt_models/pytorch-gpt2-large/steptime_train_float32": 295.0526971836,
"gpt_models/pytorch-gpt2-large/throughput_train_float32": 27.1154543969,
"gpt_models/pytorch-gpt2-large/steptime_train_float16": 194.4957742235,
"gpt_models/pytorch-gpt2-large/throughput_train_float16": 41.1394499411,
"gpu-sm-copy-bw/dtoh:0": 3.91755,
"gpu-sm-copy-bw/dtoh:1": 4.45414,
"gpu-sm-copy-bw/dtoh:2": 1.26483,
"gpu-sm-copy-bw/dtoh:3": 1.30041,
"gpu-sm-copy-bw/dtoh:4": 1.31577,
"gpu-sm-copy-bw/dtoh:5": 1.27968,
"gpu-sm-copy-bw/dtoh:6": 4.47849,
"gpu-sm-copy-bw/dtoh:7": 3.96231,
"gpu-sm-copy-bw/dtoh:8": 3.91705,
"gpu-sm-copy-bw/dtoh:9": 4.45487,
"gpu-sm-copy-bw/dtoh:10": 1.26352,
"gpu-sm-copy-bw/dtoh:11": 1.2999,
"gpu-sm-copy-bw/dtoh:12": 1.31677,
"gpu-sm-copy-bw/dtoh:13": 1.27885,
"gpu-sm-copy-bw/dtoh:14": 4.47913,
"gpu-sm-copy-bw/dtoh:15": 3.95893,
"gpu-sm-copy-bw/dtoh:16": 3.91729,
"gpu-sm-copy-bw/dtoh:17": 4.45627,
"gpu-sm-copy-bw/dtoh:18": 1.26437,
"gpu-sm-copy-bw/dtoh:19": 1.30144,
"gpu-sm-copy-bw/dtoh:20": 1.31704,
"gpu-sm-copy-bw/dtoh:21": 1.27857,
"gpu-sm-copy-bw/dtoh:22": 4.47889,
"gpu-sm-copy-bw/dtoh:23": 3.95984,
"gpu-sm-copy-bw/dtoh:24": 3.92025,
"gpu-sm-copy-bw/dtoh:25": 4.45423,
"gpu-sm-copy-bw/dtoh:26": 1.26449,
"gpu-sm-copy-bw/dtoh:27": 1.29954,
"gpu-sm-copy-bw/dtoh:28": 1.31731,
"gpu-sm-copy-bw/dtoh:29": 1.27916,
"gpu-sm-copy-bw/dtoh:30": 4.4797,
"gpu-sm-copy-bw/dtoh:31": 3.96124,
"gpu-sm-copy-bw/htod:0": 23.9685,
"gpu-sm-copy-bw/htod:1": 23.967,
"gpu-sm-copy-bw/htod:2": 19.9898,
"gpu-sm-copy-bw/htod:3": 20.2848,
"gpu-sm-copy-bw/htod:4": 20.3931,
"gpu-sm-copy-bw/htod:5": 20.0888,
"gpu-sm-copy-bw/htod:6": 23.9766,
"gpu-sm-copy-bw/htod:7": 23.9792,
"gpu-sm-copy-bw/htod:8": 23.9497,
"gpu-sm-copy-bw/htod:9": 23.9438,
"gpu-sm-copy-bw/htod:10": 20.0039,
"gpu-sm-copy-bw/htod:11": 20.2469,
"gpu-sm-copy-bw/htod:12": 20.4344,
"gpu-sm-copy-bw/htod:13": 20.1005,
"gpu-sm-copy-bw/htod:14": 23.9491,
"gpu-sm-copy-bw/htod:15": 23.9898,
"gpu-sm-copy-bw/htod:16": 23.962,
"gpu-sm-copy-bw/htod:17": 23.97,
"gpu-sm-copy-bw/htod:18": 19.987,
"gpu-sm-copy-bw/htod:19": 20.1549,
"gpu-sm-copy-bw/htod:20": 20.3931,
"gpu-sm-copy-bw/htod:21": 20.1013,
"gpu-sm-copy-bw/htod:22": 23.9865,
"gpu-sm-copy-bw/htod:23": 23.972,
"gpu-sm-copy-bw/htod:24": 23.9569,
"gpu-sm-copy-bw/htod:25": 23.9762,
"gpu-sm-copy-bw/htod:26": 19.92,
"gpu-sm-copy-bw/htod:27": 20.2638,
"gpu-sm-copy-bw/htod:28": 20.4419,
"gpu-sm-copy-bw/htod:29": 20.1054,
"gpu-sm-copy-bw/htod:30": 23.9752,
"gpu-sm-copy-bw/htod:31": 23.9631,
"ib-loopback/IB_write_512_Avg_0:0": 1492.89,
"ib-loopback/IB_write_1024_Avg_0:0": 3224.92,
"ib-loopback/IB_write_2048_Avg_0:0": 6714.5,
"ib-loopback/IB_write_4096_Avg_0:0": 12871.93,
"ib-loopback/IB_write_8192_Avg_0:0": 19990.78,
"ib-loopback/IB_write_16384_Avg_0:0": 22172.25,
"ib-loopback/IB_write_32768_Avg_0:0": 23073.19,
"ib-loopback/IB_write_65536_Avg_0:0": 23527.09,
"ib-loopback/IB_write_131072_Avg_0:0": 23805.92,
"ib-loopback/IB_write_262144_Avg_0:0": 23380.51,
"ib-loopback/IB_write_524288_Avg_0:0": 23856.63,
"ib-loopback/IB_write_1048576_Avg_0:0": 23869.58,
"ib-loopback/IB_write_2097152_Avg_0:0": 23885.7,
"ib-loopback/IB_write_4194304_Avg_0:0": 23914.56,
"ib-loopback/IB_write_8388608_Avg_0:0": 23935.21,
"ib-loopback/IB_write_512_Avg_1:0": 1479.1,
"ib-loopback/IB_write_1024_Avg_1:0": 3246.48,
"ib-loopback/IB_write_2048_Avg_1:0": 6754.94,
"ib-loopback/IB_write_4096_Avg_1:0": 13101.09,
"ib-loopback/IB_write_8192_Avg_1:0": 19945.81,
"ib-loopback/IB_write_16384_Avg_1:0": 22631.62,
"ib-loopback/IB_write_32768_Avg_1:0": 23381.58,
"ib-loopback/IB_write_65536_Avg_1:0": 23515.29,
"ib-loopback/IB_write_131072_Avg_1:0": 23756.01,
"ib-loopback/IB_write_262144_Avg_1:0": 23875.91,
"ib-loopback/IB_write_524288_Avg_1:0": 23911.13,
"ib-loopback/IB_write_1048576_Avg_1:0": 23935.42,
"ib-loopback/IB_write_2097152_Avg_1:0": 23941.25,
"ib-loopback/IB_write_4194304_Avg_1:0": 23922.06,
"ib-loopback/IB_write_8388608_Avg_1:0": 23928.59,
"ib-loopback/IB_write_512_Avg_2:0": 1505.29,
"ib-loopback/IB_write_1024_Avg_2:0": 3215.97,
"ib-loopback/IB_write_2048_Avg_2:0": 6745.49,
"ib-loopback/IB_write_4096_Avg_2:0": 11548.88,
"ib-loopback/IB_write_8192_Avg_2:0": 19432.15,
"ib-loopback/IB_write_16384_Avg_2:0": 22765.51,
"ib-loopback/IB_write_32768_Avg_2:0": 23235.07,
"ib-loopback/IB_write_65536_Avg_2:0": 23620.08,
"ib-loopback/IB_write_131072_Avg_2:0": 23759.08,
"ib-loopback/IB_write_262144_Avg_2:0": 23859.82,
"ib-loopback/IB_write_524288_Avg_2:0": 23775.01,
"ib-loopback/IB_write_1048576_Avg_2:0": 23885.96,
"ib-loopback/IB_write_2097152_Avg_2:0": 23894.73,
"ib-loopback/IB_write_4194304_Avg_2:0": 23909.98,
"ib-loopback/IB_write_8388608_Avg_2:0": 23927.21,
"ib-loopback/IB_write_512_Avg_3:0": 1474.47,
"ib-loopback/IB_write_1024_Avg_3:0": 3344.68,
"ib-loopback/IB_write_2048_Avg_3:0": 6606.35,
"ib-loopback/IB_write_4096_Avg_3:0": 12071.4,
"ib-loopback/IB_write_8192_Avg_3:0": 18408.33,
"ib-loopback/IB_write_16384_Avg_3:0": 20789.27,
"ib-loopback/IB_write_32768_Avg_3:0": 22469.43,
"ib-loopback/IB_write_65536_Avg_3:0": 22777.1,
"ib-loopback/IB_write_131072_Avg_3:0": 23461.03,
"ib-loopback/IB_write_262144_Avg_3:0": 23397.19,
"ib-loopback/IB_write_524288_Avg_3:0": 23526.55,
"ib-loopback/IB_write_1048576_Avg_3:0": 23854.76,
"ib-loopback/IB_write_2097152_Avg_3:0": 23862.23,
"ib-loopback/IB_write_4194304_Avg_3:0": 23931.15,
"ib-loopback/IB_write_8388608_Avg_3:0": 23924.44,
"ib-loopback/IB_write_512_Avg_4:0": 1523.33,
"ib-loopback/IB_write_1024_Avg_4:0": 3233.23,
"ib-loopback/IB_write_2048_Avg_4:0": 6792.88,
"ib-loopback/IB_write_4096_Avg_4:0": 12616.05,
"ib-loopback/IB_write_8192_Avg_4:0": 19324.05,
"ib-loopback/IB_write_16384_Avg_4:0": 22082.51,
"ib-loopback/IB_write_32768_Avg_4:0": 23294.23,
"ib-loopback/IB_write_65536_Avg_4:0": 23546.22,
"ib-loopback/IB_write_131072_Avg_4:0": 23727.91,
"ib-loopback/IB_write_262144_Avg_4:0": 23843.93,
"ib-loopback/IB_write_524288_Avg_4:0": 23905.96,
"ib-loopback/IB_write_1048576_Avg_4:0": 23902.37,
"ib-loopback/IB_write_2097152_Avg_4:0": 23921.03,
"ib-loopback/IB_write_4194304_Avg_4:0": 23921.06,
"ib-loopback/IB_write_8388608_Avg_4:0": 23922.4,
"ib-loopback/IB_write_512_Avg_5:0": 1506.63,
"ib-loopback/IB_write_1024_Avg_5:0": 3261.71,
"ib-loopback/IB_write_2048_Avg_5:0": 6752.38,
"ib-loopback/IB_write_4096_Avg_5:0": 13592.41,
"ib-loopback/IB_write_8192_Avg_5:0": 19989.31,
"ib-loopback/IB_write_16384_Avg_5:0": 22560.57,
"ib-loopback/IB_write_32768_Avg_5:0": 23389.29,
"ib-loopback/IB_write_65536_Avg_5:0": 23503.32,
"ib-loopback/IB_write_131072_Avg_5:0": 23741.51,
"ib-loopback/IB_write_262144_Avg_5:0": 23866.43,
"ib-loopback/IB_write_524288_Avg_5:0": 23898.95,
"ib-loopback/IB_write_1048576_Avg_5:0": 23876.36,
"ib-loopback/IB_write_2097152_Avg_5:0": 23919.63,
"ib-loopback/IB_write_4194304_Avg_5:0": 23924.68,
"ib-loopback/IB_write_8388608_Avg_5:0": 23930.37,
"ib-loopback/IB_write_512_Avg_6:0": 1467.69,
"ib-loopback/IB_write_1024_Avg_6:0": 3157.04,
"ib-loopback/IB_write_2048_Avg_6:0": 6494.61,
"ib-loopback/IB_write_4096_Avg_6:0": 12883.51,
"ib-loopback/IB_write_8192_Avg_6:0": 19207.67,
"ib-loopback/IB_write_16384_Avg_6:0": 22519.39,
"ib-loopback/IB_write_32768_Avg_6:0": 23323.46,
"ib-loopback/IB_write_65536_Avg_6:0": 23523.6,
"ib-loopback/IB_write_131072_Avg_6:0": 23626.67,
"ib-loopback/IB_write_262144_Avg_6:0": 23836.99,
"ib-loopback/IB_write_524288_Avg_6:0": 23904.51,
"ib-loopback/IB_write_1048576_Avg_6:0": 23919.07,
"ib-loopback/IB_write_2097152_Avg_6:0": 23943.82,
"ib-loopback/IB_write_4194304_Avg_6:0": 23936.77,
"ib-loopback/IB_write_8388608_Avg_6:0": 23941.57,
"ib-loopback/IB_write_512_Avg_7:0": 1505.63,
"ib-loopback/IB_write_1024_Avg_7:0": 3259.93,
"ib-loopback/IB_write_2048_Avg_7:0": 6738.6,
"ib-loopback/IB_write_4096_Avg_7:0": 13352.06,
"ib-loopback/IB_write_8192_Avg_7:0": 19941.35,
"ib-loopback/IB_write_16384_Avg_7:0": 22566.09,
"ib-loopback/IB_write_32768_Avg_7:0": 23244.77,
"ib-loopback/IB_write_65536_Avg_7:0": 23377.67,
"ib-loopback/IB_write_131072_Avg_7:0": 23736.17,
"ib-loopback/IB_write_262144_Avg_7:0": 23829.25,
"ib-loopback/IB_write_524288_Avg_7:0": 23879.6,
"ib-loopback/IB_write_1048576_Avg_7:0": 23895.1,
"ib-loopback/IB_write_2097152_Avg_7:0": 23930.64,
"ib-loopback/IB_write_4194304_Avg_7:0": 23845.63,
"ib-loopback/IB_write_8388608_Avg_7:0": 23896.94,
"kernel-launch/return_code": 0.0,
"kernel-launch/event_overhead:0": 0.00595,
"kernel-launch/event_overhead:1": 0.00595,
"kernel-launch/event_overhead:2": 0.00557,
......@@ -49,7 +770,6 @@
"kernel-launch/event_overhead:5": 0.00589,
"kernel-launch/event_overhead:6": 0.00572,
"kernel-launch/event_overhead:7": 0.0059,
"kernel-launch/return_code": 0.0,
"kernel-launch/wall_overhead:0": 0.01026,
"kernel-launch/wall_overhead:1": 0.01026,
"kernel-launch/wall_overhead:2": 0.01046,
......@@ -58,14 +778,12 @@
"kernel-launch/wall_overhead:5": 0.01006,
"kernel-launch/wall_overhead:6": 0.01045,
"kernel-launch/wall_overhead:7": 0.01071,
"mem-bw/D2H_Mem_BW:0": 24.3,
"mem-bw/D2H_Mem_BW:1": 24.6,
"mem-bw/D2H_Mem_BW:2": 24.5,
"mem-bw/D2H_Mem_BW:3": 24.6,
"mem-bw/D2H_Mem_BW:4": 24.3,
"mem-bw/D2H_Mem_BW:5": 24.3,
"mem-bw/D2H_Mem_BW:6": 23.9,
"mem-bw/D2H_Mem_BW:7": 24.6,
"lstm_models/pytorch-lstm/steptime_train_float32": 48.0702451896,
"lstm_models/pytorch-lstm/throughput_train_float32": 4806.4724411328,
"lstm_models/pytorch-lstm/steptime_train_float16": 25.9531298652,
"lstm_models/pytorch-lstm/throughput_train_float16": 9069.9080925588,
"pytorch-matmul/nosharding": 34.6449975967,
"mem-bw/return_code": 0.0,
"mem-bw/H2D_Mem_BW:0": 25.6,
"mem-bw/H2D_Mem_BW:1": 25.8,
"mem-bw/H2D_Mem_BW:2": 26.0,
......@@ -74,7 +792,145 @@
"mem-bw/H2D_Mem_BW:5": 25.8,
"mem-bw/H2D_Mem_BW:6": 25.3,
"mem-bw/H2D_Mem_BW:7": 26.1,
"mem-bw/return_code": 0.0,
"mem-bw/D2H_Mem_BW:0": 24.3,
"mem-bw/D2H_Mem_BW:1": 24.6,
"mem-bw/D2H_Mem_BW:2": 24.5,
"mem-bw/D2H_Mem_BW:3": 24.6,
"mem-bw/D2H_Mem_BW:4": 24.3,
"mem-bw/D2H_Mem_BW:5": 24.3,
"mem-bw/D2H_Mem_BW:6": 23.9,
"mem-bw/D2H_Mem_BW:7": 24.6,
"mem-bw/D2D_Mem_BW:0": 1118.0,
"mem-bw/D2D_Mem_BW:1": 1114.6,
"mem-bw/D2D_Mem_BW:2": 1119.7,
"mem-bw/D2D_Mem_BW:3": 1121.9,
"mem-bw/D2D_Mem_BW:4": 1109.7,
"mem-bw/D2D_Mem_BW:5": 1110.1,
"mem-bw/D2D_Mem_BW:6": 1123.3,
"mem-bw/D2D_Mem_BW:7": 1117.6,
"nccl-bw/allreduce_8_busbw:0": 0.0,
"nccl-bw/allreduce_8_algbw:0": 0.0,
"nccl-bw/allreduce_8_time:0": 37.84,
"nccl-bw/allreduce_16_busbw:0": 0.0,
"nccl-bw/allreduce_16_algbw:0": 0.0,
"nccl-bw/allreduce_16_time:0": 36.42,
"nccl-bw/allreduce_32_busbw:0": 0.0,
"nccl-bw/allreduce_32_algbw:0": 0.0,
"nccl-bw/allreduce_32_time:0": 36.87,
"nccl-bw/allreduce_64_busbw:0": 0.0,
"nccl-bw/allreduce_64_algbw:0": 0.0,
"nccl-bw/allreduce_64_time:0": 35.83,
"nccl-bw/allreduce_128_busbw:0": 0.01,
"nccl-bw/allreduce_128_algbw:0": 0.0,
"nccl-bw/allreduce_128_time:0": 36.91,
"nccl-bw/allreduce_256_busbw:0": 0.01,
"nccl-bw/allreduce_256_algbw:0": 0.01,
"nccl-bw/allreduce_256_time:0": 37.58,
"nccl-bw/allreduce_512_busbw:0": 0.02,
"nccl-bw/allreduce_512_algbw:0": 0.01,
"nccl-bw/allreduce_512_time:0": 36.98,
"nccl-bw/allreduce_1024_busbw:0": 0.05,
"nccl-bw/allreduce_1024_algbw:0": 0.03,
"nccl-bw/allreduce_1024_time:0": 36.93,
"nccl-bw/allreduce_2048_busbw:0": 0.1,
"nccl-bw/allreduce_2048_algbw:0": 0.06,
"nccl-bw/allreduce_2048_time:0": 36.06,
"nccl-bw/allreduce_4096_busbw:0": 0.19,
"nccl-bw/allreduce_4096_algbw:0": 0.11,
"nccl-bw/allreduce_4096_time:0": 37.2,
"nccl-bw/allreduce_8192_busbw:0": 0.39,
"nccl-bw/allreduce_8192_algbw:0": 0.22,
"nccl-bw/allreduce_8192_time:0": 37.04,
"nccl-bw/allreduce_16384_busbw:0": 0.77,
"nccl-bw/allreduce_16384_algbw:0": 0.44,
"nccl-bw/allreduce_16384_time:0": 37.46,
"nccl-bw/allreduce_32768_busbw:0": 1.52,
"nccl-bw/allreduce_32768_algbw:0": 0.87,
"nccl-bw/allreduce_32768_time:0": 37.64,
"nccl-bw/allreduce_65536_busbw:0": 3.0,
"nccl-bw/allreduce_65536_algbw:0": 1.71,
"nccl-bw/allreduce_65536_time:0": 38.22,
"nccl-bw/allreduce_131072_busbw:0": 5.31,
"nccl-bw/allreduce_131072_algbw:0": 3.04,
"nccl-bw/allreduce_131072_time:0": 43.17,
"nccl-bw/allreduce_262144_busbw:0": 9.5,
"nccl-bw/allreduce_262144_algbw:0": 5.43,
"nccl-bw/allreduce_262144_time:0": 48.29,
"nccl-bw/allreduce_524288_busbw:0": 15.11,
"nccl-bw/allreduce_524288_algbw:0": 8.64,
"nccl-bw/allreduce_524288_time:0": 60.71,
"nccl-bw/allreduce_1048576_busbw:0": 24.1,
"nccl-bw/allreduce_1048576_algbw:0": 13.77,
"nccl-bw/allreduce_1048576_time:0": 76.13,
"nccl-bw/allreduce_2097152_busbw:0": 38.12,
"nccl-bw/allreduce_2097152_algbw:0": 21.78,
"nccl-bw/allreduce_2097152_time:0": 96.28,
"nccl-bw/allreduce_4194304_busbw:0": 65.75,
"nccl-bw/allreduce_4194304_algbw:0": 37.57,
"nccl-bw/allreduce_4194304_time:0": 111.6,
"nccl-bw/allreduce_8388608_busbw:0": 89.51,
"nccl-bw/allreduce_8388608_algbw:0": 51.15,
"nccl-bw/allreduce_8388608_time:0": 164.0,
"nccl-bw/allreduce_16777216_busbw:0": 114.38,
"nccl-bw/allreduce_16777216_algbw:0": 65.36,
"nccl-bw/allreduce_16777216_time:0": 256.7,
"nccl-bw/allreduce_33554432_busbw:0": 154.89,
"nccl-bw/allreduce_33554432_algbw:0": 88.51,
"nccl-bw/allreduce_33554432_time:0": 379.1,
"nccl-bw/allreduce_67108864_busbw:0": 200.01,
"nccl-bw/allreduce_67108864_algbw:0": 114.29,
"nccl-bw/allreduce_67108864_time:0": 587.2,
"nccl-bw/allreduce_134217728_busbw:0": 202.97,
"nccl-bw/allreduce_134217728_algbw:0": 115.98,
"nccl-bw/allreduce_134217728_time:0": 1157.2,
"nccl-bw/allreduce_268435456_busbw:0": 221.82,
"nccl-bw/allreduce_268435456_algbw:0": 126.75,
"nccl-bw/allreduce_268435456_time:0": 2117.8,
"nccl-bw/allreduce_536870912_busbw:0": 224.54,
"nccl-bw/allreduce_536870912_algbw:0": 128.31,
"nccl-bw/allreduce_536870912_time:0": 4184.2,
"nccl-bw/allreduce_1073741824_busbw:0": 230.15,
"nccl-bw/allreduce_1073741824_algbw:0": 131.51,
"nccl-bw/allreduce_1073741824_time:0": 8164.5,
"nccl-bw/allreduce_2147483648_busbw:0": 231.89,
"nccl-bw/allreduce_2147483648_algbw:0": 132.51,
"nccl-bw/allreduce_2147483648_time:0": 16207.0,
"nccl-bw/allreduce_4294967296_busbw:0": 234.45,
"nccl-bw/allreduce_4294967296_algbw:0": 133.97,
"nccl-bw/allreduce_4294967296_time:0": 32059.0,
"nccl-bw/allreduce_8589934592_busbw:0": 235.36,
"nccl-bw/allreduce_8589934592_algbw:0": 134.49,
"nccl-bw/allreduce_8589934592_time:0": 63870.0,
"resnet_models/pytorch-resnet50/steptime_train_float32": 253.9552273229,
"resnet_models/pytorch-resnet50/throughput_train_float32": 760.334809913,
"resnet_models/pytorch-resnet50/steptime_train_float16": 200.0860618427,
"resnet_models/pytorch-resnet50/throughput_train_float16": 971.0651430923,
"resnet_models/pytorch-resnet101/steptime_train_float32": 389.0860509127,
"resnet_models/pytorch-resnet101/throughput_train_float32": 496.117474093,
"resnet_models/pytorch-resnet101/steptime_train_float16": 308.6274107918,
"resnet_models/pytorch-resnet101/throughput_train_float16": 627.2056272195,
"resnet_models/pytorch-resnet152/steptime_train_float32": 547.6558278315,
"resnet_models/pytorch-resnet152/throughput_train_float32": 352.0709954335,
"resnet_models/pytorch-resnet152/steptime_train_float16": 424.5809856802,
"resnet_models/pytorch-resnet152/throughput_train_float16": 454.8335998154,
"pytorch-sharding-matmul/allreduce": 10.574411869,
"pytorch-sharding-matmul/allgather": 10.0846967697,
"vgg_models/pytorch-vgg11/steptime_train_float32": 40.3528367169,
"vgg_models/pytorch-vgg11/throughput_train_float32": 796.361593695,
"vgg_models/pytorch-vgg11/steptime_train_float16": 24.1335148457,
"vgg_models/pytorch-vgg11/throughput_train_float16": 1330.4113614585,
"vgg_models/pytorch-vgg13/steptime_train_float32": 55.466310936,
"vgg_models/pytorch-vgg13/throughput_train_float32": 580.2341074444,
"vgg_models/pytorch-vgg13/steptime_train_float16": 33.3522899309,
"vgg_models/pytorch-vgg13/throughput_train_float16": 962.5332023902,
"vgg_models/pytorch-vgg16/steptime_train_float32": 65.225199447,
"vgg_models/pytorch-vgg16/throughput_train_float32": 493.4268638876,
"vgg_models/pytorch-vgg16/steptime_train_float16": 39.2528773518,
"vgg_models/pytorch-vgg16/throughput_train_float16": 817.2008546148,
"vgg_models/pytorch-vgg19/steptime_train_float32": 74.9348710524,
"vgg_models/pytorch-vgg19/throughput_train_float32": 429.8092158311,
"vgg_models/pytorch-vgg19/steptime_train_float16": 45.2033062465,
"vgg_models/pytorch-vgg19/throughput_train_float16": 709.1127328377,
"Accept": true,
"#Issues": 0.0,
"Category": "",
......@@ -82,6 +938,299 @@
"Index": "sb-validation-02"
},
{
"bert_models/pytorch-bert-base/steptime_train_float32": 114.5916701062,
"bert_models/pytorch-bert-base/throughput_train_float32": 279.8794623591,
"bert_models/pytorch-bert-base/steptime_train_float16": 83.8895108318,
"bert_models/pytorch-bert-base/throughput_train_float16": 382.0672582742,
"bert_models/pytorch-bert-large/steptime_train_float32": 307.9359371914,
"bert_models/pytorch-bert-large/throughput_train_float32": 103.9487609742,
"bert_models/pytorch-bert-large/steptime_train_float16": 206.8114168942,
"bert_models/pytorch-bert-large/throughput_train_float16": 154.8408911711,
"pytorch-computation-communication-overlap/mul_cost:0": 44.1822062144,
"pytorch-computation-communication-overlap/mul_cost:1": 44.1822139389,
"pytorch-computation-communication-overlap/mul_cost:2": 43.9701470781,
"pytorch-computation-communication-overlap/mul_cost:3": 43.9701478756,
"pytorch-computation-communication-overlap/mul_cost:4": 43.9701779317,
"pytorch-computation-communication-overlap/mul_cost:5": 43.9701571606,
"pytorch-computation-communication-overlap/mul_cost:6": 43.9701651983,
"pytorch-computation-communication-overlap/mul_cost:7": 44.1795444785,
"pytorch-computation-communication-overlap/matmul_cost:0": 137.0477370556,
"pytorch-computation-communication-overlap/matmul_cost:1": 137.0478344693,
"pytorch-computation-communication-overlap/matmul_cost:2": 137.047772209,
"pytorch-computation-communication-overlap/matmul_cost:3": 137.0477969726,
"pytorch-computation-communication-overlap/matmul_cost:4": 137.0481367431,
"pytorch-computation-communication-overlap/matmul_cost:5": 137.0482198877,
"pytorch-computation-communication-overlap/matmul_cost:6": 137.0477532237,
"pytorch-computation-communication-overlap/matmul_cost:7": 137.0478081607,
"densenet_models/pytorch-densenet169/steptime_train_float32": 150.6415554322,
"densenet_models/pytorch-densenet169/throughput_train_float32": 212.4701719243,
"densenet_models/pytorch-densenet169/steptime_train_float16": 145.3081957297,
"densenet_models/pytorch-densenet169/throughput_train_float16": 220.2932337305,
"densenet_models/pytorch-densenet201/steptime_train_float32": 182.9111778643,
"densenet_models/pytorch-densenet201/throughput_train_float32": 174.9742278232,
"densenet_models/pytorch-densenet201/steptime_train_float16": 176.3489063596,
"densenet_models/pytorch-densenet201/throughput_train_float16": 181.5446573603,
"gemm-flops/FP64:0": 9031.23,
"gemm-flops/FP64:1": 9040.85,
"gemm-flops/FP64:2": 9010.56,
"gemm-flops/FP64:3": 9041.26,
"gemm-flops/FP64:4": 9039.19,
"gemm-flops/FP64:5": 9015.69,
"gemm-flops/FP64:6": 9022.19,
"gemm-flops/FP64:7": 9030.2,
"gemm-flops/FP32:0": 18362.1,
"gemm-flops/FP32:1": 18375.6,
"gemm-flops/FP32:2": 18314.9,
"gemm-flops/FP32:3": 18375.6,
"gemm-flops/FP32:4": 18368.6,
"gemm-flops/FP32:5": 18347.1,
"gemm-flops/FP32:6": 18247.4,
"gemm-flops/FP32:7": 18318.4,
"gemm-flops/FP16:0": 33878.0,
"gemm-flops/FP16:1": 33911.1,
"gemm-flops/FP16:2": 33769.3,
"gemm-flops/FP16:3": 33909.9,
"gemm-flops/FP16:4": 33896.5,
"gemm-flops/FP16:5": 33798.1,
"gemm-flops/FP16:6": 33647.3,
"gemm-flops/FP16:7": 33764.8,
"gemm-flops/FP64_TC:0": 18963.6,
"gemm-flops/FP64_TC:1": 18924.2,
"gemm-flops/FP64_TC:2": 18930.3,
"gemm-flops/FP64_TC:3": 18971.9,
"gemm-flops/FP64_TC:4": 18946.0,
"gemm-flops/FP64_TC:5": 18945.0,
"gemm-flops/FP64_TC:6": 18822.9,
"gemm-flops/FP64_TC:7": 18911.1,
"gemm-flops/TF32_TC:0": 127900.0,
"gemm-flops/TF32_TC:1": 129094.0,
"gemm-flops/TF32_TC:2": 127831.0,
"gemm-flops/TF32_TC:3": 128709.0,
"gemm-flops/TF32_TC:4": 127388.0,
"gemm-flops/TF32_TC:5": 127861.0,
"gemm-flops/TF32_TC:6": 128492.0,
"gemm-flops/TF32_TC:7": 127720.0,
"gemm-flops/BF16_TC:0": 264965.0,
"gemm-flops/BF16_TC:1": 266638.0,
"gemm-flops/BF16_TC:2": 263151.0,
"gemm-flops/BF16_TC:3": 264752.0,
"gemm-flops/BF16_TC:4": 263049.0,
"gemm-flops/BF16_TC:5": 266605.0,
"gemm-flops/BF16_TC:6": 267501.0,
"gemm-flops/BF16_TC:7": 263880.0,
"gemm-flops/FP16_TC:0": 279474.0,
"gemm-flops/FP16_TC:1": 281256.0,
"gemm-flops/FP16_TC:2": 277403.0,
"gemm-flops/FP16_TC:3": 279147.0,
"gemm-flops/FP16_TC:4": 277587.0,
"gemm-flops/FP16_TC:5": 281537.0,
"gemm-flops/FP16_TC:6": 282132.0,
"gemm-flops/FP16_TC:7": 277788.0,
"gemm-flops/INT8_TC:0": 475160.0,
"gemm-flops/INT8_TC:1": 477725.0,
"gemm-flops/INT8_TC:2": 471621.0,
"gemm-flops/INT8_TC:3": 473716.0,
"gemm-flops/INT8_TC:4": 472124.0,
"gemm-flops/INT8_TC:5": 479972.0,
"gemm-flops/INT8_TC:6": 481327.0,
"gemm-flops/INT8_TC:7": 474710.0,
"gemm-flops/INT4_TC:0": 970330.0,
"gemm-flops/INT4_TC:1": 976837.0,
"gemm-flops/INT4_TC:2": 966003.0,
"gemm-flops/INT4_TC:3": 971315.0,
"gemm-flops/INT4_TC:4": 964441.0,
"gemm-flops/INT4_TC:5": 982461.0,
"gemm-flops/INT4_TC:6": 979610.0,
"gemm-flops/INT4_TC:7": 968359.0,
"gpt_models/pytorch-gpt2-large/steptime_train_float32": 295.0526971836,
"gpt_models/pytorch-gpt2-large/throughput_train_float32": 27.1154543969,
"gpt_models/pytorch-gpt2-large/steptime_train_float16": 194.4957742235,
"gpt_models/pytorch-gpt2-large/throughput_train_float16": 41.1394499411,
"gpu-sm-copy-bw/dtoh:0": 3.91755,
"gpu-sm-copy-bw/dtoh:1": 4.45414,
"gpu-sm-copy-bw/dtoh:2": 1.26483,
"gpu-sm-copy-bw/dtoh:3": 1.30041,
"gpu-sm-copy-bw/dtoh:4": 1.31577,
"gpu-sm-copy-bw/dtoh:5": 1.27968,
"gpu-sm-copy-bw/dtoh:6": 4.47849,
"gpu-sm-copy-bw/dtoh:7": 3.96231,
"gpu-sm-copy-bw/dtoh:8": 3.91705,
"gpu-sm-copy-bw/dtoh:9": 4.45487,
"gpu-sm-copy-bw/dtoh:10": 1.26352,
"gpu-sm-copy-bw/dtoh:11": 1.2999,
"gpu-sm-copy-bw/dtoh:12": 1.31677,
"gpu-sm-copy-bw/dtoh:13": 1.27885,
"gpu-sm-copy-bw/dtoh:14": 4.47913,
"gpu-sm-copy-bw/dtoh:15": 3.95893,
"gpu-sm-copy-bw/dtoh:16": 3.91729,
"gpu-sm-copy-bw/dtoh:17": 4.45627,
"gpu-sm-copy-bw/dtoh:18": 1.26437,
"gpu-sm-copy-bw/dtoh:19": 1.30144,
"gpu-sm-copy-bw/dtoh:20": 1.31704,
"gpu-sm-copy-bw/dtoh:21": 1.27857,
"gpu-sm-copy-bw/dtoh:22": 4.47889,
"gpu-sm-copy-bw/dtoh:23": 3.95984,
"gpu-sm-copy-bw/dtoh:24": 3.92025,
"gpu-sm-copy-bw/dtoh:25": 4.45423,
"gpu-sm-copy-bw/dtoh:26": 1.26449,
"gpu-sm-copy-bw/dtoh:27": 1.29954,
"gpu-sm-copy-bw/dtoh:28": 1.31731,
"gpu-sm-copy-bw/dtoh:29": 1.27916,
"gpu-sm-copy-bw/dtoh:30": 4.4797,
"gpu-sm-copy-bw/dtoh:31": 3.96124,
"gpu-sm-copy-bw/htod:0": 23.9685,
"gpu-sm-copy-bw/htod:1": 23.967,
"gpu-sm-copy-bw/htod:2": 19.9898,
"gpu-sm-copy-bw/htod:3": 20.2848,
"gpu-sm-copy-bw/htod:4": 20.3931,
"gpu-sm-copy-bw/htod:5": 20.0888,
"gpu-sm-copy-bw/htod:6": 23.9766,
"gpu-sm-copy-bw/htod:7": 23.9792,
"gpu-sm-copy-bw/htod:8": 23.9497,
"gpu-sm-copy-bw/htod:9": 23.9438,
"gpu-sm-copy-bw/htod:10": 20.0039,
"gpu-sm-copy-bw/htod:11": 20.2469,
"gpu-sm-copy-bw/htod:12": 20.4344,
"gpu-sm-copy-bw/htod:13": 20.1005,
"gpu-sm-copy-bw/htod:14": 23.9491,
"gpu-sm-copy-bw/htod:15": 23.9898,
"gpu-sm-copy-bw/htod:16": 23.962,
"gpu-sm-copy-bw/htod:17": 23.97,
"gpu-sm-copy-bw/htod:18": 19.987,
"gpu-sm-copy-bw/htod:19": 20.1549,
"gpu-sm-copy-bw/htod:20": 20.3931,
"gpu-sm-copy-bw/htod:21": 20.1013,
"gpu-sm-copy-bw/htod:22": 23.9865,
"gpu-sm-copy-bw/htod:23": 23.972,
"gpu-sm-copy-bw/htod:24": 23.9569,
"gpu-sm-copy-bw/htod:25": 23.9762,
"gpu-sm-copy-bw/htod:26": 19.92,
"gpu-sm-copy-bw/htod:27": 20.2638,
"gpu-sm-copy-bw/htod:28": 20.4419,
"gpu-sm-copy-bw/htod:29": 20.1054,
"gpu-sm-copy-bw/htod:30": 23.9752,
"gpu-sm-copy-bw/htod:31": 23.9631,
"ib-loopback/IB_write_512_Avg_0:0": 1492.89,
"ib-loopback/IB_write_1024_Avg_0:0": 3224.92,
"ib-loopback/IB_write_2048_Avg_0:0": 6714.5,
"ib-loopback/IB_write_4096_Avg_0:0": 12871.93,
"ib-loopback/IB_write_8192_Avg_0:0": 19990.78,
"ib-loopback/IB_write_16384_Avg_0:0": 22172.25,
"ib-loopback/IB_write_32768_Avg_0:0": 23073.19,
"ib-loopback/IB_write_65536_Avg_0:0": 23527.09,
"ib-loopback/IB_write_131072_Avg_0:0": 23805.92,
"ib-loopback/IB_write_262144_Avg_0:0": 23380.51,
"ib-loopback/IB_write_524288_Avg_0:0": 23856.63,
"ib-loopback/IB_write_1048576_Avg_0:0": 23869.58,
"ib-loopback/IB_write_2097152_Avg_0:0": 23885.7,
"ib-loopback/IB_write_4194304_Avg_0:0": 23914.56,
"ib-loopback/IB_write_8388608_Avg_0:0": 23935.21,
"ib-loopback/IB_write_512_Avg_1:0": 1479.1,
"ib-loopback/IB_write_1024_Avg_1:0": 3246.48,
"ib-loopback/IB_write_2048_Avg_1:0": 6754.94,
"ib-loopback/IB_write_4096_Avg_1:0": 13101.09,
"ib-loopback/IB_write_8192_Avg_1:0": 19945.81,
"ib-loopback/IB_write_16384_Avg_1:0": 22631.62,
"ib-loopback/IB_write_32768_Avg_1:0": 23381.58,
"ib-loopback/IB_write_65536_Avg_1:0": 23515.29,
"ib-loopback/IB_write_131072_Avg_1:0": 23756.01,
"ib-loopback/IB_write_262144_Avg_1:0": 23875.91,
"ib-loopback/IB_write_524288_Avg_1:0": 23911.13,
"ib-loopback/IB_write_1048576_Avg_1:0": 23935.42,
"ib-loopback/IB_write_2097152_Avg_1:0": 23941.25,
"ib-loopback/IB_write_4194304_Avg_1:0": 23922.06,
"ib-loopback/IB_write_8388608_Avg_1:0": 23928.59,
"ib-loopback/IB_write_512_Avg_2:0": 1505.29,
"ib-loopback/IB_write_1024_Avg_2:0": 3215.97,
"ib-loopback/IB_write_2048_Avg_2:0": 6745.49,
"ib-loopback/IB_write_4096_Avg_2:0": 11548.88,
"ib-loopback/IB_write_8192_Avg_2:0": 19432.15,
"ib-loopback/IB_write_16384_Avg_2:0": 22765.51,
"ib-loopback/IB_write_32768_Avg_2:0": 23235.07,
"ib-loopback/IB_write_65536_Avg_2:0": 23620.08,
"ib-loopback/IB_write_131072_Avg_2:0": 23759.08,
"ib-loopback/IB_write_262144_Avg_2:0": 23859.82,
"ib-loopback/IB_write_524288_Avg_2:0": 23775.01,
"ib-loopback/IB_write_1048576_Avg_2:0": 23885.96,
"ib-loopback/IB_write_2097152_Avg_2:0": 23894.73,
"ib-loopback/IB_write_4194304_Avg_2:0": 23909.98,
"ib-loopback/IB_write_8388608_Avg_2:0": 23927.21,
"ib-loopback/IB_write_512_Avg_3:0": 1474.47,
"ib-loopback/IB_write_1024_Avg_3:0": 3344.68,
"ib-loopback/IB_write_2048_Avg_3:0": 6606.35,
"ib-loopback/IB_write_4096_Avg_3:0": 12071.4,
"ib-loopback/IB_write_8192_Avg_3:0": 18408.33,
"ib-loopback/IB_write_16384_Avg_3:0": 20789.27,
"ib-loopback/IB_write_32768_Avg_3:0": 22469.43,
"ib-loopback/IB_write_65536_Avg_3:0": 22777.1,
"ib-loopback/IB_write_131072_Avg_3:0": 23461.03,
"ib-loopback/IB_write_262144_Avg_3:0": 23397.19,
"ib-loopback/IB_write_524288_Avg_3:0": 23526.55,
"ib-loopback/IB_write_1048576_Avg_3:0": 23854.76,
"ib-loopback/IB_write_2097152_Avg_3:0": 23862.23,
"ib-loopback/IB_write_4194304_Avg_3:0": 23931.15,
"ib-loopback/IB_write_8388608_Avg_3:0": 23924.44,
"ib-loopback/IB_write_512_Avg_4:0": 1523.33,
"ib-loopback/IB_write_1024_Avg_4:0": 3233.23,
"ib-loopback/IB_write_2048_Avg_4:0": 6792.88,
"ib-loopback/IB_write_4096_Avg_4:0": 12616.05,
"ib-loopback/IB_write_8192_Avg_4:0": 19324.05,
"ib-loopback/IB_write_16384_Avg_4:0": 22082.51,
"ib-loopback/IB_write_32768_Avg_4:0": 23294.23,
"ib-loopback/IB_write_65536_Avg_4:0": 23546.22,
"ib-loopback/IB_write_131072_Avg_4:0": 23727.91,
"ib-loopback/IB_write_262144_Avg_4:0": 23843.93,
"ib-loopback/IB_write_524288_Avg_4:0": 23905.96,
"ib-loopback/IB_write_1048576_Avg_4:0": 23902.37,
"ib-loopback/IB_write_2097152_Avg_4:0": 23921.03,
"ib-loopback/IB_write_4194304_Avg_4:0": 23921.06,
"ib-loopback/IB_write_8388608_Avg_4:0": 23922.4,
"ib-loopback/IB_write_512_Avg_5:0": 1506.63,
"ib-loopback/IB_write_1024_Avg_5:0": 3261.71,
"ib-loopback/IB_write_2048_Avg_5:0": 6752.38,
"ib-loopback/IB_write_4096_Avg_5:0": 13592.41,
"ib-loopback/IB_write_8192_Avg_5:0": 19989.31,
"ib-loopback/IB_write_16384_Avg_5:0": 22560.57,
"ib-loopback/IB_write_32768_Avg_5:0": 23389.29,
"ib-loopback/IB_write_65536_Avg_5:0": 23503.32,
"ib-loopback/IB_write_131072_Avg_5:0": 23741.51,
"ib-loopback/IB_write_262144_Avg_5:0": 23866.43,
"ib-loopback/IB_write_524288_Avg_5:0": 23898.95,
"ib-loopback/IB_write_1048576_Avg_5:0": 23876.36,
"ib-loopback/IB_write_2097152_Avg_5:0": 23919.63,
"ib-loopback/IB_write_4194304_Avg_5:0": 23924.68,
"ib-loopback/IB_write_8388608_Avg_5:0": 23930.37,
"ib-loopback/IB_write_512_Avg_6:0": 1467.69,
"ib-loopback/IB_write_1024_Avg_6:0": 3157.04,
"ib-loopback/IB_write_2048_Avg_6:0": 6494.61,
"ib-loopback/IB_write_4096_Avg_6:0": 12883.51,
"ib-loopback/IB_write_8192_Avg_6:0": 19207.67,
"ib-loopback/IB_write_16384_Avg_6:0": 22519.39,
"ib-loopback/IB_write_32768_Avg_6:0": 23323.46,
"ib-loopback/IB_write_65536_Avg_6:0": 23523.6,
"ib-loopback/IB_write_131072_Avg_6:0": 23626.67,
"ib-loopback/IB_write_262144_Avg_6:0": 23836.99,
"ib-loopback/IB_write_524288_Avg_6:0": 23904.51,
"ib-loopback/IB_write_1048576_Avg_6:0": 23919.07,
"ib-loopback/IB_write_2097152_Avg_6:0": 23943.82,
"ib-loopback/IB_write_4194304_Avg_6:0": 23936.77,
"ib-loopback/IB_write_8388608_Avg_6:0": 23941.57,
"ib-loopback/IB_write_512_Avg_7:0": 1505.63,
"ib-loopback/IB_write_1024_Avg_7:0": 3259.93,
"ib-loopback/IB_write_2048_Avg_7:0": 6738.6,
"ib-loopback/IB_write_4096_Avg_7:0": 13352.06,
"ib-loopback/IB_write_8192_Avg_7:0": 19941.35,
"ib-loopback/IB_write_16384_Avg_7:0": 22566.09,
"ib-loopback/IB_write_32768_Avg_7:0": 23244.77,
"ib-loopback/IB_write_65536_Avg_7:0": 23377.67,
"ib-loopback/IB_write_131072_Avg_7:0": 23736.17,
"ib-loopback/IB_write_262144_Avg_7:0": 23829.25,
"ib-loopback/IB_write_524288_Avg_7:0": 23879.6,
"ib-loopback/IB_write_1048576_Avg_7:0": 23895.1,
"ib-loopback/IB_write_2097152_Avg_7:0": 23930.64,
"ib-loopback/IB_write_4194304_Avg_7:0": 23845.63,
"ib-loopback/IB_write_8388608_Avg_7:0": 23896.94,
"kernel-launch/return_code": 0.0,
"kernel-launch/event_overhead:0": 0.00596,
"kernel-launch/event_overhead:1": 0.00595,
"kernel-launch/event_overhead:2": 0.00557,
......@@ -90,7 +1239,6 @@
"kernel-launch/event_overhead:5": 0.00589,
"kernel-launch/event_overhead:6": 0.00572,
"kernel-launch/event_overhead:7": 0.0059,
"kernel-launch/return_code": 0.0,
"kernel-launch/wall_overhead:0": 0.01026,
"kernel-launch/wall_overhead:1": 0.01026,
"kernel-launch/wall_overhead:2": 0.01046,
......@@ -99,14 +1247,12 @@
"kernel-launch/wall_overhead:5": 0.01006,
"kernel-launch/wall_overhead:6": 0.01045,
"kernel-launch/wall_overhead:7": 0.01071,
"mem-bw/D2H_Mem_BW:0": "",
"mem-bw/D2H_Mem_BW:1": "",
"mem-bw/D2H_Mem_BW:2": "",
"mem-bw/D2H_Mem_BW:3": "",
"mem-bw/D2H_Mem_BW:4": "",
"mem-bw/D2H_Mem_BW:5": "",
"mem-bw/D2H_Mem_BW:6": "",
"mem-bw/D2H_Mem_BW:7": "",
"lstm_models/pytorch-lstm/steptime_train_float32": 48.0702451896,
"lstm_models/pytorch-lstm/throughput_train_float32": 4806.4724411328,
"lstm_models/pytorch-lstm/steptime_train_float16": 25.9531298652,
"lstm_models/pytorch-lstm/throughput_train_float16": 9069.9080925588,
"pytorch-matmul/nosharding": 34.6449975967,
"mem-bw/return_code": 1.0,
"mem-bw/H2D_Mem_BW:0": "",
"mem-bw/H2D_Mem_BW:1": "",
"mem-bw/H2D_Mem_BW:2": "",
......@@ -115,7 +1261,145 @@
"mem-bw/H2D_Mem_BW:5": "",
"mem-bw/H2D_Mem_BW:6": "",
"mem-bw/H2D_Mem_BW:7": "",
"mem-bw/return_code": 1.0,
"mem-bw/D2H_Mem_BW:0": "",
"mem-bw/D2H_Mem_BW:1": "",
"mem-bw/D2H_Mem_BW:2": "",
"mem-bw/D2H_Mem_BW:3": "",
"mem-bw/D2H_Mem_BW:4": "",
"mem-bw/D2H_Mem_BW:5": "",
"mem-bw/D2H_Mem_BW:6": "",
"mem-bw/D2H_Mem_BW:7": "",
"mem-bw/D2D_Mem_BW:0": "",
"mem-bw/D2D_Mem_BW:1": "",
"mem-bw/D2D_Mem_BW:2": "",
"mem-bw/D2D_Mem_BW:3": "",
"mem-bw/D2D_Mem_BW:4": "",
"mem-bw/D2D_Mem_BW:5": "",
"mem-bw/D2D_Mem_BW:6": "",
"mem-bw/D2D_Mem_BW:7": "",
"nccl-bw/allreduce_8_busbw:0": 0.0,
"nccl-bw/allreduce_8_algbw:0": 0.0,
"nccl-bw/allreduce_8_time:0": 37.84,
"nccl-bw/allreduce_16_busbw:0": 0.0,
"nccl-bw/allreduce_16_algbw:0": 0.0,
"nccl-bw/allreduce_16_time:0": 36.42,
"nccl-bw/allreduce_32_busbw:0": 0.0,
"nccl-bw/allreduce_32_algbw:0": 0.0,
"nccl-bw/allreduce_32_time:0": 36.87,
"nccl-bw/allreduce_64_busbw:0": 0.0,
"nccl-bw/allreduce_64_algbw:0": 0.0,
"nccl-bw/allreduce_64_time:0": 35.83,
"nccl-bw/allreduce_128_busbw:0": 0.01,
"nccl-bw/allreduce_128_algbw:0": 0.0,
"nccl-bw/allreduce_128_time:0": 36.91,
"nccl-bw/allreduce_256_busbw:0": 0.01,
"nccl-bw/allreduce_256_algbw:0": 0.01,
"nccl-bw/allreduce_256_time:0": 37.58,
"nccl-bw/allreduce_512_busbw:0": 0.02,
"nccl-bw/allreduce_512_algbw:0": 0.01,
"nccl-bw/allreduce_512_time:0": 36.98,
"nccl-bw/allreduce_1024_busbw:0": 0.05,
"nccl-bw/allreduce_1024_algbw:0": 0.03,
"nccl-bw/allreduce_1024_time:0": 36.93,
"nccl-bw/allreduce_2048_busbw:0": 0.1,
"nccl-bw/allreduce_2048_algbw:0": 0.06,
"nccl-bw/allreduce_2048_time:0": 36.06,
"nccl-bw/allreduce_4096_busbw:0": 0.19,
"nccl-bw/allreduce_4096_algbw:0": 0.11,
"nccl-bw/allreduce_4096_time:0": 37.2,
"nccl-bw/allreduce_8192_busbw:0": 0.39,
"nccl-bw/allreduce_8192_algbw:0": 0.22,
"nccl-bw/allreduce_8192_time:0": 37.04,
"nccl-bw/allreduce_16384_busbw:0": 0.77,
"nccl-bw/allreduce_16384_algbw:0": 0.44,
"nccl-bw/allreduce_16384_time:0": 37.46,
"nccl-bw/allreduce_32768_busbw:0": 1.52,
"nccl-bw/allreduce_32768_algbw:0": 0.87,
"nccl-bw/allreduce_32768_time:0": 37.64,
"nccl-bw/allreduce_65536_busbw:0": 3.0,
"nccl-bw/allreduce_65536_algbw:0": 1.71,
"nccl-bw/allreduce_65536_time:0": 38.22,
"nccl-bw/allreduce_131072_busbw:0": 5.31,
"nccl-bw/allreduce_131072_algbw:0": 3.04,
"nccl-bw/allreduce_131072_time:0": 43.17,
"nccl-bw/allreduce_262144_busbw:0": 9.5,
"nccl-bw/allreduce_262144_algbw:0": 5.43,
"nccl-bw/allreduce_262144_time:0": 48.29,
"nccl-bw/allreduce_524288_busbw:0": 15.11,
"nccl-bw/allreduce_524288_algbw:0": 8.64,
"nccl-bw/allreduce_524288_time:0": 60.71,
"nccl-bw/allreduce_1048576_busbw:0": 24.1,
"nccl-bw/allreduce_1048576_algbw:0": 13.77,
"nccl-bw/allreduce_1048576_time:0": 76.13,
"nccl-bw/allreduce_2097152_busbw:0": 38.12,
"nccl-bw/allreduce_2097152_algbw:0": 21.78,
"nccl-bw/allreduce_2097152_time:0": 96.28,
"nccl-bw/allreduce_4194304_busbw:0": 65.75,
"nccl-bw/allreduce_4194304_algbw:0": 37.57,
"nccl-bw/allreduce_4194304_time:0": 111.6,
"nccl-bw/allreduce_8388608_busbw:0": 89.51,
"nccl-bw/allreduce_8388608_algbw:0": 51.15,
"nccl-bw/allreduce_8388608_time:0": 164.0,
"nccl-bw/allreduce_16777216_busbw:0": 114.38,
"nccl-bw/allreduce_16777216_algbw:0": 65.36,
"nccl-bw/allreduce_16777216_time:0": 256.7,
"nccl-bw/allreduce_33554432_busbw:0": 154.89,
"nccl-bw/allreduce_33554432_algbw:0": 88.51,
"nccl-bw/allreduce_33554432_time:0": 379.1,
"nccl-bw/allreduce_67108864_busbw:0": 200.01,
"nccl-bw/allreduce_67108864_algbw:0": 114.29,
"nccl-bw/allreduce_67108864_time:0": 587.2,
"nccl-bw/allreduce_134217728_busbw:0": 202.97,
"nccl-bw/allreduce_134217728_algbw:0": 115.98,
"nccl-bw/allreduce_134217728_time:0": 1157.2,
"nccl-bw/allreduce_268435456_busbw:0": 221.82,
"nccl-bw/allreduce_268435456_algbw:0": 126.75,
"nccl-bw/allreduce_268435456_time:0": 2117.8,
"nccl-bw/allreduce_536870912_busbw:0": 224.54,
"nccl-bw/allreduce_536870912_algbw:0": 128.31,
"nccl-bw/allreduce_536870912_time:0": 4184.2,
"nccl-bw/allreduce_1073741824_busbw:0": 230.15,
"nccl-bw/allreduce_1073741824_algbw:0": 131.51,
"nccl-bw/allreduce_1073741824_time:0": 8164.5,
"nccl-bw/allreduce_2147483648_busbw:0": 231.89,
"nccl-bw/allreduce_2147483648_algbw:0": 132.51,
"nccl-bw/allreduce_2147483648_time:0": 16207.0,
"nccl-bw/allreduce_4294967296_busbw:0": 234.45,
"nccl-bw/allreduce_4294967296_algbw:0": 133.97,
"nccl-bw/allreduce_4294967296_time:0": 32059.0,
"nccl-bw/allreduce_8589934592_busbw:0": 235.36,
"nccl-bw/allreduce_8589934592_algbw:0": 134.49,
"nccl-bw/allreduce_8589934592_time:0": 63870.0,
"resnet_models/pytorch-resnet50/steptime_train_float32": 253.9552273229,
"resnet_models/pytorch-resnet50/throughput_train_float32": 760.334809913,
"resnet_models/pytorch-resnet50/steptime_train_float16": 200.0860618427,
"resnet_models/pytorch-resnet50/throughput_train_float16": 971.0651430923,
"resnet_models/pytorch-resnet101/steptime_train_float32": 389.0860509127,
"resnet_models/pytorch-resnet101/throughput_train_float32": 496.117474093,
"resnet_models/pytorch-resnet101/steptime_train_float16": 308.6274107918,
"resnet_models/pytorch-resnet101/throughput_train_float16": 627.2056272195,
"resnet_models/pytorch-resnet152/steptime_train_float32": 547.6558278315,
"resnet_models/pytorch-resnet152/throughput_train_float32": 352.0709954335,
"resnet_models/pytorch-resnet152/steptime_train_float16": 424.5809856802,
"resnet_models/pytorch-resnet152/throughput_train_float16": 454.8335998154,
"pytorch-sharding-matmul/allreduce": 10.574411869,
"pytorch-sharding-matmul/allgather": 10.0846967697,
"vgg_models/pytorch-vgg11/steptime_train_float32": 40.3528367169,
"vgg_models/pytorch-vgg11/throughput_train_float32": 796.361593695,
"vgg_models/pytorch-vgg11/steptime_train_float16": 24.1335148457,
"vgg_models/pytorch-vgg11/throughput_train_float16": 1330.4113614585,
"vgg_models/pytorch-vgg13/steptime_train_float32": 55.466310936,
"vgg_models/pytorch-vgg13/throughput_train_float32": 580.2341074444,
"vgg_models/pytorch-vgg13/steptime_train_float16": 33.3522899309,
"vgg_models/pytorch-vgg13/throughput_train_float16": 962.5332023902,
"vgg_models/pytorch-vgg16/steptime_train_float32": 65.225199447,
"vgg_models/pytorch-vgg16/throughput_train_float32": 493.4268638876,
"vgg_models/pytorch-vgg16/steptime_train_float16": 39.2528773518,
"vgg_models/pytorch-vgg16/throughput_train_float16": 817.2008546148,
"vgg_models/pytorch-vgg19/steptime_train_float32": 74.9348710524,
"vgg_models/pytorch-vgg19/throughput_train_float32": 429.8092158311,
"vgg_models/pytorch-vgg19/steptime_train_float16": 45.2033062465,
"vgg_models/pytorch-vgg19/throughput_train_float16": 709.1127328377,
"Accept": false,
"#Issues": 17.0,
"Category": "FailedTest,Mem",
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment