"vscode:/vscode.git/clone" did not exist on "79892d376c5faeaa55f6ccb656b1a09c0781815a"
make_plot_with_jsonl.py 5.12 KB
Newer Older
Mitchell Wortsman's avatar
Mitchell Wortsman committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import os

import matplotlib.gridspec as gridspec

cmap=plt.get_cmap('cool')

if __name__ == '__main__':

    fig = plt.figure(tight_layout=True, figsize=(12,3.5))
    gs = gridspec.GridSpec(1, 2)

Mitchell Wortsman's avatar
Mitchell Wortsman committed
15
16
17
18
19
    dims_to_consider = [1024, 1280, 1408, 1664, 2048, 4096]
    batch_size_for_plot1 = 32768
    batch_sizes_for_plot2 = [2**14, 2**15, 2**16, 2**17]
    dims_to_xtick = [1024, 2048, 4096]
    logscale_plot1 = True
Mitchell Wortsman's avatar
Mitchell Wortsman committed
20
21
22

    ax = fig.add_subplot(gs[0, 0])

Mitchell Wortsman's avatar
Mitchell Wortsman committed
23
    # TODO: change this to what you want.
Mitchell Wortsman's avatar
Mitchell Wortsman committed
24
25
    rdf = pd.read_json('speed_benchmark/info_a100_py2.jsonl', lines=True)
    df = rdf[rdf.batch_size == batch_size_for_plot1]
Mitchell Wortsman's avatar
Mitchell Wortsman committed
26

Mitchell Wortsman's avatar
Mitchell Wortsman committed
27
    # first plot the time occupied by different operations
Mitchell Wortsman's avatar
Mitchell Wortsman committed
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
    for k, marker, ls, color, name in [
        ('standard_gx+standard_gw+standard_fwd', 's', '-', 'C2', 'Standard fp16 (sum of parts)'),
        ('x_quantize_rowwise+g_quantize_rowwise+w_quantize_global+w_quantize_global_transpose+standard_gw+global_fwd+global_bwd', 'o', '-', 'C4', 'SwitchBack int8 (sum of parts)'),

        ('standard_fwd', '^', '--', 'C2', 'Matmul XW (standard)'),
        ('standard_gw', '^', '-.', 'C2', 'Matmul GW (standard)'),
        ('standard_gx', '^', ':', 'gray', 'Matmul GX (both)'),

        ('global_fwd', '^', '--', 'C4', 'Int8 Matmul XW (switchback)'),
        ('global_bwd', '^', '-.', 'C4', 'Int8 Matmul GW (switchback)'),
        
        ('x_quantize_rowwise', 'P', '--', 'C4', 'Quantize rowwise X (switchback)'),
        ('g_quantize_rowwise', 'P', '-.', 'C4', 'Quantize rowwise G (switchback)'),
        ('w_quantize_global', '.', '--', 'C4', 'Quatnize global W (switchback)'),
        ('w_quantize_global_transpose', '.', '-.', 'C4', 'Quantize gloabl and\ntranspose W (switchback)'),
    ]:
        xs = []
        ys = []
Mitchell Wortsman's avatar
Mitchell Wortsman committed
46
47
        for embed_dim in dims_to_consider:
            # average over dim -> 4*dim and 4*dim -> dim
Mitchell Wortsman's avatar
Mitchell Wortsman committed
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
            df_ = df[df.dim_in == embed_dim]
            df_ = df_[df_.dim_out == embed_dim * 4]
            xs.append(embed_dim)
            y_ = 0
            for k_ in k.split('+'):
                y_ += df_[k_].values[0]
            df_ = df[df.dim_in == embed_dim * 4]
            df_ = df_[df_.dim_out == embed_dim]
            for k_ in k.split('+'):
                y_ += df_[k_].values[0]
            ys.append(y_ * 0.5)

        
        ax.plot(xs, ys, color=color, label=name, marker=marker, markersize=5 if marker=='s' else 5, linestyle=ls, linewidth=2 if '+' in k else 1.)


    ax.set_xlabel('dim', fontsize=13)
    ax.set_ylabel('time (ms)', fontsize=13)

    ax.grid()

    ax.set_xscale('log')
Mitchell Wortsman's avatar
Mitchell Wortsman committed
70
71
    if logscale_plot1:
        ax.set_yscale('log')
Mitchell Wortsman's avatar
Mitchell Wortsman committed
72
73
74
75
    
    ax.tick_params(axis='x', labelsize=11)
    ax.tick_params(axis='y', labelsize=11)

Mitchell Wortsman's avatar
Mitchell Wortsman committed
76
77
    ax.set_xticks(dims_to_xtick)
    ax.set_xticklabels(dims_to_xtick)
Mitchell Wortsman's avatar
Mitchell Wortsman committed
78
79
80
81
82
83
84
85
86
87
88
89
    ax.set_xticks([], minor=True)

    leg = ax.legend(loc='upper center', bbox_to_anchor=(-0.64,  1.), ncol=1, fontsize=10)
    leg.get_texts()[0].set_fontweight('bold')
    leg.get_texts()[1].set_fontweight('bold')
    plt.subplots_adjust(left=0.1)
    ax.set_title('  Linear layer, batch * sequence length = 32k', fontsize=10, loc='left', y=1.05, pad=-20)


    ax = fig.add_subplot(gs[0, 1])

    # now plot the % speedup for different batch sizes
Mitchell Wortsman's avatar
Mitchell Wortsman committed
90
    for j, batch_size in enumerate(batch_sizes_for_plot2):
Mitchell Wortsman's avatar
Mitchell Wortsman committed
91
92
93
94
95
96
97
98
        all_xs, all_ys = [], []
        for k, marker, ls, color, name in [
            ('standard_gx+standard_gw+standard_fwd', 's', '-', 'C2', 'Standard fp16 (total time)'),
            ('x_quantize_rowwise+g_quantize_rowwise+w_quantize_global+w_quantize_global_transpose+standard_gw+global_fwd+global_bwd', 'o', '-', 'C4', 'SwitchBack int8 (total time)'),
        ]:
        
            xs, ys = [], []
            df = rdf[rdf.batch_size == batch_size]
Mitchell Wortsman's avatar
Mitchell Wortsman committed
99
            for embed_dim in dims_to_consider:
Mitchell Wortsman's avatar
Mitchell Wortsman committed
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
                df_ = df[df.dim_in == embed_dim]
                df_ = df_[df_.dim_out == embed_dim * 4]
                xs.append(embed_dim)
                y_ = 0
                for k_ in k.split('+'):
                    y_ += df_[k_].values[0]
                df_ = df[df.dim_in == embed_dim * 4]
                df_ = df_[df_.dim_out == embed_dim]
                for k_ in k.split('+'):
                    y_ += df_[k_].values[0]
                ys.append(y_ * 0.5)
            all_xs.append(xs)
            all_ys.append(ys)

        color = cmap(j * 0.25)
        real_ys = [-((all_ys[1][i] - all_ys[0][i]) / all_ys[0][i]) * 100 for i in range(len(all_ys[0]))]
        markers = ['^', 'v', 'P', 'o']
        ax.plot(all_xs[0], real_ys, color=color, label=f'batch * sequence length = {batch_size}', marker=markers[j], markersize=5 if marker=='s' else 5)

    ax.legend()
    ax.set_xlabel('dim', fontsize=13)
    ax.set_xscale('log')
    ax.grid()
    ax.set_ylabel(r'% speedup', fontsize=13)


    ax.tick_params(axis='x', labelsize=11)
    ax.tick_params(axis='y', labelsize=11)

Mitchell Wortsman's avatar
Mitchell Wortsman committed
129
130
    ax.set_xticks(dims_to_xtick)
    ax.set_xticklabels(dims_to_xtick)
Mitchell Wortsman's avatar
Mitchell Wortsman committed
131
132
133
134
135
136
    ax.set_xticks([], minor=True)

    ax.set_title('  Linear layer summary, varying dimensions', fontsize=10, loc='left', y=1.05, pad=-20)



Mitchell Wortsman's avatar
Mitchell Wortsman committed
137
    plt.savefig('speed_benchmark/plot_with_info.pdf', bbox_inches='tight')
Mitchell Wortsman's avatar
Mitchell Wortsman committed
138