make_plot_with_jsonl.py 5.06 KB
Newer Older
Mitchell Wortsman's avatar
Mitchell Wortsman committed
1
2

import matplotlib.gridspec as gridspec
Aarni Koskela's avatar
Aarni Koskela committed
3
4
import matplotlib.pyplot as plt
import pandas as pd
Mitchell Wortsman's avatar
Mitchell Wortsman committed
5
6
7
8
9
10
11
12

cmap=plt.get_cmap('cool')

if __name__ == '__main__':

    fig = plt.figure(tight_layout=True, figsize=(12,3.5))
    gs = gridspec.GridSpec(1, 2)

Mitchell Wortsman's avatar
Mitchell Wortsman committed
13
14
15
16
17
    dims_to_consider = [1024, 1280, 1408, 1664, 2048, 4096]
    batch_size_for_plot1 = 32768
    batch_sizes_for_plot2 = [2**14, 2**15, 2**16, 2**17]
    dims_to_xtick = [1024, 2048, 4096]
    logscale_plot1 = True
Mitchell Wortsman's avatar
Mitchell Wortsman committed
18
19
20

    ax = fig.add_subplot(gs[0, 0])

Mitchell Wortsman's avatar
Mitchell Wortsman committed
21
    # TODO: change this to what you want.
Mitchell Wortsman's avatar
Mitchell Wortsman committed
22
23
    rdf = pd.read_json('speed_benchmark/info_a100_py2.jsonl', lines=True)
    df = rdf[rdf.batch_size == batch_size_for_plot1]
Mitchell Wortsman's avatar
Mitchell Wortsman committed
24

Mitchell Wortsman's avatar
Mitchell Wortsman committed
25
    # first plot the time occupied by different operations
Mitchell Wortsman's avatar
Mitchell Wortsman committed
26
27
28
29
30
31
32
33
34
35
    for k, marker, ls, color, name in [
        ('standard_gx+standard_gw+standard_fwd', 's', '-', 'C2', 'Standard fp16 (sum of parts)'),
        ('x_quantize_rowwise+g_quantize_rowwise+w_quantize_global+w_quantize_global_transpose+standard_gw+global_fwd+global_bwd', 'o', '-', 'C4', 'SwitchBack int8 (sum of parts)'),

        ('standard_fwd', '^', '--', 'C2', 'Matmul XW (standard)'),
        ('standard_gw', '^', '-.', 'C2', 'Matmul GW (standard)'),
        ('standard_gx', '^', ':', 'gray', 'Matmul GX (both)'),

        ('global_fwd', '^', '--', 'C4', 'Int8 Matmul XW (switchback)'),
        ('global_bwd', '^', '-.', 'C4', 'Int8 Matmul GW (switchback)'),
36

Mitchell Wortsman's avatar
Mitchell Wortsman committed
37
38
        ('x_quantize_rowwise', 'P', '--', 'C4', 'Quantize rowwise X (switchback)'),
        ('g_quantize_rowwise', 'P', '-.', 'C4', 'Quantize rowwise G (switchback)'),
39
40
        ('w_quantize_global', '.', '--', 'C4', 'Quantize global W (switchback)'),
        ('w_quantize_global_transpose', '.', '-.', 'C4', 'Quantize global and\ntranspose W (switchback)'),
Mitchell Wortsman's avatar
Mitchell Wortsman committed
41
42
43
    ]:
        xs = []
        ys = []
Mitchell Wortsman's avatar
Mitchell Wortsman committed
44
45
        for embed_dim in dims_to_consider:
            # average over dim -> 4*dim and 4*dim -> dim
Mitchell Wortsman's avatar
Mitchell Wortsman committed
46
47
48
49
50
51
52
53
54
55
56
57
            df_ = df[df.dim_in == embed_dim]
            df_ = df_[df_.dim_out == embed_dim * 4]
            xs.append(embed_dim)
            y_ = 0
            for k_ in k.split('+'):
                y_ += df_[k_].values[0]
            df_ = df[df.dim_in == embed_dim * 4]
            df_ = df_[df_.dim_out == embed_dim]
            for k_ in k.split('+'):
                y_ += df_[k_].values[0]
            ys.append(y_ * 0.5)

58

Mitchell Wortsman's avatar
Mitchell Wortsman committed
59
60
61
62
63
64
65
66
67
        ax.plot(xs, ys, color=color, label=name, marker=marker, markersize=5 if marker=='s' else 5, linestyle=ls, linewidth=2 if '+' in k else 1.)


    ax.set_xlabel('dim', fontsize=13)
    ax.set_ylabel('time (ms)', fontsize=13)

    ax.grid()

    ax.set_xscale('log')
Mitchell Wortsman's avatar
Mitchell Wortsman committed
68
69
    if logscale_plot1:
        ax.set_yscale('log')
70

Mitchell Wortsman's avatar
Mitchell Wortsman committed
71
72
73
    ax.tick_params(axis='x', labelsize=11)
    ax.tick_params(axis='y', labelsize=11)

Mitchell Wortsman's avatar
Mitchell Wortsman committed
74
75
    ax.set_xticks(dims_to_xtick)
    ax.set_xticklabels(dims_to_xtick)
Mitchell Wortsman's avatar
Mitchell Wortsman committed
76
77
78
79
80
81
82
83
84
85
86
87
    ax.set_xticks([], minor=True)

    leg = ax.legend(loc='upper center', bbox_to_anchor=(-0.64,  1.), ncol=1, fontsize=10)
    leg.get_texts()[0].set_fontweight('bold')
    leg.get_texts()[1].set_fontweight('bold')
    plt.subplots_adjust(left=0.1)
    ax.set_title('  Linear layer, batch * sequence length = 32k', fontsize=10, loc='left', y=1.05, pad=-20)


    ax = fig.add_subplot(gs[0, 1])

    # now plot the % speedup for different batch sizes
Mitchell Wortsman's avatar
Mitchell Wortsman committed
88
    for j, batch_size in enumerate(batch_sizes_for_plot2):
Mitchell Wortsman's avatar
Mitchell Wortsman committed
89
90
91
92
93
        all_xs, all_ys = [], []
        for k, marker, ls, color, name in [
            ('standard_gx+standard_gw+standard_fwd', 's', '-', 'C2', 'Standard fp16 (total time)'),
            ('x_quantize_rowwise+g_quantize_rowwise+w_quantize_global+w_quantize_global_transpose+standard_gw+global_fwd+global_bwd', 'o', '-', 'C4', 'SwitchBack int8 (total time)'),
        ]:
94

Mitchell Wortsman's avatar
Mitchell Wortsman committed
95
96
            xs, ys = [], []
            df = rdf[rdf.batch_size == batch_size]
Mitchell Wortsman's avatar
Mitchell Wortsman committed
97
            for embed_dim in dims_to_consider:
Mitchell Wortsman's avatar
Mitchell Wortsman committed
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
                df_ = df[df.dim_in == embed_dim]
                df_ = df_[df_.dim_out == embed_dim * 4]
                xs.append(embed_dim)
                y_ = 0
                for k_ in k.split('+'):
                    y_ += df_[k_].values[0]
                df_ = df[df.dim_in == embed_dim * 4]
                df_ = df_[df_.dim_out == embed_dim]
                for k_ in k.split('+'):
                    y_ += df_[k_].values[0]
                ys.append(y_ * 0.5)
            all_xs.append(xs)
            all_ys.append(ys)

        color = cmap(j * 0.25)
        real_ys = [-((all_ys[1][i] - all_ys[0][i]) / all_ys[0][i]) * 100 for i in range(len(all_ys[0]))]
        markers = ['^', 'v', 'P', 'o']
        ax.plot(all_xs[0], real_ys, color=color, label=f'batch * sequence length = {batch_size}', marker=markers[j], markersize=5 if marker=='s' else 5)

    ax.legend()
    ax.set_xlabel('dim', fontsize=13)
    ax.set_xscale('log')
    ax.grid()
    ax.set_ylabel(r'% speedup', fontsize=13)


    ax.tick_params(axis='x', labelsize=11)
    ax.tick_params(axis='y', labelsize=11)

Mitchell Wortsman's avatar
Mitchell Wortsman committed
127
128
    ax.set_xticks(dims_to_xtick)
    ax.set_xticklabels(dims_to_xtick)
Mitchell Wortsman's avatar
Mitchell Wortsman committed
129
130
131
132
133
134
    ax.set_xticks([], minor=True)

    ax.set_title('  Linear layer summary, varying dimensions', fontsize=10, loc='left', y=1.05, pad=-20)



Mitchell Wortsman's avatar
Mitchell Wortsman committed
135
    plt.savefig('speed_benchmark/plot_with_info.pdf', bbox_inches='tight')