eval_perf.sh 1.19 KB
Newer Older
Zhekai Zhang's avatar
Zhekai Zhang committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
#!/bin/bash

rundir=$(date +"run-$(hostname -s)-%Y%m%d-%H%M%S")
mkdir -p $rundir

function run() {
    echo config=$config
    echo args=$@

    python3 run_flux.py --steps 4  "$@" > >(tee $rundir/stdout-s4-$config.log)  2> >(tee $rundir/stderr-s4-$config.log)
    python3 run_flux.py --steps 25 "$@" > >(tee $rundir/stdout-s25-$config.log) 2> >(tee $rundir/stderr-s25-$config.log)
    python3 run_flux.py --steps 50 "$@" > >(tee $rundir/stdout-s50-$config.log) 2> >(tee $rundir/stderr-s50-$config.log)

    if [ $? -eq 0 ]; then
        nsys profile --cuda-memory-usage true -o $rundir/report-$config.nsys-rep python3 run_flux.py --steps 4 "$@"
    fi
}

config=bf16-compile
run --config bf16 --compile

config=bf16-t5-compile
run --config bf16-t5 --compile

config=int8dq-compile
run --config bf16 --torchao --compile

config=int8dq-t5-compile
run --config bf16-t5 --torchao --compile

config=int8dq-nocompile
run --config bf16 --torchao 

config=int8dq-t5-nocompile
run --config bf16-t5 --torchao 

for cfg in svdq svdq-t5 w4a4 w4a4-t5 bf16 bf16-t5 nf4 nf4-t5; do
    config=$cfg
    run --config $cfg

    config=$cfg-ol1
    run --config $cfg --offload 1

    config=$cfg-ol2
    run --config $cfg --offload 2
done