test_aiter_sigmoid.py 2.71 KB
Newer Older
Xiaowei.zhang's avatar
Xiaowei.zhang committed
1
2
3
# SPDX-License-Identifier: MIT
import torch
import aiter
4
import pandas as pd
Xiaowei.zhang's avatar
Xiaowei.zhang committed
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
from aiter import dtypes

# from ater.test_common import checkAllclose, perftest
from torch.profiler import profile, ProfilerActivity

# input shape: torch.Size([4096, 64, 160]) (20480, 1, 128)
# other shape: torch.Size([4096, 64, 160]) (10240, 160, 1)

# input shape: torch.Size([4096, 64, 160]) (47360, 1, 296)
# other shape: torch.Size([4096, 64, 160]) (10240, 160, 1)

shape0 = (4096, 880)
stride0 = (880, 1)

# shape0 = (16,16)
# stride0 = (16, 1)

tensor0 = torch.empty_strided(shape0, stride0, dtype=dtypes.fp16, device="cuda")
random_data0 = torch.rand(shape0)
tensor0.copy_(random_data0)
# tensor0.fill_(1)

print("shape0", shape0)
print("strride0:", stride0)

30
31
32
33
34
35
36
37
38
39
def get_profiler_totals(prof):
    totals = {}
    table = prof.key_averages().table(sort_by="cuda_time_total", row_limit=10)
    for line in table.splitlines():
        if line.startswith("Self CPU time total:"):
            totals["CPU total time"] = line.split(":", 1)[1].strip()
        elif line.startswith("Self CUDA time total:"):
            totals["CUDA total time"] = line.split(":", 1)[1].strip()
    return totals

Xiaowei.zhang's avatar
Xiaowei.zhang committed
40
41
42
43
44
45
46
47
48
49
50
with profile(
    activities=[ProfilerActivity.CPU, ProfilerActivity.CUDA],
    profile_memory=True,
    with_stack=True,
    with_modules=True,
    record_shapes=True,
) as prof:
    for j in range(100):
        # cache_flush1 = torch.randn(10000, 10000, requires_grad=True, device="cuda", dtype=dtypes.fp32).to(dtypes.i32)
        result = torch.sigmoid(tensor0)
print(prof.key_averages().table(sort_by="cuda_time_total", row_limit=10))
51
torch_totals = get_profiler_totals(prof)
Xiaowei.zhang's avatar
Xiaowei.zhang committed
52
53
54
55
56
57
58
59
60
61
62
63

with profile(
    activities=[ProfilerActivity.CPU, ProfilerActivity.CUDA],
    profile_memory=True,
    with_stack=True,
    with_modules=True,
    record_shapes=True,
) as prof:
    for j in range(100):
        # cache_flush1 = torch.randn(10000, 10000, requires_grad=True, device="cuda", dtype=dtypes.fp32).to(dtypes.i32)
        output = aiter.sigmoid(tensor0)
print(prof.key_averages().table(sort_by="cuda_time_total", row_limit=10))
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
aiter_totals = get_profiler_totals(prof)

result_equal = torch.equal(result, output)
summary = pd.DataFrame([
    {
        "metric": "CPU total time",
        "torch.sigmoid": torch_totals.get("CPU total time"),
        "aiter.sigmoid": aiter_totals.get("CPU total time"),
    },
    {
        "metric": "CUDA total time",
        "torch.sigmoid": torch_totals.get("CUDA total time"),
        "aiter.sigmoid": aiter_totals.get("CUDA total time"),
    }
])
Xiaowei.zhang's avatar
Xiaowei.zhang committed
79

80
81
82
83
equal_msg = f"Whether the two outputs are equal: {str(result_equal)}"
summary.to_csv("test_aiter_sigmoid.csv", index=False)
with open("test_aiter_sigmoid.csv", "a", encoding="utf-8") as f:
    f.write(equal_msg + "\n")