"vscode:/vscode.git/clone" did not exist on "eb38c7d1cae1c616de3c1c0ce40353c720f7e3c7"
compare.py 1.25 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
"""
used for debug using tensor comparison
dump {name: tensor} into "log_hf.jsonl" and "log_srt.jsonl"
use the same name for two tensors that supposed to be close
recommend name like: "layer 2 after mlp"
"""

import json
import sys

import torch

if len(sys.argv) > 1:
    assert sys.argv[1] == "base"
    hf_log = "base_log_hf.jsonl"
    srt_log = "base_log_srt.jsonl"
else:
    hf_log = "log_hf.jsonl"
    srt_log = "log_srt.jsonl"


def load_data(filepath):
    tensors = {}
    with open(filepath, "r") as f:
        lines = f.readlines()
        for line in lines:
            data = json.loads(line)
            for k, v in data.items():
                tensors[k] = torch.tensor(v)
    return tensors


hf_tensors = load_data(hf_log)
srt_tensors = load_data(srt_log)


def get_diff(t1, t2):
    t1 = t1.reshape(t2.shape)
    max_diff = torch.max(abs(t1.reshape(t2.shape) - t2))
    l2_dis = torch.dist(t1, t2, p=2)
    return l2_dis, max_diff


for k, _ in srt_tensors.items():
    l2_dis, max_diff = get_diff(hf_tensors[k], srt_tensors[k])
    print(f"{k} {l2_dis=} {max_diff=}")
    if k == "layer 1 attn":
        print(hf_tensors[k])
        print(srt_tensors[k])
    if k == "layer 0 prefill k":
        print(srt_tensors[k].shape)
        print(hf_tensors[k].shape)