"benchmarks/benchmark_latency.py" did not exist on "0f40557af6141ced118b81f2a04e651a0c6c9dbd"
chatglm_export.py 541 Bytes
Newer Older
1
2
import sys
from transformers import AutoTokenizer, AutoModel
zhouxiang's avatar
zhouxiang committed
3
from fastllm_pytools import torch2flm
4
5

if __name__ == "__main__":
zhouxiang's avatar
zhouxiang committed
6
7
    tokenizer = AutoTokenizer.from_pretrained("THUDM/chatglm2-6b", trust_remote_code=True)
    model = AutoModel.from_pretrained("THUDM/chatglm2-6b", trust_remote_code=True)
8
9
10
    model = model.eval()

    dtype = sys.argv[2] if len(sys.argv) >= 3 else "float16"
zhouxiang's avatar
zhouxiang committed
11
12
    exportPath = sys.argv[1] if len(sys.argv) >= 2 else "chatglm-6b-' + dtype + '.flm"
    torch2flm.tofile(exportPath, model, tokenizer, dtype = dtype)