"examples/offline_inference/lora_inference/README.md" did not exist on "356077823ea8569ff15218e51228c1b3d50792a9"
run.sh 524 Bytes
Newer Older
zk's avatar
zk committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
# python3 -m onnxruntime.transformers.optimizer \
#     --input ../weights/ground.onnx \
#     --output ./mha.onnx \
#     --use_multi_head_attention \
#     # --num_heads 12 \
#     # --hidden_size 256 \
#     --model_type bert \
#     --disable_skip_layer_norm \
#     --disable_gelu \
#     --use_gpu \
#     --disable_embed_layer_norm \
#     --use_mask_index \
#     --use_raw_attention_mask
python3 -m onnxruntime.transformers.optimizer --input ../weights/ground.onnx --output ./ground.onnx --model_type bert --use_gpu