Commit aec77d6a authored by Bruce MacDonald's avatar Bruce MacDonald Committed by Michael Yang
Browse files

support new "longrope" attention factor

parent 6ffb5cb0
...@@ -58,7 +58,7 @@ func (p *phi3) KV(t *Tokenizer) llm.KV { ...@@ -58,7 +58,7 @@ func (p *phi3) KV(t *Tokenizer) llm.KV {
switch p.RopeScaling.Type { switch p.RopeScaling.Type {
case "": case "":
// no scaling // no scaling
case "su": case "su", "longrope":
kv["phi3.rope.scaling.attn_factor"] = float32(max(math.Sqrt(1+math.Log(scale)/math.Log(float64(p.OriginalMaxPositionEmbeddings))), 1.0)) kv["phi3.rope.scaling.attn_factor"] = float32(max(math.Sqrt(1+math.Log(scale)/math.Log(float64(p.OriginalMaxPositionEmbeddings))), 1.0))
case "yarn": case "yarn":
kv["phi3.rope.scaling.attn_factor"] = float32(max(0.1*math.Log(scale)+1.0, 1.0)) kv["phi3.rope.scaling.attn_factor"] = float32(max(0.1*math.Log(scale)+1.0, 1.0))
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment