Merge pull request #3508 from ollama/mxyng/rope

a5ec9cfc · Michael Yang · GitHub · fc8e1086 · be517e49 · a5ec9cfc
Unverified Commit a5ec9cfc authored Apr 05, 2024 by Michael Yang Committed by GitHub Apr 05, 2024
Hide whitespace changes
Inline Side-by-side

Showing with 0 additions and 14 deletions

api/types.go api/types.go +0 -4

convert/convert.go convert/convert.go +0 -1

convert/mistral.go convert/mistral.go +0 -1

llm/server.go llm/server.go +0 -8

No files found.
--- a/api/types.go
+++ b/api/types.go
@@ -121,8 +121,6 @@ type Runner struct {
 	VocabOnly          bool    `json:"vocab_only,omitempty"`
 	UseMMap            bool    `json:"use_mmap,omitempty"`
 	UseMLock           bool    `json:"use_mlock,omitempty"`
-	RopeFrequencyBase  float32 `json:"rope_frequency_base,omitempty"`
-	RopeFrequencyScale float32 `json:"rope_frequency_scale,omitempty"`
 	NumThread          int     `json:"num_thread,omitempty"`
 }

@@ -383,8 +381,6 @@ func DefaultOptions() Options {
 		Runner: Runner{
 			// options set when the model is loaded
 			NumCtx:             2048,
-			RopeFrequencyBase:  10000.0,
-			RopeFrequencyScale: 1.0,
 			NumBatch:           512,
 			NumGPU:             -1, // -1 here indicates that NumGPU should be set dynamically
 			NumGQA:             1,

--- a/convert/convert.go
+++ b/convert/convert.go
@@ -32,7 +32,6 @@ type Params struct {
 	AttentionHeads   int      `json:"num_attention_heads"` // n_head
 	KeyValHeads      int      `json:"num_key_value_heads"`
 	NormEPS          float64  `json:"rms_norm_eps"`
-	RopeFreqBase     float64  `json:"rope_theta"`
 	BoSTokenID       int      `json:"bos_token_id"`
 	EoSTokenID       int      `json:"eos_token_id"`
 	HeadDimension    int      `json:"head_dim"`

--- a/convert/mistral.go
+++ b/convert/mistral.go
@@ -144,7 +144,6 @@ func (m *MistralModel) WriteGGUF() (string, error) {
 		"llama.attention.head_count":             uint32(m.Params.AttentionHeads),
 		"llama.attention.head_count_kv":          uint32(m.Params.KeyValHeads),
 		"llama.attention.layer_norm_rms_epsilon": float32(m.Params.NormEPS),
-		"llama.rope.freq_base":                   float32(m.Params.RopeFreqBase),
 		"general.file_type":                      uint32(1),
 		"tokenizer.ggml.model":                   "llama",


--- a/llm/server.go
+++ b/llm/server.go
@@ -172,14 +172,6 @@ func NewLlamaServer(model string, adapters, projectors []string, opts api.Option
 		params = append(params, "--main-gpu", fmt.Sprintf("%d", opts.MainGPU))
 	}

-	if opts.RopeFrequencyBase > 0 {
-		params = append(params, "--rope-freq-base", fmt.Sprintf("%f", opts.RopeFrequencyBase))
-	}
-
-	if opts.RopeFrequencyScale > 0 {
-		params = append(params, "--rope-freq-scale", fmt.Sprintf("%f", opts.RopeFrequencyScale))
-	}
-
 	if len(adapters) > 0 {
 		// TODO: applying multiple adapters is not supported by the llama.cpp server yet
 		params = append(params, "--lora", adapters[0])