fprintf(stderr," -h, --help show this help message and exit\n");
fprintf(stderr," --model-base FNAME model path from which to load base model (default '%s')\n",params->fn_model_base);
fprintf(stderr," --lora-out FNAME path to save llama lora (default '%s')\n",params->fn_lora_out);
fprintf(stderr," --only-write-lora only save llama lora, don't do any training. use this if you only want to convert a checkpoint to a lora adapter.\n");
fprintf(stderr," --norm-rms-eps F RMS-Norm epsilon value (default %f)\n",params->f_norm_rms_eps);
fprintf(stderr," --rope-freq-base F Frequency base for ROPE (default %f)\n",params->rope_freq_base);
fprintf(stderr," --rope-freq-scale F Frequency scale for ROPE (default %f)\n",params->rope_freq_scale);
fprintf(stderr," --lora-alpha N LORA alpha : resulting LORA scaling is alpha/r. (default %d)\n",params->lora_alpha);
fprintf(stderr," --lora-r N LORA r: default rank. Also specifies resulting scaling together with lora-alpha. (default %d)\n",params->lora_r);
fprintf(stderr," --rank-att-norm N LORA rank for attention norm tensor, overrides default rank. Norm tensors should generally have rank 1.\n");
fprintf(stderr," --rank-ffn-norm N LORA rank for feed-forward norm tensor, overrides default rank. Norm tensors should generally have rank 1.\n");
fprintf(stderr," --rank-out-norm N LORA rank for output norm tensor, overrides default rank. Norm tensors should generally have rank 1.\n");
fprintf(stderr," --rank-tok-embd N LORA rank for token embeddings tensor, overrides default rank.\n");
fprintf(stderr," --rank-out N LORA rank for output tensor, overrides default rank.\n");
fprintf(stderr," --rank-wq N LORA rank for wq tensor, overrides default rank.\n");
fprintf(stderr," --rank-wk N LORA rank for wk tensor, overrides default rank.\n");
fprintf(stderr," --rank-wv N LORA rank for wv tensor, overrides default rank.\n");
fprintf(stderr," --rank-wo N LORA rank for wo tensor, overrides default rank.\n");
fprintf(stderr," --rank-ffn_gate N LORA rank for ffn_gate tensor, overrides default rank.\n");
fprintf(stderr," --rank-ffn_down N LORA rank for ffn_down tensor, overrides default rank.\n");
fprintf(stderr," --rank-ffn_up N LORA rank for ffn_up tensor, overrides default rank.\n");
die("Provided rank differs from checkpoint file. To use different rank start finetune from scratch with empty input checkpoint, e.g --checkpoint-in ''. Aborting.");
// need to discard previous optimizer gradient statistics and opt_init with new shapes
// TODO
}
if(opt_past_changed){
die("Optimizer parameter '--opt-past N' differs from checkpoint file. To use different value finetune from scratch with empty input checkpoint, e.g --checkpoint-in ''. Aborting");
// need to discard previous optimizer past function value statistics and opt_init with new shapes
# MODEL="$LLAMA_MODEL_DIR/openllama-3b-v2-q8_0.gguf" # This is the model the readme uses.
MODEL="$LLAMA_MODEL_DIR/openllama-3b-v2.gguf"# An f16 model. Note in this case with "-g", you get an f32-format .BIN file that isn't yet supported if you use it with "main --lora" with GPU inferencing.
# checkpoint_paths = [path for path in model_files if (path.endswith('.bin') and path.startswith('pytorch')) or (path.endswith('.safetensors') and path.startswith('model'))]
# A function for the agent to send a message to the user.
classSendMessageToUser(BaseModel):
"""
Send a message to the User.
"""
chain_of_thought:str=Field(...,description="Your chain of thought while sending the message.")
message:str=Field(...,description="Message you want to send to the user.")
defrun(self):
print(self.message)
# Enum for the calculator tool.
classMathOperation(Enum):
ADD="add"
SUBTRACT="subtract"
MULTIPLY="multiply"
DIVIDE="divide"
# Simple pydantic calculator tool for the agent that can add, subtract, multiply, and divide. Docstring and description of fields will be used in system prompt.
# Here the grammar gets generated by passing the available function models to generate_gbnf_grammar_and_documentation function. This also generates a documentation usable by the LLM.
# pydantic_model_list is the list of pydanitc models
# outer_object_name is an optional name for an outer object around the actual model object. Like a "function" object with "function_parameters" which contains the actual model object. If None, no outer object will be generated
# outer_object_content is the name of outer object content.
# model_prefix is the optional prefix for models in the documentation. (Default="Output Model")
# fields_prefix is the prefix for the model fields in the documentation. (Default="Output Fields")
system_message="You are an advanced AI, tasked to assist the user by calling functions in JSON format. The following are the available functions and their parameters and types:\n\n"+documentation
system_message="You are an advanced AI, tasked to create a dataset entry in JSON for a Book. The following is the expected output model:\n\n"+documentation
text="""The Feynman Lectures on Physics is a physics textbook based on some lectures by Richard Feynman, a Nobel laureate who has sometimes been called "The Great Explainer". The lectures were presented before undergraduate students at the California Institute of Technology (Caltech), during 1961–1963. The book's co-authors are Feynman, Robert B. Leighton, and Matthew Sands."""
system_message="You are an advanced AI assistant. You are interacting with the user and with your environment by calling functions. You call functions by writing JSON objects, which represent specific function calls.\nBelow is a list of your available function calls:\n\n"+documentation
text="""Get the date and time, get the current weather in celsius in London and solve the following calculation: 42 * 42"""
die("Optimizer parameter '--opt-past N' differs from checkpoint file. To use different value train from scratch with empty input checkpoint, e.g --checkpoint-in ''. Aborting");
// need to discard previous optimizer past function value statistics and opt_init with new shapes
size_t(*GGML_CALLget_max_size)(ggml_backend_buffer_type_tbuft);// allocation max size
size_t(*GGML_CALLget_alloc_size)(ggml_backend_buffer_type_tbuft,conststructggml_tensor*tensor);// data size needed to allocate the tensor, including padding
bool(*GGML_CALLsupports_backend)(ggml_backend_buffer_type_tbuft,ggml_backend_tbackend);// check if the buffer type is usable by the backend
// check if tensor data is in host memory
// should be equivalent to supports_backend(buft, ggml_backend_cpu_init())
bool(*GGML_CALLcpy_tensor)(ggml_backend_buffer_tbuffer,conststructggml_tensor*src,structggml_tensor*dst);// dst is in the buffer, src may be in any buffer