diff --git a/.dockerignore b/.dockerignore
old mode 100755
new mode 100644
diff --git a/.gitattributes b/.gitattributes
old mode 100755
new mode 100644
diff --git a/.github/ISSUE_TEMPLATE/10_bug_report.yml b/.github/ISSUE_TEMPLATE/10_bug_report.yml
old mode 100755
new mode 100644
diff --git a/.github/ISSUE_TEMPLATE/20_feature_request.md b/.github/ISSUE_TEMPLATE/20_feature_request.md
old mode 100755
new mode 100644
diff --git a/.github/ISSUE_TEMPLATE/30_model_request.md b/.github/ISSUE_TEMPLATE/30_model_request.md
old mode 100755
new mode 100644
diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml
old mode 100755
new mode 100644
diff --git a/.github/workflows/latest.yaml b/.github/workflows/latest.yaml
old mode 100755
new mode 100644
diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml
old mode 100755
new mode 100644
diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml
old mode 100755
new mode 100644
diff --git a/.gitignore b/.gitignore
old mode 100755
new mode 100644
diff --git a/.gitmodules b/.gitmodules
old mode 100755
new mode 100644
diff --git a/.golangci.yaml b/.golangci.yaml
old mode 100755
new mode 100644
diff --git a/.prettierrc.json b/.prettierrc.json
old mode 100755
new mode 100644
diff --git a/Dockerfile b/Dockerfile
old mode 100755
new mode 100644
diff --git a/LICENSE b/LICENSE
old mode 100755
new mode 100644
diff --git a/README copy.md b/README copy.md
new file mode 100644
index 0000000000000000000000000000000000000000..235de75a09a7f4cfa6ae2a79651f00a409ef45da
--- /dev/null
+++ b/README copy.md	
@@ -0,0 +1,71 @@
+# <div align="center"><strong>Ollama</strong></div>
+
+## 简介
+
+1 Ollama是以llama.cpp为后端的前端大模型推理框架，可快速部署主流模型。
+
+## 安装
+
+### 1、使用dockerfile方式安装
+
+直接下载本仓库中的`v0.3.5`分支，然后执行`docker build xxxxx`相关命令（具体需自行查阅相关资料）。
+
+如遇到卡数检测错误，请参考 https://developer.hpccube.com/codes/OpenDAS/ollama/-/issues/1 ，也可以提前进行修复。
+
+### 2、使用源码编译方式安装（推荐）
+
+#### 环境准备
+
+##### Docker
+
+    docker pull docker pull image.sourcefind.cn:5000/dcu/admin/base/pytorch:2.1.0-py3.10-dtk24.04.3-ubuntu20.04
+    
+    docker run -i -t -d  --device=/dev/kfd --privileged --network=host --device=/dev/dri --cap-add=SYS_PTRACE --security-opt seccomp=unconfined -v 项目地址(绝对路径):/home  -v /opt/hyhal:/opt/hyhal:ro -v --group-add video --shm-size 16G --name {容器名} {镜像ID}
+
+
+1、下载源码
+
+    git clone -b v0.3.5 http://developer.sourcefind.cn/codes/OpenDAS/ollama.git --depth=1
+    cd ollama
+
+2、安装依赖包
+
+    cd llm/llama.cpp
+    pip install -r requirements.txt
+
+#### 编译
+
+##### 环境设置
+    export AMDGPU_TARGETS=当前设备型号（如：gfx906，gfx928等）
+    export HSA_OVERRIDE_GFX_VERSION=设备型号（如: gfx906对应9.0.6；gfx928对应9.2.8）
+    export LIBRARY_PATH=/opt/dtk/llvm/lib/clang/15.0.0/lib/linux/:$LIBRARY_PATH
+    export HIP_VISIBLE_DEVICES=所有设备号（0,1,2,3,4,5,6,...）/选择设备号
+
+安装go
+
+    cd ../..
+    tar -C /usr/local -xzf go1.22.3.linux-amd64.tar.gz
+    export PATH=$PATH:/usr/local/go/bin
+    
+    # 修改go下载源，提升速度（按需设置）
+    go env -w GO111MODULE=on
+    go env -w GOPROXY=https://goproxy.cn,direct
+
+##### 运行编译
+
+    cd llm/generate && bash gen_linux.sh
+    cd ../.. && go build
+
+## 验证
+
+    ./ollama serve  （选择可用设备，可通过上条命令输出结果查看）
+    ./ollama run llama3.1
+
+更多使用方式请参考[原项目](https://github.com/ollama/ollama)。
+
+注意：每次运行前请检查环境变量`HSA_OVERRIDE_GFX_VERSION`是否正确设置。
+
+## 参考资料
+
+* https://github.com/ollama/ollama
+* https://github.com/ggerganov/llama.cpp
diff --git a/README.md b/README.md
index 235de75a09a7f4cfa6ae2a79651f00a409ef45da..aae92e6c2a841b26eb08f83d808f828f22a9944c 100644
--- a/README.md
+++ b/README.md
@@ -1,71 +1,409 @@
-# <div align="center"><strong>Ollama</strong></div>
+<div align="center">
+ <img alt="ollama" height="200px" src="https://github.com/ollama/ollama/assets/3325447/0d0b44e2-8f4a-4e99-9b52-a5c1c741c8f7">
+</div>
 
-## 简介
+# Ollama
 
-1 Ollama是以llama.cpp为后端的前端大模型推理框架，可快速部署主流模型。
+[![Discord](https://dcbadge.vercel.app/api/server/ollama?style=flat&compact=true)](https://discord.gg/ollama)
 
-## 安装
+Get up and running with large language models.
 
-### 1、使用dockerfile方式安装
+### macOS
 
-直接下载本仓库中的`v0.3.5`分支，然后执行`docker build xxxxx`相关命令（具体需自行查阅相关资料）。
+[Download](https://ollama.com/download/Ollama-darwin.zip)
 
-如遇到卡数检测错误，请参考 https://developer.hpccube.com/codes/OpenDAS/ollama/-/issues/1 ，也可以提前进行修复。
+### Windows preview
 
-### 2、使用源码编译方式安装（推荐）
+[Download](https://ollama.com/download/OllamaSetup.exe)
 
-#### 环境准备
+### Linux
 
-##### Docker
+```
+curl -fsSL https://ollama.com/install.sh | sh
+```
 
-    docker pull docker pull image.sourcefind.cn:5000/dcu/admin/base/pytorch:2.1.0-py3.10-dtk24.04.3-ubuntu20.04
-    
-    docker run -i -t -d  --device=/dev/kfd --privileged --network=host --device=/dev/dri --cap-add=SYS_PTRACE --security-opt seccomp=unconfined -v 项目地址(绝对路径):/home  -v /opt/hyhal:/opt/hyhal:ro -v --group-add video --shm-size 16G --name {容器名} {镜像ID}
+[Manual install instructions](https://github.com/ollama/ollama/blob/main/docs/linux.md)
 
+### Docker
 
-1、下载源码
+The official [Ollama Docker image](https://hub.docker.com/r/ollama/ollama) `ollama/ollama` is available on Docker Hub.
 
-    git clone -b v0.3.5 http://developer.sourcefind.cn/codes/OpenDAS/ollama.git --depth=1
-    cd ollama
+### Libraries
 
-2、安装依赖包
+- [ollama-python](https://github.com/ollama/ollama-python)
+- [ollama-js](https://github.com/ollama/ollama-js)
 
-    cd llm/llama.cpp
-    pip install -r requirements.txt
+## Quickstart
 
-#### 编译
+To run and chat with [Llama 3.1](https://ollama.com/library/llama3.1):
 
-##### 环境设置
-    export AMDGPU_TARGETS=当前设备型号（如：gfx906，gfx928等）
-    export HSA_OVERRIDE_GFX_VERSION=设备型号（如: gfx906对应9.0.6；gfx928对应9.2.8）
-    export LIBRARY_PATH=/opt/dtk/llvm/lib/clang/15.0.0/lib/linux/:$LIBRARY_PATH
-    export HIP_VISIBLE_DEVICES=所有设备号（0,1,2,3,4,5,6,...）/选择设备号
+```
+ollama run llama3.1
+```
 
-安装go
+## Model library
 
-    cd ../..
-    tar -C /usr/local -xzf go1.22.3.linux-amd64.tar.gz
-    export PATH=$PATH:/usr/local/go/bin
-    
-    # 修改go下载源，提升速度（按需设置）
-    go env -w GO111MODULE=on
-    go env -w GOPROXY=https://goproxy.cn,direct
+Ollama supports a list of models available on [ollama.com/library](https://ollama.com/library 'ollama model library')
 
-##### 运行编译
+Here are some example models that can be downloaded:
 
-    cd llm/generate && bash gen_linux.sh
-    cd ../.. && go build
+| Model              | Parameters | Size  | Download                       |
+| ------------------ | ---------- | ----- | ------------------------------ |
+| Llama 3.1          | 8B         | 4.7GB | `ollama run llama3.1`          |
+| Llama 3.1          | 70B        | 40GB  | `ollama run llama3.1:70b`      |
+| Llama 3.1          | 405B       | 231GB | `ollama run llama3.1:405b`     |
+| Phi 3 Mini         | 3.8B       | 2.3GB | `ollama run phi3`              |
+| Phi 3 Medium       | 14B        | 7.9GB | `ollama run phi3:medium`       |
+| Gemma 2            | 2B         | 1.6GB | `ollama run gemma2:2b`         |
+| Gemma 2            | 9B         | 5.5GB | `ollama run gemma2`            |
+| Gemma 2            | 27B        | 16GB  | `ollama run gemma2:27b`        |
+| Mistral            | 7B         | 4.1GB | `ollama run mistral`           |
+| Moondream 2        | 1.4B       | 829MB | `ollama run moondream`         |
+| Neural Chat        | 7B         | 4.1GB | `ollama run neural-chat`       |
+| Starling           | 7B         | 4.1GB | `ollama run starling-lm`       |
+| Code Llama         | 7B         | 3.8GB | `ollama run codellama`         |
+| Llama 2 Uncensored | 7B         | 3.8GB | `ollama run llama2-uncensored` |
+| LLaVA              | 7B         | 4.5GB | `ollama run llava`             |
+| Solar              | 10.7B      | 6.1GB | `ollama run solar`             |
 
-## 验证
+> [!NOTE]
+> You should have at least 8 GB of RAM available to run the 7B models, 16 GB to run the 13B models, and 32 GB to run the 33B models.
 
-    ./ollama serve  （选择可用设备，可通过上条命令输出结果查看）
-    ./ollama run llama3.1
+## Customize a model
 
-更多使用方式请参考[原项目](https://github.com/ollama/ollama)。
+### Import from GGUF
 
-注意：每次运行前请检查环境变量`HSA_OVERRIDE_GFX_VERSION`是否正确设置。
+Ollama supports importing GGUF models in the Modelfile:
 
-## 参考资料
+1. Create a file named `Modelfile`, with a `FROM` instruction with the local filepath to the model you want to import.
+
+   ```
+   FROM ./vicuna-33b.Q4_0.gguf
+   ```
+
+2. Create the model in Ollama
+
+   ```
+   ollama create example -f Modelfile
+   ```
+
+3. Run the model
+
+   ```
+   ollama run example
+   ```
+
+### Import from PyTorch or Safetensors
+
+See the [guide](docs/import.md) on importing models for more information.
+
+### Customize a prompt
+
+Models from the Ollama library can be customized with a prompt. For example, to customize the `llama3.1` model:
+
+```
+ollama pull llama3.1
+```
+
+Create a `Modelfile`:
+
+```
+FROM llama3.1
+
+# set the temperature to 1 [higher is more creative, lower is more coherent]
+PARAMETER temperature 1
+
+# set the system message
+SYSTEM """
+You are Mario from Super Mario Bros. Answer as Mario, the assistant, only.
+"""
+```
+
+Next, create and run the model:
+
+```
+ollama create mario -f ./Modelfile
+ollama run mario
+>>> hi
+Hello! It's your friend Mario.
+```
+
+For more examples, see the [examples](examples) directory. For more information on working with a Modelfile, see the [Modelfile](docs/modelfile.md) documentation.
+
+## CLI Reference
+
+### Create a model
+
+`ollama create` is used to create a model from a Modelfile.
+
+```
+ollama create mymodel -f ./Modelfile
+```
+
+### Pull a model
+
+```
+ollama pull llama3.1
+```
+
+> This command can also be used to update a local model. Only the diff will be pulled.
+
+### Remove a model
+
+```
+ollama rm llama3.1
+```
+
+### Copy a model
+
+```
+ollama cp llama3.1 my-model
+```
+
+### Multiline input
+
+For multiline input, you can wrap text with `"""`:
+
+```
+>>> """Hello,
+... world!
+... """
+I'm a basic program that prints the famous "Hello, world!" message to the console.
+```
+
+### Multimodal models
+
+```
+ollama run llava "What's in this image? /Users/jmorgan/Desktop/smile.png"
+The image features a yellow smiley face, which is likely the central focus of the picture.
+```
+
+### Pass the prompt as an argument
+
+```
+$ ollama run llama3.1 "Summarize this file: $(cat README.md)"
+ Ollama is a lightweight, extensible framework for building and running language models on the local machine. It provides a simple API for creating, running, and managing models, as well as a library of pre-built models that can be easily used in a variety of applications.
+```
+
+### Show model information
+
+```
+ollama show llama3.1
+```
+
+### List models on your computer
+
+```
+ollama list
+```
+
+### Start Ollama
+
+`ollama serve` is used when you want to start ollama without running the desktop application.
+
+## Building
+
+See the [developer guide](https://github.com/ollama/ollama/blob/main/docs/development.md)
+
+### Running local builds
+
+Next, start the server:
+
+```
+./ollama serve
+```
+
+Finally, in a separate shell, run a model:
+
+```
+./ollama run llama3.1
+```
+
+## REST API
+
+Ollama has a REST API for running and managing models.
+
+### Generate a response
+
+```
+curl http://localhost:11434/api/generate -d '{
+  "model": "llama3.1",
+  "prompt":"Why is the sky blue?"
+}'
+```
+
+### Chat with a model
+
+```
+curl http://localhost:11434/api/chat -d '{
+  "model": "llama3.1",
+  "messages": [
+    { "role": "user", "content": "why is the sky blue?" }
+  ]
+}'
+```
+
+See the [API documentation](./docs/api.md) for all endpoints.
+
+## Community Integrations
+
+### Web & Desktop
+
+- [Open WebUI](https://github.com/open-webui/open-webui)
+- [Enchanted (macOS native)](https://github.com/AugustDev/enchanted)
+- [Hollama](https://github.com/fmaclen/hollama)
+- [Lollms-Webui](https://github.com/ParisNeo/lollms-webui)
+- [LibreChat](https://github.com/danny-avila/LibreChat)
+- [Bionic GPT](https://github.com/bionic-gpt/bionic-gpt)
+- [HTML UI](https://github.com/rtcfirefly/ollama-ui)
+- [Saddle](https://github.com/jikkuatwork/saddle)
+- [Chatbot UI](https://github.com/ivanfioravanti/chatbot-ollama)
+- [Chatbot UI v2](https://github.com/mckaywrigley/chatbot-ui)
+- [Typescript UI](https://github.com/ollama-interface/Ollama-Gui?tab=readme-ov-file)
+- [Minimalistic React UI for Ollama Models](https://github.com/richawo/minimal-llm-ui)
+- [Ollamac](https://github.com/kevinhermawan/Ollamac)
+- [big-AGI](https://github.com/enricoros/big-AGI/blob/main/docs/config-local-ollama.md)
+- [Cheshire Cat assistant framework](https://github.com/cheshire-cat-ai/core)
+- [Amica](https://github.com/semperai/amica)
+- [chatd](https://github.com/BruceMacD/chatd)
+- [Ollama-SwiftUI](https://github.com/kghandour/Ollama-SwiftUI)
+- [Dify.AI](https://github.com/langgenius/dify)
+- [MindMac](https://mindmac.app)
+- [NextJS Web Interface for Ollama](https://github.com/jakobhoeg/nextjs-ollama-llm-ui)
+- [Msty](https://msty.app)
+- [Chatbox](https://github.com/Bin-Huang/Chatbox)
+- [WinForm Ollama Copilot](https://github.com/tgraupmann/WinForm_Ollama_Copilot)
+- [NextChat](https://github.com/ChatGPTNextWeb/ChatGPT-Next-Web) with [Get Started Doc](https://docs.nextchat.dev/models/ollama)
+- [Alpaca WebUI](https://github.com/mmo80/alpaca-webui)
+- [OllamaGUI](https://github.com/enoch1118/ollamaGUI)
+- [OpenAOE](https://github.com/InternLM/OpenAOE)
+- [Odin Runes](https://github.com/leonid20000/OdinRunes)
+- [LLM-X](https://github.com/mrdjohnson/llm-x) (Progressive Web App)
+- [AnythingLLM (Docker + MacOs/Windows/Linux native app)](https://github.com/Mintplex-Labs/anything-llm)
+- [Ollama Basic Chat: Uses HyperDiv Reactive UI](https://github.com/rapidarchitect/ollama_basic_chat)
+- [Ollama-chats RPG](https://github.com/drazdra/ollama-chats)
+- [QA-Pilot](https://github.com/reid41/QA-Pilot) (Chat with Code Repository)
+- [ChatOllama](https://github.com/sugarforever/chat-ollama) (Open Source Chatbot based on Ollama with Knowledge Bases)
+- [CRAG Ollama Chat](https://github.com/Nagi-ovo/CRAG-Ollama-Chat) (Simple Web Search with Corrective RAG)
+- [RAGFlow](https://github.com/infiniflow/ragflow) (Open-source Retrieval-Augmented Generation engine based on deep document understanding)
+- [StreamDeploy](https://github.com/StreamDeploy-DevRel/streamdeploy-llm-app-scaffold) (LLM Application Scaffold)
+- [chat](https://github.com/swuecho/chat) (chat web app for teams)
+- [Lobe Chat](https://github.com/lobehub/lobe-chat) with [Integrating Doc](https://lobehub.com/docs/self-hosting/examples/ollama)
+- [Ollama RAG Chatbot](https://github.com/datvodinh/rag-chatbot.git) (Local Chat with multiple PDFs using Ollama and RAG)
+- [BrainSoup](https://www.nurgo-software.com/products/brainsoup) (Flexible native client with RAG & multi-agent automation)
+- [macai](https://github.com/Renset/macai) (macOS client for Ollama, ChatGPT, and other compatible API back-ends)
+- [Olpaka](https://github.com/Otacon/olpaka) (User-friendly Flutter Web App for Ollama)
+- [OllamaSpring](https://github.com/CrazyNeil/OllamaSpring) (Ollama Client for macOS)
+- [LLocal.in](https://github.com/kartikm7/llocal) (Easy to use Electron Desktop Client for Ollama)
+- [Ollama with Google Mesop](https://github.com/rapidarchitect/ollama_mesop/) (Mesop Chat Client implementation with Ollama)
+- [Kerlig AI](https://www.kerlig.com/) (AI writing assistant for macOS)
+- [AI Studio](https://github.com/MindWorkAI/AI-Studio)
+- [Sidellama](https://github.com/gyopak/sidellama) (browser-based LLM client)
+- [LLMStack](https://github.com/trypromptly/LLMStack) (No-code multi-agent framework to build LLM agents and workflows)
+- [BoltAI for Mac](https://boltai.com) (AI Chat Client for Mac)
+- [Harbor](https://github.com/av/harbor) (Containerized LLM Toolkit with Ollama as default backend)
+
+### Terminal
+
+- [oterm](https://github.com/ggozad/oterm)
+- [Ellama Emacs client](https://github.com/s-kostyaev/ellama)
+- [Emacs client](https://github.com/zweifisch/ollama)
+- [gen.nvim](https://github.com/David-Kunz/gen.nvim)
+- [ollama.nvim](https://github.com/nomnivore/ollama.nvim)
+- [ollero.nvim](https://github.com/marco-souza/ollero.nvim)
+- [ollama-chat.nvim](https://github.com/gerazov/ollama-chat.nvim)
+- [ogpt.nvim](https://github.com/huynle/ogpt.nvim)
+- [gptel Emacs client](https://github.com/karthink/gptel)
+- [Oatmeal](https://github.com/dustinblackman/oatmeal)
+- [cmdh](https://github.com/pgibler/cmdh)
+- [ooo](https://github.com/npahlfer/ooo)
+- [shell-pilot](https://github.com/reid41/shell-pilot)
+- [tenere](https://github.com/pythops/tenere)
+- [llm-ollama](https://github.com/taketwo/llm-ollama) for [Datasette's LLM CLI](https://llm.datasette.io/en/stable/).
+- [typechat-cli](https://github.com/anaisbetts/typechat-cli)
+- [ShellOracle](https://github.com/djcopley/ShellOracle)
+- [tlm](https://github.com/yusufcanb/tlm)
+- [podman-ollama](https://github.com/ericcurtin/podman-ollama)
+- [gollama](https://github.com/sammcj/gollama)
+- [Ollama eBook Summary](https://github.com/cognitivetech/ollama-ebook-summary/)
+
+### Database
+
+- [MindsDB](https://github.com/mindsdb/mindsdb/blob/staging/mindsdb/integrations/handlers/ollama_handler/README.md) (Connects Ollama models with nearly 200 data platforms and apps)
+- [chromem-go](https://github.com/philippgille/chromem-go/blob/v0.5.0/embed_ollama.go) with [example](https://github.com/philippgille/chromem-go/tree/v0.5.0/examples/rag-wikipedia-ollama)
+
+### Package managers
+
+- [Pacman](https://archlinux.org/packages/extra/x86_64/ollama/)
+- [Helm Chart](https://artifacthub.io/packages/helm/ollama-helm/ollama)
+- [Guix channel](https://codeberg.org/tusharhero/ollama-guix)
+
+### Libraries
+
+- [LangChain](https://python.langchain.com/docs/integrations/llms/ollama) and [LangChain.js](https://js.langchain.com/docs/modules/model_io/models/llms/integrations/ollama) with [example](https://js.langchain.com/docs/use_cases/question_answering/local_retrieval_qa)
+- [Firebase Genkit](https://firebase.google.com/docs/genkit/plugins/ollama)
+- [LangChainGo](https://github.com/tmc/langchaingo/) with [example](https://github.com/tmc/langchaingo/tree/main/examples/ollama-completion-example)
+- [LangChain4j](https://github.com/langchain4j/langchain4j) with [example](https://github.com/langchain4j/langchain4j-examples/tree/main/ollama-examples/src/main/java)
+- [LangChainRust](https://github.com/Abraxas-365/langchain-rust) with [example](https://github.com/Abraxas-365/langchain-rust/blob/main/examples/llm_ollama.rs)
+- [LlamaIndex](https://gpt-index.readthedocs.io/en/stable/examples/llm/ollama.html)
+- [LiteLLM](https://github.com/BerriAI/litellm)
+- [OllamaSharp for .NET](https://github.com/awaescher/OllamaSharp)
+- [Ollama for Ruby](https://github.com/gbaptista/ollama-ai)
+- [Ollama-rs for Rust](https://github.com/pepperoni21/ollama-rs)
+- [Ollama-hpp for C++](https://github.com/jmont-dev/ollama-hpp)
+- [Ollama4j for Java](https://github.com/amithkoujalgi/ollama4j)
+- [ModelFusion Typescript Library](https://modelfusion.dev/integration/model-provider/ollama)
+- [OllamaKit for Swift](https://github.com/kevinhermawan/OllamaKit)
+- [Ollama for Dart](https://github.com/breitburg/dart-ollama)
+- [Ollama for Laravel](https://github.com/cloudstudio/ollama-laravel)
+- [LangChainDart](https://github.com/davidmigloz/langchain_dart)
+- [Semantic Kernel - Python](https://github.com/microsoft/semantic-kernel/tree/main/python/semantic_kernel/connectors/ai/ollama)
+- [Haystack](https://github.com/deepset-ai/haystack-integrations/blob/main/integrations/ollama.md)
+- [Elixir LangChain](https://github.com/brainlid/langchain)
+- [Ollama for R - rollama](https://github.com/JBGruber/rollama)
+- [Ollama for R - ollama-r](https://github.com/hauselin/ollama-r)
+- [Ollama-ex for Elixir](https://github.com/lebrunel/ollama-ex)
+- [Ollama Connector for SAP ABAP](https://github.com/b-tocs/abap_btocs_ollama)
+- [Testcontainers](https://testcontainers.com/modules/ollama/)
+- [Portkey](https://portkey.ai/docs/welcome/integration-guides/ollama)
+- [PromptingTools.jl](https://github.com/svilupp/PromptingTools.jl) with an [example](https://svilupp.github.io/PromptingTools.jl/dev/examples/working_with_ollama)
+- [LlamaScript](https://github.com/Project-Llama/llamascript)
+
+### Mobile
+
+- [Enchanted](https://github.com/AugustDev/enchanted)
+- [Maid](https://github.com/Mobile-Artificial-Intelligence/maid)
+
+### Extensions & Plugins
+
+- [Raycast extension](https://github.com/MassimilianoPasquini97/raycast_ollama)
+- [Discollama](https://github.com/mxyng/discollama) (Discord bot inside the Ollama discord channel)
+- [Continue](https://github.com/continuedev/continue)
+- [Obsidian Ollama plugin](https://github.com/hinterdupfinger/obsidian-ollama)
+- [Logseq Ollama plugin](https://github.com/omagdy7/ollama-logseq)
+- [NotesOllama](https://github.com/andersrex/notesollama) (Apple Notes Ollama plugin)
+- [Dagger Chatbot](https://github.com/samalba/dagger-chatbot)
+- [Discord AI Bot](https://github.com/mekb-turtle/discord-ai-bot)
+- [Ollama Telegram Bot](https://github.com/ruecat/ollama-telegram)
+- [Hass Ollama Conversation](https://github.com/ej52/hass-ollama-conversation)
+- [Rivet plugin](https://github.com/abrenneke/rivet-plugin-ollama)
+- [Obsidian BMO Chatbot plugin](https://github.com/longy2k/obsidian-bmo-chatbot)
+- [Cliobot](https://github.com/herval/cliobot) (Telegram bot with Ollama support)
+- [Copilot for Obsidian plugin](https://github.com/logancyang/obsidian-copilot)
+- [Obsidian Local GPT plugin](https://github.com/pfrankov/obsidian-local-gpt)
+- [Open Interpreter](https://docs.openinterpreter.com/language-model-setup/local-models/ollama)
+- [Llama Coder](https://github.com/ex3ndr/llama-coder) (Copilot alternative using Ollama)
+- [Ollama Copilot](https://github.com/bernardo-bruning/ollama-copilot) (Proxy that allows you to use ollama as a copilot like Github copilot)
+- [twinny](https://github.com/rjmacarthy/twinny) (Copilot and Copilot chat alternative using Ollama)
+- [Wingman-AI](https://github.com/RussellCanfield/wingman-ai) (Copilot code and chat alternative using Ollama and Hugging Face)
+- [Page Assist](https://github.com/n4ze3m/page-assist) (Chrome Extension)
+- [AI Telegram Bot](https://github.com/tusharhero/aitelegrambot) (Telegram bot using Ollama in backend)
+- [AI ST Completion](https://github.com/yaroslavyaroslav/OpenAI-sublime-text) (Sublime Text 4 AI assistant plugin with Ollama support)
+- [Discord-Ollama Chat Bot](https://github.com/kevinthedang/discord-ollama) (Generalized TypeScript Discord Bot w/ Tuning Documentation)
+- [Discord AI chat/moderation bot](https://github.com/rapmd73/Companion) Chat/moderation bot written in python. Uses Ollama to create personalities.
+- [Headless Ollama](https://github.com/nischalj10/headless-ollama) (Scripts to automatically install ollama client & models on any OS for apps that depends on ollama server)
+
+### Supported backends
+
+- [llama.cpp](https://github.com/ggerganov/llama.cpp) project founded by Georgi Gerganov.
 
-* https://github.com/ollama/ollama
-* https://github.com/ggerganov/llama.cpp
diff --git a/SECURITY.md b/SECURITY.md
old mode 100755
new mode 100644
diff --git a/api/client.go b/api/client.go
old mode 100755
new mode 100644
diff --git a/api/client_test.go b/api/client_test.go
old mode 100755
new mode 100644
diff --git a/api/types.go b/api/types.go
old mode 100755
new mode 100644
diff --git a/api/types_test.go b/api/types_test.go
old mode 100755
new mode 100644
diff --git a/app/.gitignore b/app/.gitignore
old mode 100755
new mode 100644
diff --git a/app/README.md b/app/README.md
old mode 100755
new mode 100644
diff --git a/app/assets/app.ico b/app/assets/app.ico
old mode 100755
new mode 100644
diff --git a/app/assets/assets.go b/app/assets/assets.go
old mode 100755
new mode 100644
diff --git a/app/assets/setup.bmp b/app/assets/setup.bmp
old mode 100755
new mode 100644
diff --git a/app/assets/tray.ico b/app/assets/tray.ico
old mode 100755
new mode 100644
diff --git a/app/assets/tray_upgrade.ico b/app/assets/tray_upgrade.ico
old mode 100755
new mode 100644
diff --git a/app/lifecycle/getstarted_nonwindows.go b/app/lifecycle/getstarted_nonwindows.go
old mode 100755
new mode 100644
diff --git a/app/lifecycle/getstarted_windows.go b/app/lifecycle/getstarted_windows.go
old mode 100755
new mode 100644
diff --git a/app/lifecycle/lifecycle.go b/app/lifecycle/lifecycle.go
old mode 100755
new mode 100644
diff --git a/app/lifecycle/logging.go b/app/lifecycle/logging.go
old mode 100755
new mode 100644
diff --git a/app/lifecycle/logging_nonwindows.go b/app/lifecycle/logging_nonwindows.go
old mode 100755
new mode 100644
diff --git a/app/lifecycle/logging_test.go b/app/lifecycle/logging_test.go
old mode 100755
new mode 100644
diff --git a/app/lifecycle/logging_windows.go b/app/lifecycle/logging_windows.go
old mode 100755
new mode 100644
diff --git a/app/lifecycle/paths.go b/app/lifecycle/paths.go
old mode 100755
new mode 100644
diff --git a/app/lifecycle/server.go b/app/lifecycle/server.go
old mode 100755
new mode 100644
diff --git a/app/lifecycle/server_unix.go b/app/lifecycle/server_unix.go
old mode 100755
new mode 100644
diff --git a/app/lifecycle/server_windows.go b/app/lifecycle/server_windows.go
old mode 100755
new mode 100644
diff --git a/app/lifecycle/updater.go b/app/lifecycle/updater.go
old mode 100755
new mode 100644
diff --git a/app/lifecycle/updater_nonwindows.go b/app/lifecycle/updater_nonwindows.go
old mode 100755
new mode 100644
diff --git a/app/lifecycle/updater_windows.go b/app/lifecycle/updater_windows.go
old mode 100755
new mode 100644
diff --git a/app/main.go b/app/main.go
old mode 100755
new mode 100644
diff --git a/app/ollama.iss b/app/ollama.iss
old mode 100755
new mode 100644
diff --git a/app/ollama.rc b/app/ollama.rc
old mode 100755
new mode 100644
diff --git a/app/ollama_welcome.ps1 b/app/ollama_welcome.ps1
old mode 100755
new mode 100644
diff --git a/app/store/store.go b/app/store/store.go
old mode 100755
new mode 100644
diff --git a/app/store/store_darwin.go b/app/store/store_darwin.go
old mode 100755
new mode 100644
diff --git a/app/store/store_linux.go b/app/store/store_linux.go
old mode 100755
new mode 100644
diff --git a/app/store/store_windows.go b/app/store/store_windows.go
old mode 100755
new mode 100644
diff --git a/app/tray/commontray/types.go b/app/tray/commontray/types.go
old mode 100755
new mode 100644
diff --git a/app/tray/tray.go b/app/tray/tray.go
old mode 100755
new mode 100644
diff --git a/app/tray/tray_nonwindows.go b/app/tray/tray_nonwindows.go
old mode 100755
new mode 100644
diff --git a/app/tray/tray_windows.go b/app/tray/tray_windows.go
old mode 100755
new mode 100644
diff --git a/app/tray/wintray/eventloop.go b/app/tray/wintray/eventloop.go
old mode 100755
new mode 100644
diff --git a/app/tray/wintray/menus.go b/app/tray/wintray/menus.go
old mode 100755
new mode 100644
diff --git a/app/tray/wintray/messages.go b/app/tray/wintray/messages.go
old mode 100755
new mode 100644
diff --git a/app/tray/wintray/notifyicon.go b/app/tray/wintray/notifyicon.go
old mode 100755
new mode 100644
diff --git a/app/tray/wintray/tray.go b/app/tray/wintray/tray.go
old mode 100755
new mode 100644
diff --git a/app/tray/wintray/w32api.go b/app/tray/wintray/w32api.go
old mode 100755
new mode 100644
diff --git a/app/tray/wintray/winclass.go b/app/tray/wintray/winclass.go
old mode 100755
new mode 100644
diff --git a/auth/auth.go b/auth/auth.go
old mode 100755
new mode 100644
diff --git a/cmd/cmd.go b/cmd/cmd.go
old mode 100755
new mode 100644
diff --git a/cmd/interactive.go b/cmd/interactive.go
old mode 100755
new mode 100644
diff --git a/cmd/interactive_test.go b/cmd/interactive_test.go
old mode 100755
new mode 100644
diff --git a/cmd/start.go b/cmd/start.go
old mode 100755
new mode 100644
diff --git a/cmd/start_darwin.go b/cmd/start_darwin.go
old mode 100755
new mode 100644
diff --git a/cmd/start_default.go b/cmd/start_default.go
old mode 100755
new mode 100644
diff --git a/cmd/start_windows.go b/cmd/start_windows.go
old mode 100755
new mode 100644
diff --git a/convert/convert.go b/convert/convert.go
old mode 100755
new mode 100644
diff --git a/convert/convert_gemma.go b/convert/convert_gemma.go
old mode 100755
new mode 100644
diff --git a/convert/convert_llama.go b/convert/convert_llama.go
old mode 100755
new mode 100644
diff --git a/convert/convert_mixtral.go b/convert/convert_mixtral.go
old mode 100755
new mode 100644
diff --git a/convert/convert_test.go b/convert/convert_test.go
old mode 100755
new mode 100644
diff --git a/convert/fs.go b/convert/fs.go
old mode 100755
new mode 100644
diff --git a/convert/reader.go b/convert/reader.go
old mode 100755
new mode 100644
diff --git a/convert/reader_safetensors.go b/convert/reader_safetensors.go
old mode 100755
new mode 100644
diff --git a/convert/reader_torch.go b/convert/reader_torch.go
old mode 100755
new mode 100644
diff --git a/convert/sentencepiece/sentencepiece_model.pb.go b/convert/sentencepiece/sentencepiece_model.pb.go
old mode 100755
new mode 100644
diff --git a/convert/sentencepiece_model.proto b/convert/sentencepiece_model.proto
old mode 100755
new mode 100644
diff --git a/convert/testdata/Meta-Llama-3-8B-Instruct.json b/convert/testdata/Meta-Llama-3-8B-Instruct.json
old mode 100755
new mode 100644
diff --git a/convert/testdata/Mistral-7B-Instruct-v0.2.json b/convert/testdata/Mistral-7B-Instruct-v0.2.json
old mode 100755
new mode 100644
diff --git a/convert/testdata/Mixtral-8x7B-Instruct-v0.1.json b/convert/testdata/Mixtral-8x7B-Instruct-v0.1.json
old mode 100755
new mode 100644
diff --git a/convert/testdata/gemma-2b-it.json b/convert/testdata/gemma-2b-it.json
old mode 100755
new mode 100644
diff --git a/convert/tokenizer.go b/convert/tokenizer.go
old mode 100755
new mode 100644
diff --git a/convert/tokenizer_spm.go b/convert/tokenizer_spm.go
old mode 100755
new mode 100644
diff --git a/docs/README.md b/docs/README.md
old mode 100755
new mode 100644
diff --git a/docs/api.md b/docs/api.md
old mode 100755
new mode 100644
diff --git a/docs/development.md b/docs/development.md
old mode 100755
new mode 100644
diff --git a/docs/docker.md b/docs/docker.md
old mode 100755
new mode 100644
diff --git a/docs/faq.md b/docs/faq.md
old mode 100755
new mode 100644
diff --git a/docs/gpu.md b/docs/gpu.md
old mode 100755
new mode 100644
diff --git a/docs/import.md b/docs/import.md
old mode 100755
new mode 100644
diff --git a/docs/linux.md b/docs/linux.md
old mode 100755
new mode 100644
diff --git a/docs/modelfile.md b/docs/modelfile.md
old mode 100755
new mode 100644
diff --git a/docs/openai.md b/docs/openai.md
old mode 100755
new mode 100644
diff --git a/docs/template.md b/docs/template.md
old mode 100755
new mode 100644
diff --git a/docs/troubleshooting.md b/docs/troubleshooting.md
old mode 100755
new mode 100644
diff --git a/docs/tutorials.md b/docs/tutorials.md
old mode 100755
new mode 100644
diff --git a/docs/tutorials/fly-gpu.md b/docs/tutorials/fly-gpu.md
old mode 100755
new mode 100644
diff --git a/docs/tutorials/langchainjs.md b/docs/tutorials/langchainjs.md
old mode 100755
new mode 100644
diff --git a/docs/tutorials/langchainpy.md b/docs/tutorials/langchainpy.md
old mode 100755
new mode 100644
diff --git a/docs/tutorials/nvidia-jetson.md b/docs/tutorials/nvidia-jetson.md
old mode 100755
new mode 100644
diff --git a/docs/windows.md b/docs/windows.md
old mode 100755
new mode 100644
diff --git a/envconfig/config.go b/envconfig/config.go
old mode 100755
new mode 100644
diff --git a/envconfig/config_test.go b/envconfig/config_test.go
old mode 100755
new mode 100644
diff --git a/examples/.gitignore b/examples/.gitignore
old mode 100755
new mode 100644
diff --git a/examples/README.md b/examples/README.md
old mode 100755
new mode 100644
diff --git a/examples/flyio/.gitignore b/examples/flyio/.gitignore
old mode 100755
new mode 100644
diff --git a/examples/flyio/README.md b/examples/flyio/README.md
old mode 100755
new mode 100644
diff --git a/examples/go-chat/main.go b/examples/go-chat/main.go
old mode 100755
new mode 100644
diff --git a/examples/go-generate-streaming/main.go b/examples/go-generate-streaming/main.go
old mode 100755
new mode 100644
diff --git a/examples/go-generate/main.go b/examples/go-generate/main.go
old mode 100755
new mode 100644
diff --git a/examples/go-http-generate/main.go b/examples/go-http-generate/main.go
old mode 100755
new mode 100644
diff --git a/examples/go-multimodal/main.go b/examples/go-multimodal/main.go
old mode 100755
new mode 100644
diff --git a/examples/go-pull-progress/main.go b/examples/go-pull-progress/main.go
old mode 100755
new mode 100644
diff --git a/examples/jupyter-notebook/README.md b/examples/jupyter-notebook/README.md
old mode 100755
new mode 100644
diff --git a/examples/jupyter-notebook/ollama.ipynb b/examples/jupyter-notebook/ollama.ipynb
old mode 100755
new mode 100644
diff --git a/examples/kubernetes/README.md b/examples/kubernetes/README.md
old mode 100755
new mode 100644
diff --git a/examples/kubernetes/cpu.yaml b/examples/kubernetes/cpu.yaml
old mode 100755
new mode 100644
diff --git a/examples/kubernetes/gpu.yaml b/examples/kubernetes/gpu.yaml
old mode 100755
new mode 100644
diff --git a/examples/langchain-python-rag-document/README.md b/examples/langchain-python-rag-document/README.md
old mode 100755
new mode 100644
diff --git a/examples/langchain-python-rag-document/main.py b/examples/langchain-python-rag-document/main.py
old mode 100755
new mode 100644
diff --git a/examples/langchain-python-rag-document/requirements.txt b/examples/langchain-python-rag-document/requirements.txt
old mode 100755
new mode 100644
diff --git a/examples/langchain-python-rag-privategpt/.gitignore b/examples/langchain-python-rag-privategpt/.gitignore
old mode 100755
new mode 100644
diff --git a/examples/langchain-python-rag-privategpt/LICENSE b/examples/langchain-python-rag-privategpt/LICENSE
old mode 100755
new mode 100644
diff --git a/examples/langchain-python-rag-privategpt/README.md b/examples/langchain-python-rag-privategpt/README.md
old mode 100755
new mode 100644
diff --git a/examples/langchain-python-rag-privategpt/constants.py b/examples/langchain-python-rag-privategpt/constants.py
old mode 100755
new mode 100644
diff --git a/examples/langchain-python-rag-privategpt/ingest.py b/examples/langchain-python-rag-privategpt/ingest.py
old mode 100755
new mode 100644
diff --git a/examples/langchain-python-rag-privategpt/poetry.lock b/examples/langchain-python-rag-privategpt/poetry.lock
old mode 100755
new mode 100644
diff --git a/examples/langchain-python-rag-privategpt/privateGPT.py b/examples/langchain-python-rag-privategpt/privateGPT.py
old mode 100755
new mode 100644
diff --git a/examples/langchain-python-rag-privategpt/pyproject.toml b/examples/langchain-python-rag-privategpt/pyproject.toml
old mode 100755
new mode 100644
diff --git a/examples/langchain-python-rag-privategpt/requirements.txt b/examples/langchain-python-rag-privategpt/requirements.txt
old mode 100755
new mode 100644
diff --git a/examples/langchain-python-rag-websummary/README.md b/examples/langchain-python-rag-websummary/README.md
old mode 100755
new mode 100644
diff --git a/examples/langchain-python-rag-websummary/main.py b/examples/langchain-python-rag-websummary/main.py
old mode 100755
new mode 100644
diff --git a/examples/langchain-python-rag-websummary/requirements.txt b/examples/langchain-python-rag-websummary/requirements.txt
old mode 100755
new mode 100644
diff --git a/examples/langchain-python-simple/README.md b/examples/langchain-python-simple/README.md
old mode 100755
new mode 100644
diff --git a/examples/langchain-python-simple/main.py b/examples/langchain-python-simple/main.py
old mode 100755
new mode 100644
diff --git a/examples/langchain-python-simple/requirements.txt b/examples/langchain-python-simple/requirements.txt
old mode 100755
new mode 100644
diff --git a/examples/langchain-typescript-simple/README.md b/examples/langchain-typescript-simple/README.md
old mode 100755
new mode 100644
diff --git a/examples/langchain-typescript-simple/main.ts b/examples/langchain-typescript-simple/main.ts
old mode 100755
new mode 100644
diff --git a/examples/langchain-typescript-simple/package-lock.json b/examples/langchain-typescript-simple/package-lock.json
old mode 100755
new mode 100644
diff --git a/examples/langchain-typescript-simple/package.json b/examples/langchain-typescript-simple/package.json
old mode 100755
new mode 100644
diff --git a/examples/modelfile-mario/Modelfile b/examples/modelfile-mario/Modelfile
old mode 100755
new mode 100644
diff --git a/examples/modelfile-mario/logo.png b/examples/modelfile-mario/logo.png
old mode 100755
new mode 100644
diff --git a/examples/modelfile-mario/readme.md b/examples/modelfile-mario/readme.md
old mode 100755
new mode 100644
diff --git a/examples/python-dockerit/Modelfile b/examples/python-dockerit/Modelfile
old mode 100755
new mode 100644
diff --git a/examples/python-dockerit/README.md b/examples/python-dockerit/README.md
old mode 100755
new mode 100644
diff --git a/examples/python-dockerit/dockerit.py b/examples/python-dockerit/dockerit.py
old mode 100755
new mode 100644
diff --git a/examples/python-dockerit/requirements.txt b/examples/python-dockerit/requirements.txt
old mode 100755
new mode 100644
diff --git a/examples/python-json-datagenerator/predefinedschema.py b/examples/python-json-datagenerator/predefinedschema.py
old mode 100755
new mode 100644
diff --git a/examples/python-json-datagenerator/randomaddresses.py b/examples/python-json-datagenerator/randomaddresses.py
old mode 100755
new mode 100644
diff --git a/examples/python-json-datagenerator/readme.md b/examples/python-json-datagenerator/readme.md
old mode 100755
new mode 100644
diff --git a/examples/python-json-datagenerator/requirements.txt b/examples/python-json-datagenerator/requirements.txt
old mode 100755
new mode 100644
diff --git a/examples/python-loganalysis/Modelfile b/examples/python-loganalysis/Modelfile
old mode 100755
new mode 100644
diff --git a/examples/python-loganalysis/loganalysis.py b/examples/python-loganalysis/loganalysis.py
old mode 100755
new mode 100644
diff --git a/examples/python-loganalysis/logtest.logfile b/examples/python-loganalysis/logtest.logfile
old mode 100755
new mode 100644
diff --git a/examples/python-loganalysis/readme.md b/examples/python-loganalysis/readme.md
old mode 100755
new mode 100644
diff --git a/examples/python-loganalysis/requirements.txt b/examples/python-loganalysis/requirements.txt
old mode 100755
new mode 100644
diff --git a/examples/python-rag-newssummary/README.md b/examples/python-rag-newssummary/README.md
old mode 100755
new mode 100644
diff --git a/examples/python-rag-newssummary/requirements.txt b/examples/python-rag-newssummary/requirements.txt
old mode 100755
new mode 100644
diff --git a/examples/python-rag-newssummary/summ.py b/examples/python-rag-newssummary/summ.py
old mode 100755
new mode 100644
diff --git a/examples/python-rag-newssummary/utils.py b/examples/python-rag-newssummary/utils.py
old mode 100755
new mode 100644
diff --git a/examples/python-simplechat/client.py b/examples/python-simplechat/client.py
old mode 100755
new mode 100644
diff --git a/examples/python-simplechat/readme.md b/examples/python-simplechat/readme.md
old mode 100755
new mode 100644
diff --git a/examples/python-simplechat/requirements.txt b/examples/python-simplechat/requirements.txt
old mode 100755
new mode 100644
diff --git a/examples/python-simplegenerate/README.md b/examples/python-simplegenerate/README.md
old mode 100755
new mode 100644
diff --git a/examples/python-simplegenerate/client.py b/examples/python-simplegenerate/client.py
old mode 100755
new mode 100644
diff --git a/examples/python-simplegenerate/requirements.txt b/examples/python-simplegenerate/requirements.txt
old mode 100755
new mode 100644
diff --git a/examples/typescript-functioncalling/extractemail.ts b/examples/typescript-functioncalling/extractemail.ts
old mode 100755
new mode 100644
diff --git a/examples/typescript-functioncalling/extractwp.ts b/examples/typescript-functioncalling/extractwp.ts
old mode 100755
new mode 100644
diff --git a/examples/typescript-functioncalling/info.txt b/examples/typescript-functioncalling/info.txt
old mode 100755
new mode 100644
diff --git a/examples/typescript-functioncalling/package-lock.json b/examples/typescript-functioncalling/package-lock.json
old mode 100755
new mode 100644
diff --git a/examples/typescript-functioncalling/package.json b/examples/typescript-functioncalling/package.json
old mode 100755
new mode 100644
diff --git a/examples/typescript-functioncalling/readme.md b/examples/typescript-functioncalling/readme.md
old mode 100755
new mode 100644
diff --git a/examples/typescript-functioncalling/wp.txt b/examples/typescript-functioncalling/wp.txt
old mode 100755
new mode 100644
diff --git a/examples/typescript-mentors/.gitignore b/examples/typescript-mentors/.gitignore
old mode 100755
new mode 100644
diff --git a/examples/typescript-mentors/README.md b/examples/typescript-mentors/README.md
old mode 100755
new mode 100644
diff --git a/examples/typescript-mentors/character-generator.ts b/examples/typescript-mentors/character-generator.ts
old mode 100755
new mode 100644
diff --git a/examples/typescript-mentors/mentors.ts b/examples/typescript-mentors/mentors.ts
old mode 100755
new mode 100644
diff --git a/examples/typescript-mentors/package.json b/examples/typescript-mentors/package.json
old mode 100755
new mode 100644
diff --git a/examples/typescript-simplechat/client.ts b/examples/typescript-simplechat/client.ts
old mode 100755
new mode 100644
diff --git a/examples/typescript-simplechat/package.json b/examples/typescript-simplechat/package.json
old mode 100755
new mode 100644
diff --git a/examples/typescript-simplechat/readme.md b/examples/typescript-simplechat/readme.md
old mode 100755
new mode 100644
diff --git a/format/bytes.go b/format/bytes.go
old mode 100755
new mode 100644
diff --git a/format/format.go b/format/format.go
old mode 100755
new mode 100644
diff --git a/format/format_test.go b/format/format_test.go
old mode 100755
new mode 100644
diff --git a/format/time.go b/format/time.go
old mode 100755
new mode 100644
diff --git a/format/time_test.go b/format/time_test.go
old mode 100755
new mode 100644
diff --git a/go.mod b/go.mod
old mode 100755
new mode 100644
diff --git a/go.sum b/go.sum
old mode 100755
new mode 100644
diff --git a/gpu/amd_common.go b/gpu/amd_common.go
old mode 100755
new mode 100644
diff --git a/gpu/amd_hip_windows.go b/gpu/amd_hip_windows.go
old mode 100755
new mode 100644
diff --git a/gpu/amd_linux.go b/gpu/amd_linux.go
old mode 100755
new mode 100644
diff --git a/gpu/amd_windows.go b/gpu/amd_windows.go
old mode 100755
new mode 100644
diff --git a/gpu/assets.go b/gpu/assets.go
old mode 100755
new mode 100644
diff --git a/gpu/cpu_common.go b/gpu/cpu_common.go
old mode 100755
new mode 100644
diff --git a/gpu/cuda_common.go b/gpu/cuda_common.go
old mode 100755
new mode 100644
diff --git a/gpu/gpu.go b/gpu/gpu.go
old mode 100755
new mode 100644
diff --git a/gpu/gpu_darwin.go b/gpu/gpu_darwin.go
old mode 100755
new mode 100644
diff --git a/gpu/gpu_info.h b/gpu/gpu_info.h
old mode 100755
new mode 100644
diff --git a/gpu/gpu_info_cudart.c b/gpu/gpu_info_cudart.c
old mode 100755
new mode 100644
diff --git a/gpu/gpu_info_cudart.h b/gpu/gpu_info_cudart.h
old mode 100755
new mode 100644
diff --git a/gpu/gpu_info_darwin.h b/gpu/gpu_info_darwin.h
old mode 100755
new mode 100644
diff --git a/gpu/gpu_info_darwin.m b/gpu/gpu_info_darwin.m
old mode 100755
new mode 100644
diff --git a/gpu/gpu_info_nvcuda.c b/gpu/gpu_info_nvcuda.c
old mode 100755
new mode 100644
diff --git a/gpu/gpu_info_nvcuda.h b/gpu/gpu_info_nvcuda.h
old mode 100755
new mode 100644
diff --git a/gpu/gpu_info_nvml.c b/gpu/gpu_info_nvml.c
old mode 100755
new mode 100644
diff --git a/gpu/gpu_info_nvml.h b/gpu/gpu_info_nvml.h
old mode 100755
new mode 100644
diff --git a/gpu/gpu_info_oneapi.c b/gpu/gpu_info_oneapi.c
old mode 100755
new mode 100644
diff --git a/gpu/gpu_info_oneapi.h b/gpu/gpu_info_oneapi.h
old mode 100755
new mode 100644
diff --git a/gpu/gpu_linux.go b/gpu/gpu_linux.go
old mode 100755
new mode 100644
diff --git a/gpu/gpu_oneapi.go b/gpu/gpu_oneapi.go
old mode 100755
new mode 100644
diff --git a/gpu/gpu_test.go b/gpu/gpu_test.go
old mode 100755
new mode 100644
diff --git a/gpu/gpu_windows.go b/gpu/gpu_windows.go
old mode 100755
new mode 100644
diff --git a/gpu/types.go b/gpu/types.go
old mode 100755
new mode 100644
diff --git a/integration/README.md b/integration/README.md
old mode 100755
new mode 100644
diff --git a/integration/basic_test.go b/integration/basic_test.go
old mode 100755
new mode 100644
diff --git a/integration/concurrency_test.go b/integration/concurrency_test.go
old mode 100755
new mode 100644
diff --git a/integration/context_test.go b/integration/context_test.go
old mode 100755
new mode 100644
diff --git a/integration/embed_test.go b/integration/embed_test.go
old mode 100755
new mode 100644
diff --git a/integration/llm_image_test.go b/integration/llm_image_test.go
old mode 100755
new mode 100644
diff --git a/integration/llm_test.go b/integration/llm_test.go
old mode 100755
new mode 100644
diff --git a/integration/max_queue_test.go b/integration/max_queue_test.go
old mode 100755
new mode 100644
diff --git a/integration/utils_test.go b/integration/utils_test.go
old mode 100755
new mode 100644
diff --git a/llm/ext_server/CMakeLists.txt b/llm/ext_server/CMakeLists.txt
old mode 100755
new mode 100644
diff --git a/llm/ext_server/httplib.h b/llm/ext_server/httplib.h
old mode 100755
new mode 100644
diff --git a/llm/ext_server/json.hpp b/llm/ext_server/json.hpp
old mode 100755
new mode 100644
diff --git a/llm/ext_server/server.cpp b/llm/ext_server/server.cpp
old mode 100755
new mode 100644
diff --git a/llm/ext_server/utils.hpp b/llm/ext_server/utils.hpp
old mode 100755
new mode 100644
diff --git a/llm/filetype.go b/llm/filetype.go
old mode 100755
new mode 100644
diff --git a/llm/generate/gen_common.sh b/llm/generate/gen_common.sh
old mode 100755
new mode 100644
diff --git a/llm/generate/gen_darwin.sh b/llm/generate/gen_darwin.sh
old mode 100755
new mode 100644
diff --git a/llm/generate/gen_linux.sh b/llm/generate/gen_linux.sh
old mode 100755
new mode 100644
diff --git a/llm/generate/gen_windows.ps1 b/llm/generate/gen_windows.ps1
old mode 100755
new mode 100644
diff --git a/llm/generate/generate_darwin.go b/llm/generate/generate_darwin.go
old mode 100755
new mode 100644
diff --git a/llm/generate/generate_linux.go b/llm/generate/generate_linux.go
old mode 100755
new mode 100644
diff --git a/llm/generate/generate_windows.go b/llm/generate/generate_windows.go
old mode 100755
new mode 100644
diff --git a/llm/ggla.go b/llm/ggla.go
old mode 100755
new mode 100644
diff --git a/llm/ggml.go b/llm/ggml.go
old mode 100755
new mode 100644
diff --git a/llm/ggml_test.go b/llm/ggml_test.go
old mode 100755
new mode 100644
diff --git a/llm/gguf.go b/llm/gguf.go
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/.clang-tidy b/llm/llama.cpp/.clang-tidy
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/.devops/cloud-v-pipeline b/llm/llama.cpp/.devops/cloud-v-pipeline
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/.devops/full-cuda.Dockerfile b/llm/llama.cpp/.devops/full-cuda.Dockerfile
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/.devops/full-rocm.Dockerfile b/llm/llama.cpp/.devops/full-rocm.Dockerfile
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/.devops/full.Dockerfile b/llm/llama.cpp/.devops/full.Dockerfile
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/.devops/llama-cli-cuda.Dockerfile b/llm/llama.cpp/.devops/llama-cli-cuda.Dockerfile
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/.devops/llama-cli-intel.Dockerfile b/llm/llama.cpp/.devops/llama-cli-intel.Dockerfile
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/.devops/llama-cli-rocm.Dockerfile b/llm/llama.cpp/.devops/llama-cli-rocm.Dockerfile
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/.devops/llama-cli-vulkan.Dockerfile b/llm/llama.cpp/.devops/llama-cli-vulkan.Dockerfile
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/.devops/llama-cli.Dockerfile b/llm/llama.cpp/.devops/llama-cli.Dockerfile
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/.devops/llama-cpp-cuda.srpm.spec b/llm/llama.cpp/.devops/llama-cpp-cuda.srpm.spec
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/.devops/llama-cpp.srpm.spec b/llm/llama.cpp/.devops/llama-cpp.srpm.spec
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/.devops/llama-server-cuda.Dockerfile b/llm/llama.cpp/.devops/llama-server-cuda.Dockerfile
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/.devops/llama-server-intel.Dockerfile b/llm/llama.cpp/.devops/llama-server-intel.Dockerfile
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/.devops/llama-server-rocm.Dockerfile b/llm/llama.cpp/.devops/llama-server-rocm.Dockerfile
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/.devops/llama-server-vulkan.Dockerfile b/llm/llama.cpp/.devops/llama-server-vulkan.Dockerfile
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/.devops/llama-server.Dockerfile b/llm/llama.cpp/.devops/llama-server.Dockerfile
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/.devops/nix/apps.nix b/llm/llama.cpp/.devops/nix/apps.nix
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/.devops/nix/devshells.nix b/llm/llama.cpp/.devops/nix/devshells.nix
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/.devops/nix/docker.nix b/llm/llama.cpp/.devops/nix/docker.nix
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/.devops/nix/jetson-support.nix b/llm/llama.cpp/.devops/nix/jetson-support.nix
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/.devops/nix/nixpkgs-instances.nix b/llm/llama.cpp/.devops/nix/nixpkgs-instances.nix
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/.devops/nix/package.nix b/llm/llama.cpp/.devops/nix/package.nix
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/.devops/nix/scope.nix b/llm/llama.cpp/.devops/nix/scope.nix
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/.devops/nix/sif.nix b/llm/llama.cpp/.devops/nix/sif.nix
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/.devops/tools.sh b/llm/llama.cpp/.devops/tools.sh
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/.dockerignore b/llm/llama.cpp/.dockerignore
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/.ecrc b/llm/llama.cpp/.ecrc
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/.editorconfig b/llm/llama.cpp/.editorconfig
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/.flake8 b/llm/llama.cpp/.flake8
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/.github/ISSUE_TEMPLATE/01-bug-low.yml b/llm/llama.cpp/.github/ISSUE_TEMPLATE/01-bug-low.yml
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/.github/ISSUE_TEMPLATE/02-bug-medium.yml b/llm/llama.cpp/.github/ISSUE_TEMPLATE/02-bug-medium.yml
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/.github/ISSUE_TEMPLATE/03-bug-high.yml b/llm/llama.cpp/.github/ISSUE_TEMPLATE/03-bug-high.yml
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/.github/ISSUE_TEMPLATE/04-bug-critical.yml b/llm/llama.cpp/.github/ISSUE_TEMPLATE/04-bug-critical.yml
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/.github/ISSUE_TEMPLATE/05-enhancement.yml b/llm/llama.cpp/.github/ISSUE_TEMPLATE/05-enhancement.yml
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/.github/ISSUE_TEMPLATE/06-research.yml b/llm/llama.cpp/.github/ISSUE_TEMPLATE/06-research.yml
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/.github/ISSUE_TEMPLATE/07-refactor.yml b/llm/llama.cpp/.github/ISSUE_TEMPLATE/07-refactor.yml
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/.github/ISSUE_TEMPLATE/config.yml b/llm/llama.cpp/.github/ISSUE_TEMPLATE/config.yml
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/.github/labeler.yml b/llm/llama.cpp/.github/labeler.yml
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/.github/pull_request_template.md b/llm/llama.cpp/.github/pull_request_template.md
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/.github/workflows/bench.yml b/llm/llama.cpp/.github/workflows/bench.yml
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/.github/workflows/build.yml b/llm/llama.cpp/.github/workflows/build.yml
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/.github/workflows/close-issue.yml b/llm/llama.cpp/.github/workflows/close-issue.yml
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/.github/workflows/docker.yml b/llm/llama.cpp/.github/workflows/docker.yml
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/.github/workflows/editorconfig.yml b/llm/llama.cpp/.github/workflows/editorconfig.yml
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/.github/workflows/gguf-publish.yml b/llm/llama.cpp/.github/workflows/gguf-publish.yml
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/.github/workflows/labeler.yml b/llm/llama.cpp/.github/workflows/labeler.yml
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/.github/workflows/nix-ci-aarch64.yml b/llm/llama.cpp/.github/workflows/nix-ci-aarch64.yml
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/.github/workflows/nix-ci.yml b/llm/llama.cpp/.github/workflows/nix-ci.yml
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/.github/workflows/nix-flake-update.yml b/llm/llama.cpp/.github/workflows/nix-flake-update.yml
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/.github/workflows/nix-publish-flake.yml b/llm/llama.cpp/.github/workflows/nix-publish-flake.yml
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/.github/workflows/python-check-requirements.yml b/llm/llama.cpp/.github/workflows/python-check-requirements.yml
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/.github/workflows/python-lint.yml b/llm/llama.cpp/.github/workflows/python-lint.yml
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/.github/workflows/python-type-check.yml b/llm/llama.cpp/.github/workflows/python-type-check.yml
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/.github/workflows/server.yml b/llm/llama.cpp/.github/workflows/server.yml
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/.gitignore b/llm/llama.cpp/.gitignore
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/.gitmodules b/llm/llama.cpp/.gitmodules
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/.pre-commit-config.yaml b/llm/llama.cpp/.pre-commit-config.yaml
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/AUTHORS b/llm/llama.cpp/AUTHORS
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/CMakeLists.txt b/llm/llama.cpp/CMakeLists.txt
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/CMakePresets.json b/llm/llama.cpp/CMakePresets.json
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/CONTRIBUTING.md b/llm/llama.cpp/CONTRIBUTING.md
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/LICENSE b/llm/llama.cpp/LICENSE
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/Makefile b/llm/llama.cpp/Makefile
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/Package.swift b/llm/llama.cpp/Package.swift
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/README.md b/llm/llama.cpp/README.md
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/SECURITY.md b/llm/llama.cpp/SECURITY.md
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ci/README.md b/llm/llama.cpp/ci/README.md
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ci/run.sh b/llm/llama.cpp/ci/run.sh
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/cmake/arm64-windows-llvm.cmake b/llm/llama.cpp/cmake/arm64-windows-llvm.cmake
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/cmake/arm64-windows-msvc.cmake b/llm/llama.cpp/cmake/arm64-windows-msvc.cmake
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/cmake/build-info.cmake b/llm/llama.cpp/cmake/build-info.cmake
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/cmake/git-vars.cmake b/llm/llama.cpp/cmake/git-vars.cmake
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/cmake/llama-config.cmake.in b/llm/llama.cpp/cmake/llama-config.cmake.in
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/cmake/llama.pc.in b/llm/llama.cpp/cmake/llama.pc.in
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/common/CMakeLists.txt b/llm/llama.cpp/common/CMakeLists.txt
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/common/base64.hpp b/llm/llama.cpp/common/base64.hpp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/common/build-info.cpp.in b/llm/llama.cpp/common/build-info.cpp.in
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/common/cmake/build-info-gen-cpp.cmake b/llm/llama.cpp/common/cmake/build-info-gen-cpp.cmake
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/common/common.cpp b/llm/llama.cpp/common/common.cpp
old mode 100755
new mode 100644
index 2e8374d50cafad99cb9f59eccb4ac5c22ed6b32b..56cad1af975f066fcb77af652a04e896eb3361a9
--- a/llm/llama.cpp/common/common.cpp
+++ b/llm/llama.cpp/common/common.cpp
@@ -2110,9 +2110,21 @@ struct llama_init_result llama_init_from_gpt_params(gpt_params & params) {
         loaded_la.adapter = llama_lora_adapter_init(model, la.path.c_str());
         if (loaded_la.adapter == nullptr) {
             fprintf(stderr, "%s: error: failed to apply lora adapter '%s'\n", __func__, la.path.c_str());
-            llama_free(lctx);
-            llama_free_model(model);
-            return iparams;
+
+            // if that fails, try loading as ggla for compatibility
+            int err = llama_model_apply_lora_from_file(model,
+                                                    la.path.c_str(),
+                                                    la.scale,
+                                                    nullptr,
+                                                    params.n_threads);
+            if (err != 0) {
+                fprintf(stderr, "%s: error: failed to apply lora adapter\n", __func__);
+                llama_free(lctx);
+                llama_free_model(model);
+                return iparams;
+            } else {
+                break;
+            }
         }
         iparams.lora_adapters.push_back(loaded_la); // copy to list of loaded adapters
     }
@@ -2178,6 +2190,8 @@ struct llama_model_params llama_model_params_from_gpt_params(const gpt_params &
     mparams.use_mmap        = params.use_mmap;
     mparams.use_mlock       = params.use_mlock;
     mparams.check_tensors   = params.check_tensors;
+    mparams.progress_callback = params.progress_callback;
+    mparams.progress_callback_user_data = params.progress_callback_user_data;
     if (params.kv_overrides.empty()) {
         mparams.kv_overrides = NULL;
     } else {
diff --git a/llm/llama.cpp/common/common.h b/llm/llama.cpp/common/common.h
old mode 100755
new mode 100644
index d88966ece20aa73da262d047e6c0137e7a0e972f..c238cd7417ddd28b965af154ee86a9d88ec0294b
--- a/llm/llama.cpp/common/common.h
+++ b/llm/llama.cpp/common/common.h
@@ -194,6 +194,13 @@ struct gpt_params {
     std::string mmproj = "";        // path to multimodal projector
     std::vector<std::string> image; // path to image file(s)
 
+    // Called with a progress value between 0.0 and 1.0. Pass NULL to disable.
+    // If the provided progress_callback returns true, model loading continues.
+    // If it returns false, model loading is immediately aborted.
+    llama_progress_callback progress_callback = NULL;
+    // context pointer passed to the progress callback
+    void * progress_callback_user_data;
+
     // embedding
     bool embedding         = false; // get only sentence embedding
     int32_t embd_normalize = 2;     // normalisation for embendings (-1=none, 0=max absolute int16, 1=taxicab, 2=euclidean, >2=p-norm)
diff --git a/llm/llama.cpp/common/console.cpp b/llm/llama.cpp/common/console.cpp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/common/console.h b/llm/llama.cpp/common/console.h
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/common/grammar-parser.cpp b/llm/llama.cpp/common/grammar-parser.cpp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/common/grammar-parser.h b/llm/llama.cpp/common/grammar-parser.h
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/common/json-schema-to-grammar.cpp b/llm/llama.cpp/common/json-schema-to-grammar.cpp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/common/json-schema-to-grammar.h b/llm/llama.cpp/common/json-schema-to-grammar.h
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/common/json.hpp b/llm/llama.cpp/common/json.hpp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/common/log.h b/llm/llama.cpp/common/log.h
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/common/ngram-cache.cpp b/llm/llama.cpp/common/ngram-cache.cpp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/common/ngram-cache.h b/llm/llama.cpp/common/ngram-cache.h
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/common/sampling.cpp b/llm/llama.cpp/common/sampling.cpp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/common/sampling.h b/llm/llama.cpp/common/sampling.h
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/common/stb_image.h b/llm/llama.cpp/common/stb_image.h
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/common/train.cpp b/llm/llama.cpp/common/train.cpp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/common/train.h b/llm/llama.cpp/common/train.h
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/convert_hf_to_gguf.py b/llm/llama.cpp/convert_hf_to_gguf.py
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/convert_hf_to_gguf_update.py b/llm/llama.cpp/convert_hf_to_gguf_update.py
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/convert_llama_ggml_to_gguf.py b/llm/llama.cpp/convert_llama_ggml_to_gguf.py
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/convert_lora_to_gguf.py b/llm/llama.cpp/convert_lora_to_gguf.py
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/docs/android.md b/llm/llama.cpp/docs/android.md
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/docs/backend/BLIS.md b/llm/llama.cpp/docs/backend/BLIS.md
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/docs/backend/SYCL.md b/llm/llama.cpp/docs/backend/SYCL.md
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/docs/build.md b/llm/llama.cpp/docs/build.md
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/docs/development/HOWTO-add-model.md b/llm/llama.cpp/docs/development/HOWTO-add-model.md
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/docs/development/debugging-tests.md b/llm/llama.cpp/docs/development/debugging-tests.md
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/docs/development/llama-star/idea-arch.key b/llm/llama.cpp/docs/development/llama-star/idea-arch.key
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/docs/development/llama-star/idea-arch.pdf b/llm/llama.cpp/docs/development/llama-star/idea-arch.pdf
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/docs/development/token_generation_performance_tips.md b/llm/llama.cpp/docs/development/token_generation_performance_tips.md
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/docs/docker.md b/llm/llama.cpp/docs/docker.md
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/docs/install.md b/llm/llama.cpp/docs/install.md
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/CMakeLists.txt b/llm/llama.cpp/examples/CMakeLists.txt
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/Miku.sh b/llm/llama.cpp/examples/Miku.sh
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/baby-llama/CMakeLists.txt b/llm/llama.cpp/examples/baby-llama/CMakeLists.txt
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/baby-llama/baby-llama.cpp b/llm/llama.cpp/examples/baby-llama/baby-llama.cpp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/base-translate.sh b/llm/llama.cpp/examples/base-translate.sh
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/batched-bench/CMakeLists.txt b/llm/llama.cpp/examples/batched-bench/CMakeLists.txt
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/batched-bench/README.md b/llm/llama.cpp/examples/batched-bench/README.md
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/batched-bench/batched-bench.cpp b/llm/llama.cpp/examples/batched-bench/batched-bench.cpp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/batched.swift/.gitignore b/llm/llama.cpp/examples/batched.swift/.gitignore
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/batched.swift/Makefile b/llm/llama.cpp/examples/batched.swift/Makefile
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/batched.swift/Package.swift b/llm/llama.cpp/examples/batched.swift/Package.swift
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/batched.swift/README.md b/llm/llama.cpp/examples/batched.swift/README.md
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/batched.swift/Sources/main.swift b/llm/llama.cpp/examples/batched.swift/Sources/main.swift
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/batched/CMakeLists.txt b/llm/llama.cpp/examples/batched/CMakeLists.txt
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/batched/README.md b/llm/llama.cpp/examples/batched/README.md
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/batched/batched.cpp b/llm/llama.cpp/examples/batched/batched.cpp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/benchmark/CMakeLists.txt b/llm/llama.cpp/examples/benchmark/CMakeLists.txt
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/benchmark/benchmark-matmult.cpp b/llm/llama.cpp/examples/benchmark/benchmark-matmult.cpp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/chat-13B.sh b/llm/llama.cpp/examples/chat-13B.sh
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/chat-persistent.sh b/llm/llama.cpp/examples/chat-persistent.sh
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/chat-vicuna.sh b/llm/llama.cpp/examples/chat-vicuna.sh
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/chat.sh b/llm/llama.cpp/examples/chat.sh
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/convert-llama2c-to-ggml/CMakeLists.txt b/llm/llama.cpp/examples/convert-llama2c-to-ggml/CMakeLists.txt
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/convert-llama2c-to-ggml/README.md b/llm/llama.cpp/examples/convert-llama2c-to-ggml/README.md
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp b/llm/llama.cpp/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/convert_legacy_llama.py b/llm/llama.cpp/examples/convert_legacy_llama.py
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/cvector-generator/CMakeLists.txt b/llm/llama.cpp/examples/cvector-generator/CMakeLists.txt
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/cvector-generator/README.md b/llm/llama.cpp/examples/cvector-generator/README.md
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/cvector-generator/completions.txt b/llm/llama.cpp/examples/cvector-generator/completions.txt
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/cvector-generator/cvector-generator.cpp b/llm/llama.cpp/examples/cvector-generator/cvector-generator.cpp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/cvector-generator/mean.hpp b/llm/llama.cpp/examples/cvector-generator/mean.hpp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/cvector-generator/negative.txt b/llm/llama.cpp/examples/cvector-generator/negative.txt
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/cvector-generator/pca.hpp b/llm/llama.cpp/examples/cvector-generator/pca.hpp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/cvector-generator/positive.txt b/llm/llama.cpp/examples/cvector-generator/positive.txt
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/deprecation-warning/README.md b/llm/llama.cpp/examples/deprecation-warning/README.md
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/deprecation-warning/deprecation-warning.cpp b/llm/llama.cpp/examples/deprecation-warning/deprecation-warning.cpp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/embedding/CMakeLists.txt b/llm/llama.cpp/examples/embedding/CMakeLists.txt
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/embedding/README.md b/llm/llama.cpp/examples/embedding/README.md
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/embedding/embedding.cpp b/llm/llama.cpp/examples/embedding/embedding.cpp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/eval-callback/CMakeLists.txt b/llm/llama.cpp/examples/eval-callback/CMakeLists.txt
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/eval-callback/README.md b/llm/llama.cpp/examples/eval-callback/README.md
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/eval-callback/eval-callback.cpp b/llm/llama.cpp/examples/eval-callback/eval-callback.cpp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/export-lora/CMakeLists.txt b/llm/llama.cpp/examples/export-lora/CMakeLists.txt
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/export-lora/README.md b/llm/llama.cpp/examples/export-lora/README.md
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/export-lora/export-lora.cpp b/llm/llama.cpp/examples/export-lora/export-lora.cpp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/gbnf-validator/CMakeLists.txt b/llm/llama.cpp/examples/gbnf-validator/CMakeLists.txt
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/gbnf-validator/gbnf-validator.cpp b/llm/llama.cpp/examples/gbnf-validator/gbnf-validator.cpp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/gguf-hash/CMakeLists.txt b/llm/llama.cpp/examples/gguf-hash/CMakeLists.txt
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/gguf-hash/README.md b/llm/llama.cpp/examples/gguf-hash/README.md
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/gguf-hash/deps/rotate-bits/package.json b/llm/llama.cpp/examples/gguf-hash/deps/rotate-bits/package.json
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/gguf-hash/deps/rotate-bits/rotate-bits.h b/llm/llama.cpp/examples/gguf-hash/deps/rotate-bits/rotate-bits.h
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/gguf-hash/deps/sha1/package.json b/llm/llama.cpp/examples/gguf-hash/deps/sha1/package.json
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/gguf-hash/deps/sha1/sha1.c b/llm/llama.cpp/examples/gguf-hash/deps/sha1/sha1.c
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/gguf-hash/deps/sha1/sha1.h b/llm/llama.cpp/examples/gguf-hash/deps/sha1/sha1.h
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/gguf-hash/deps/sha256/package.json b/llm/llama.cpp/examples/gguf-hash/deps/sha256/package.json
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/gguf-hash/deps/sha256/sha256.c b/llm/llama.cpp/examples/gguf-hash/deps/sha256/sha256.c
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/gguf-hash/deps/sha256/sha256.h b/llm/llama.cpp/examples/gguf-hash/deps/sha256/sha256.h
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/gguf-hash/deps/xxhash/clib.json b/llm/llama.cpp/examples/gguf-hash/deps/xxhash/clib.json
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/gguf-hash/deps/xxhash/xxhash.c b/llm/llama.cpp/examples/gguf-hash/deps/xxhash/xxhash.c
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/gguf-hash/deps/xxhash/xxhash.h b/llm/llama.cpp/examples/gguf-hash/deps/xxhash/xxhash.h
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/gguf-hash/gguf-hash.cpp b/llm/llama.cpp/examples/gguf-hash/gguf-hash.cpp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/gguf-split/CMakeLists.txt b/llm/llama.cpp/examples/gguf-split/CMakeLists.txt
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/gguf-split/README.md b/llm/llama.cpp/examples/gguf-split/README.md
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/gguf-split/gguf-split.cpp b/llm/llama.cpp/examples/gguf-split/gguf-split.cpp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/gguf-split/tests.sh b/llm/llama.cpp/examples/gguf-split/tests.sh
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/gguf/CMakeLists.txt b/llm/llama.cpp/examples/gguf/CMakeLists.txt
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/gguf/gguf.cpp b/llm/llama.cpp/examples/gguf/gguf.cpp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/gritlm/CMakeLists.txt b/llm/llama.cpp/examples/gritlm/CMakeLists.txt
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/gritlm/README.md b/llm/llama.cpp/examples/gritlm/README.md
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/gritlm/gritlm.cpp b/llm/llama.cpp/examples/gritlm/gritlm.cpp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/imatrix/CMakeLists.txt b/llm/llama.cpp/examples/imatrix/CMakeLists.txt
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/imatrix/README.md b/llm/llama.cpp/examples/imatrix/README.md
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/imatrix/imatrix.cpp b/llm/llama.cpp/examples/imatrix/imatrix.cpp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/infill/CMakeLists.txt b/llm/llama.cpp/examples/infill/CMakeLists.txt
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/infill/README.md b/llm/llama.cpp/examples/infill/README.md
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/infill/infill.cpp b/llm/llama.cpp/examples/infill/infill.cpp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/jeopardy/README.md b/llm/llama.cpp/examples/jeopardy/README.md
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/jeopardy/graph.py b/llm/llama.cpp/examples/jeopardy/graph.py
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/jeopardy/jeopardy.sh b/llm/llama.cpp/examples/jeopardy/jeopardy.sh
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/jeopardy/qasheet.csv b/llm/llama.cpp/examples/jeopardy/qasheet.csv
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/jeopardy/questions.txt b/llm/llama.cpp/examples/jeopardy/questions.txt
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/json_schema_pydantic_example.py b/llm/llama.cpp/examples/json_schema_pydantic_example.py
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/json_schema_to_grammar.py b/llm/llama.cpp/examples/json_schema_to_grammar.py
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/llama-bench/CMakeLists.txt b/llm/llama.cpp/examples/llama-bench/CMakeLists.txt
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/llama-bench/README.md b/llm/llama.cpp/examples/llama-bench/README.md
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/llama-bench/llama-bench.cpp b/llm/llama.cpp/examples/llama-bench/llama-bench.cpp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/llama.android/.gitignore b/llm/llama.cpp/examples/llama.android/.gitignore
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/llama.android/README.md b/llm/llama.cpp/examples/llama.android/README.md
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/llama.android/app/.gitignore b/llm/llama.cpp/examples/llama.android/app/.gitignore
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/llama.android/app/build.gradle.kts b/llm/llama.cpp/examples/llama.android/app/build.gradle.kts
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/llama.android/app/proguard-rules.pro b/llm/llama.cpp/examples/llama.android/app/proguard-rules.pro
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/llama.android/app/src/main/AndroidManifest.xml b/llm/llama.cpp/examples/llama.android/app/src/main/AndroidManifest.xml
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/llama.android/app/src/main/java/com/example/llama/Downloadable.kt b/llm/llama.cpp/examples/llama.android/app/src/main/java/com/example/llama/Downloadable.kt
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/llama.android/app/src/main/java/com/example/llama/MainActivity.kt b/llm/llama.cpp/examples/llama.android/app/src/main/java/com/example/llama/MainActivity.kt
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/llama.android/app/src/main/java/com/example/llama/MainViewModel.kt b/llm/llama.cpp/examples/llama.android/app/src/main/java/com/example/llama/MainViewModel.kt
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/llama.android/app/src/main/java/com/example/llama/ui/theme/Color.kt b/llm/llama.cpp/examples/llama.android/app/src/main/java/com/example/llama/ui/theme/Color.kt
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/llama.android/app/src/main/java/com/example/llama/ui/theme/Theme.kt b/llm/llama.cpp/examples/llama.android/app/src/main/java/com/example/llama/ui/theme/Theme.kt
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/llama.android/app/src/main/java/com/example/llama/ui/theme/Type.kt b/llm/llama.cpp/examples/llama.android/app/src/main/java/com/example/llama/ui/theme/Type.kt
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/llama.android/app/src/main/res/drawable/ic_launcher_background.xml b/llm/llama.cpp/examples/llama.android/app/src/main/res/drawable/ic_launcher_background.xml
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/llama.android/app/src/main/res/drawable/ic_launcher_foreground.xml b/llm/llama.cpp/examples/llama.android/app/src/main/res/drawable/ic_launcher_foreground.xml
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/llama.android/app/src/main/res/mipmap-anydpi/ic_launcher.xml b/llm/llama.cpp/examples/llama.android/app/src/main/res/mipmap-anydpi/ic_launcher.xml
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/llama.android/app/src/main/res/mipmap-anydpi/ic_launcher_round.xml b/llm/llama.cpp/examples/llama.android/app/src/main/res/mipmap-anydpi/ic_launcher_round.xml
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/llama.android/app/src/main/res/mipmap-hdpi/ic_launcher.webp b/llm/llama.cpp/examples/llama.android/app/src/main/res/mipmap-hdpi/ic_launcher.webp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/llama.android/app/src/main/res/mipmap-hdpi/ic_launcher_round.webp b/llm/llama.cpp/examples/llama.android/app/src/main/res/mipmap-hdpi/ic_launcher_round.webp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/llama.android/app/src/main/res/mipmap-mdpi/ic_launcher.webp b/llm/llama.cpp/examples/llama.android/app/src/main/res/mipmap-mdpi/ic_launcher.webp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/llama.android/app/src/main/res/mipmap-mdpi/ic_launcher_round.webp b/llm/llama.cpp/examples/llama.android/app/src/main/res/mipmap-mdpi/ic_launcher_round.webp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/llama.android/app/src/main/res/mipmap-xhdpi/ic_launcher.webp b/llm/llama.cpp/examples/llama.android/app/src/main/res/mipmap-xhdpi/ic_launcher.webp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/llama.android/app/src/main/res/mipmap-xhdpi/ic_launcher_round.webp b/llm/llama.cpp/examples/llama.android/app/src/main/res/mipmap-xhdpi/ic_launcher_round.webp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/llama.android/app/src/main/res/mipmap-xxhdpi/ic_launcher.webp b/llm/llama.cpp/examples/llama.android/app/src/main/res/mipmap-xxhdpi/ic_launcher.webp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/llama.android/app/src/main/res/mipmap-xxhdpi/ic_launcher_round.webp b/llm/llama.cpp/examples/llama.android/app/src/main/res/mipmap-xxhdpi/ic_launcher_round.webp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/llama.android/app/src/main/res/mipmap-xxxhdpi/ic_launcher.webp b/llm/llama.cpp/examples/llama.android/app/src/main/res/mipmap-xxxhdpi/ic_launcher.webp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/llama.android/app/src/main/res/mipmap-xxxhdpi/ic_launcher_round.webp b/llm/llama.cpp/examples/llama.android/app/src/main/res/mipmap-xxxhdpi/ic_launcher_round.webp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/llama.android/app/src/main/res/values/colors.xml b/llm/llama.cpp/examples/llama.android/app/src/main/res/values/colors.xml
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/llama.android/app/src/main/res/values/strings.xml b/llm/llama.cpp/examples/llama.android/app/src/main/res/values/strings.xml
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/llama.android/app/src/main/res/values/themes.xml b/llm/llama.cpp/examples/llama.android/app/src/main/res/values/themes.xml
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/llama.android/app/src/main/res/xml/backup_rules.xml b/llm/llama.cpp/examples/llama.android/app/src/main/res/xml/backup_rules.xml
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/llama.android/app/src/main/res/xml/data_extraction_rules.xml b/llm/llama.cpp/examples/llama.android/app/src/main/res/xml/data_extraction_rules.xml
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/llama.android/build.gradle.kts b/llm/llama.cpp/examples/llama.android/build.gradle.kts
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/llama.android/gradle.properties b/llm/llama.cpp/examples/llama.android/gradle.properties
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/llama.android/gradle/wrapper/gradle-wrapper.jar b/llm/llama.cpp/examples/llama.android/gradle/wrapper/gradle-wrapper.jar
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/llama.android/gradle/wrapper/gradle-wrapper.properties b/llm/llama.cpp/examples/llama.android/gradle/wrapper/gradle-wrapper.properties
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/llama.android/gradlew b/llm/llama.cpp/examples/llama.android/gradlew
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/llama.android/llama/.gitignore b/llm/llama.cpp/examples/llama.android/llama/.gitignore
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/llama.android/llama/build.gradle.kts b/llm/llama.cpp/examples/llama.android/llama/build.gradle.kts
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/llama.android/llama/consumer-rules.pro b/llm/llama.cpp/examples/llama.android/llama/consumer-rules.pro
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/llama.android/llama/proguard-rules.pro b/llm/llama.cpp/examples/llama.android/llama/proguard-rules.pro
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/llama.android/llama/src/androidTest/java/android/llama/cpp/ExampleInstrumentedTest.kt b/llm/llama.cpp/examples/llama.android/llama/src/androidTest/java/android/llama/cpp/ExampleInstrumentedTest.kt
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/llama.android/llama/src/main/AndroidManifest.xml b/llm/llama.cpp/examples/llama.android/llama/src/main/AndroidManifest.xml
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/llama.android/llama/src/main/cpp/CMakeLists.txt b/llm/llama.cpp/examples/llama.android/llama/src/main/cpp/CMakeLists.txt
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/llama.android/llama/src/main/cpp/llama-android.cpp b/llm/llama.cpp/examples/llama.android/llama/src/main/cpp/llama-android.cpp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/llama.android/llama/src/main/java/android/llama/cpp/LLamaAndroid.kt b/llm/llama.cpp/examples/llama.android/llama/src/main/java/android/llama/cpp/LLamaAndroid.kt
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/llama.android/llama/src/test/java/android/llama/cpp/ExampleUnitTest.kt b/llm/llama.cpp/examples/llama.android/llama/src/test/java/android/llama/cpp/ExampleUnitTest.kt
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/llama.android/settings.gradle.kts b/llm/llama.cpp/examples/llama.android/settings.gradle.kts
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/llama.swiftui/.gitignore b/llm/llama.cpp/examples/llama.swiftui/.gitignore
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/llama.swiftui/README.md b/llm/llama.cpp/examples/llama.swiftui/README.md
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/llama.swiftui/llama.cpp.swift/LibLlama.swift b/llm/llama.cpp/examples/llama.swiftui/llama.cpp.swift/LibLlama.swift
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/llama.swiftui/llama.swiftui.xcodeproj/project.pbxproj b/llm/llama.cpp/examples/llama.swiftui/llama.swiftui.xcodeproj/project.pbxproj
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/llama.swiftui/llama.swiftui.xcodeproj/project.xcworkspace/contents.xcworkspacedata b/llm/llama.cpp/examples/llama.swiftui/llama.swiftui.xcodeproj/project.xcworkspace/contents.xcworkspacedata
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/llama.swiftui/llama.swiftui/Assets.xcassets/AppIcon.appiconset/Contents.json b/llm/llama.cpp/examples/llama.swiftui/llama.swiftui/Assets.xcassets/AppIcon.appiconset/Contents.json
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/llama.swiftui/llama.swiftui/Assets.xcassets/Contents.json b/llm/llama.cpp/examples/llama.swiftui/llama.swiftui/Assets.xcassets/Contents.json
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/llama.swiftui/llama.swiftui/Models/LlamaState.swift b/llm/llama.cpp/examples/llama.swiftui/llama.swiftui/Models/LlamaState.swift
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/llama.swiftui/llama.swiftui/Resources/models/.gitignore b/llm/llama.cpp/examples/llama.swiftui/llama.swiftui/Resources/models/.gitignore
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/llama.swiftui/llama.swiftui/UI/ContentView.swift b/llm/llama.cpp/examples/llama.swiftui/llama.swiftui/UI/ContentView.swift
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/llama.swiftui/llama.swiftui/UI/DownloadButton.swift b/llm/llama.cpp/examples/llama.swiftui/llama.swiftui/UI/DownloadButton.swift
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/llama.swiftui/llama.swiftui/UI/InputButton.swift b/llm/llama.cpp/examples/llama.swiftui/llama.swiftui/UI/InputButton.swift
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/llama.swiftui/llama.swiftui/UI/LoadCustomButton.swift b/llm/llama.cpp/examples/llama.swiftui/llama.swiftui/UI/LoadCustomButton.swift
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/llama.swiftui/llama.swiftui/llama_swiftuiApp.swift b/llm/llama.cpp/examples/llama.swiftui/llama.swiftui/llama_swiftuiApp.swift
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/llama.vim b/llm/llama.cpp/examples/llama.vim
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/llava/CMakeLists.txt b/llm/llama.cpp/examples/llava/CMakeLists.txt
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/llava/MobileVLM-README.md b/llm/llama.cpp/examples/llava/MobileVLM-README.md
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/llava/README.md b/llm/llama.cpp/examples/llava/README.md
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/llava/android/adb_run.sh b/llm/llama.cpp/examples/llava/android/adb_run.sh
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/llava/android/build_64.sh b/llm/llama.cpp/examples/llava/android/build_64.sh
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/llava/clip.cpp b/llm/llama.cpp/examples/llava/clip.cpp
old mode 100755
new mode 100644
index 7cda5f10c4b05dce52334ec6636c0d0fa7bcc1b6..b86c7a58ea6f766418c1694669d62b5c7b3a1b52
--- a/llm/llama.cpp/examples/llava/clip.cpp
+++ b/llm/llama.cpp/examples/llava/clip.cpp
@@ -3,6 +3,7 @@
 // I'll gradually clean and extend it
 // Note: Even when using identical normalized image inputs (see normalize_image_u8_to_f32()) we have a significant difference in resulting embeddings compared to pytorch
 #include "clip.h"
+#include "common.h"
 #include "log.h"
 #include "ggml.h"
 #include "ggml-alloc.h"
@@ -36,6 +37,14 @@
 #include <cinttypes>
 #include <limits>
 
+#if defined(_WIN32)
+#define WIN32_LEAN_AND_MEAN
+#ifndef NOMINMAX
+    #define NOMINMAX
+#endif
+#include <windows.h>
+#endif
+
 //#define CLIP_DEBUG_FUNCTIONS
 
 // RGB uint8 image
@@ -1064,7 +1073,22 @@ struct clip_ctx * clip_model_load(const char * fname, const int verbosity = 1) {
             return nullptr;
         }
 
+#ifdef _WIN32
+        int wlen = MultiByteToWideChar(CP_UTF8, 0, fname, -1, NULL, 0);
+        if (!wlen) {
+            return NULL;
+        }
+        wchar_t * wbuf = (wchar_t *) malloc(wlen * sizeof(wchar_t));
+        wlen = MultiByteToWideChar(CP_UTF8, 0, fname, -1, wbuf, wlen);
+        if (!wlen) {
+            free(wbuf);
+            return NULL;
+        }
+        auto fin = std::ifstream(wbuf, std::ios::binary);
+        free(wbuf);
+#else
         auto fin = std::ifstream(fname, std::ios::binary);
+#endif
         if (!fin) {
             LOG_TEE("cannot open model file for loading tensors\n");
             clip_free(new_clip);
diff --git a/llm/llama.cpp/examples/llava/clip.h b/llm/llama.cpp/examples/llava/clip.h
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/llava/convert_image_encoder_to_gguf.py b/llm/llama.cpp/examples/llava/convert_image_encoder_to_gguf.py
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/llava/llava-cli.cpp b/llm/llama.cpp/examples/llava/llava-cli.cpp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/llava/llava.cpp b/llm/llama.cpp/examples/llava/llava.cpp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/llava/llava.h b/llm/llama.cpp/examples/llava/llava.h
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/llava/llava_surgery.py b/llm/llama.cpp/examples/llava/llava_surgery.py
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/llava/llava_surgery_v2.py b/llm/llama.cpp/examples/llava/llava_surgery_v2.py
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/llava/requirements.txt b/llm/llama.cpp/examples/llava/requirements.txt
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/llm.vim b/llm/llama.cpp/examples/llm.vim
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/lookahead/CMakeLists.txt b/llm/llama.cpp/examples/lookahead/CMakeLists.txt
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/lookahead/README.md b/llm/llama.cpp/examples/lookahead/README.md
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/lookahead/lookahead.cpp b/llm/llama.cpp/examples/lookahead/lookahead.cpp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/lookup/CMakeLists.txt b/llm/llama.cpp/examples/lookup/CMakeLists.txt
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/lookup/README.md b/llm/llama.cpp/examples/lookup/README.md
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/lookup/lookup-create.cpp b/llm/llama.cpp/examples/lookup/lookup-create.cpp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/lookup/lookup-merge.cpp b/llm/llama.cpp/examples/lookup/lookup-merge.cpp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/lookup/lookup-stats.cpp b/llm/llama.cpp/examples/lookup/lookup-stats.cpp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/lookup/lookup.cpp b/llm/llama.cpp/examples/lookup/lookup.cpp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/main-cmake-pkg/.gitignore b/llm/llama.cpp/examples/main-cmake-pkg/.gitignore
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/main-cmake-pkg/CMakeLists.txt b/llm/llama.cpp/examples/main-cmake-pkg/CMakeLists.txt
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/main-cmake-pkg/README.md b/llm/llama.cpp/examples/main-cmake-pkg/README.md
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/main/CMakeLists.txt b/llm/llama.cpp/examples/main/CMakeLists.txt
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/main/README.md b/llm/llama.cpp/examples/main/README.md
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/main/main.cpp b/llm/llama.cpp/examples/main/main.cpp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/parallel/CMakeLists.txt b/llm/llama.cpp/examples/parallel/CMakeLists.txt
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/parallel/README.md b/llm/llama.cpp/examples/parallel/README.md
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/parallel/parallel.cpp b/llm/llama.cpp/examples/parallel/parallel.cpp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/passkey/CMakeLists.txt b/llm/llama.cpp/examples/passkey/CMakeLists.txt
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/passkey/README.md b/llm/llama.cpp/examples/passkey/README.md
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/passkey/passkey.cpp b/llm/llama.cpp/examples/passkey/passkey.cpp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/perplexity/CMakeLists.txt b/llm/llama.cpp/examples/perplexity/CMakeLists.txt
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/perplexity/README.md b/llm/llama.cpp/examples/perplexity/README.md
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/perplexity/perplexity.cpp b/llm/llama.cpp/examples/perplexity/perplexity.cpp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/pydantic_models_to_grammar.py b/llm/llama.cpp/examples/pydantic_models_to_grammar.py
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/pydantic_models_to_grammar_examples.py b/llm/llama.cpp/examples/pydantic_models_to_grammar_examples.py
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/quantize-stats/CMakeLists.txt b/llm/llama.cpp/examples/quantize-stats/CMakeLists.txt
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/quantize-stats/quantize-stats.cpp b/llm/llama.cpp/examples/quantize-stats/quantize-stats.cpp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/quantize/CMakeLists.txt b/llm/llama.cpp/examples/quantize/CMakeLists.txt
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/quantize/README.md b/llm/llama.cpp/examples/quantize/README.md
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/quantize/quantize.cpp b/llm/llama.cpp/examples/quantize/quantize.cpp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/quantize/tests.sh b/llm/llama.cpp/examples/quantize/tests.sh
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/reason-act.sh b/llm/llama.cpp/examples/reason-act.sh
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/regex_to_grammar.py b/llm/llama.cpp/examples/regex_to_grammar.py
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/retrieval/CMakeLists.txt b/llm/llama.cpp/examples/retrieval/CMakeLists.txt
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/retrieval/README.md b/llm/llama.cpp/examples/retrieval/README.md
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/retrieval/retrieval.cpp b/llm/llama.cpp/examples/retrieval/retrieval.cpp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/rpc/CMakeLists.txt b/llm/llama.cpp/examples/rpc/CMakeLists.txt
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/rpc/README.md b/llm/llama.cpp/examples/rpc/README.md
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/rpc/rpc-server.cpp b/llm/llama.cpp/examples/rpc/rpc-server.cpp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/save-load-state/CMakeLists.txt b/llm/llama.cpp/examples/save-load-state/CMakeLists.txt
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/save-load-state/save-load-state.cpp b/llm/llama.cpp/examples/save-load-state/save-load-state.cpp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/server-llama2-13B.sh b/llm/llama.cpp/examples/server-llama2-13B.sh
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/server/CMakeLists.txt b/llm/llama.cpp/examples/server/CMakeLists.txt
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/server/README.md b/llm/llama.cpp/examples/server/README.md
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/server/bench/README.md b/llm/llama.cpp/examples/server/bench/README.md
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/server/bench/bench.py b/llm/llama.cpp/examples/server/bench/bench.py
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/server/bench/prometheus.yml b/llm/llama.cpp/examples/server/bench/prometheus.yml
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/server/bench/requirements.txt b/llm/llama.cpp/examples/server/bench/requirements.txt
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/server/bench/script.js b/llm/llama.cpp/examples/server/bench/script.js
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/server/chat-llama2.sh b/llm/llama.cpp/examples/server/chat-llama2.sh
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/server/chat.mjs b/llm/llama.cpp/examples/server/chat.mjs
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/server/chat.sh b/llm/llama.cpp/examples/server/chat.sh
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/server/deps.sh b/llm/llama.cpp/examples/server/deps.sh
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/server/httplib.h b/llm/llama.cpp/examples/server/httplib.h
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/server/public/colorthemes.css b/llm/llama.cpp/examples/server/public/colorthemes.css
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/server/public/completion.js b/llm/llama.cpp/examples/server/public/completion.js
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/server/public/favicon.ico b/llm/llama.cpp/examples/server/public/favicon.ico
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/server/public/index-new.html b/llm/llama.cpp/examples/server/public/index-new.html
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/server/public/index.html b/llm/llama.cpp/examples/server/public/index.html
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/server/public/index.js b/llm/llama.cpp/examples/server/public/index.js
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/server/public/json-schema-to-grammar.mjs b/llm/llama.cpp/examples/server/public/json-schema-to-grammar.mjs
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/server/public/prompt-formats.js b/llm/llama.cpp/examples/server/public/prompt-formats.js
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/server/public/style.css b/llm/llama.cpp/examples/server/public/style.css
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/server/public/system-prompts.js b/llm/llama.cpp/examples/server/public/system-prompts.js
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/server/public/theme-beeninorder.css b/llm/llama.cpp/examples/server/public/theme-beeninorder.css
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/server/public/theme-ketivah.css b/llm/llama.cpp/examples/server/public/theme-ketivah.css
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/server/public/theme-mangotango.css b/llm/llama.cpp/examples/server/public/theme-mangotango.css
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/server/public/theme-playground.css b/llm/llama.cpp/examples/server/public/theme-playground.css
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/server/public/theme-polarnight.css b/llm/llama.cpp/examples/server/public/theme-polarnight.css
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/server/public/theme-snowstorm.css b/llm/llama.cpp/examples/server/public/theme-snowstorm.css
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/server/public_simplechat/datautils.mjs b/llm/llama.cpp/examples/server/public_simplechat/datautils.mjs
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/server/public_simplechat/index.html b/llm/llama.cpp/examples/server/public_simplechat/index.html
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/server/public_simplechat/readme.md b/llm/llama.cpp/examples/server/public_simplechat/readme.md
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/server/public_simplechat/simplechat.css b/llm/llama.cpp/examples/server/public_simplechat/simplechat.css
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/server/public_simplechat/simplechat.js b/llm/llama.cpp/examples/server/public_simplechat/simplechat.js
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/server/public_simplechat/simplechat_screens.webp b/llm/llama.cpp/examples/server/public_simplechat/simplechat_screens.webp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/server/public_simplechat/ui.mjs b/llm/llama.cpp/examples/server/public_simplechat/ui.mjs
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/server/server.cpp b/llm/llama.cpp/examples/server/server.cpp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/server/tests/README.md b/llm/llama.cpp/examples/server/tests/README.md
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/server/tests/features/embeddings.feature b/llm/llama.cpp/examples/server/tests/features/embeddings.feature
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/server/tests/features/environment.py b/llm/llama.cpp/examples/server/tests/features/environment.py
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/server/tests/features/issues.feature b/llm/llama.cpp/examples/server/tests/features/issues.feature
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/server/tests/features/lora.feature b/llm/llama.cpp/examples/server/tests/features/lora.feature
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/server/tests/features/parallel.feature b/llm/llama.cpp/examples/server/tests/features/parallel.feature
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/server/tests/features/passkey.feature b/llm/llama.cpp/examples/server/tests/features/passkey.feature
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/server/tests/features/results.feature b/llm/llama.cpp/examples/server/tests/features/results.feature
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/server/tests/features/security.feature b/llm/llama.cpp/examples/server/tests/features/security.feature
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/server/tests/features/server.feature b/llm/llama.cpp/examples/server/tests/features/server.feature
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/server/tests/features/slotsave.feature b/llm/llama.cpp/examples/server/tests/features/slotsave.feature
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/server/tests/features/steps/steps.py b/llm/llama.cpp/examples/server/tests/features/steps/steps.py
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/server/tests/features/wrong_usages.feature b/llm/llama.cpp/examples/server/tests/features/wrong_usages.feature
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/server/tests/requirements.txt b/llm/llama.cpp/examples/server/tests/requirements.txt
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/server/tests/tests.sh b/llm/llama.cpp/examples/server/tests/tests.sh
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/server/themes/README.md b/llm/llama.cpp/examples/server/themes/README.md
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/server/themes/buttons-top/README.md b/llm/llama.cpp/examples/server/themes/buttons-top/README.md
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/server/themes/buttons-top/buttons_top.png b/llm/llama.cpp/examples/server/themes/buttons-top/buttons_top.png
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/server/themes/buttons-top/favicon.ico b/llm/llama.cpp/examples/server/themes/buttons-top/favicon.ico
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/server/themes/buttons-top/index.html b/llm/llama.cpp/examples/server/themes/buttons-top/index.html
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/server/themes/wild/README.md b/llm/llama.cpp/examples/server/themes/wild/README.md
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/server/themes/wild/favicon.ico b/llm/llama.cpp/examples/server/themes/wild/favicon.ico
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/server/themes/wild/index.html b/llm/llama.cpp/examples/server/themes/wild/index.html
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/server/themes/wild/llama_cpp.png b/llm/llama.cpp/examples/server/themes/wild/llama_cpp.png
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/server/themes/wild/llamapattern.png b/llm/llama.cpp/examples/server/themes/wild/llamapattern.png
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/server/themes/wild/wild.png b/llm/llama.cpp/examples/server/themes/wild/wild.png
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/server/utils.hpp b/llm/llama.cpp/examples/server/utils.hpp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/server_embd.py b/llm/llama.cpp/examples/server_embd.py
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/simple/CMakeLists.txt b/llm/llama.cpp/examples/simple/CMakeLists.txt
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/simple/README.md b/llm/llama.cpp/examples/simple/README.md
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/simple/simple.cpp b/llm/llama.cpp/examples/simple/simple.cpp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/speculative/CMakeLists.txt b/llm/llama.cpp/examples/speculative/CMakeLists.txt
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/speculative/README.md b/llm/llama.cpp/examples/speculative/README.md
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/speculative/speculative.cpp b/llm/llama.cpp/examples/speculative/speculative.cpp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/sycl/CMakeLists.txt b/llm/llama.cpp/examples/sycl/CMakeLists.txt
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/sycl/README.md b/llm/llama.cpp/examples/sycl/README.md
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/sycl/build.sh b/llm/llama.cpp/examples/sycl/build.sh
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/sycl/ls-sycl-device.cpp b/llm/llama.cpp/examples/sycl/ls-sycl-device.cpp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/sycl/run-llama2.sh b/llm/llama.cpp/examples/sycl/run-llama2.sh
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/tokenize/CMakeLists.txt b/llm/llama.cpp/examples/tokenize/CMakeLists.txt
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/tokenize/tokenize.cpp b/llm/llama.cpp/examples/tokenize/tokenize.cpp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/examples/ts-type-to-grammar.sh b/llm/llama.cpp/examples/ts-type-to-grammar.sh
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/flake.lock b/llm/llama.cpp/flake.lock
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/flake.nix b/llm/llama.cpp/flake.nix
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/.gitignore b/llm/llama.cpp/ggml/.gitignore
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/CMakeLists.txt b/llm/llama.cpp/ggml/CMakeLists.txt
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/cmake/FindSIMD.cmake b/llm/llama.cpp/ggml/cmake/FindSIMD.cmake
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/include/ggml-alloc.h b/llm/llama.cpp/ggml/include/ggml-alloc.h
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/include/ggml-backend.h b/llm/llama.cpp/ggml/include/ggml-backend.h
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/include/ggml-blas.h b/llm/llama.cpp/ggml/include/ggml-blas.h
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/include/ggml-cann.h b/llm/llama.cpp/ggml/include/ggml-cann.h
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/include/ggml-cuda.h b/llm/llama.cpp/ggml/include/ggml-cuda.h
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/include/ggml-kompute.h b/llm/llama.cpp/ggml/include/ggml-kompute.h
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/include/ggml-metal.h b/llm/llama.cpp/ggml/include/ggml-metal.h
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/include/ggml-rpc.h b/llm/llama.cpp/ggml/include/ggml-rpc.h
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/include/ggml-sycl.h b/llm/llama.cpp/ggml/include/ggml-sycl.h
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/include/ggml-vulkan.h b/llm/llama.cpp/ggml/include/ggml-vulkan.h
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/include/ggml.h b/llm/llama.cpp/ggml/include/ggml.h
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/CMakeLists.txt b/llm/llama.cpp/ggml/src/CMakeLists.txt
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-aarch64.c b/llm/llama.cpp/ggml/src/ggml-aarch64.c
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-aarch64.h b/llm/llama.cpp/ggml/src/ggml-aarch64.h
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-alloc.c b/llm/llama.cpp/ggml/src/ggml-alloc.c
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-backend-impl.h b/llm/llama.cpp/ggml/src/ggml-backend-impl.h
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-backend.c b/llm/llama.cpp/ggml/src/ggml-backend.c
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-blas.cpp b/llm/llama.cpp/ggml/src/ggml-blas.cpp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-cann.cpp b/llm/llama.cpp/ggml/src/ggml-cann.cpp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-cann/.clang-format b/llm/llama.cpp/ggml/src/ggml-cann/.clang-format
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-cann/Doxyfile b/llm/llama.cpp/ggml/src/ggml-cann/Doxyfile
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-cann/acl_tensor.cpp b/llm/llama.cpp/ggml/src/ggml-cann/acl_tensor.cpp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-cann/acl_tensor.h b/llm/llama.cpp/ggml/src/ggml-cann/acl_tensor.h
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp b/llm/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-cann/aclnn_ops.h b/llm/llama.cpp/ggml/src/ggml-cann/aclnn_ops.h
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-cann/common.h b/llm/llama.cpp/ggml/src/ggml-cann/common.h
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-cann/kernels/CMakeLists.txt b/llm/llama.cpp/ggml/src/ggml-cann/kernels/CMakeLists.txt
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-cann/kernels/ascendc_kernels.h b/llm/llama.cpp/ggml/src/ggml-cann/kernels/ascendc_kernels.h
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-cann/kernels/dup.cpp b/llm/llama.cpp/ggml/src/ggml-cann/kernels/dup.cpp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-cann/kernels/get_row_f16.cpp b/llm/llama.cpp/ggml/src/ggml-cann/kernels/get_row_f16.cpp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-cann/kernels/get_row_f32.cpp b/llm/llama.cpp/ggml/src/ggml-cann/kernels/get_row_f32.cpp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-cann/kernels/get_row_q4_0.cpp b/llm/llama.cpp/ggml/src/ggml-cann/kernels/get_row_q4_0.cpp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-cann/kernels/get_row_q8_0.cpp b/llm/llama.cpp/ggml/src/ggml-cann/kernels/get_row_q8_0.cpp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-cann/kernels/quantize_f16_q8_0.cpp b/llm/llama.cpp/ggml/src/ggml-cann/kernels/quantize_f16_q8_0.cpp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-cann/kernels/quantize_f32_q8_0.cpp b/llm/llama.cpp/ggml/src/ggml-cann/kernels/quantize_f32_q8_0.cpp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-cann/kernels/quantize_float_to_q4_0.cpp b/llm/llama.cpp/ggml/src/ggml-cann/kernels/quantize_float_to_q4_0.cpp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-common.h b/llm/llama.cpp/ggml/src/ggml-common.h
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-cuda.cu b/llm/llama.cpp/ggml/src/ggml-cuda.cu
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-cuda/acc.cu b/llm/llama.cpp/ggml/src/ggml-cuda/acc.cu
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-cuda/acc.cuh b/llm/llama.cpp/ggml/src/ggml-cuda/acc.cuh
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-cuda/arange.cu b/llm/llama.cpp/ggml/src/ggml-cuda/arange.cu
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-cuda/arange.cuh b/llm/llama.cpp/ggml/src/ggml-cuda/arange.cuh
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-cuda/argsort.cu b/llm/llama.cpp/ggml/src/ggml-cuda/argsort.cu
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-cuda/argsort.cuh b/llm/llama.cpp/ggml/src/ggml-cuda/argsort.cuh
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-cuda/binbcast.cu b/llm/llama.cpp/ggml/src/ggml-cuda/binbcast.cu
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-cuda/binbcast.cuh b/llm/llama.cpp/ggml/src/ggml-cuda/binbcast.cuh
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-cuda/clamp.cu b/llm/llama.cpp/ggml/src/ggml-cuda/clamp.cu
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-cuda/clamp.cuh b/llm/llama.cpp/ggml/src/ggml-cuda/clamp.cuh
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-cuda/common.cuh b/llm/llama.cpp/ggml/src/ggml-cuda/common.cuh
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-cuda/concat.cu b/llm/llama.cpp/ggml/src/ggml-cuda/concat.cu
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-cuda/concat.cuh b/llm/llama.cpp/ggml/src/ggml-cuda/concat.cuh
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-cuda/conv-transpose-1d.cu b/llm/llama.cpp/ggml/src/ggml-cuda/conv-transpose-1d.cu
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-cuda/conv-transpose-1d.cuh b/llm/llama.cpp/ggml/src/ggml-cuda/conv-transpose-1d.cuh
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-cuda/convert.cu b/llm/llama.cpp/ggml/src/ggml-cuda/convert.cu
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-cuda/convert.cuh b/llm/llama.cpp/ggml/src/ggml-cuda/convert.cuh
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-cuda/cpy.cu b/llm/llama.cpp/ggml/src/ggml-cuda/cpy.cu
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-cuda/cpy.cuh b/llm/llama.cpp/ggml/src/ggml-cuda/cpy.cuh
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-cuda/dequantize.cuh b/llm/llama.cpp/ggml/src/ggml-cuda/dequantize.cuh
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-cuda/diagmask.cu b/llm/llama.cpp/ggml/src/ggml-cuda/diagmask.cu
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-cuda/diagmask.cuh b/llm/llama.cpp/ggml/src/ggml-cuda/diagmask.cuh
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-cuda/dmmv.cu b/llm/llama.cpp/ggml/src/ggml-cuda/dmmv.cu
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-cuda/dmmv.cuh b/llm/llama.cpp/ggml/src/ggml-cuda/dmmv.cuh
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-cuda/fattn-common.cuh b/llm/llama.cpp/ggml/src/ggml-cuda/fattn-common.cuh
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-cuda/fattn-tile-f16.cu b/llm/llama.cpp/ggml/src/ggml-cuda/fattn-tile-f16.cu
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-cuda/fattn-tile-f16.cuh b/llm/llama.cpp/ggml/src/ggml-cuda/fattn-tile-f16.cuh
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-cuda/fattn-tile-f32.cu b/llm/llama.cpp/ggml/src/ggml-cuda/fattn-tile-f32.cu
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-cuda/fattn-tile-f32.cuh b/llm/llama.cpp/ggml/src/ggml-cuda/fattn-tile-f32.cuh
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-cuda/fattn-vec-f16.cuh b/llm/llama.cpp/ggml/src/ggml-cuda/fattn-vec-f16.cuh
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-cuda/fattn-vec-f32.cuh b/llm/llama.cpp/ggml/src/ggml-cuda/fattn-vec-f32.cuh
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-cuda/fattn-wmma-f16.cuh b/llm/llama.cpp/ggml/src/ggml-cuda/fattn-wmma-f16.cuh
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-cuda/fattn.cu b/llm/llama.cpp/ggml/src/ggml-cuda/fattn.cu
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-cuda/fattn.cuh b/llm/llama.cpp/ggml/src/ggml-cuda/fattn.cuh
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-cuda/getrows.cu b/llm/llama.cpp/ggml/src/ggml-cuda/getrows.cu
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-cuda/getrows.cuh b/llm/llama.cpp/ggml/src/ggml-cuda/getrows.cuh
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-cuda/im2col.cu b/llm/llama.cpp/ggml/src/ggml-cuda/im2col.cu
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-cuda/im2col.cuh b/llm/llama.cpp/ggml/src/ggml-cuda/im2col.cuh
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-cuda/mma.cuh b/llm/llama.cpp/ggml/src/ggml-cuda/mma.cuh
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-cuda/mmq.cu b/llm/llama.cpp/ggml/src/ggml-cuda/mmq.cu
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-cuda/mmq.cuh b/llm/llama.cpp/ggml/src/ggml-cuda/mmq.cuh
old mode 100755
new mode 100644
index 703c620c70f21636e67a434896346c0017258f97..2306db89f2f72884a54d16bf03bd96c4ceee25e7
--- a/llm/llama.cpp/ggml/src/ggml-cuda/mmq.cuh
+++ b/llm/llama.cpp/ggml/src/ggml-cuda/mmq.cuh
@@ -2580,7 +2580,7 @@ template <ggml_type type, int mmq_x, int nwarps, bool need_check>
     __launch_bounds__(WARP_SIZE*nwarps, 2)
 #endif // __CUDA_ARCH__ >= CC_VOLTA
 #endif // defined(GGML_USE_HIPBLAS) && defined(__HIP_PLATFORM_AMD__)
-static __global__ __launch_bounds__(1024) void mul_mat_q(
+static __global__ __launch_bounds__(256) void mul_mat_q(
     const char * __restrict__ x, const char * __restrict__ yc, float * __restrict__ dst, float * __restrict__ tmp_fixup,
     const int ne00, const int ne01, const int stride01, const int ne10, const int ne11, const int stride11, const int ne0) {
 
diff --git a/llm/llama.cpp/ggml/src/ggml-cuda/mmvq.cu b/llm/llama.cpp/ggml/src/ggml-cuda/mmvq.cu
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-cuda/mmvq.cuh b/llm/llama.cpp/ggml/src/ggml-cuda/mmvq.cuh
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-cuda/norm.cu b/llm/llama.cpp/ggml/src/ggml-cuda/norm.cu
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-cuda/norm.cuh b/llm/llama.cpp/ggml/src/ggml-cuda/norm.cuh
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-cuda/pad.cu b/llm/llama.cpp/ggml/src/ggml-cuda/pad.cu
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-cuda/pad.cuh b/llm/llama.cpp/ggml/src/ggml-cuda/pad.cuh
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-cuda/pool2d.cu b/llm/llama.cpp/ggml/src/ggml-cuda/pool2d.cu
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-cuda/pool2d.cuh b/llm/llama.cpp/ggml/src/ggml-cuda/pool2d.cuh
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-cuda/quantize.cu b/llm/llama.cpp/ggml/src/ggml-cuda/quantize.cu
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-cuda/quantize.cuh b/llm/llama.cpp/ggml/src/ggml-cuda/quantize.cuh
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-cuda/rope.cu b/llm/llama.cpp/ggml/src/ggml-cuda/rope.cu
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-cuda/rope.cuh b/llm/llama.cpp/ggml/src/ggml-cuda/rope.cuh
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-cuda/scale.cu b/llm/llama.cpp/ggml/src/ggml-cuda/scale.cu
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-cuda/scale.cuh b/llm/llama.cpp/ggml/src/ggml-cuda/scale.cuh
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-cuda/softmax.cu b/llm/llama.cpp/ggml/src/ggml-cuda/softmax.cu
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-cuda/softmax.cuh b/llm/llama.cpp/ggml/src/ggml-cuda/softmax.cuh
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-cuda/sumrows.cu b/llm/llama.cpp/ggml/src/ggml-cuda/sumrows.cu
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-cuda/sumrows.cuh b/llm/llama.cpp/ggml/src/ggml-cuda/sumrows.cuh
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-f16.cu b/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-f16.cu
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q4_0.cu b/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q4_0.cu
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q4_1.cu b/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q4_1.cu
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q5_0.cu b/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q5_0.cu
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q5_1.cu b/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q5_1.cu
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q8_0.cu b/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q8_0.cu
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-f16.cu b/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-f16.cu
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_0.cu b/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_0.cu
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_1.cu b/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_1.cu
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q5_0.cu b/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q5_0.cu
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q5_1.cu b/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q5_1.cu
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q8_0.cu b/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q8_0.cu
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-f16.cu b/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-f16.cu
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q4_0.cu b/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q4_0.cu
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q4_1.cu b/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q4_1.cu
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q5_0.cu b/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q5_0.cu
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q5_1.cu b/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q5_1.cu
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q8_0.cu b/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q8_0.cu
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-f16.cu b/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-f16.cu
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q4_0.cu b/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q4_0.cu
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q4_1.cu b/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q4_1.cu
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q5_0.cu b/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q5_0.cu
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q5_1.cu b/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q5_1.cu
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q8_0.cu b/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q8_0.cu
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-f16.cu b/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-f16.cu
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q4_0.cu b/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q4_0.cu
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q4_1.cu b/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q4_1.cu
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q5_0.cu b/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q5_0.cu
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q5_1.cu b/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q5_1.cu
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q8_0.cu b/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q8_0.cu
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-f16.cu b/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-f16.cu
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q4_0.cu b/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q4_0.cu
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q4_1.cu b/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q4_1.cu
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q5_0.cu b/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q5_0.cu
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q5_1.cu b/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q5_1.cu
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q8_0.cu b/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q8_0.cu
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs256-f16-f16.cu b/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs256-f16-f16.cu
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-f16.cu b/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-f16.cu
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q4_0.cu b/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q4_0.cu
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q4_1.cu b/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q4_1.cu
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q5_0.cu b/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q5_0.cu
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q5_1.cu b/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q5_1.cu
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q8_0.cu b/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q8_0.cu
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-f16.cu b/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-f16.cu
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q4_0.cu b/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q4_0.cu
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q4_1.cu b/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q4_1.cu
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q5_0.cu b/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q5_0.cu
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q5_1.cu b/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q5_1.cu
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q8_0.cu b/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q8_0.cu
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-f16.cu b/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-f16.cu
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_0.cu b/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_0.cu
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_1.cu b/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_1.cu
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q5_0.cu b/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q5_0.cu
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q5_1.cu b/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q5_1.cu
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q8_0.cu b/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q8_0.cu
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-f16.cu b/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-f16.cu
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q4_0.cu b/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q4_0.cu
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q4_1.cu b/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q4_1.cu
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q5_0.cu b/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q5_0.cu
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q5_1.cu b/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q5_1.cu
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q8_0.cu b/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q8_0.cu
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-f16.cu b/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-f16.cu
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q4_0.cu b/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q4_0.cu
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q4_1.cu b/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q4_1.cu
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q5_0.cu b/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q5_0.cu
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q5_1.cu b/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q5_1.cu
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q8_0.cu b/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q8_0.cu
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-f16.cu b/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-f16.cu
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q4_0.cu b/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q4_0.cu
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q4_1.cu b/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q4_1.cu
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q5_0.cu b/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q5_0.cu
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q5_1.cu b/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q5_1.cu
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q8_0.cu b/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q8_0.cu
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-f16.cu b/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-f16.cu
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q4_0.cu b/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q4_0.cu
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q4_1.cu b/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q4_1.cu
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q5_0.cu b/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q5_0.cu
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q5_1.cu b/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q5_1.cu
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q8_0.cu b/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q8_0.cu
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs256-f16-f16.cu b/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs256-f16-f16.cu
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-f16.cu b/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-f16.cu
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q4_0.cu b/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q4_0.cu
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q4_1.cu b/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q4_1.cu
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q5_0.cu b/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q5_0.cu
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q5_1.cu b/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q5_1.cu
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q8_0.cu b/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q8_0.cu
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqfloat-cpb16.cu b/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqfloat-cpb16.cu
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqfloat-cpb32.cu b/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqfloat-cpb32.cu
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb16.cu b/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb16.cu
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb32.cu b/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb32.cu
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb8.cu b/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb8.cu
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/generate_cu_files.py b/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/generate_cu_files.py
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-iq1_s.cu b/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-iq1_s.cu
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-iq2_s.cu b/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-iq2_s.cu
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-iq2_xs.cu b/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-iq2_xs.cu
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-iq2_xxs.cu b/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-iq2_xxs.cu
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-iq3_s.cu b/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-iq3_s.cu
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-iq3_xxs.cu b/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-iq3_xxs.cu
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-iq4_nl.cu b/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-iq4_nl.cu
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-iq4_xs.cu b/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-iq4_xs.cu
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-q2_k.cu b/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-q2_k.cu
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-q3_k.cu b/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-q3_k.cu
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-q4_0.cu b/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-q4_0.cu
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-q4_1.cu b/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-q4_1.cu
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-q4_k.cu b/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-q4_k.cu
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-q5_0.cu b/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-q5_0.cu
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-q5_1.cu b/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-q5_1.cu
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-q5_k.cu b/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-q5_k.cu
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-q6_k.cu b/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-q6_k.cu
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-q8_0.cu b/llm/llama.cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-q8_0.cu
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-cuda/tsembd.cu b/llm/llama.cpp/ggml/src/ggml-cuda/tsembd.cu
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-cuda/tsembd.cuh b/llm/llama.cpp/ggml/src/ggml-cuda/tsembd.cuh
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-cuda/unary.cu b/llm/llama.cpp/ggml/src/ggml-cuda/unary.cu
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-cuda/unary.cuh b/llm/llama.cpp/ggml/src/ggml-cuda/unary.cuh
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-cuda/upscale.cu b/llm/llama.cpp/ggml/src/ggml-cuda/upscale.cu
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-cuda/upscale.cuh b/llm/llama.cpp/ggml/src/ggml-cuda/upscale.cuh
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-cuda/vecdotq.cuh b/llm/llama.cpp/ggml/src/ggml-cuda/vecdotq.cuh
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-cuda/vendors/cuda.h b/llm/llama.cpp/ggml/src/ggml-cuda/vendors/cuda.h
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-cuda/vendors/hip.h b/llm/llama.cpp/ggml/src/ggml-cuda/vendors/hip.h
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-cuda/vendors/musa.h b/llm/llama.cpp/ggml/src/ggml-cuda/vendors/musa.h
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-impl.h b/llm/llama.cpp/ggml/src/ggml-impl.h
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-kompute.cpp b/llm/llama.cpp/ggml/src/ggml-kompute.cpp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-metal.m b/llm/llama.cpp/ggml/src/ggml-metal.m
old mode 100755
new mode 100644
index b512eb0be132e7dfd5dac5d7e4dcee0f1fce5fe9..8740a16c733298386dbd8001604e5a30e108a1ff
--- a/llm/llama.cpp/ggml/src/ggml-metal.m
+++ b/llm/llama.cpp/ggml/src/ggml-metal.m
@@ -1545,27 +1545,23 @@ static enum ggml_status ggml_metal_graph_compute(
                         // to the matrix-vector kernel
                         int ne11_mm_min = 1;
 
-#if 0
                         // the numbers below are measured on M2 Ultra for 7B and 13B models
                         // these numbers do not translate to other devices or model sizes
                         // TODO: need to find a better approach
-                        if ([ctx->device.name isEqualToString:@"Apple M2 Ultra"]) {
-                            switch (src0t) {
-                                case GGML_TYPE_F16:  ne11_mm_min = 2;  break;
-                                case GGML_TYPE_Q8_0: ne11_mm_min = 7;  break;
-                                case GGML_TYPE_Q2_K: ne11_mm_min = 15; break;
-                                case GGML_TYPE_Q3_K: ne11_mm_min = 7;  break;
-                                case GGML_TYPE_Q4_0:
-                                case GGML_TYPE_Q4_1: ne11_mm_min = 15; break;
-                                case GGML_TYPE_Q4_K: ne11_mm_min = 11; break;
-                                case GGML_TYPE_Q5_0:                          // not tested yet
-                                case GGML_TYPE_Q5_1: ne11_mm_min = 13; break; // not tested yet
-                                case GGML_TYPE_Q5_K: ne11_mm_min = 7;  break;
-                                case GGML_TYPE_Q6_K: ne11_mm_min = 7;  break;
-                                default:             ne11_mm_min = 1;  break;
-                            }
+                        switch (src0t) {
+                            case GGML_TYPE_F16:  ne11_mm_min = 2;  break;
+                            case GGML_TYPE_Q8_0: ne11_mm_min = 7;  break;
+                            case GGML_TYPE_Q2_K: ne11_mm_min = 15; break;
+                            case GGML_TYPE_Q3_K: ne11_mm_min = 7;  break;
+                            case GGML_TYPE_Q4_0:
+                            case GGML_TYPE_Q4_1: ne11_mm_min = 15; break;
+                            case GGML_TYPE_Q4_K: ne11_mm_min = 11; break;
+                            case GGML_TYPE_Q5_0:                          // not tested yet
+                            case GGML_TYPE_Q5_1: ne11_mm_min = 13; break; // not tested yet
+                            case GGML_TYPE_Q5_K: ne11_mm_min = 7;  break;
+                            case GGML_TYPE_Q6_K: ne11_mm_min = 7;  break;
+                            default:             ne11_mm_min = 1;  break;
                         }
-#endif
 
                         // for now the matrix-matrix multiplication kernel only works on A14+/M1+ SoCs
                         // AMD GPU and older A-chips will reuse matrix-vector multiplication kernel
diff --git a/llm/llama.cpp/ggml/src/ggml-quants.c b/llm/llama.cpp/ggml/src/ggml-quants.c
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-quants.h b/llm/llama.cpp/ggml/src/ggml-quants.h
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-rpc.cpp b/llm/llama.cpp/ggml/src/ggml-rpc.cpp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-sycl.cpp b/llm/llama.cpp/ggml/src/ggml-sycl.cpp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-sycl/backend.hpp b/llm/llama.cpp/ggml/src/ggml-sycl/backend.hpp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-sycl/common.cpp b/llm/llama.cpp/ggml/src/ggml-sycl/common.cpp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-sycl/common.hpp b/llm/llama.cpp/ggml/src/ggml-sycl/common.hpp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-sycl/concat.cpp b/llm/llama.cpp/ggml/src/ggml-sycl/concat.cpp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-sycl/concat.hpp b/llm/llama.cpp/ggml/src/ggml-sycl/concat.hpp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-sycl/conv.cpp b/llm/llama.cpp/ggml/src/ggml-sycl/conv.cpp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-sycl/conv.hpp b/llm/llama.cpp/ggml/src/ggml-sycl/conv.hpp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-sycl/convert.cpp b/llm/llama.cpp/ggml/src/ggml-sycl/convert.cpp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-sycl/convert.hpp b/llm/llama.cpp/ggml/src/ggml-sycl/convert.hpp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-sycl/dequantize.hpp b/llm/llama.cpp/ggml/src/ggml-sycl/dequantize.hpp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp b/llm/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-sycl/dmmv.hpp b/llm/llama.cpp/ggml/src/ggml-sycl/dmmv.hpp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-sycl/dpct/helper.hpp b/llm/llama.cpp/ggml/src/ggml-sycl/dpct/helper.hpp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-sycl/mmq.cpp b/llm/llama.cpp/ggml/src/ggml-sycl/mmq.cpp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-sycl/mmq.hpp b/llm/llama.cpp/ggml/src/ggml-sycl/mmq.hpp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp b/llm/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-sycl/mmvq.hpp b/llm/llama.cpp/ggml/src/ggml-sycl/mmvq.hpp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-sycl/norm.cpp b/llm/llama.cpp/ggml/src/ggml-sycl/norm.cpp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-sycl/norm.hpp b/llm/llama.cpp/ggml/src/ggml-sycl/norm.hpp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-sycl/presets.hpp b/llm/llama.cpp/ggml/src/ggml-sycl/presets.hpp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-sycl/rope.cpp b/llm/llama.cpp/ggml/src/ggml-sycl/rope.cpp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-sycl/rope.hpp b/llm/llama.cpp/ggml/src/ggml-sycl/rope.hpp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-sycl/softmax.cpp b/llm/llama.cpp/ggml/src/ggml-sycl/softmax.cpp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-sycl/softmax.hpp b/llm/llama.cpp/ggml/src/ggml-sycl/softmax.hpp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-sycl/tsembd.cpp b/llm/llama.cpp/ggml/src/ggml-sycl/tsembd.cpp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-sycl/tsembd.hpp b/llm/llama.cpp/ggml/src/ggml-sycl/tsembd.hpp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-sycl/vecdotq.hpp b/llm/llama.cpp/ggml/src/ggml-sycl/vecdotq.hpp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml-vulkan.cpp b/llm/llama.cpp/ggml/src/ggml-vulkan.cpp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/ggml.c b/llm/llama.cpp/ggml/src/ggml.c
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/kompute-shaders/common.comp b/llm/llama.cpp/ggml/src/kompute-shaders/common.comp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/kompute-shaders/op_add.comp b/llm/llama.cpp/ggml/src/kompute-shaders/op_add.comp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/kompute-shaders/op_addrow.comp b/llm/llama.cpp/ggml/src/kompute-shaders/op_addrow.comp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/kompute-shaders/op_cpy_f16_f16.comp b/llm/llama.cpp/ggml/src/kompute-shaders/op_cpy_f16_f16.comp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/kompute-shaders/op_cpy_f16_f32.comp b/llm/llama.cpp/ggml/src/kompute-shaders/op_cpy_f16_f32.comp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/kompute-shaders/op_cpy_f32_f16.comp b/llm/llama.cpp/ggml/src/kompute-shaders/op_cpy_f32_f16.comp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/kompute-shaders/op_cpy_f32_f32.comp b/llm/llama.cpp/ggml/src/kompute-shaders/op_cpy_f32_f32.comp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/kompute-shaders/op_diagmask.comp b/llm/llama.cpp/ggml/src/kompute-shaders/op_diagmask.comp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/kompute-shaders/op_gelu.comp b/llm/llama.cpp/ggml/src/kompute-shaders/op_gelu.comp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/kompute-shaders/op_getrows.comp b/llm/llama.cpp/ggml/src/kompute-shaders/op_getrows.comp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/kompute-shaders/op_getrows_f16.comp b/llm/llama.cpp/ggml/src/kompute-shaders/op_getrows_f16.comp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/kompute-shaders/op_getrows_f32.comp b/llm/llama.cpp/ggml/src/kompute-shaders/op_getrows_f32.comp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/kompute-shaders/op_getrows_q4_0.comp b/llm/llama.cpp/ggml/src/kompute-shaders/op_getrows_q4_0.comp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/kompute-shaders/op_getrows_q4_1.comp b/llm/llama.cpp/ggml/src/kompute-shaders/op_getrows_q4_1.comp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/kompute-shaders/op_getrows_q6_k.comp b/llm/llama.cpp/ggml/src/kompute-shaders/op_getrows_q6_k.comp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/kompute-shaders/op_mul.comp b/llm/llama.cpp/ggml/src/kompute-shaders/op_mul.comp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/kompute-shaders/op_mul_mat_f16.comp b/llm/llama.cpp/ggml/src/kompute-shaders/op_mul_mat_f16.comp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/kompute-shaders/op_mul_mat_mat_f32.comp b/llm/llama.cpp/ggml/src/kompute-shaders/op_mul_mat_mat_f32.comp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/kompute-shaders/op_mul_mat_q4_0.comp b/llm/llama.cpp/ggml/src/kompute-shaders/op_mul_mat_q4_0.comp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/kompute-shaders/op_mul_mat_q4_1.comp b/llm/llama.cpp/ggml/src/kompute-shaders/op_mul_mat_q4_1.comp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/kompute-shaders/op_mul_mat_q6_k.comp b/llm/llama.cpp/ggml/src/kompute-shaders/op_mul_mat_q6_k.comp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/kompute-shaders/op_mul_mat_q8_0.comp b/llm/llama.cpp/ggml/src/kompute-shaders/op_mul_mat_q8_0.comp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/kompute-shaders/op_mul_mv_q_n.comp b/llm/llama.cpp/ggml/src/kompute-shaders/op_mul_mv_q_n.comp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/kompute-shaders/op_mul_mv_q_n_pre.comp b/llm/llama.cpp/ggml/src/kompute-shaders/op_mul_mv_q_n_pre.comp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/kompute-shaders/op_norm.comp b/llm/llama.cpp/ggml/src/kompute-shaders/op_norm.comp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/kompute-shaders/op_relu.comp b/llm/llama.cpp/ggml/src/kompute-shaders/op_relu.comp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/kompute-shaders/op_rmsnorm.comp b/llm/llama.cpp/ggml/src/kompute-shaders/op_rmsnorm.comp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/kompute-shaders/op_rope_f16.comp b/llm/llama.cpp/ggml/src/kompute-shaders/op_rope_f16.comp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/kompute-shaders/op_rope_f32.comp b/llm/llama.cpp/ggml/src/kompute-shaders/op_rope_f32.comp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/kompute-shaders/op_scale.comp b/llm/llama.cpp/ggml/src/kompute-shaders/op_scale.comp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/kompute-shaders/op_scale_8.comp b/llm/llama.cpp/ggml/src/kompute-shaders/op_scale_8.comp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/kompute-shaders/op_silu.comp b/llm/llama.cpp/ggml/src/kompute-shaders/op_silu.comp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/kompute-shaders/op_softmax.comp b/llm/llama.cpp/ggml/src/kompute-shaders/op_softmax.comp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/kompute-shaders/rope_common.comp b/llm/llama.cpp/ggml/src/kompute-shaders/rope_common.comp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/llamafile/sgemm.cpp b/llm/llama.cpp/ggml/src/llamafile/sgemm.cpp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/llamafile/sgemm.h b/llm/llama.cpp/ggml/src/llamafile/sgemm.h
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/vulkan-shaders/CMakeLists.txt b/llm/llama.cpp/ggml/src/vulkan-shaders/CMakeLists.txt
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/vulkan-shaders/add.comp b/llm/llama.cpp/ggml/src/vulkan-shaders/add.comp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/vulkan-shaders/argsort.comp b/llm/llama.cpp/ggml/src/vulkan-shaders/argsort.comp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/vulkan-shaders/clamp.comp b/llm/llama.cpp/ggml/src/vulkan-shaders/clamp.comp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/vulkan-shaders/concat.comp b/llm/llama.cpp/ggml/src/vulkan-shaders/concat.comp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/vulkan-shaders/copy.comp b/llm/llama.cpp/ggml/src/vulkan-shaders/copy.comp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/vulkan-shaders/dequant_f32.comp b/llm/llama.cpp/ggml/src/vulkan-shaders/dequant_f32.comp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/vulkan-shaders/dequant_funcs.comp b/llm/llama.cpp/ggml/src/vulkan-shaders/dequant_funcs.comp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/vulkan-shaders/dequant_head.comp b/llm/llama.cpp/ggml/src/vulkan-shaders/dequant_head.comp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/vulkan-shaders/dequant_iq4_nl.comp b/llm/llama.cpp/ggml/src/vulkan-shaders/dequant_iq4_nl.comp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/vulkan-shaders/dequant_q2_k.comp b/llm/llama.cpp/ggml/src/vulkan-shaders/dequant_q2_k.comp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/vulkan-shaders/dequant_q3_k.comp b/llm/llama.cpp/ggml/src/vulkan-shaders/dequant_q3_k.comp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/vulkan-shaders/dequant_q4_0.comp b/llm/llama.cpp/ggml/src/vulkan-shaders/dequant_q4_0.comp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/vulkan-shaders/dequant_q4_1.comp b/llm/llama.cpp/ggml/src/vulkan-shaders/dequant_q4_1.comp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/vulkan-shaders/dequant_q4_k.comp b/llm/llama.cpp/ggml/src/vulkan-shaders/dequant_q4_k.comp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/vulkan-shaders/dequant_q5_0.comp b/llm/llama.cpp/ggml/src/vulkan-shaders/dequant_q5_0.comp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/vulkan-shaders/dequant_q5_1.comp b/llm/llama.cpp/ggml/src/vulkan-shaders/dequant_q5_1.comp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/vulkan-shaders/dequant_q5_k.comp b/llm/llama.cpp/ggml/src/vulkan-shaders/dequant_q5_k.comp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/vulkan-shaders/dequant_q6_k.comp b/llm/llama.cpp/ggml/src/vulkan-shaders/dequant_q6_k.comp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/vulkan-shaders/dequant_q8_0.comp b/llm/llama.cpp/ggml/src/vulkan-shaders/dequant_q8_0.comp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/vulkan-shaders/diag_mask_inf.comp b/llm/llama.cpp/ggml/src/vulkan-shaders/diag_mask_inf.comp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/vulkan-shaders/div.comp b/llm/llama.cpp/ggml/src/vulkan-shaders/div.comp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/vulkan-shaders/gelu.comp b/llm/llama.cpp/ggml/src/vulkan-shaders/gelu.comp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/vulkan-shaders/gelu_quick.comp b/llm/llama.cpp/ggml/src/vulkan-shaders/gelu_quick.comp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/vulkan-shaders/generic_binary_head.comp b/llm/llama.cpp/ggml/src/vulkan-shaders/generic_binary_head.comp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/vulkan-shaders/generic_head.comp b/llm/llama.cpp/ggml/src/vulkan-shaders/generic_head.comp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/vulkan-shaders/generic_unary_head.comp b/llm/llama.cpp/ggml/src/vulkan-shaders/generic_unary_head.comp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/vulkan-shaders/get_rows.comp b/llm/llama.cpp/ggml/src/vulkan-shaders/get_rows.comp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/vulkan-shaders/get_rows_quant.comp b/llm/llama.cpp/ggml/src/vulkan-shaders/get_rows_quant.comp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/vulkan-shaders/group_norm.comp b/llm/llama.cpp/ggml/src/vulkan-shaders/group_norm.comp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/vulkan-shaders/im2col.comp b/llm/llama.cpp/ggml/src/vulkan-shaders/im2col.comp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/vulkan-shaders/leaky_relu.comp b/llm/llama.cpp/ggml/src/vulkan-shaders/leaky_relu.comp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/vulkan-shaders/mul.comp b/llm/llama.cpp/ggml/src/vulkan-shaders/mul.comp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/vulkan-shaders/mul_mat_split_k_reduce.comp b/llm/llama.cpp/ggml/src/vulkan-shaders/mul_mat_split_k_reduce.comp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/vulkan-shaders/mul_mat_vec.comp b/llm/llama.cpp/ggml/src/vulkan-shaders/mul_mat_vec.comp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/vulkan-shaders/mul_mat_vec_base.comp b/llm/llama.cpp/ggml/src/vulkan-shaders/mul_mat_vec_base.comp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/vulkan-shaders/mul_mat_vec_nc.comp b/llm/llama.cpp/ggml/src/vulkan-shaders/mul_mat_vec_nc.comp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/vulkan-shaders/mul_mat_vec_p021.comp b/llm/llama.cpp/ggml/src/vulkan-shaders/mul_mat_vec_p021.comp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/vulkan-shaders/mul_mat_vec_q2_k.comp b/llm/llama.cpp/ggml/src/vulkan-shaders/mul_mat_vec_q2_k.comp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/vulkan-shaders/mul_mat_vec_q3_k.comp b/llm/llama.cpp/ggml/src/vulkan-shaders/mul_mat_vec_q3_k.comp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/vulkan-shaders/mul_mat_vec_q4_k.comp b/llm/llama.cpp/ggml/src/vulkan-shaders/mul_mat_vec_q4_k.comp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/vulkan-shaders/mul_mat_vec_q5_k.comp b/llm/llama.cpp/ggml/src/vulkan-shaders/mul_mat_vec_q5_k.comp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/vulkan-shaders/mul_mat_vec_q6_k.comp b/llm/llama.cpp/ggml/src/vulkan-shaders/mul_mat_vec_q6_k.comp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/vulkan-shaders/mul_mm.comp b/llm/llama.cpp/ggml/src/vulkan-shaders/mul_mm.comp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/vulkan-shaders/norm.comp b/llm/llama.cpp/ggml/src/vulkan-shaders/norm.comp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/vulkan-shaders/pad.comp b/llm/llama.cpp/ggml/src/vulkan-shaders/pad.comp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/vulkan-shaders/relu.comp b/llm/llama.cpp/ggml/src/vulkan-shaders/relu.comp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/vulkan-shaders/rms_norm.comp b/llm/llama.cpp/ggml/src/vulkan-shaders/rms_norm.comp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/vulkan-shaders/rope_head.comp b/llm/llama.cpp/ggml/src/vulkan-shaders/rope_head.comp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/vulkan-shaders/rope_neox.comp b/llm/llama.cpp/ggml/src/vulkan-shaders/rope_neox.comp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/vulkan-shaders/rope_norm.comp b/llm/llama.cpp/ggml/src/vulkan-shaders/rope_norm.comp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/vulkan-shaders/scale.comp b/llm/llama.cpp/ggml/src/vulkan-shaders/scale.comp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/vulkan-shaders/silu.comp b/llm/llama.cpp/ggml/src/vulkan-shaders/silu.comp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/vulkan-shaders/soft_max.comp b/llm/llama.cpp/ggml/src/vulkan-shaders/soft_max.comp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/vulkan-shaders/square.comp b/llm/llama.cpp/ggml/src/vulkan-shaders/square.comp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/vulkan-shaders/sum_rows.comp b/llm/llama.cpp/ggml/src/vulkan-shaders/sum_rows.comp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/vulkan-shaders/tanh.comp b/llm/llama.cpp/ggml/src/vulkan-shaders/tanh.comp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/vulkan-shaders/timestep_embedding.comp b/llm/llama.cpp/ggml/src/vulkan-shaders/timestep_embedding.comp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/vulkan-shaders/types.comp b/llm/llama.cpp/ggml/src/vulkan-shaders/types.comp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/vulkan-shaders/upscale.comp b/llm/llama.cpp/ggml/src/vulkan-shaders/upscale.comp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/ggml/src/vulkan-shaders/vulkan-shaders-gen.cpp b/llm/llama.cpp/ggml/src/vulkan-shaders/vulkan-shaders-gen.cpp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/gguf-py/LICENSE b/llm/llama.cpp/gguf-py/LICENSE
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/gguf-py/README.md b/llm/llama.cpp/gguf-py/README.md
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/gguf-py/examples/reader.py b/llm/llama.cpp/gguf-py/examples/reader.py
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/gguf-py/examples/writer.py b/llm/llama.cpp/gguf-py/examples/writer.py
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/gguf-py/gguf/__init__.py b/llm/llama.cpp/gguf-py/gguf/__init__.py
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/gguf-py/gguf/constants.py b/llm/llama.cpp/gguf-py/gguf/constants.py
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/gguf-py/gguf/gguf.py b/llm/llama.cpp/gguf-py/gguf/gguf.py
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/gguf-py/gguf/gguf_reader.py b/llm/llama.cpp/gguf-py/gguf/gguf_reader.py
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/gguf-py/gguf/gguf_writer.py b/llm/llama.cpp/gguf-py/gguf/gguf_writer.py
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/gguf-py/gguf/lazy.py b/llm/llama.cpp/gguf-py/gguf/lazy.py
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/gguf-py/gguf/metadata.py b/llm/llama.cpp/gguf-py/gguf/metadata.py
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/gguf-py/gguf/py.typed b/llm/llama.cpp/gguf-py/gguf/py.typed
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/gguf-py/gguf/quants.py b/llm/llama.cpp/gguf-py/gguf/quants.py
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/gguf-py/gguf/tensor_mapping.py b/llm/llama.cpp/gguf-py/gguf/tensor_mapping.py
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/gguf-py/gguf/utility.py b/llm/llama.cpp/gguf-py/gguf/utility.py
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/gguf-py/gguf/vocab.py b/llm/llama.cpp/gguf-py/gguf/vocab.py
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/gguf-py/pyproject.toml b/llm/llama.cpp/gguf-py/pyproject.toml
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/gguf-py/scripts/__init__.py b/llm/llama.cpp/gguf-py/scripts/__init__.py
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/gguf-py/scripts/gguf_convert_endian.py b/llm/llama.cpp/gguf-py/scripts/gguf_convert_endian.py
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/gguf-py/scripts/gguf_dump.py b/llm/llama.cpp/gguf-py/scripts/gguf_dump.py
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/gguf-py/scripts/gguf_hash.py b/llm/llama.cpp/gguf-py/scripts/gguf_hash.py
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/gguf-py/scripts/gguf_new_metadata.py b/llm/llama.cpp/gguf-py/scripts/gguf_new_metadata.py
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/gguf-py/scripts/gguf_set_metadata.py b/llm/llama.cpp/gguf-py/scripts/gguf_set_metadata.py
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/gguf-py/tests/__init__.py b/llm/llama.cpp/gguf-py/tests/__init__.py
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/gguf-py/tests/test_metadata.py b/llm/llama.cpp/gguf-py/tests/test_metadata.py
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/grammars/README.md b/llm/llama.cpp/grammars/README.md
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/grammars/arithmetic.gbnf b/llm/llama.cpp/grammars/arithmetic.gbnf
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/grammars/c.gbnf b/llm/llama.cpp/grammars/c.gbnf
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/grammars/chess.gbnf b/llm/llama.cpp/grammars/chess.gbnf
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/grammars/japanese.gbnf b/llm/llama.cpp/grammars/japanese.gbnf
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/grammars/json.gbnf b/llm/llama.cpp/grammars/json.gbnf
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/grammars/json_arr.gbnf b/llm/llama.cpp/grammars/json_arr.gbnf
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/grammars/list.gbnf b/llm/llama.cpp/grammars/list.gbnf
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/include/llama.h b/llm/llama.cpp/include/llama.h
old mode 100755
new mode 100644
index f23355a6bc9593e53d5978ff81b381182dcccd61..6072e76ebdcb7a707939573bb238904d564a13dd
--- a/llm/llama.cpp/include/llama.h
+++ b/llm/llama.cpp/include/llama.h
@@ -1178,6 +1178,20 @@ extern "C" {
 
     LLAMA_API void llama_dump_timing_info_yaml(FILE * stream, const struct llama_context * ctx);
 
+    // Apply a LoRA adapter to a loaded model
+    // path_base_model is the path to a higher quality model to use as a base for
+    // the layers modified by the adapter. Can be NULL to use the current loaded model.
+    // The model needs to be reloaded before applying a new adapter, otherwise the adapter
+    // will be applied on top of the previous one
+    // Returns 0 on success
+    LLAMA_API int32_t llama_model_apply_lora_from_file(
+            const struct llama_model * model,
+                            const char * path_lora,
+                                float   scale,
+                            const char * path_base_model,
+                                int32_t   n_threads);
+
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/llm/llama.cpp/media/llama-leader.jpeg b/llm/llama.cpp/media/llama-leader.jpeg
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/media/llama0-banner.png b/llm/llama.cpp/media/llama0-banner.png
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/media/llama0-logo.png b/llm/llama.cpp/media/llama0-logo.png
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/media/llama1-banner.png b/llm/llama.cpp/media/llama1-banner.png
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/media/llama1-logo.png b/llm/llama.cpp/media/llama1-logo.png
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/media/matmul.png b/llm/llama.cpp/media/matmul.png
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/media/matmul.svg b/llm/llama.cpp/media/matmul.svg
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/models/.editorconfig b/llm/llama.cpp/models/.editorconfig
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/models/ggml-vocab-aquila.gguf b/llm/llama.cpp/models/ggml-vocab-aquila.gguf
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/models/ggml-vocab-baichuan.gguf b/llm/llama.cpp/models/ggml-vocab-baichuan.gguf
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/models/ggml-vocab-bert-bge.gguf b/llm/llama.cpp/models/ggml-vocab-bert-bge.gguf
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/models/ggml-vocab-bert-bge.gguf.inp b/llm/llama.cpp/models/ggml-vocab-bert-bge.gguf.inp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/models/ggml-vocab-bert-bge.gguf.out b/llm/llama.cpp/models/ggml-vocab-bert-bge.gguf.out
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/models/ggml-vocab-command-r.gguf b/llm/llama.cpp/models/ggml-vocab-command-r.gguf
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/models/ggml-vocab-command-r.gguf.inp b/llm/llama.cpp/models/ggml-vocab-command-r.gguf.inp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/models/ggml-vocab-command-r.gguf.out b/llm/llama.cpp/models/ggml-vocab-command-r.gguf.out
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/models/ggml-vocab-deepseek-coder.gguf b/llm/llama.cpp/models/ggml-vocab-deepseek-coder.gguf
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/models/ggml-vocab-deepseek-coder.gguf.inp b/llm/llama.cpp/models/ggml-vocab-deepseek-coder.gguf.inp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/models/ggml-vocab-deepseek-coder.gguf.out b/llm/llama.cpp/models/ggml-vocab-deepseek-coder.gguf.out
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/models/ggml-vocab-deepseek-llm.gguf b/llm/llama.cpp/models/ggml-vocab-deepseek-llm.gguf
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/models/ggml-vocab-deepseek-llm.gguf.inp b/llm/llama.cpp/models/ggml-vocab-deepseek-llm.gguf.inp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/models/ggml-vocab-deepseek-llm.gguf.out b/llm/llama.cpp/models/ggml-vocab-deepseek-llm.gguf.out
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/models/ggml-vocab-falcon.gguf b/llm/llama.cpp/models/ggml-vocab-falcon.gguf
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/models/ggml-vocab-falcon.gguf.inp b/llm/llama.cpp/models/ggml-vocab-falcon.gguf.inp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/models/ggml-vocab-falcon.gguf.out b/llm/llama.cpp/models/ggml-vocab-falcon.gguf.out
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/models/ggml-vocab-gpt-2.gguf b/llm/llama.cpp/models/ggml-vocab-gpt-2.gguf
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/models/ggml-vocab-gpt-2.gguf.inp b/llm/llama.cpp/models/ggml-vocab-gpt-2.gguf.inp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/models/ggml-vocab-gpt-2.gguf.out b/llm/llama.cpp/models/ggml-vocab-gpt-2.gguf.out
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/models/ggml-vocab-gpt-neox.gguf b/llm/llama.cpp/models/ggml-vocab-gpt-neox.gguf
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/models/ggml-vocab-llama-bpe.gguf b/llm/llama.cpp/models/ggml-vocab-llama-bpe.gguf
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/models/ggml-vocab-llama-bpe.gguf.inp b/llm/llama.cpp/models/ggml-vocab-llama-bpe.gguf.inp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/models/ggml-vocab-llama-bpe.gguf.out b/llm/llama.cpp/models/ggml-vocab-llama-bpe.gguf.out
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/models/ggml-vocab-llama-spm.gguf b/llm/llama.cpp/models/ggml-vocab-llama-spm.gguf
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/models/ggml-vocab-llama-spm.gguf.inp b/llm/llama.cpp/models/ggml-vocab-llama-spm.gguf.inp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/models/ggml-vocab-llama-spm.gguf.out b/llm/llama.cpp/models/ggml-vocab-llama-spm.gguf.out
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/models/ggml-vocab-mpt.gguf b/llm/llama.cpp/models/ggml-vocab-mpt.gguf
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/models/ggml-vocab-mpt.gguf.inp b/llm/llama.cpp/models/ggml-vocab-mpt.gguf.inp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/models/ggml-vocab-mpt.gguf.out b/llm/llama.cpp/models/ggml-vocab-mpt.gguf.out
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/models/ggml-vocab-phi-3.gguf b/llm/llama.cpp/models/ggml-vocab-phi-3.gguf
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/models/ggml-vocab-phi-3.gguf.inp b/llm/llama.cpp/models/ggml-vocab-phi-3.gguf.inp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/models/ggml-vocab-phi-3.gguf.out b/llm/llama.cpp/models/ggml-vocab-phi-3.gguf.out
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/models/ggml-vocab-qwen2.gguf b/llm/llama.cpp/models/ggml-vocab-qwen2.gguf
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/models/ggml-vocab-qwen2.gguf.inp b/llm/llama.cpp/models/ggml-vocab-qwen2.gguf.inp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/models/ggml-vocab-qwen2.gguf.out b/llm/llama.cpp/models/ggml-vocab-qwen2.gguf.out
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/models/ggml-vocab-refact.gguf b/llm/llama.cpp/models/ggml-vocab-refact.gguf
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/models/ggml-vocab-refact.gguf.inp b/llm/llama.cpp/models/ggml-vocab-refact.gguf.inp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/models/ggml-vocab-refact.gguf.out b/llm/llama.cpp/models/ggml-vocab-refact.gguf.out
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/models/ggml-vocab-starcoder.gguf b/llm/llama.cpp/models/ggml-vocab-starcoder.gguf
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/models/ggml-vocab-starcoder.gguf.inp b/llm/llama.cpp/models/ggml-vocab-starcoder.gguf.inp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/models/ggml-vocab-starcoder.gguf.out b/llm/llama.cpp/models/ggml-vocab-starcoder.gguf.out
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/mypy.ini b/llm/llama.cpp/mypy.ini
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/pocs/CMakeLists.txt b/llm/llama.cpp/pocs/CMakeLists.txt
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/pocs/vdot/CMakeLists.txt b/llm/llama.cpp/pocs/vdot/CMakeLists.txt
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/pocs/vdot/q8dot.cpp b/llm/llama.cpp/pocs/vdot/q8dot.cpp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/pocs/vdot/vdot.cpp b/llm/llama.cpp/pocs/vdot/vdot.cpp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/poetry.lock b/llm/llama.cpp/poetry.lock
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/prompts/LLM-questions.txt b/llm/llama.cpp/prompts/LLM-questions.txt
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/prompts/alpaca.txt b/llm/llama.cpp/prompts/alpaca.txt
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/prompts/assistant.txt b/llm/llama.cpp/prompts/assistant.txt
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/prompts/chat-with-baichuan.txt b/llm/llama.cpp/prompts/chat-with-baichuan.txt
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/prompts/chat-with-bob.txt b/llm/llama.cpp/prompts/chat-with-bob.txt
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/prompts/chat-with-qwen.txt b/llm/llama.cpp/prompts/chat-with-qwen.txt
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/prompts/chat-with-vicuna-v0.txt b/llm/llama.cpp/prompts/chat-with-vicuna-v0.txt
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/prompts/chat-with-vicuna-v1.txt b/llm/llama.cpp/prompts/chat-with-vicuna-v1.txt
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/prompts/chat.txt b/llm/llama.cpp/prompts/chat.txt
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/prompts/dan-modified.txt b/llm/llama.cpp/prompts/dan-modified.txt
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/prompts/dan.txt b/llm/llama.cpp/prompts/dan.txt
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/prompts/mnemonics.txt b/llm/llama.cpp/prompts/mnemonics.txt
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/prompts/parallel-questions.txt b/llm/llama.cpp/prompts/parallel-questions.txt
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/prompts/reason-act.txt b/llm/llama.cpp/prompts/reason-act.txt
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/pyproject.toml b/llm/llama.cpp/pyproject.toml
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/pyrightconfig.json b/llm/llama.cpp/pyrightconfig.json
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/requirements.txt b/llm/llama.cpp/requirements.txt
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/requirements/requirements-all.txt b/llm/llama.cpp/requirements/requirements-all.txt
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/requirements/requirements-compare-llama-bench.txt b/llm/llama.cpp/requirements/requirements-compare-llama-bench.txt
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/requirements/requirements-convert_hf_to_gguf.txt b/llm/llama.cpp/requirements/requirements-convert_hf_to_gguf.txt
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/requirements/requirements-convert_hf_to_gguf_update.txt b/llm/llama.cpp/requirements/requirements-convert_hf_to_gguf_update.txt
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/requirements/requirements-convert_legacy_llama.txt b/llm/llama.cpp/requirements/requirements-convert_legacy_llama.txt
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/requirements/requirements-convert_llama_ggml_to_gguf.txt b/llm/llama.cpp/requirements/requirements-convert_llama_ggml_to_gguf.txt
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/requirements/requirements-convert_lora_to_gguf.txt b/llm/llama.cpp/requirements/requirements-convert_lora_to_gguf.txt
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/requirements/requirements-pydantic.txt b/llm/llama.cpp/requirements/requirements-pydantic.txt
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/requirements/requirements-test-tokenizer-random.txt b/llm/llama.cpp/requirements/requirements-test-tokenizer-random.txt
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/scripts/build-info.sh b/llm/llama.cpp/scripts/build-info.sh
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/scripts/check-requirements.sh b/llm/llama.cpp/scripts/check-requirements.sh
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/scripts/ci-run.sh b/llm/llama.cpp/scripts/ci-run.sh
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/scripts/compare-commits.sh b/llm/llama.cpp/scripts/compare-commits.sh
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/scripts/compare-llama-bench.py b/llm/llama.cpp/scripts/compare-llama-bench.py
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/scripts/debug-test.sh b/llm/llama.cpp/scripts/debug-test.sh
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/scripts/gen-authors.sh b/llm/llama.cpp/scripts/gen-authors.sh
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/scripts/gen-unicode-data.py b/llm/llama.cpp/scripts/gen-unicode-data.py
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/scripts/get-flags.mk b/llm/llama.cpp/scripts/get-flags.mk
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/scripts/get-hellaswag.sh b/llm/llama.cpp/scripts/get-hellaswag.sh
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/scripts/get-pg.sh b/llm/llama.cpp/scripts/get-pg.sh
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/scripts/get-wikitext-103.sh b/llm/llama.cpp/scripts/get-wikitext-103.sh
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/scripts/get-wikitext-2.sh b/llm/llama.cpp/scripts/get-wikitext-2.sh
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/scripts/get-winogrande.sh b/llm/llama.cpp/scripts/get-winogrande.sh
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/scripts/hf.sh b/llm/llama.cpp/scripts/hf.sh
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/scripts/pod-llama.sh b/llm/llama.cpp/scripts/pod-llama.sh
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/scripts/qnt-all.sh b/llm/llama.cpp/scripts/qnt-all.sh
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/scripts/run-all-perf.sh b/llm/llama.cpp/scripts/run-all-perf.sh
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/scripts/run-all-ppl.sh b/llm/llama.cpp/scripts/run-all-ppl.sh
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/scripts/run-with-preset.py b/llm/llama.cpp/scripts/run-with-preset.py
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/scripts/server-llm.sh b/llm/llama.cpp/scripts/server-llm.sh
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/scripts/sync-ggml-am.sh b/llm/llama.cpp/scripts/sync-ggml-am.sh
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/scripts/sync-ggml.last b/llm/llama.cpp/scripts/sync-ggml.last
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/scripts/sync-ggml.sh b/llm/llama.cpp/scripts/sync-ggml.sh
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/scripts/verify-checksum-models.py b/llm/llama.cpp/scripts/verify-checksum-models.py
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/scripts/xxd.cmake b/llm/llama.cpp/scripts/xxd.cmake
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/spm-headers/ggml-alloc.h b/llm/llama.cpp/spm-headers/ggml-alloc.h
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/spm-headers/ggml-backend.h b/llm/llama.cpp/spm-headers/ggml-backend.h
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/spm-headers/ggml-metal.h b/llm/llama.cpp/spm-headers/ggml-metal.h
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/spm-headers/ggml.h b/llm/llama.cpp/spm-headers/ggml.h
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/spm-headers/llama.h b/llm/llama.cpp/spm-headers/llama.h
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/src/CMakeLists.txt b/llm/llama.cpp/src/CMakeLists.txt
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/src/llama-grammar.cpp b/llm/llama.cpp/src/llama-grammar.cpp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/src/llama-grammar.h b/llm/llama.cpp/src/llama-grammar.h
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/src/llama-impl.h b/llm/llama.cpp/src/llama-impl.h
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/src/llama-sampling.cpp b/llm/llama.cpp/src/llama-sampling.cpp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/src/llama-sampling.h b/llm/llama.cpp/src/llama-sampling.h
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/src/llama-vocab.cpp b/llm/llama.cpp/src/llama-vocab.cpp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/src/llama-vocab.h b/llm/llama.cpp/src/llama-vocab.h
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/src/llama.cpp b/llm/llama.cpp/src/llama.cpp
old mode 100755
new mode 100644
index a7b1c9ebd9e37d1e9017bdec315b86ecd51a738b..6479e121ed5648c6fa60e3f854948682dbd3f4cd
--- a/llm/llama.cpp/src/llama.cpp
+++ b/llm/llama.cpp/src/llama.cpp
@@ -4892,7 +4892,7 @@ static void llm_load_hparams(
             } break;
         case LLM_ARCH_PHI3:
             {
-                ml.get_key(LLM_KV_ATTENTION_SLIDING_WINDOW, hparams.n_swa);
+                ml.get_key(LLM_KV_ATTENTION_SLIDING_WINDOW, hparams.n_swa, false);
                 ml.get_key(LLM_KV_ATTENTION_LAYERNORM_RMS_EPS, hparams.f_norm_rms_eps);
 
                 switch (hparams.n_layer) {
@@ -5347,16 +5347,7 @@ static void llm_load_vocab(
         if (vocab.type == LLAMA_VOCAB_TYPE_BPE) {
             vocab.tokenizer_add_space_prefix = false;
             vocab.tokenizer_clean_spaces = true;
-            if (tokenizer_pre.empty()) {
-                LLAMA_LOG_WARN("%s: missing pre-tokenizer type, using: 'default'\n", __func__);
-                LLAMA_LOG_WARN("%s:                                             \n", __func__);
-                LLAMA_LOG_WARN("%s: ************************************        \n", __func__);
-                LLAMA_LOG_WARN("%s: GENERATION QUALITY WILL BE DEGRADED!        \n", __func__);
-                LLAMA_LOG_WARN("%s: CONSIDER REGENERATING THE MODEL             \n", __func__);
-                LLAMA_LOG_WARN("%s: ************************************        \n", __func__);
-                LLAMA_LOG_WARN("%s:                                             \n", __func__);
-                vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_DEFAULT;
-            } else if (tokenizer_pre == "default") {
+            if (tokenizer_pre == "default") {
                 vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_DEFAULT;
             } else if (
                     tokenizer_pre == "llama3"   ||
@@ -5443,7 +5434,8 @@ static void llm_load_vocab(
                 tokenizer_pre == "codeshell") {
                 vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_CODESHELL;
             } else {
-                throw std::runtime_error(format("unknown pre-tokenizer type: '%s'", tokenizer_pre.c_str()));
+                LLAMA_LOG_WARN("%s: missing or unrecognized pre-tokenizer type, using: 'default'\n", __func__);
+                vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_DEFAULT;
             }
         } else if (vocab.type == LLAMA_VOCAB_TYPE_SPM) {
             vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_DEFAULT;
@@ -7708,7 +7700,7 @@ static int llama_model_load(const std::string & fname, llama_model & model, llam
         }
     } catch (const std::exception & err) {
         LLAMA_LOG_ERROR("%s: error loading model: %s\n", __func__, err.what());
-        return -1;
+        throw;
     }
 
     return 0;
@@ -8592,14 +8584,14 @@ struct llm_build_context {
     }
 
     struct ggml_tensor * build_inp_mean() {
-        lctx.inp_mean = ggml_new_tensor_2d(ctx0, GGML_TYPE_F32, n_tokens, n_tokens);
+        lctx.inp_mean = ggml_new_tensor_2d(ctx0, GGML_TYPE_F32, n_tokens, cparams.n_seq_max);
         cb(lctx.inp_mean, "inp_mean", -1);
         ggml_set_input(lctx.inp_mean);
         return lctx.inp_mean;
     }
 
     struct ggml_tensor * build_inp_cls() {
-        lctx.inp_cls = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, n_tokens);
+        lctx.inp_cls = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, cparams.n_seq_max);
         cb(lctx.inp_cls, "inp_cls", -1);
         ggml_set_input(lctx.inp_cls);
         return lctx.inp_cls;
@@ -10774,7 +10766,7 @@ struct llm_build_context {
         struct ggml_tensor * inp_pos = build_inp_pos();
 
         // KQ_mask (mask for 1 head, it will be broadcasted to all heads)
-        struct ggml_tensor * KQ_mask_swa = build_inp_KQ_mask_swa();
+        struct ggml_tensor * KQ_mask = hparams.n_swa > 0 ? build_inp_KQ_mask_swa() : build_inp_KQ_mask();
 
         for (int il = 0; il < n_layer; ++il) {
             auto residual = inpL;
@@ -10832,7 +10824,7 @@ struct llm_build_context {
 
                 cur = llm_build_kv(ctx0, lctx, kv_self, gf,
                         model.layers[il].wo, model.layers[il].bo,
-                        Kcur, Vcur, Qcur, KQ_mask_swa, n_tokens, kv_head, n_kv, 1.0f, cb, il);
+                        Kcur, Vcur, Qcur, KQ_mask, n_tokens, kv_head, n_kv, 1.0f, cb, il);
             }
 
             if (il == n_layer - 1) {
@@ -14154,19 +14146,16 @@ static void llama_set_inputs(llama_context & lctx, const llama_batch & batch) {
         GGML_ASSERT(ggml_backend_buffer_is_host(lctx.inp_mean->buffer));
 
         float * data = (float *) lctx.inp_mean->data;
-        memset(lctx.inp_mean->data, 0, n_tokens * n_tokens * ggml_element_size(lctx.inp_mean));
+        memset(lctx.inp_mean->data, 0, n_tokens * cparams.n_seq_max * ggml_element_size(lctx.inp_mean));
 
         std::vector<uint64_t> sum(n_tokens, 0);
         for (int i = 0; i < n_tokens; ++i) {
             const llama_seq_id seq_id = batch.seq_id[i][0];
-
-            GGML_ASSERT(seq_id < n_tokens && "seq_id cannot be larger than n_tokens with pooling_type == MEAN");
-
             sum[seq_id] += 1;
         }
 
-        std::vector<float> div(n_tokens, 0.0f);
-        for (int i = 0; i < n_tokens; ++i) {
+        std::vector<float> div(cparams.n_seq_max, 0.0f);
+        for (uint32_t i = 0; i < cparams.n_seq_max; ++i) {
             const uint64_t s = sum[i];
             if (s > 0) {
                 div[i] = 1.0f/float(s);
@@ -14186,14 +14175,11 @@ static void llama_set_inputs(llama_context & lctx, const llama_batch & batch) {
         GGML_ASSERT(ggml_backend_buffer_is_host(lctx.inp_cls->buffer));
 
         uint32_t * data = (uint32_t *) lctx.inp_cls->data;
-        memset(lctx.inp_cls->data, 0, n_tokens * ggml_element_size(lctx.inp_cls));
+        memset(lctx.inp_cls->data, 0, cparams.n_seq_max * ggml_element_size(lctx.inp_cls));
 
         for (int i = 0; i < n_tokens; ++i) {
             const llama_seq_id seq_id = batch.seq_id[i][0];
             const llama_pos    pos    = batch.pos[i];
-
-            GGML_ASSERT(seq_id < n_tokens && "seq_id cannot be larger than n_tokens with pooling_type == CLS");
-
             if (pos == 0) {
                 data[seq_id] = i;
             }
@@ -14356,7 +14342,7 @@ static size_t llama_output_reserve(llama_context & lctx, size_t n_outputs) {
     const auto n_embd  = hparams.n_embd;
 
     // TODO: use a per-batch flag for logits presence instead
-    const bool has_logits = !cparams.embeddings;
+    const bool has_logits =  cparams.causal_attn;
     const bool has_embd   =  lctx.is_encoding || (cparams.embeddings && (cparams.pooling_type == LLAMA_POOLING_TYPE_NONE));
 
     const size_t logits_size = has_logits ? n_vocab*n_outputs_max : 0;
@@ -14627,17 +14613,25 @@ static int llama_decode_internal(
             // no output
             res  = nullptr;
             embd = nullptr;
-        } else if (cparams.embeddings) {
-            res = nullptr; // do not extract logits for embedding case
-            embd = gf->nodes[gf->n_nodes - 1];
-            if (strcmp(embd->name, "result_embd_pooled") != 0) {
-                embd = gf->nodes[gf->n_nodes - 2];
+        }
+
+        if (cparams.embeddings) {
+            for (int i = gf->n_nodes - 1; i >= 0; --i) {
+                embd = gf->nodes[i];
+                if (strcmp(embd->name, "result_embd_pooled") == 0) {
+                    break;
+                }
             }
             GGML_ASSERT(strcmp(embd->name, "result_embd_pooled") == 0 && "missing embeddings tensor");
-        } else {
+         } else {
             embd = nullptr; // do not extract embeddings when not needed
             GGML_ASSERT(strcmp(res->name, "result_output") == 0 && "missing result_output tensor");
         }
+
+        if (!cparams.causal_attn) {
+            res = nullptr; // do not extract logits when not needed
+        }
+
         // LLAMA_LOG_INFO("graph build time: %.3f ms (%d nodes, %d leafs)\n", (ggml_time_us() - t_start_us)/1000.0, gf->n_nodes, gf->n_leafs);
 
         ggml_backend_sched_alloc_graph(lctx.sched, gf);
@@ -16429,16 +16423,23 @@ struct llama_model * llama_load_model_from_file(
         }
         model->rpc_servers.push_back(servers);
     }
-    int status = llama_model_load(path_model, *model, params);
-    GGML_ASSERT(status <= 0);
-    if (status < 0) {
-        if (status == -1) {
-            LLAMA_LOG_ERROR("%s: failed to load model\n", __func__);
-        } else if (status == -2) {
-            LLAMA_LOG_INFO("%s: cancelled model load\n", __func__);
+
+    try {
+        int status = llama_model_load(path_model, *model, params);
+        GGML_ASSERT(status <= 0);
+        if (status < 0) {
+            if (status == -1) {
+                LLAMA_LOG_ERROR("%s: failed to load model\n", __func__);
+            } else if (status == -2) {
+                LLAMA_LOG_INFO("%s: cancelled model load\n", __func__);
+            }
+            delete model;
+            return nullptr;
         }
+    } catch (...) {
+        LLAMA_LOG_ERROR("%s: exception loading model\n", __func__);
         delete model;
-        return nullptr;
+        throw;
     }
 
     return model;
@@ -19171,3 +19172,290 @@ void llama_log_callback_default(ggml_log_level level, const char * text, void *
     fputs(text, stderr);
     fflush(stderr);
 }
+
+static int llama_apply_lora_from_file_internal(
+    const struct llama_model & model, const char * path_lora, float scale, const char * path_base_model, int n_threads
+) {
+    LLAMA_LOG_INFO("%s: applying lora adapter from '%s' - please wait ...\n", __func__, path_lora);
+
+    const int64_t t_start_lora_us = ggml_time_us();
+
+    llama_file fin(path_lora, "rb");
+
+    // verify magic and version
+    {
+        uint32_t magic = fin.read_u32();
+        if (magic != LLAMA_FILE_MAGIC_GGLA) {
+            LLAMA_LOG_ERROR("%s: bad file magic\n", __func__);
+            return 1;
+        }
+
+        uint32_t format_version = fin.read_u32();
+        if (format_version != 1) {
+            LLAMA_LOG_ERROR("%s: unsupported file version\n", __func__ );
+            return 1;
+        }
+    }
+
+    int32_t lora_r = fin.read_u32();
+    int32_t lora_alpha = fin.read_u32();
+    float scaling = scale * (float)lora_alpha / (float)lora_r;
+
+    LLAMA_LOG_INFO("%s: r = %d, alpha = %d, scaling = %.2f\n", __func__, lora_r, lora_alpha, scaling);
+
+    // load base model
+    std::unique_ptr<llama_model_loader> ml;
+    if (path_base_model) {
+        LLAMA_LOG_INFO("%s: loading base model from '%s'\n", __func__, path_base_model);
+        ml.reset(new llama_model_loader(path_base_model, /*use_mmap*/ true, /*check_tensors*/ false, /*kv_overrides*/ nullptr));
+        ml->init_mappings(/*prefetch*/ false); // no prefetching
+    }
+
+    struct tensor_meta {
+        std::string name;
+        ggml_type type;
+        int32_t ne[2];
+        size_t offset;
+    };
+    std::map<std::string, tensor_meta> tensor_meta_map;
+
+    // load all tensor meta
+    while (true) {
+        if (fin.tell() == fin.size) {
+            // eof
+            break;
+        }
+
+        int32_t n_dims;
+        int32_t name_len;
+        int32_t ftype;
+
+        fin.read_raw(&n_dims, sizeof(n_dims));
+        fin.read_raw(&name_len, sizeof(name_len));
+        fin.read_raw(&ftype, sizeof(ftype));
+
+        if (n_dims != 1 && n_dims != 2) {
+            LLAMA_LOG_ERROR("%s: unsupported tensor dimension %d\n", __func__, n_dims);
+            return 1;
+        }
+
+        int32_t ne[2] = { 1, 1 };
+        for (int i = 0; i < n_dims; ++i) {
+            fin.read_raw(&ne[i], sizeof(ne[i]));
+        }
+
+        std::string name;
+        {
+            GGML_ASSERT(name_len < GGML_MAX_NAME);
+            char buf[GGML_MAX_NAME];
+            fin.read_raw(buf, name_len);
+            name = std::string(buf, name_len);
+        }
+
+        // check for lora suffix
+        std::string lora_suffix;
+        if (name.length() > 6) {
+            lora_suffix = name.substr(name.length() - 6);
+        }
+        if (lora_suffix != ".loraA" && lora_suffix != ".loraB") {
+            LLAMA_LOG_ERROR("%s: error: '%s' is not a lora tensor\n", __func__, name.c_str());
+            return 1;
+        }
+
+        // tensor type
+        ggml_type wtype;
+        switch (ftype) {
+            case 0: wtype = GGML_TYPE_F32;  break;
+            case 1: wtype = GGML_TYPE_F16;  break;
+            default:
+                    {
+                        LLAMA_LOG_ERROR("%s: invalid tensor data type '%d'\n",
+                                __func__, ftype);
+                        return 1;
+                    }
+        }
+
+        // data offset
+        size_t offset = fin.tell();
+        offset = (offset + 31) & -32;
+
+        // skip tensor data
+        fin.seek(offset + ggml_row_size(wtype, ne[0]) * ne[1], SEEK_SET);
+
+        tensor_meta_map.emplace(name, tensor_meta{ name, wtype, { ne[0], ne[1] }, offset });
+    }
+
+    bool warned = false;
+    int n_tensors = 0;
+
+    // apply
+    ggml_backend_t backend_cpu = ggml_backend_cpu_init();
+    if (backend_cpu == nullptr) {
+        LLAMA_LOG_ERROR("%s: error: failed to initialize cpu backend\n", __func__);
+        return 1;
+    }
+    ggml_backend_cpu_set_n_threads(backend_cpu, n_threads);
+
+    std::vector<no_init<uint8_t>> read_buf;
+    for (const auto & it : model.tensors_by_name) {
+        const std::string & base_name = it.first;
+        ggml_tensor * model_t = it.second;
+
+        if (tensor_meta_map.find(base_name + ".loraA") == tensor_meta_map.end() ||
+            tensor_meta_map.find(base_name + ".loraB") == tensor_meta_map.end()) {
+            continue;
+        }
+
+        tensor_meta & metaA = tensor_meta_map.at(base_name + ".loraA");
+        tensor_meta & metaB = tensor_meta_map.at(base_name + ".loraB");
+
+        ggml_init_params lora_init_params = {
+            /* .mem_size   */ ggml_tensor_overhead()*128 + ggml_graph_overhead(),
+            /* .mem_buffer */ nullptr,
+            /* .no_alloc   */ true,
+        };
+        ggml_context * lora_ctx = ggml_init(lora_init_params);
+        if (lora_ctx == nullptr) {
+            LLAMA_LOG_ERROR("%s: error: failed to initialize lora context\n", __func__);
+            ggml_backend_free(backend_cpu);
+            return 1;
+        }
+
+        // create tensors
+        ggml_tensor * loraA = ggml_new_tensor_2d(lora_ctx, metaA.type, metaA.ne[0], metaA.ne[1]);
+        ggml_tensor * loraB = ggml_new_tensor_2d(lora_ctx, metaB.type, metaB.ne[0], metaB.ne[1]);
+        ggml_set_name(loraA, metaA.name.c_str());
+        ggml_set_name(loraB, metaB.name.c_str());
+
+        ggml_tensor * base_t;
+        if (ml) {
+            if (!ml->get_tensor_meta(base_name.c_str())) {
+                LLAMA_LOG_ERROR("%s: error: tensor '%s' not found in base model\n", __func__, base_name.c_str());
+                return 1;
+            }
+            base_t = ggml_dup_tensor(lora_ctx, ml->get_tensor_meta(base_name.c_str()));
+        } else {
+            base_t = ggml_dup_tensor(lora_ctx, model_t);
+        }
+        ggml_set_name(base_t, base_name.c_str());
+
+        // allocate in backend buffer
+        ggml_backend_buffer_t lora_buf = ggml_backend_alloc_ctx_tensors_from_buft(lora_ctx, ggml_backend_cpu_buffer_type());
+        if (lora_buf == nullptr) {
+            LLAMA_LOG_ERROR("%s: error: failed to allocate lora tensors\n", __func__);
+            return 1;
+        }
+
+        // load tensor data
+        auto load_tensor = [&read_buf, &fin](const tensor_meta & tensor_meta, ggml_tensor * tensor) {
+            read_buf.resize(ggml_nbytes(tensor));
+            fin.seek(tensor_meta.offset, SEEK_SET);
+            fin.read_raw(read_buf.data(), ggml_nbytes(tensor));
+            ggml_backend_tensor_set(tensor, read_buf.data(), 0, read_buf.size());
+        };
+        load_tensor(metaA, loraA);
+        load_tensor(metaB, loraB);
+
+        // load base model tensor data
+        if (ml) {
+            ml->load_data_for(base_t);
+        } else {
+            ggml_backend_tensor_copy(model_t, base_t);
+        }
+
+        if (ggml_is_quantized(base_t->type) && !warned) {
+            LLAMA_LOG_WARN("%s: warning: using a lora adapter with a quantized model may result in poor quality, "
+                            "use a f16 or f32 base model with --lora-base\n", __func__);
+            warned = true;
+        }
+
+        if (base_t->ne[0] != loraA->ne[1] || base_t->ne[1] != loraB->ne[1]) {
+            LLAMA_LOG_ERROR("%s: incompatible tensor dimensions (%" PRId64 " and %" PRId64 ");"
+                            " are you sure that this adapter is for this model?\n", __func__, base_t->ne[0], loraA->ne[1]);
+            ggml_free(lora_ctx);
+            ggml_backend_buffer_free(lora_buf);
+            ggml_backend_free(backend_cpu);
+            return 1;
+        }
+
+        auto build_lora_graph = [&]() {
+            // w = w + BA*s
+            ggml_tensor * BA = ggml_mul_mat(lora_ctx, loraA, loraB);
+            ggml_set_name(BA, "BA");
+
+            if (scaling != 1.0f) {
+                BA = ggml_scale(lora_ctx, BA, scaling);
+                ggml_set_name(BA, "BA_scaled");
+            }
+
+            ggml_tensor * r;
+            r = ggml_add_inplace(lora_ctx, base_t, BA);
+            ggml_set_name(r, "r_add");
+
+            if (base_t->type != model_t->type) {
+                // convert the result to the model type
+                r = ggml_cast(lora_ctx, r, model_t->type);
+                ggml_set_name(r, "r_cast");
+            }
+
+            return r;
+        };
+
+        ggml_cgraph * gf = ggml_new_graph(lora_ctx);
+        ggml_tensor * r = build_lora_graph();
+        ggml_build_forward_expand(gf, r);
+
+        ggml_backend_buffer_t graph_buf = ggml_backend_alloc_ctx_tensors_from_buft(lora_ctx, ggml_backend_cpu_buffer_type());
+        if (graph_buf == nullptr) {
+            LLAMA_LOG_ERROR("%s: error: failed to allocate graph tensors\n", __func__);
+            ggml_free(lora_ctx);
+            ggml_backend_buffer_free(lora_buf);
+            ggml_backend_free(backend_cpu);
+            return 1;
+        }
+
+        ggml_backend_graph_compute(backend_cpu, gf);
+
+        ggml_backend_tensor_set(model_t, r->data, 0, ggml_nbytes(r));
+
+#if 0
+        // TODO: use scheduler with fallback to CPU for less copies between CPU and GPU
+        //ggml_backend_sched_t sched = ggml_backend_sched_new(backends.data(), backends.size(), GGML_DEFAULT_GRAPH_SIZE);
+
+        // sched compute
+        ggml_build_forward_expand(gf, build_graph());
+        ggml_backend_sched_init_measure(sched, gf);
+
+        // create the graph again, since the previous one was destroyed by the measure
+        ggml_graph_clear(gf);
+        ggml_build_forward_expand(gf, build_graph());
+        ggml_backend_sched_graph_compute(sched, gf);
+        ggml_backend_sched_free(sched);
+#endif
+
+        ggml_backend_buffer_free(lora_buf);
+        ggml_backend_buffer_free(graph_buf);
+        ggml_free(lora_ctx);
+
+        n_tensors++;
+        if (n_tensors % 4 == 0) {
+            LLAMA_LOG_INFO(".");
+        }
+    }
+
+    ggml_backend_free(backend_cpu);
+
+    const int64_t t_lora_us = ggml_time_us() - t_start_lora_us;
+    LLAMA_LOG_INFO(" done (%.2f ms)\n", t_lora_us / 1000.0);
+
+    return 0;
+}
+
+int32_t llama_model_apply_lora_from_file(const struct llama_model * model, const char * path_lora, float scale, const char * path_base_model, int32_t n_threads) {
+    try {
+        return llama_apply_lora_from_file_internal(*model, path_lora, scale, path_base_model, n_threads);
+    } catch (const std::exception & err) {
+        LLAMA_LOG_ERROR("%s: failed to apply lora adapter: %s\n", __func__, err.what());
+        return 1;
+    }
+}
\ No newline at end of file
diff --git a/llm/llama.cpp/src/unicode-data.cpp b/llm/llama.cpp/src/unicode-data.cpp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/src/unicode-data.h b/llm/llama.cpp/src/unicode-data.h
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/src/unicode.cpp b/llm/llama.cpp/src/unicode.cpp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/src/unicode.h b/llm/llama.cpp/src/unicode.h
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/tests/.gitignore b/llm/llama.cpp/tests/.gitignore
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/tests/CMakeLists.txt b/llm/llama.cpp/tests/CMakeLists.txt
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/tests/get-model.cpp b/llm/llama.cpp/tests/get-model.cpp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/tests/get-model.h b/llm/llama.cpp/tests/get-model.h
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/tests/run-json-schema-to-grammar.mjs b/llm/llama.cpp/tests/run-json-schema-to-grammar.mjs
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/tests/test-autorelease.cpp b/llm/llama.cpp/tests/test-autorelease.cpp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/tests/test-backend-ops.cpp b/llm/llama.cpp/tests/test-backend-ops.cpp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/tests/test-c.c b/llm/llama.cpp/tests/test-c.c
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/tests/test-chat-template.cpp b/llm/llama.cpp/tests/test-chat-template.cpp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/tests/test-double-float.cpp b/llm/llama.cpp/tests/test-double-float.cpp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/tests/test-grad0.cpp b/llm/llama.cpp/tests/test-grad0.cpp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/tests/test-grammar-integration.cpp b/llm/llama.cpp/tests/test-grammar-integration.cpp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/tests/test-grammar-parser.cpp b/llm/llama.cpp/tests/test-grammar-parser.cpp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/tests/test-json-schema-to-grammar.cpp b/llm/llama.cpp/tests/test-json-schema-to-grammar.cpp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/tests/test-llama-grammar.cpp b/llm/llama.cpp/tests/test-llama-grammar.cpp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/tests/test-model-load-cancel.cpp b/llm/llama.cpp/tests/test-model-load-cancel.cpp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/tests/test-opt.cpp b/llm/llama.cpp/tests/test-opt.cpp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/tests/test-quantize-fns.cpp b/llm/llama.cpp/tests/test-quantize-fns.cpp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/tests/test-quantize-perf.cpp b/llm/llama.cpp/tests/test-quantize-perf.cpp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/tests/test-rope.cpp b/llm/llama.cpp/tests/test-rope.cpp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/tests/test-sampling.cpp b/llm/llama.cpp/tests/test-sampling.cpp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/tests/test-tokenizer-0.cpp b/llm/llama.cpp/tests/test-tokenizer-0.cpp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/tests/test-tokenizer-0.py b/llm/llama.cpp/tests/test-tokenizer-0.py
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/tests/test-tokenizer-0.sh b/llm/llama.cpp/tests/test-tokenizer-0.sh
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/tests/test-tokenizer-1-bpe.cpp b/llm/llama.cpp/tests/test-tokenizer-1-bpe.cpp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/tests/test-tokenizer-1-spm.cpp b/llm/llama.cpp/tests/test-tokenizer-1-spm.cpp
old mode 100755
new mode 100644
diff --git a/llm/llama.cpp/tests/test-tokenizer-random.py b/llm/llama.cpp/tests/test-tokenizer-random.py
old mode 100755
new mode 100644
diff --git a/llm/llm.go b/llm/llm.go
old mode 100755
new mode 100644
diff --git a/llm/llm_darwin_amd64.go b/llm/llm_darwin_amd64.go
old mode 100755
new mode 100644
diff --git a/llm/llm_darwin_arm64.go b/llm/llm_darwin_arm64.go
old mode 100755
new mode 100644
diff --git a/llm/llm_linux.go b/llm/llm_linux.go
old mode 100755
new mode 100644
diff --git a/llm/llm_windows.go b/llm/llm_windows.go
old mode 100755
new mode 100644
diff --git a/llm/memory.go b/llm/memory.go
old mode 100755
new mode 100644
diff --git a/llm/memory_test.go b/llm/memory_test.go
old mode 100755
new mode 100644
diff --git a/llm/patches/01-load-progress.diff b/llm/patches/01-load-progress.diff
old mode 100755
new mode 100644
diff --git a/llm/patches/02-clip-log.diff b/llm/patches/02-clip-log.diff
old mode 100755
new mode 100644
diff --git a/llm/patches/03-load_exception.diff b/llm/patches/03-load_exception.diff
old mode 100755
new mode 100644
diff --git a/llm/patches/04-metal.diff b/llm/patches/04-metal.diff
old mode 100755
new mode 100644
diff --git a/llm/patches/05-default-pretokenizer.diff b/llm/patches/05-default-pretokenizer.diff
old mode 100755
new mode 100644
diff --git a/llm/patches/06-embeddings.diff b/llm/patches/06-embeddings.diff
old mode 100755
new mode 100644
diff --git a/llm/patches/07-clip-unicode.diff b/llm/patches/07-clip-unicode.diff
old mode 100755
new mode 100644
diff --git a/llm/patches/08-pooling.diff b/llm/patches/08-pooling.diff
old mode 100755
new mode 100644
diff --git a/llm/patches/09-lora.diff b/llm/patches/09-lora.diff
old mode 100755
new mode 100644
diff --git a/llm/patches/11-phi3-sliding-window.diff b/llm/patches/11-phi3-sliding-window.diff
old mode 100755
new mode 100644
diff --git a/llm/payload.go b/llm/payload.go
old mode 100755
new mode 100644
diff --git a/llm/server.go b/llm/server.go
old mode 100755
new mode 100644
diff --git a/llm/status.go b/llm/status.go
old mode 100755
new mode 100644
diff --git a/macapp/.eslintrc.json b/macapp/.eslintrc.json
old mode 100755
new mode 100644
diff --git a/macapp/.gitignore b/macapp/.gitignore
old mode 100755
new mode 100644
diff --git a/macapp/README.md b/macapp/README.md
old mode 100755
new mode 100644
diff --git a/macapp/assets/icon.icns b/macapp/assets/icon.icns
old mode 100755
new mode 100644
diff --git a/macapp/assets/iconDarkTemplate.png b/macapp/assets/iconDarkTemplate.png
old mode 100755
new mode 100644
diff --git a/macapp/assets/iconDarkTemplate@2x.png b/macapp/assets/iconDarkTemplate@2x.png
old mode 100755
new mode 100644
diff --git a/macapp/assets/iconDarkUpdateTemplate.png b/macapp/assets/iconDarkUpdateTemplate.png
old mode 100755
new mode 100644
diff --git a/macapp/assets/iconDarkUpdateTemplate@2x.png b/macapp/assets/iconDarkUpdateTemplate@2x.png
old mode 100755
new mode 100644
diff --git a/macapp/assets/iconTemplate.png b/macapp/assets/iconTemplate.png
old mode 100755
new mode 100644
diff --git a/macapp/assets/iconTemplate@2x.png b/macapp/assets/iconTemplate@2x.png
old mode 100755
new mode 100644
diff --git a/macapp/assets/iconUpdateTemplate.png b/macapp/assets/iconUpdateTemplate.png
old mode 100755
new mode 100644
diff --git a/macapp/assets/iconUpdateTemplate@2x.png b/macapp/assets/iconUpdateTemplate@2x.png
old mode 100755
new mode 100644
diff --git a/macapp/forge.config.ts b/macapp/forge.config.ts
old mode 100755
new mode 100644
diff --git a/macapp/package-lock.json b/macapp/package-lock.json
old mode 100755
new mode 100644
diff --git a/macapp/package.json b/macapp/package.json
old mode 100755
new mode 100644
diff --git a/macapp/postcss.config.js b/macapp/postcss.config.js
old mode 100755
new mode 100644
diff --git a/macapp/src/app.css b/macapp/src/app.css
old mode 100755
new mode 100644
diff --git a/macapp/src/app.tsx b/macapp/src/app.tsx
old mode 100755
new mode 100644
diff --git a/macapp/src/declarations.d.ts b/macapp/src/declarations.d.ts
old mode 100755
new mode 100644
diff --git a/macapp/src/index.html b/macapp/src/index.html
old mode 100755
new mode 100644
diff --git a/macapp/src/index.ts b/macapp/src/index.ts
old mode 100755
new mode 100644
diff --git a/macapp/src/install.ts b/macapp/src/install.ts
old mode 100755
new mode 100644
diff --git a/macapp/src/ollama.svg b/macapp/src/ollama.svg
old mode 100755
new mode 100644
diff --git a/macapp/src/preload.ts b/macapp/src/preload.ts
old mode 100755
new mode 100644
diff --git a/macapp/src/renderer.tsx b/macapp/src/renderer.tsx
old mode 100755
new mode 100644
diff --git a/macapp/tailwind.config.js b/macapp/tailwind.config.js
old mode 100755
new mode 100644
diff --git a/macapp/tsconfig.json b/macapp/tsconfig.json
old mode 100755
new mode 100644
diff --git a/macapp/webpack.main.config.ts b/macapp/webpack.main.config.ts
old mode 100755
new mode 100644
diff --git a/macapp/webpack.plugins.ts b/macapp/webpack.plugins.ts
old mode 100755
new mode 100644
diff --git a/macapp/webpack.renderer.config.ts b/macapp/webpack.renderer.config.ts
old mode 100755
new mode 100644
diff --git a/macapp/webpack.rules.ts b/macapp/webpack.rules.ts
old mode 100755
new mode 100644
diff --git a/main.go b/main.go
old mode 100755
new mode 100644
diff --git a/openai/openai.go b/openai/openai.go
old mode 100755
new mode 100644
diff --git a/openai/openai_test.go b/openai/openai_test.go
old mode 100755
new mode 100644
diff --git a/parser/parser.go b/parser/parser.go
old mode 100755
new mode 100644
diff --git a/parser/parser_test.go b/parser/parser_test.go
old mode 100755
new mode 100644
diff --git a/progress/bar.go b/progress/bar.go
old mode 100755
new mode 100644
diff --git a/progress/progress.go b/progress/progress.go
old mode 100755
new mode 100644
diff --git a/progress/spinner.go b/progress/spinner.go
old mode 100755
new mode 100644
diff --git a/readline/buffer.go b/readline/buffer.go
old mode 100755
new mode 100644
diff --git a/readline/errors.go b/readline/errors.go
old mode 100755
new mode 100644
diff --git a/readline/history.go b/readline/history.go
old mode 100755
new mode 100644
diff --git a/readline/readline.go b/readline/readline.go
old mode 100755
new mode 100644
diff --git a/readline/readline_unix.go b/readline/readline_unix.go
old mode 100755
new mode 100644
diff --git a/readline/readline_windows.go b/readline/readline_windows.go
old mode 100755
new mode 100644
diff --git a/readline/term.go b/readline/term.go
old mode 100755
new mode 100644
diff --git a/readline/term_bsd.go b/readline/term_bsd.go
old mode 100755
new mode 100644
diff --git a/readline/term_linux.go b/readline/term_linux.go
old mode 100755
new mode 100644
diff --git a/readline/term_windows.go b/readline/term_windows.go
old mode 100755
new mode 100644
diff --git a/readline/types.go b/readline/types.go
old mode 100755
new mode 100644
diff --git a/scripts/build.sh b/scripts/build.sh
old mode 100755
new mode 100644
diff --git a/scripts/build_darwin.sh b/scripts/build_darwin.sh
old mode 100755
new mode 100644
diff --git a/scripts/build_docker.sh b/scripts/build_docker.sh
old mode 100755
new mode 100644
diff --git a/scripts/build_linux.sh b/scripts/build_linux.sh
old mode 100755
new mode 100644
diff --git a/scripts/build_remote.py b/scripts/build_remote.py
old mode 100755
new mode 100644
diff --git a/scripts/build_windows.ps1 b/scripts/build_windows.ps1
old mode 100755
new mode 100644
diff --git a/scripts/install.sh b/scripts/install.sh
old mode 100755
new mode 100644
diff --git a/scripts/publish.sh b/scripts/publish.sh
old mode 100755
new mode 100644
diff --git a/scripts/push_docker.sh b/scripts/push_docker.sh
old mode 100755
new mode 100644
diff --git a/scripts/rh_linux_deps.sh b/scripts/rh_linux_deps.sh
old mode 100755
new mode 100644
diff --git a/scripts/tag_latest.sh b/scripts/tag_latest.sh
old mode 100755
new mode 100644
diff --git a/server/auth.go b/server/auth.go
old mode 100755
new mode 100644
diff --git a/server/download.go b/server/download.go
old mode 100755
new mode 100644
diff --git a/server/fixblobs.go b/server/fixblobs.go
old mode 100755
new mode 100644
diff --git a/server/fixblobs_test.go b/server/fixblobs_test.go
old mode 100755
new mode 100644
diff --git a/server/images.go b/server/images.go
old mode 100755
new mode 100644
diff --git a/server/layer.go b/server/layer.go
old mode 100755
new mode 100644
diff --git a/server/manifest.go b/server/manifest.go
old mode 100755
new mode 100644
diff --git a/server/manifest_test.go b/server/manifest_test.go
old mode 100755
new mode 100644
diff --git a/server/model.go b/server/model.go
old mode 100755
new mode 100644
diff --git a/server/model_test.go b/server/model_test.go
old mode 100755
new mode 100644
diff --git a/server/modelpath.go b/server/modelpath.go
old mode 100755
new mode 100644
diff --git a/server/modelpath_test.go b/server/modelpath_test.go
old mode 100755
new mode 100644
diff --git a/server/prompt.go b/server/prompt.go
old mode 100755
new mode 100644
diff --git a/server/prompt_test.go b/server/prompt_test.go
old mode 100755
new mode 100644
diff --git a/server/routes.go b/server/routes.go
old mode 100755
new mode 100644
diff --git a/server/routes_create_test.go b/server/routes_create_test.go
old mode 100755
new mode 100644
diff --git a/server/routes_delete_test.go b/server/routes_delete_test.go
old mode 100755
new mode 100644
diff --git a/server/routes_generate_test.go b/server/routes_generate_test.go
old mode 100755
new mode 100644
diff --git a/server/routes_list_test.go b/server/routes_list_test.go
old mode 100755
new mode 100644
diff --git a/server/routes_test.go b/server/routes_test.go
old mode 100755
new mode 100644
diff --git a/server/sched.go b/server/sched.go
old mode 100755
new mode 100644
diff --git a/server/sched_test.go b/server/sched_test.go
old mode 100755
new mode 100644
diff --git a/server/sparse_common.go b/server/sparse_common.go
old mode 100755
new mode 100644
diff --git a/server/sparse_windows.go b/server/sparse_windows.go
old mode 100755
new mode 100644
diff --git a/server/testdata/tools/command-r-plus.gotmpl b/server/testdata/tools/command-r-plus.gotmpl
old mode 100755
new mode 100644
diff --git a/server/testdata/tools/command-r-plus.out b/server/testdata/tools/command-r-plus.out
old mode 100755
new mode 100644
diff --git a/server/testdata/tools/firefunction.gotmpl b/server/testdata/tools/firefunction.gotmpl
old mode 100755
new mode 100644
diff --git a/server/testdata/tools/firefunction.out b/server/testdata/tools/firefunction.out
old mode 100755
new mode 100644
diff --git a/server/testdata/tools/llama3-groq-tool-use.gotmpl b/server/testdata/tools/llama3-groq-tool-use.gotmpl
old mode 100755
new mode 100644
diff --git a/server/testdata/tools/llama3-groq-tool-use.out b/server/testdata/tools/llama3-groq-tool-use.out
old mode 100755
new mode 100644
diff --git a/server/testdata/tools/messages.json b/server/testdata/tools/messages.json
old mode 100755
new mode 100644
diff --git a/server/testdata/tools/mistral.gotmpl b/server/testdata/tools/mistral.gotmpl
old mode 100755
new mode 100644
diff --git a/server/testdata/tools/mistral.out b/server/testdata/tools/mistral.out
old mode 100755
new mode 100644
diff --git a/server/testdata/tools/tools.json b/server/testdata/tools/tools.json
old mode 100755
new mode 100644
diff --git a/server/testdata/tools/xlam.gotmpl b/server/testdata/tools/xlam.gotmpl
old mode 100755
new mode 100644
diff --git a/server/testdata/tools/xlam.out b/server/testdata/tools/xlam.out
old mode 100755
new mode 100644
diff --git a/server/upload.go b/server/upload.go
old mode 100755
new mode 100644
diff --git a/template/alfred.gotmpl b/template/alfred.gotmpl
old mode 100755
new mode 100644
diff --git a/template/alfred.json b/template/alfred.json
old mode 100755
new mode 100644
diff --git a/template/alpaca.gotmpl b/template/alpaca.gotmpl
old mode 100755
new mode 100644
diff --git a/template/alpaca.json b/template/alpaca.json
old mode 100755
new mode 100644
diff --git a/template/chatml.gotmpl b/template/chatml.gotmpl
old mode 100755
new mode 100644
diff --git a/template/chatml.json b/template/chatml.json
old mode 100755
new mode 100644
diff --git a/template/chatqa.gotmpl b/template/chatqa.gotmpl
old mode 100755
new mode 100644
diff --git a/template/chatqa.json b/template/chatqa.json
old mode 100755
new mode 100644
diff --git a/template/codellama-70b-instruct.gotmpl b/template/codellama-70b-instruct.gotmpl
old mode 100755
new mode 100644
diff --git a/template/codellama-70b-instruct.json b/template/codellama-70b-instruct.json
old mode 100755
new mode 100644
diff --git a/template/falcon-instruct.gotmpl b/template/falcon-instruct.gotmpl
old mode 100755
new mode 100644
diff --git a/template/falcon-instruct.json b/template/falcon-instruct.json
old mode 100755
new mode 100644
diff --git a/template/gemma-instruct.gotmpl b/template/gemma-instruct.gotmpl
old mode 100755
new mode 100644
diff --git a/template/gemma-instruct.json b/template/gemma-instruct.json
old mode 100755
new mode 100644
diff --git a/template/granite-instruct.gotmpl b/template/granite-instruct.gotmpl
old mode 100755
new mode 100644
diff --git a/template/granite-instruct.json b/template/granite-instruct.json
old mode 100755
new mode 100644
diff --git a/template/index.json b/template/index.json
old mode 100755
new mode 100644
diff --git a/template/llama2-chat.gotmpl b/template/llama2-chat.gotmpl
old mode 100755
new mode 100644
diff --git a/template/llama2-chat.json b/template/llama2-chat.json
old mode 100755
new mode 100644
diff --git a/template/llama3-instruct.gotmpl b/template/llama3-instruct.gotmpl
old mode 100755
new mode 100644
diff --git a/template/llama3-instruct.json b/template/llama3-instruct.json
old mode 100755
new mode 100644
diff --git a/template/magicoder.gotmpl b/template/magicoder.gotmpl
old mode 100755
new mode 100644
diff --git a/template/magicoder.json b/template/magicoder.json
old mode 100755
new mode 100644
diff --git a/template/mistral-instruct.gotmpl b/template/mistral-instruct.gotmpl
old mode 100755
new mode 100644
diff --git a/template/mistral-instruct.json b/template/mistral-instruct.json
old mode 100755
new mode 100644
diff --git a/template/openchat.gotmpl b/template/openchat.gotmpl
old mode 100755
new mode 100644
diff --git a/template/openchat.json b/template/openchat.json
old mode 100755
new mode 100644
diff --git a/template/phi-3.gotmpl b/template/phi-3.gotmpl
old mode 100755
new mode 100644
diff --git a/template/phi-3.json b/template/phi-3.json
old mode 100755
new mode 100644
diff --git a/template/solar-instruct.gotmpl b/template/solar-instruct.gotmpl
old mode 100755
new mode 100644
diff --git a/template/solar-instruct.json b/template/solar-instruct.json
old mode 100755
new mode 100644
diff --git a/template/starcoder2-instruct.gotmpl b/template/starcoder2-instruct.gotmpl
old mode 100755
new mode 100644
diff --git a/template/starcoder2-instruct.json b/template/starcoder2-instruct.json
old mode 100755
new mode 100644
diff --git a/template/template.go b/template/template.go
old mode 100755
new mode 100644
diff --git a/template/template_test.go b/template/template_test.go
old mode 100755
new mode 100644
diff --git a/template/testdata/alfred.gotmpl/system-user-assistant-user b/template/testdata/alfred.gotmpl/system-user-assistant-user
old mode 100755
new mode 100644
diff --git a/template/testdata/alfred.gotmpl/user b/template/testdata/alfred.gotmpl/user
old mode 100755
new mode 100644
diff --git a/template/testdata/alfred.gotmpl/user-assistant-user b/template/testdata/alfred.gotmpl/user-assistant-user
old mode 100755
new mode 100644
diff --git a/template/testdata/alpaca.gotmpl/system-user-assistant-user b/template/testdata/alpaca.gotmpl/system-user-assistant-user
old mode 100755
new mode 100644
diff --git a/template/testdata/alpaca.gotmpl/user b/template/testdata/alpaca.gotmpl/user
old mode 100755
new mode 100644
diff --git a/template/testdata/alpaca.gotmpl/user-assistant-user b/template/testdata/alpaca.gotmpl/user-assistant-user
old mode 100755
new mode 100644
diff --git a/template/testdata/chatml.gotmpl/system-user-assistant-user b/template/testdata/chatml.gotmpl/system-user-assistant-user
old mode 100755
new mode 100644
diff --git a/template/testdata/chatml.gotmpl/user b/template/testdata/chatml.gotmpl/user
old mode 100755
new mode 100644
diff --git a/template/testdata/chatml.gotmpl/user-assistant-user b/template/testdata/chatml.gotmpl/user-assistant-user
old mode 100755
new mode 100644
diff --git a/template/testdata/chatqa.gotmpl/system-user-assistant-user b/template/testdata/chatqa.gotmpl/system-user-assistant-user
old mode 100755
new mode 100644
diff --git a/template/testdata/chatqa.gotmpl/user b/template/testdata/chatqa.gotmpl/user
old mode 100755
new mode 100644
diff --git a/template/testdata/chatqa.gotmpl/user-assistant-user b/template/testdata/chatqa.gotmpl/user-assistant-user
old mode 100755
new mode 100644
diff --git a/template/testdata/codellama-70b-instruct.gotmpl/system-user-assistant-user b/template/testdata/codellama-70b-instruct.gotmpl/system-user-assistant-user
old mode 100755
new mode 100644
diff --git a/template/testdata/codellama-70b-instruct.gotmpl/user b/template/testdata/codellama-70b-instruct.gotmpl/user
old mode 100755
new mode 100644
diff --git a/template/testdata/codellama-70b-instruct.gotmpl/user-assistant-user b/template/testdata/codellama-70b-instruct.gotmpl/user-assistant-user
old mode 100755
new mode 100644
diff --git a/template/testdata/falcon-instruct.gotmpl/system-user-assistant-user b/template/testdata/falcon-instruct.gotmpl/system-user-assistant-user
old mode 100755
new mode 100644
diff --git a/template/testdata/falcon-instruct.gotmpl/user b/template/testdata/falcon-instruct.gotmpl/user
old mode 100755
new mode 100644
diff --git a/template/testdata/falcon-instruct.gotmpl/user-assistant-user b/template/testdata/falcon-instruct.gotmpl/user-assistant-user
old mode 100755
new mode 100644
diff --git a/template/testdata/gemma-instruct.gotmpl/system-user-assistant-user b/template/testdata/gemma-instruct.gotmpl/system-user-assistant-user
old mode 100755
new mode 100644
diff --git a/template/testdata/gemma-instruct.gotmpl/user b/template/testdata/gemma-instruct.gotmpl/user
old mode 100755
new mode 100644
diff --git a/template/testdata/gemma-instruct.gotmpl/user-assistant-user b/template/testdata/gemma-instruct.gotmpl/user-assistant-user
old mode 100755
new mode 100644
diff --git a/template/testdata/granite-instruct.gotmpl/system-user-assistant-user b/template/testdata/granite-instruct.gotmpl/system-user-assistant-user
old mode 100755
new mode 100644
diff --git a/template/testdata/granite-instruct.gotmpl/user b/template/testdata/granite-instruct.gotmpl/user
old mode 100755
new mode 100644
diff --git a/template/testdata/granite-instruct.gotmpl/user-assistant-user b/template/testdata/granite-instruct.gotmpl/user-assistant-user
old mode 100755
new mode 100644
diff --git a/template/testdata/llama2-chat.gotmpl/system-user-assistant-user b/template/testdata/llama2-chat.gotmpl/system-user-assistant-user
old mode 100755
new mode 100644
diff --git a/template/testdata/llama2-chat.gotmpl/user b/template/testdata/llama2-chat.gotmpl/user
old mode 100755
new mode 100644
diff --git a/template/testdata/llama2-chat.gotmpl/user-assistant-user b/template/testdata/llama2-chat.gotmpl/user-assistant-user
old mode 100755
new mode 100644
diff --git a/template/testdata/llama3-instruct.gotmpl/system-user-assistant-user b/template/testdata/llama3-instruct.gotmpl/system-user-assistant-user
old mode 100755
new mode 100644
diff --git a/template/testdata/llama3-instruct.gotmpl/user b/template/testdata/llama3-instruct.gotmpl/user
old mode 100755
new mode 100644
diff --git a/template/testdata/llama3-instruct.gotmpl/user-assistant-user b/template/testdata/llama3-instruct.gotmpl/user-assistant-user
old mode 100755
new mode 100644
diff --git a/template/testdata/magicoder.gotmpl/system-user-assistant-user b/template/testdata/magicoder.gotmpl/system-user-assistant-user
old mode 100755
new mode 100644
diff --git a/template/testdata/magicoder.gotmpl/user b/template/testdata/magicoder.gotmpl/user
old mode 100755
new mode 100644
diff --git a/template/testdata/magicoder.gotmpl/user-assistant-user b/template/testdata/magicoder.gotmpl/user-assistant-user
old mode 100755
new mode 100644
diff --git a/template/testdata/mistral-instruct.gotmpl/system-user-assistant-user b/template/testdata/mistral-instruct.gotmpl/system-user-assistant-user
old mode 100755
new mode 100644
diff --git a/template/testdata/mistral-instruct.gotmpl/user b/template/testdata/mistral-instruct.gotmpl/user
old mode 100755
new mode 100644
diff --git a/template/testdata/mistral-instruct.gotmpl/user-assistant-user b/template/testdata/mistral-instruct.gotmpl/user-assistant-user
old mode 100755
new mode 100644
diff --git a/template/testdata/openchat.gotmpl/system-user-assistant-user b/template/testdata/openchat.gotmpl/system-user-assistant-user
old mode 100755
new mode 100644
diff --git a/template/testdata/openchat.gotmpl/user b/template/testdata/openchat.gotmpl/user
old mode 100755
new mode 100644
diff --git a/template/testdata/openchat.gotmpl/user-assistant-user b/template/testdata/openchat.gotmpl/user-assistant-user
old mode 100755
new mode 100644
diff --git a/template/testdata/phi-3.gotmpl/system-user-assistant-user b/template/testdata/phi-3.gotmpl/system-user-assistant-user
old mode 100755
new mode 100644
diff --git a/template/testdata/phi-3.gotmpl/user b/template/testdata/phi-3.gotmpl/user
old mode 100755
new mode 100644
diff --git a/template/testdata/phi-3.gotmpl/user-assistant-user b/template/testdata/phi-3.gotmpl/user-assistant-user
old mode 100755
new mode 100644
diff --git a/template/testdata/solar-instruct.gotmpl/system-user-assistant-user b/template/testdata/solar-instruct.gotmpl/system-user-assistant-user
old mode 100755
new mode 100644
diff --git a/template/testdata/solar-instruct.gotmpl/user b/template/testdata/solar-instruct.gotmpl/user
old mode 100755
new mode 100644
diff --git a/template/testdata/solar-instruct.gotmpl/user-assistant-user b/template/testdata/solar-instruct.gotmpl/user-assistant-user
old mode 100755
new mode 100644
diff --git a/template/testdata/starcoder2-instruct.gotmpl/system-user-assistant-user b/template/testdata/starcoder2-instruct.gotmpl/system-user-assistant-user
old mode 100755
new mode 100644
diff --git a/template/testdata/starcoder2-instruct.gotmpl/user b/template/testdata/starcoder2-instruct.gotmpl/user
old mode 100755
new mode 100644
diff --git a/template/testdata/starcoder2-instruct.gotmpl/user-assistant-user b/template/testdata/starcoder2-instruct.gotmpl/user-assistant-user
old mode 100755
new mode 100644
diff --git a/template/testdata/templates.jsonl b/template/testdata/templates.jsonl
old mode 100755
new mode 100644
diff --git a/template/testdata/vicuna.gotmpl/system-user-assistant-user b/template/testdata/vicuna.gotmpl/system-user-assistant-user
old mode 100755
new mode 100644
diff --git a/template/testdata/vicuna.gotmpl/user b/template/testdata/vicuna.gotmpl/user
old mode 100755
new mode 100644
diff --git a/template/testdata/vicuna.gotmpl/user-assistant-user b/template/testdata/vicuna.gotmpl/user-assistant-user
old mode 100755
new mode 100644
diff --git a/template/testdata/zephyr.gotmpl/system-user-assistant-user b/template/testdata/zephyr.gotmpl/system-user-assistant-user
old mode 100755
new mode 100644
diff --git a/template/testdata/zephyr.gotmpl/user b/template/testdata/zephyr.gotmpl/user
old mode 100755
new mode 100644
diff --git a/template/testdata/zephyr.gotmpl/user-assistant-user b/template/testdata/zephyr.gotmpl/user-assistant-user
old mode 100755
new mode 100644
diff --git a/template/vicuna.gotmpl b/template/vicuna.gotmpl
old mode 100755
new mode 100644
diff --git a/template/vicuna.json b/template/vicuna.json
old mode 100755
new mode 100644
diff --git a/template/zephyr.gotmpl b/template/zephyr.gotmpl
old mode 100755
new mode 100644
diff --git a/template/zephyr.json b/template/zephyr.json
old mode 100755
new mode 100644
diff --git a/types/errtypes/errtypes.go b/types/errtypes/errtypes.go
old mode 100755
new mode 100644
diff --git a/types/model/name.go b/types/model/name.go
old mode 100755
new mode 100644
diff --git a/types/model/name_test.go b/types/model/name_test.go
old mode 100755
new mode 100644
diff --git a/types/model/testdata/fuzz/FuzzName/d37463aa416f6bab b/types/model/testdata/fuzz/FuzzName/d37463aa416f6bab
old mode 100755
new mode 100644
diff --git a/util/bufioutil/buffer_seeker.go b/util/bufioutil/buffer_seeker.go
old mode 100755
new mode 100644
diff --git a/util/bufioutil/buffer_seeker_test.go b/util/bufioutil/buffer_seeker_test.go
old mode 100755
new mode 100644
diff --git a/version/version.go b/version/version.go
old mode 100755
new mode 100644