update v0.3.9

1e1ec87b · wangkx1 · 1611dedb · 1611dedb · 1611dedb · 1611dedb
Commit 1e1ec87b authored Sep 11, 2024 by wangkx1
20 changed files
--- a/ollama/llm/llama.cpp/.github/workflows/gguf-publish.yml
+++ b/ollama/llm/llama.cpp/.github/workflows/gguf-publish.yml
-# This workflow will upload a Python Package using Twine when a GGUF release is created
-# For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries
-
-# See `gguf-py/README.md` for how to make a release.
-
-# This workflow uses actions that are not certified by GitHub.
-# They are provided by a third-party and are governed by
-# separate terms of service, privacy policy, and support
-# documentation.
-
-name: Upload Python Package
-
-on:
-  workflow_dispatch:
-  push:
-    # Pattern matched against refs/tags
-    tags:
-      - 'gguf-v*'           # Push events to every version tag
-
-
-jobs:
-  deploy:
-
-    runs-on: ubuntu-latest
-
-    steps:
-    - uses: actions/checkout@v4
-    - name: Set up Python
-      uses: actions/setup-python@v5
-      with:
-        python-version: '3.9.x'
-    - name: Install dependencies
-      run: |
-        cd gguf-py
-        python -m pip install poetry
-        poetry install
-
-    - name: Build package
-      run: cd gguf-py && poetry build
-    - name: Publish package
-      uses: pypa/gh-action-pypi-publish@release/v1
-      with:
-        password: ${{ secrets.PYPI_API_TOKEN }}
-        packages-dir: gguf-py/dist
--- a/ollama/llm/llama.cpp/.github/workflows/labeler.yml
+++ b/ollama/llm/llama.cpp/.github/workflows/labeler.yml
-name: "Pull Request Labeler"
-on:
- pull_request_target
-
-jobs:
-  labeler:
-    permissions:
-      contents: read
-      pull-requests: write
-    runs-on: ubuntu-latest
-    steps:
-    - uses: actions/checkout@v4
-      with:
-        repository: "ggerganov/llama.cpp"
-    - uses: actions/labeler@v5
-      with:
-        configuration-path: '.github/labeler.yml'
--- a/ollama/llm/llama.cpp/.github/workflows/nix-ci-aarch64.yml
+++ b/ollama/llm/llama.cpp/.github/workflows/nix-ci-aarch64.yml
-name: Nix aarch64 builds
-
-on:
-  workflow_dispatch: # allows manual triggering
-  schedule:
-    # Rebuild daily rather than on every push because QEMU is expensive (e.g.
-    # 1.5h instead of minutes with the cold cache).
-    #
-    # randint(0, 59), randint(0, 23)
-    - cron: '26 12 * * *'
-  # But also rebuild if we touched any of the Nix expressions:
-  push:
-    branches:
-      - master
-    paths: ['**/*.nix', 'flake.lock']
-  pull_request:
-    types: [opened, synchronize, reopened]
-    paths: ['**/*.nix', 'flake.lock']
-
-concurrency:
-  group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
-  cancel-in-progress: true
-
-jobs:
-  nix-build-aarch64:
-    runs-on: ubuntu-latest
-    steps:
-    - name: Checkout repository
-      uses: actions/checkout@v4
-    - name: Install QEMU
-      # Copy-paste from https://github.com/orgs/community/discussions/8305#discussioncomment-5888654
-      run: |
-        sudo apt-get update
-        sudo apt-get install -y qemu-user-static qemu-system-aarch64
-        sudo usermod -a -G kvm $USER
-    - name: Install Nix
-      uses: DeterminateSystems/nix-installer-action@v9
-      with:
-        github-token: ${{ secrets.GITHUB_TOKEN }}
-        extra-conf: |
-          extra-platforms = aarch64-linux
-          extra-system-features = nixos-test kvm
-          extra-substituters = https://llama-cpp.cachix.org https://cuda-maintainers.cachix.org
-          extra-trusted-public-keys = llama-cpp.cachix.org-1:H75X+w83wUKTIPSO1KWy9ADUrzThyGs8P5tmAbkWhQc= cuda-maintainers.cachix.org-1:0dq3bujKpuEPMCX6U4WylrUDZ9JyUG0VpVZa7CNfq5E=
-    - uses: DeterminateSystems/magic-nix-cache-action@v2
-      with:
-        upstream-cache: https://${{ matrix.cachixName }}.cachix.org
-    - name: Set-up cachix to push the results to
-      uses: cachix/cachix-action@v13
-      with:
-        authToken: '${{ secrets.CACHIX_AUTH_TOKEN }}'
-        name: llama-cpp
-    - name: Show all output paths
-      run: >
-          nix run github:nix-community/nix-eval-jobs
-          -- --gc-roots-dir gcroot
-          --flake
-          ".#packages.aarch64-linux"
-    - name: Build
-      run: >
-          nix run github:Mic92/nix-fast-build
-          -- --skip-cached --no-nom
-          --systems aarch64-linux
-          --flake
-          ".#checks.aarch64-linux"
--- a/ollama/llm/llama.cpp/.github/workflows/nix-ci.yml
+++ b/ollama/llm/llama.cpp/.github/workflows/nix-ci.yml
-name: Nix CI
-
-on:
-  workflow_dispatch: # allows manual triggering
-  push:
-    branches:
-      - master
-  pull_request:
-    types: [opened, synchronize, reopened]
-
-concurrency:
-  group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
-  cancel-in-progress: true
-
-jobs:
-  nix-eval:
-    strategy:
-      fail-fast: false
-      matrix:
-        os: [ ubuntu-latest, macos-latest ]
-    runs-on: ${{ matrix.os }}
-    steps:
-    - name: Checkout repository
-      uses: actions/checkout@v4
-    - name: Install Nix
-      uses: DeterminateSystems/nix-installer-action@v9
-      with:
-        github-token: ${{ secrets.GITHUB_TOKEN }}
-        extra-conf: |
-          extra-substituters = https://llama-cpp.cachix.org https://cuda-maintainers.cachix.org
-          extra-trusted-public-keys = llama-cpp.cachix.org-1:H75X+w83wUKTIPSO1KWy9ADUrzThyGs8P5tmAbkWhQc= cuda-maintainers.cachix.org-1:0dq3bujKpuEPMCX6U4WylrUDZ9JyUG0VpVZa7CNfq5E=
-    - uses: DeterminateSystems/magic-nix-cache-action@v2
-      with:
-        upstream-cache: https://${{ matrix.cachixName }}.cachix.org
-    - name: List all flake outputs
-      run: nix flake show --all-systems
-    - name: Show all output paths
-      run: >
-          nix run github:nix-community/nix-eval-jobs
-          -- --gc-roots-dir gcroot
-          --flake
-          ".#packages.$(nix eval --raw --impure --expr builtins.currentSystem)"
-  nix-build:
-    strategy:
-      fail-fast: false
-      matrix:
-        os: [ ubuntu-latest, macos-latest ]
-    runs-on: ${{ matrix.os }}
-    steps:
-    - name: Checkout repository
-      uses: actions/checkout@v4
-    - name: Install Nix
-      uses: DeterminateSystems/nix-installer-action@v9
-      with:
-        github-token: ${{ secrets.GITHUB_TOKEN }}
-        extra-conf: |
-          extra-substituters = https://llama-cpp.cachix.org https://cuda-maintainers.cachix.org
-          extra-trusted-public-keys = llama-cpp.cachix.org-1:H75X+w83wUKTIPSO1KWy9ADUrzThyGs8P5tmAbkWhQc= cuda-maintainers.cachix.org-1:0dq3bujKpuEPMCX6U4WylrUDZ9JyUG0VpVZa7CNfq5E=
-    - uses: DeterminateSystems/magic-nix-cache-action@v2
-      with:
-        upstream-cache: https://${{ matrix.cachixName }}.cachix.org
-    - name: Set-up cachix to push the results to
-      uses: cachix/cachix-action@v13
-      with:
-        authToken: '${{ secrets.CACHIX_AUTH_TOKEN }}'
-        name: llama-cpp
-    - name: Build
-      run: >
-          nix run github:Mic92/nix-fast-build
-          -- --skip-cached --no-nom
-          --flake
-          ".#checks.$(nix eval --raw --impure --expr builtins.currentSystem)"
--- a/ollama/llm/llama.cpp/.github/workflows/nix-flake-update.yml
+++ b/ollama/llm/llama.cpp/.github/workflows/nix-flake-update.yml
-name: update-flake-lock
-on:
-  workflow_dispatch:
-  schedule:
-    - cron: '0 0 * * 0' # runs weekly on Sunday at 00:00
-
-jobs:
-  lockfile:
-    runs-on: ubuntu-latest
-    steps:
-      - name: Checkout repository
-        uses: actions/checkout@v4
-      - name: Install Nix
-        uses: DeterminateSystems/nix-installer-action@main
-      - name: Update flake.lock
-        uses: DeterminateSystems/update-flake-lock@main
-        with:
-          pr-title: "nix: update flake.lock"
-          pr-labels: |
-            nix
-          pr-reviewers: philiptaron,SomeoneSerge
-          token: ${{ secrets.FLAKE_TOKEN }}
--- a/ollama/llm/llama.cpp/.github/workflows/nix-publish-flake.yml
+++ b/ollama/llm/llama.cpp/.github/workflows/nix-publish-flake.yml
-# Make the flake discoverable on https://flakestry.dev and https://flakehub.com/flakes
-name: "Publish a flake to flakestry & flakehub"
-on:
-    push:
-        tags:
-        - "*"
-    workflow_dispatch:
-        inputs:
-            tag:
-                description: "The existing tag to publish"
-                type: "string"
-                required: true
-jobs:
-    flakestry-publish:
-        runs-on: ubuntu-latest
-        permissions:
-            id-token: "write"
-            contents: "read"
-        steps:
-            - uses: flakestry/flakestry-publish@main
-              with:
-                version: "${{ inputs.tag || github.ref_name }}"
-    flakehub-publish:
-      runs-on: "ubuntu-latest"
-      permissions:
-        id-token: "write"
-        contents: "read"
-      steps:
-        - uses: "actions/checkout@v4"
-          with:
-            ref: "${{ (inputs.tag != null) && format('refs/tags/{0}', inputs.tag) || '' }}"
-        - uses: "DeterminateSystems/nix-installer-action@main"
-        - uses: "DeterminateSystems/flakehub-push@main"
-          with:
-            visibility: "public"
-            tag: "${{ inputs.tag }}"
--- a/ollama/llm/llama.cpp/.github/workflows/python-check-requirements.yml
+++ b/ollama/llm/llama.cpp/.github/workflows/python-check-requirements.yml
-name: Python check requirements.txt
-
-on:
-  push:
-    paths:
-      - '.github/workflows/python-check-requirements.yml'
-      - 'scripts/check-requirements.sh'
-      - 'convert*.py'
-      - 'requirements.txt'
-      - 'requirements/*.txt'
-  pull_request:
-    paths:
-      - '.github/workflows/python-check-requirements.yml'
-      - 'scripts/check-requirements.sh'
-      - 'convert*.py'
-      - 'requirements.txt'
-      - 'requirements/*.txt'
-
-concurrency:
-  group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
-  cancel-in-progress: true
-
-jobs:
-  python-check-requirements:
-    runs-on: ubuntu-latest
-    name: check-requirements
-    steps:
-      - name: Check out source repository
-        uses: actions/checkout@v4
-      - name: Set up Python environment
-        uses: actions/setup-python@v5
-        with:
-          python-version: "3.11"
-      - name: Run check-requirements.sh script
-        run:  bash scripts/check-requirements.sh
--- a/ollama/llm/llama.cpp/.github/workflows/python-lint.yml
+++ b/ollama/llm/llama.cpp/.github/workflows/python-lint.yml
-name: flake8 Lint
-
-on: [push, pull_request]
-
-concurrency:
-  group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
-  cancel-in-progress: true
-
-jobs:
-  flake8-lint:
-    runs-on: ubuntu-latest
-    name: Lint
-    steps:
-      - name: Check out source repository
-        uses: actions/checkout@v4
-      - name: Set up Python environment
-        uses: actions/setup-python@v5
-        with:
-          python-version: "3.11"
-      - name: flake8 Lint
-        uses: py-actions/flake8@v2
-        with:
-            plugins: "flake8-no-print"
--- a/ollama/llm/llama.cpp/.github/workflows/server.yml
+++ b/ollama/llm/llama.cpp/.github/workflows/server.yml
-# Server build and tests
-name: Server
-
-on:
-  workflow_dispatch: # allows manual triggering
-    inputs:
-      sha:
-        description: 'Commit SHA1 to build'
-        required: false
-        type: string
-      slow_tests:
-        description: 'Run slow tests'
-        required: true
-        type: boolean
-  push:
-    branches:
-      - master
-    paths: ['.github/workflows/server.yml', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.swift', '**/*.m', 'examples/server/**.*']
-  pull_request:
-    types: [opened, synchronize, reopened]
-    paths: ['.github/workflows/server.yml', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.swift', '**/*.m', 'examples/server/**.*']
-
-concurrency:
-  group: ${{ github.workflow }}-${{ github.ref }}-${{ github.head_ref || github.run_id }}
-  cancel-in-progress: true
-
-jobs:
-  server:
-    runs-on: ubuntu-latest
-
-    strategy:
-      matrix:
-        sanitizer: [ADDRESS, THREAD, UNDEFINED]
-        build_type: [RelWithDebInfo]
-        include:
-          - build_type: Release
-            sanitizer: ""
-      fail-fast: false # While -DLLAMA_SANITIZE_THREAD=ON is broken
-
-    steps:
-      - name: Dependencies
-        id: depends
-        run: |
-          sudo apt-get update
-          sudo apt-get -y install \
-            build-essential \
-            xxd \
-            git \
-            cmake \
-            curl \
-            wget \
-            language-pack-en \
-            libcurl4-openssl-dev
-
-      - name: Clone
-        id: checkout
-        uses: actions/checkout@v4
-        with:
-          fetch-depth: 0
-          ref: ${{ github.event.inputs.sha || github.event.pull_request.head.sha || github.sha || github.head_ref || github.ref_name }}
-
-      - name: Python setup
-        id: setup_python
-        uses: actions/setup-python@v5
-        with:
-          python-version: '3.11'
-
-      - name: Tests dependencies
-        id: test_dependencies
-        run: |
-          pip install -r examples/server/tests/requirements.txt
-
-      - name: Verify server deps
-        id: verify_server_deps
-        run: |
-          git config --global --add safe.directory $(realpath .)
-          cd examples/server
-          git ls-files --others --modified
-          git status
-          ./deps.sh
-          git status
-          not_ignored_files="$(git ls-files --others --modified)"
-          echo "Modified files: ${not_ignored_files}"
-          if [ -n "${not_ignored_files}" ]; then
-            echo "Repository is dirty or server deps are not built as expected"
-            echo "${not_ignored_files}"
-            exit 1
-          fi
-
-      - name: Build
-        id: cmake_build
-        run: |
-          cmake -B build \
-              -DLLAMA_NATIVE=OFF \
-              -DLLAMA_BUILD_SERVER=ON \
-              -DLLAMA_CURL=ON \
-              -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} \
-              -DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON ;
-          cmake --build build --config ${{ matrix.build_type }} -j $(nproc) --target llama-server
-
-      - name: Tests
-        id: server_integration_tests
-        run: |
-          cd examples/server/tests
-          PORT=8888 ./tests.sh
-
-      - name: Slow tests
-        id: server_integration_tests_slow
-        if: ${{ (github.event.schedule || github.event.inputs.slow_tests == 'true') && matrix.build_type == 'Release' }}
-        run: |
-          cd examples/server/tests
-          PORT=8888 ./tests.sh --stop --no-skipped --no-capture --tags slow
-
-
-  server-windows:
-    runs-on: windows-2019
-
-    steps:
-      - name: Clone
-        id: checkout
-        uses: actions/checkout@v4
-        with:
-          fetch-depth: 0
-          ref: ${{ github.event.inputs.sha || github.event.pull_request.head.sha || github.sha || github.head_ref || github.ref_name }}
-
-      - name: libCURL
-        id: get_libcurl
-        env:
-          CURL_VERSION: 8.6.0_6
-        run: |
-          curl.exe -o $env:RUNNER_TEMP/curl.zip -L "https://curl.se/windows/dl-${env:CURL_VERSION}/curl-${env:CURL_VERSION}-win64-mingw.zip"
-          mkdir $env:RUNNER_TEMP/libcurl
-          tar.exe -xvf $env:RUNNER_TEMP/curl.zip --strip-components=1 -C $env:RUNNER_TEMP/libcurl
-
-      - name: Build
-        id: cmake_build
-        run: |
-          cmake -B build -DLLAMA_CURL=ON -DCURL_LIBRARY="$env:RUNNER_TEMP/libcurl/lib/libcurl.dll.a" -DCURL_INCLUDE_DIR="$env:RUNNER_TEMP/libcurl/include"
-          cmake --build build --config Release -j ${env:NUMBER_OF_PROCESSORS} --target llama-server
-
-      - name: Python setup
-        id: setup_python
-        uses: actions/setup-python@v5
-        with:
-          python-version: '3.11'
-
-      - name: Tests dependencies
-        id: test_dependencies
-        run: |
-          pip install -r examples/server/tests/requirements.txt
-
-      - name: Copy Libcurl
-        id: prepare_libcurl
-        run: |
-          cp $env:RUNNER_TEMP/libcurl/bin/libcurl-x64.dll ./build/bin/Release/libcurl-x64.dll
-
-      - name: Tests
-        id: server_integration_tests
-        if: ${{ !matrix.disabled_on_pr || !github.event.pull_request }}
-        run: |
-          cd examples/server/tests
-          behave.exe --summary --stop --no-capture --exclude 'issues|wrong_usages|passkey' --tags llama.cpp
-
-      - name: Slow tests
-        id: server_integration_tests_slow
-        if: ${{ (github.event.schedule || github.event.inputs.slow_tests == 'true') && matrix.build_type == 'Release' }}
-        run: |
-          cd examples/server/tests
-          behave.exe --stop --no-skipped --no-capture --tags slow
--- a/ollama/llm/llama.cpp/.gitignore
+++ b/ollama/llm/llama.cpp/.gitignore
-*.o
-*.a
-*.so
-*.gguf
-*.gguf.json
-*.bin
-*.exe
-*.dll
-*.log
-*.gcov
-*.gcno
-*.gcda
-*.dot
-*.bat
-*.tmp
-*.metallib
-*.etag
-*.lastModified
-.DS_Store
-.build/
-.cache/
-.ccls-cache/
-.direnv/
-.envrc
-.swiftpm
-.venv
-.clang-tidy
-.vs/
-.vscode/
-.idea/
-
-ggml-metal-embed.metal
-
-lcov-report/
-gcovr-report/
-
-tags
-build*
-!build.zig
-cmake-build-*
-android-ndk-*
-out/
-tmp/
-
-models/*
-models-mnt
-
-/Pipfile
-/libllama.so
-/llama-*
-llama-batched-swift
-/common/build-info.cpp
-arm_neon.h
-compile_commands.json
-CMakeSettings.json
-
-__pycache__
-dist
-
-zig-out/
-zig-cache/
-
-ppl-*.txt
-qnt-*.txt
-perf-*.txt
-
-examples/jeopardy/results.txt
-examples/server/*.html.hpp
-examples/server/*.js.hpp
-examples/server/*.mjs.hpp
-examples/server/*.css.hpp
-
-poetry.lock
-poetry.toml
-nppBackup
-
-# Test binaries
-/tests/test-grammar-parser
-/tests/test-llama-grammar
-/tests/test-double-float
-/tests/test-grad0
-/tests/test-opt
-/tests/test-quantize-fns
-/tests/test-quantize-perf
-/tests/test-sampling
-/tests/test-tokenizer-0
-/tests/test-tokenizer-1-spm
-/tests/test-tokenizer-1-bpe
-/tests/test-rope
-/tests/test-backend-ops
--- a/ollama/llm/llama.cpp/.gitmodules
+++ b/ollama/llm/llama.cpp/.gitmodules
-[submodule "kompute"]
-	path = kompute
-	url = https://github.com/nomic-ai/kompute.git
--- a/ollama/llm/llama.cpp/.pre-commit-config.yaml
+++ b/ollama/llm/llama.cpp/.pre-commit-config.yaml
-# See https://pre-commit.com for more information
-# See https://pre-commit.com/hooks.html for more hooks
-exclude: prompts/.*.txt
-repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
-  rev: v4.6.0
-  hooks:
-  - id: trailing-whitespace
-  - id: end-of-file-fixer
-  - id: check-yaml
-  - id: check-added-large-files
- repo: https://github.com/PyCQA/flake8
-  rev: 7.0.0
-  hooks:
-  -   id: flake8
-      additional_dependencies: [flake8-no-print]
--- a/ollama/llm/llama.cpp/AUTHORS
+++ b/ollama/llm/llama.cpp/AUTHORS
-# date: Tue Apr  9 09:17:14 EEST 2024
-# this file is auto-generated by scripts/gen-authors.sh
-
-0cc4m <picard12@live.de>
-0xspringtime <110655352+0xspringtime@users.noreply.github.com>
-2f38b454 <dxf@protonmail.com>
-3ooabkhxtn <31479382+3ooabkhxtn@users.noreply.github.com>
-44670 <44670@users.noreply.github.com>
-AN Long <aisk@users.noreply.github.com>
-AT <manyoso@users.noreply.github.com>
-Aarni Koskela <akx@iki.fi>
-Aaron Miller <apage43@ninjawhale.com>
-Aaryaman Vasishta <aaryaman.vasishta@amd.com>
-Abhilash Majumder <30946547+abhilash1910@users.noreply.github.com>
-Abhishek Gopinath K <31348521+overtunned@users.noreply.github.com>
-Adithya Balaji <adithya.b94@gmail.com>
-AdithyanI <adithyan.i4internet@gmail.com>
-Adrian <smith.adriane@gmail.com>
-Adrian Hesketh <a-h@users.noreply.github.com>
-AidanBeltonS <87009434+AidanBeltonS@users.noreply.github.com>
-Aisuko <urakiny@gmail.com>
-Alberto <57916483+albbus-stack@users.noreply.github.com>
-Alex <awhill19@icloud.com>
-Alex Azarov <alex@azarov.by>
-Alex Azarov <alexander.azarov@mapbox.com>
-Alex Klinkhamer <from.github.com.917@grencez.dev>
-Alex Klinkhamer <git@grencez.dev>
-Alex Nguyen <tiendung@users.noreply.github.com>
-Alex Petenchea <alex.petenchea@gmail.com>
-Alex Renda <alexrenda@users.noreply.github.com>
-Alex von Gluck IV <kallisti5@unixzen.com>
-Alexey Parfenov <zxed@alkatrazstudio.net>
-Ali Chraghi <63465728+alichraghi@users.noreply.github.com>
-Ali Nehzat <ali.nehzat@thanks.dev>
-Ali Tariq <ali.tariq@10xengineers.ai>
-Alon <alonfaraj@gmail.com>
-AlpinDale <52078762+AlpinDale@users.noreply.github.com>
-AmirAli Mirian <37371367+amiralimi@users.noreply.github.com>
-Ananta Bastola <anantarajbastola@gmail.com>
-Anas Ahouzi <112881240+aahouzi@users.noreply.github.com>
-András Salamon <ott2@users.noreply.github.com>
-Andrei <abetlen@gmail.com>
-Andrew Canis <andrew.canis@gmail.com>
-Andrew Duffy <a10y@users.noreply.github.com>
-Andrew Godfrey <AndrewGodfrey@users.noreply.github.com>
-Arik Poznanski <arikpoz@users.noreply.github.com>
-Artem <guinmoon@gmail.com>
-Artyom Lebedev <vagran.ast@gmail.com>
-Asbjørn Olling <asbjornolling@gmail.com>
-Ásgeir Bjarni Ingvarsson <asgeir@fundinn.org>
-Ashok Gelal <401055+ashokgelal@users.noreply.github.com>
-Ashraful Islam <ashraful.meche@gmail.com>
-Atsushi Tatsuma <yoshoku@outlook.com>
-Austin <77757836+teleprint-me@users.noreply.github.com>
-AustinMroz <austinmroz@utexas.edu>
-BADR <contact@pythops.com>
-Bach Le <bach@bullno1.com>
-Bailey Chittle <39804642+bachittle@users.noreply.github.com>
-BarfingLemurs <128182951+BarfingLemurs@users.noreply.github.com>
-Behnam M <58621210+ibehnam@users.noreply.github.com>
-Ben Garney <bengarney@users.noreply.github.com>
-Ben Siraphob <bensiraphob@gmail.com>
-Ben Williams <ben@719ben.com>
-Benjamin Lecaillon <84293038+blecaillon@users.noreply.github.com>
-Bernat Vadell <hounter.caza@gmail.com>
-Bodo Graumann <mail@bodograumann.de>
-Bono Lv <lvscar@users.noreply.github.com>
-Borislav Stanimirov <b.stanimirov@abv.bg>
-Branden Butler <bwtbutler@hotmail.com>
-Brian <mofosyne@gmail.com>
-Bruce MacDonald <brucewmacdonald@gmail.com>
-CJ Pais <cj@cjpais.com>
-CRD716 <crd716@gmail.com>
-Cameron <csteele@steelecameron.com>
-Cameron Kaiser <classilla@users.noreply.github.com>
-Casey Primozic <casey@cprimozic.net>
-Casey Primozic <me@ameo.link>
-CausalLM <148736309+CausalLM@users.noreply.github.com>
-Cebtenzzre <cebtenzzre@gmail.com>
-Chad Brewbaker <crb002@gmail.com>
-Cheng Shao <terrorjack@type.dance>
-Chris Kuehl <ckuehl@ckuehl.me>
-Christian Demsar <christian@github.email.demsar.us>
-Christian Demsar <crasm@git.vczf.us>
-Christian Falch <875252+chrfalch@users.noreply.github.com>
-Christian Kögler <ck3d@gmx.de>
-Clark Saben <76020733+csaben@users.noreply.github.com>
-Clint Herron <hanclinto@gmail.com>
-Cuong Trinh Manh <nguoithichkhampha@gmail.com>
-DAN™ <dranger003@gmail.com>
-Damian Stewart <d@damianstewart.com>
-Dane Madsen <dane_madsen@hotmail.com>
-DaniAndTheWeb <57776841+DaniAndTheWeb@users.noreply.github.com>
-Daniel Bevenius <daniel.bevenius@gmail.com>
-Daniel Drake <drake@endlessos.org>
-Daniel Hiltgen <dhiltgen@users.noreply.github.com>
-Daniel Illescas Romero <illescas.daniel@protonmail.com>
-DannyDaemonic <DannyDaemonic@gmail.com>
-Dat Quoc Nguyen <2412555+datquocnguyen@users.noreply.github.com>
-Dave Della Costa <ddellacosta+github@gmail.com>
-David Friehs <david@friehs.info>
-David Kennedy <dakennedyd@gmail.com>
-David Pflug <david@pflug.email>
-David Renshaw <dwrenshaw@gmail.com>
-David Sommers <12738+databyte@users.noreply.github.com>
-David Yang <davidyang6us@gmail.com>
-Dawid Wysocki <62249621+TortillaZHawaii@users.noreply.github.com>
-Dean <Dean.Sinaean@gmail.com>
-Deins <deinsegle@gmail.com>
-Didzis Gosko <didzis@users.noreply.github.com>
-Don Mahurin <dmahurin@users.noreply.github.com>
-DooWoong Lee (David) <manics99@naver.com>
-Doomsdayrs <38189170+Doomsdayrs@users.noreply.github.com>
-Douglas Hanley <thesecretaryofwar@gmail.com>
-Dr. Tom Murphy VII Ph.D <499244+tom7@users.noreply.github.com>
-Ebey Abraham <ebey97@gmail.com>
-Ed Lee <edilee@mozilla.com>
-Ed Lepedus <ed.lepedus@googlemail.com>
-Edward Taylor <edeetee@gmail.com>
-Elbios <141279586+Elbios@users.noreply.github.com>
-Engininja2 <139037756+Engininja2@users.noreply.github.com>
-Equim <sayaka@ekyu.moe>
-Eric Sommerlade <es0m@users.noreply.github.com>
-Eric Zhang <34133756+EZForever@users.noreply.github.com>
-Erik Garrison <erik.garrison@gmail.com>
-Erik Scholz <Green-Sky@users.noreply.github.com>
-Ettore Di Giacinto <mudler@users.noreply.github.com>
-Evan Jones <evan.q.jones@gmail.com>
-Evan Miller <emmiller@gmail.com>
-Eve <139727413+netrunnereve@users.noreply.github.com>
-Evgeny Kurnevsky <kurnevsky@gmail.com>
-Ewout ter Hoeven <E.M.terHoeven@student.tudelft.nl>
-ExtReMLapin <3909752+ExtReMLapin@users.noreply.github.com>
-FK <sozforex@gmail.com>
-Fabian <cmdrf@users.noreply.github.com>
-Fabio R. Sluzala <Fabio3rs@users.noreply.github.com>
-Faez Shakil <faez.shakil@gmail.com>
-FantasyGmm <16450052+FantasyGmm@users.noreply.github.com>
-Fattire <528174+fat-tire@users.noreply.github.com>
-Felix <stenbackfelix@gmail.com>
-Finn Voorhees <finnvoorhees@gmail.com>
-Firat <firatkiral@gmail.com>
-Folko-Ven <71110216+Folko-Ven@users.noreply.github.com>
-Foul-Tarnished <107711110+Foul-Tarnished@users.noreply.github.com>
-Francisco Melo <43780565+francis2tm@users.noreply.github.com>
-FrankHB <frankhb1989@gmail.com>
-Frederik Vogel <Schaltfehler@users.noreply.github.com>
-Gabe Goodhart <gabe.l.hart@gmail.com>
-GainLee <perfecter.gen@gmail.com>
-Galunid <karolek1231456@gmail.com>
-Gary Linscott <glinscott@gmail.com>
-Gary Mulder <gjmulder@gmail.com>
-Genkagaku.GPT <hlhr202@163.com>
-Georgi Gerganov <ggerganov@gmail.com>
-Gilad S <giladgd@users.noreply.github.com>
-GiviMAD <GiviMAD@users.noreply.github.com>
-Govlzkoy <gotope@users.noreply.github.com>
-Guillaume "Vermeille" Sanchez <Guillaume.V.Sanchez@gmail.com>
-Guillaume Wenzek <gwenzek@users.noreply.github.com>
-Guoteng <32697156+SolenoidWGT@users.noreply.github.com>
-Gustavo Rocha Dias <91472747+gustrd@users.noreply.github.com>
-Halalaluyafail3 <55773281+Halalaluyafail3@users.noreply.github.com>
-Haohui Mai <ricetons@gmail.com>
-Haoxiang Fei <tonyfettes@tonyfettes.com>
-Harald Fernengel <harald.fernengel@here.com>
-Hatsune Miku <129688334+at8u@users.noreply.github.com>
-Henk Poley <HenkPoley@gmail.com>
-Henri Vasserman <henv@hot.ee>
-Henrik Forstén <henrik.forsten@gmail.com>
-Herman Semenov <GermanAizek@yandex.ru>
-Hesen Peng <hesen.peng@gmail.com>
-Hoang Nguyen <hugo53@users.noreply.github.com>
-Hongyu Ouyang <96765450+casavaca@users.noreply.github.com>
-Howard Su <howard0su@gmail.com>
-Hua Jiang <allenhjiang@outlook.com>
-Huawei Lin <huaweilin.cs@gmail.com>
-Ian Bull <irbull@eclipsesource.com>
-Ian Bull <irbull@gmail.com>
-Ian Scrivener <github@zilogy.asia>
-Ido S <ido.pluto@gmail.com>
-IgnacioFDM <ignaciofdm@gmail.com>
-Igor Okulist <okigan@gmail.com>
-Ikko Eltociear Ashimine <eltociear@gmail.com>
-Ilya Kurdyukov <59548320+ilyakurdyukov@users.noreply.github.com>
-Ionoclast Laboratories <brigham@ionoclast.com>
-Isaac McFadyen <isaac@imcf.me>
-IsaacDynamo <61521674+IsaacDynamo@users.noreply.github.com>
-Ivan Komarov <Ivan.Komarov@dfyz.info>
-Ivan Stepanov <ivanstepanovftw@gmail.com>
-JH23X <165871467+JH23X@users.noreply.github.com>
-Jack Mousseau <jmousseau@users.noreply.github.com>
-JackJollimore <130917767+JackJollimore@users.noreply.github.com>
-Jag Chadha <jagtesh@gmail.com>
-Jakub N <jakubniemczyk97@gmail.com>
-James Reynolds <magnusviri@users.noreply.github.com>
-Jan Boon <jan.boon@kaetemi.be>
-Jan Boon <kaetemi@gmail.com>
-Jan Ploski <jpl@plosquare.com>
-Jannis Schönleber <joennlae@gmail.com>
-Jared Van Bortel <cebtenzzre@gmail.com>
-Jared Van Bortel <jared@nomic.ai>
-Jason McCartney <jmac@theroot.org>
-Jean-Christophe Hoelt <hoelt@fovea.cc>
-Jean-Michaël Celerier <jeanmichael.celerier+github@gmail.com>
-Jed Fox <git@jedfox.com>
-Jeffrey Quesnelle <emozilla@nousresearch.com>
-Jesse Jojo Johnson <williamsaintgeorge@gmail.com>
-Jhen-Jie Hong <iainst0409@gmail.com>
-Jiahao Li <liplus17@163.com>
-Jian Liao <jianliao@users.noreply.github.com>
-JidongZhang-THU <1119708529@qq.com>
-Jinwoo Jeong <33892306+williamjeong2@users.noreply.github.com>
-Jiří Podivín <66251151+jpodivin@users.noreply.github.com>
-Johannes Gäßler <johannesg@5d6.de>
-Johannes Rudolph <johannes.rudolph@gmail.com>
-John <78893154+cmp-nct@users.noreply.github.com>
-John Balis <phobossystems@gmail.com>
-John Smith <67539080+kingsidelee@users.noreply.github.com>
-JohnnyB <jboero@users.noreply.github.com>
-Jonas Wunderlich <32615971+jonas-w@users.noreply.github.com>
-Jorge A <161275481+jorgealias@users.noreply.github.com>
-Jose Maldonado <63384398+yukiteruamano@users.noreply.github.com>
-Joseph Stahl <1269177+josephst@users.noreply.github.com>
-Joyce <joycebrum@google.com>
-Juan Calderon-Perez <835733+gaby@users.noreply.github.com>
-Judd <foldl@users.noreply.github.com>
-Julius Arkenberg <arki05@users.noreply.github.com>
-Jun Jie <71215065+junnjiee16@users.noreply.github.com>
-Juraj Bednar <juraj@bednar.io>
-Justin Parker <jparkerweb@gmail.com>
-Justin Suess <justin.suess@westpoint.edu>
-Justine Tunney <jtunney@gmail.com>
-Juuso Alasuutari <juuso.alasuutari@gmail.com>
-KASR <karim.asrih@gmail.com>
-Kamil Tomšík <info@tomsik.cz>
-Karsten Weiss <knweiss@gmail.com>
-Karthick <j.karthic2004@gmail.com>
-Karthik Kumar Viswanathan <195178+guilt@users.noreply.github.com>
-Karthik Sethuraman <k.seth1993@gmail.com>
-Kasumi <90275229+kasumi-1@users.noreply.github.com>
-Kawrakow <48489457+ikawrakow@users.noreply.github.com>
-Keiichi Tabata <keiichi.tabata@outlook.com>
-Kenvix ⭐ <kenvixzure@live.com>
-Kerfuffle <44031344+KerfuffleV2@users.noreply.github.com>
-Kevin Ji <1146876+kevinji@users.noreply.github.com>
-Kevin Kwok <antimatter15@gmail.com>
-Kevin Lo <kevlo@kevlo.org>
-Kolen Cheung <ickc@users.noreply.github.com>
-Konstantin Herud <konstantin.herud@denkbares.com>
-Konstantin Zhuravlyov <konstantin.zhuravlyov@amd.com>
-Kunshang Ji <kunshang.ji@intel.com>
-Kyle Liang <liangmanlai@gmail.com>
-Kyle Mistele <kyle@mistele.com>
-Kylin <56434533+KyL0N@users.noreply.github.com>
-Lars Grammel <lars.grammel@gmail.com>
-Laura <Tijntje_7@msn.com>
-Lee <44310445+lx200916@users.noreply.github.com>
-Lee Drake <b.lee.drake@gmail.com>
-Leng Yue <lengyue@lengyue.me>
-LeonEricsson <70749762+LeonEricsson@users.noreply.github.com>
-Leonardo Neumann <leonardo@neumann.dev.br>
-Li Tan <tanliboy@gmail.com>
-Linwei Wang <wanix1988@gmail.com>
-LoganDark <github@logandark.mozmail.com>
-LostRuins <39025047+LostRuins@users.noreply.github.com>
-Luciano <lucianostrika44@gmail.com>
-Luo Tian <lt@basecity.com>
-M. Yusuf Sarıgöz <yusufsarigoz@gmail.com>
-Maarten ter Huurne <maarten@treewalker.org>
-Mack Straight <eiz@users.noreply.github.com>
-Maël Kerbiriou <m431.kerbiriou@gmail.com>
-MaggotHATE <clay1326@gmail.com>
-Marc Köhlbrugge <subscriptions@marckohlbrugge.com>
-Marco Matthies <71844+marcom@users.noreply.github.com>
-Marcus Dunn <51931484+MarcusDunn@users.noreply.github.com>
-Marian Cepok <marian.cepok@gmail.com>
-Mark Fairbairn <thebaron88@gmail.com>
-Marko Tasic <mtasic85@gmail.com>
-Martin Krasser <krasserm@googlemail.com>
-Martin Schwaighofer <mschwaig@users.noreply.github.com>
-Marvin Gießing <marvin.giessing@gmail.com>
-Mateusz Charytoniuk <mateusz.charytoniuk@protonmail.com>
-Matheus C. França <matheus-catarino@hotmail.com>
-Matheus Gabriel Alves Silva <matheusgasource@gmail.com>
-Mathieu Nayrolles <MathieuNls@users.noreply.github.com>
-Mathijs de Bruin <mathijs@mathijsfietst.nl>
-Matt Clayton <156335168+mattjcly@users.noreply.github.com>
-Matt Pulver <matt.pulver@heavy.ai>
-Matteo Boschini <12133566+mbosc@users.noreply.github.com>
-Matthew Tejo <matthew.tejo@gmail.com>
-Matvey Soloviev <blackhole89@gmail.com>
-Maxime <672982+maximegmd@users.noreply.github.com>
-Maximilian Winter <maximilian.winter.91@gmail.com>
-Meng Zhang <meng@tabbyml.com>
-Meng, Hengyu <hengyu.meng@intel.com>
-Merrick Christensen <merrick.christensen@gmail.com>
-Michael Coppola <m18coppola@gmail.com>
-Michael Hueschen <m@mhueschen.dev>
-Michael Kesper <mkesper@schokokeks.org>
-Michael Klimenko <mklimenko29@gmail.com>
-Michael Podvitskiy <podvitskiymichael@gmail.com>
-Michael Potter <NanoTekGuy@Gmail.com>
-Michaël de Vries <vriesdemichael@gmail.com>
-Mihai <mihai.chirculescu@yahoo.com>
-Mike <ytianhui2004@gmail.com>
-Minsoo Cheong <54794500+mscheong01@users.noreply.github.com>
-Mirko185 <mirkosig@gmail.com>
-Mirror Azure <54669636+MirrorAzure@users.noreply.github.com>
-Miwa / Ensan <63481257+ensan-hcl@users.noreply.github.com>
-Mohammadreza Hendiani <hendiani.mohammadreza@gmail.com>
-Murilo Santana <mvrilo@gmail.com>
-Musab Gultekin <musabgultekin@users.noreply.github.com>
-Nam D. Tran <42194884+namtranase@users.noreply.github.com>
-NawafAlansari <72708095+NawafAlansari@users.noreply.github.com>
-Nebula <infinitewormhole@gmail.com>
-Neo Zhang Jianyu <jianyu.zhang@intel.com>
-Neuman Vong <neuman.vong@gmail.com>
-Nexesenex <124105151+Nexesenex@users.noreply.github.com>
-Niall Coates <1349685+Niall-@users.noreply.github.com>
-Nicolai Weitkemper <kontakt@nicolaiweitkemper.de>
-Nigel Bosch <pnigelb@gmail.com>
-Niklas Korz <niklas@niklaskorz.de>
-Nindaleth <Nindaleth@users.noreply.github.com>
-Oleksandr Nikitin <oleksandr@tvori.info>
-Oleksii Maryshchenko <oleksii.maryshchenko@gmail.com>
-Olivier Chafik <ochafik@users.noreply.github.com>
-Ondřej Čertík <ondrej@certik.us>
-Ouadie EL FAROUKI <ouadie.elfarouki@codeplay.com>
-Paul Tsochantaris <ptsochantaris@icloud.com>
-Pavol Rusnak <pavol@rusnak.io>
-Pedro Cuenca <pedro@huggingface.co>
-Peter Sugihara <peter@campsh.com>
-Phil H <5756783+phiharri@users.noreply.github.com>
-Philip Taron <philip.taron@gmail.com>
-Phillip Kravtsov <phillip@kravtsov.net>
-Pierre Alexandre SCHEMBRI <pa.schembri@gmail.com>
-Pierrick Hymbert <pierrick.hymbert@gmail.com>
-Przemysław Pawełczyk <przemoc@gmail.com>
-Qin Yue Chen <71813199+chenqiny@users.noreply.github.com>
-Qingyou Meng <meng.qingyou@gmail.com>
-Qu Zongfu <43257352+yancaoweidaode@users.noreply.github.com>
-RJ Adriaansen <adriaansen@eshcc.eur.nl>
-Radoslav Gerganov <rgerganov@gmail.com>
-Radosław Gryta <radek.gryta@gmail.com>
-Rahul Vivek Nair <68507071+RahulVivekNair@users.noreply.github.com>
-Rand Xie <randxiexyy29@gmail.com>
-Randall Fitzgerald <randall@dasaku.net>
-Reinforce-II <fate@eastal.com>
-Riceball LEE <snowyu.lee@gmail.com>
-Richard Kiss <him@richardkiss.com>
-Richard Roberson <richardr1126@gmail.com>
-Rick G <26732651+TheFlipbook@users.noreply.github.com>
-Rickard Edén <rickardeden@gmail.com>
-Rickard Hallerbäck <rickard.hallerback@gmail.com>
-Rickey Bowers Jr <bitRAKE@gmail.com>
-Riley Stewart <ristew@users.noreply.github.com>
-Rinne <AsakusaRinne@gmail.com>
-Rinne <liu_yaohui1998@126.com>
-Robert Brisita <986796+rbrisita@users.noreply.github.com>
-Robert Sung-wook Shin <edp1096@users.noreply.github.com>
-Robey Holderith <robey@flaminglunchbox.net>
-Robyn <robyngraf@users.noreply.github.com>
-Roger Meier <r.meier@siemens.com>
-Roland <14355895+rbur0425@users.noreply.github.com>
-Romain D <90720+Artefact2@users.noreply.github.com>
-Romain Neutron <romain@neutron.io>
-Roman Parykin <donderom@gmail.com>
-Ron Evans <ron@hybridgroup.com>
-Ron Jailall <rojailal@gmail.com>
-Ronny Brendel <ronnybrendel@gmail.com>
-Ronsor <ronsor@ronsor.pw>
-Rowan Hart <rowanbhart@gmail.com>
-Rune <43761327+Rune-AI@users.noreply.github.com>
-Ryan Landay <rlanday@gmail.com>
-Ryder Wishart <ryderwishart@gmail.com>
-Rőczey Barnabás <31726601+An0nie@users.noreply.github.com>
-SakuraUmi <yukinon244@gmail.com>
-Salvador E. Tropea <stropea@inti.gob.ar>
-Sam Spilsbury <smspillaz@gmail.com>
-Sami Farin <3876865+Safari77@users.noreply.github.com>
-Samuel Maynard <samwmaynard@gmail.com>
-Sang-Kil Park <sang.park@42dot.ai>
-Seb C <47074056+Sebby37@users.noreply.github.com>
-Sebastián A <sebastian.aedo29@gmail.com>
-SebastianApel <13675545+SebastianApel@users.noreply.github.com>
-Senemu <10880819+Senemu@users.noreply.github.com>
-Sergey Alirzaev <zl29ah@gmail.com>
-Sergio López <slp@sinrega.org>
-SeungWon Jeong <65549245+redlion0929@users.noreply.github.com>
-ShadovvBeast <ShadovvBeast@gmail.com>
-Shakhar Dasgupta <shakhardasgupta@gmail.com>
-Shangning Xu <32517059+xushangning@users.noreply.github.com>
-Shijie <821898965@qq.com>
-Shintarou Okada <kokuzen@gmail.com>
-Shouzheng Liu <61452103+lshzh-ww@users.noreply.github.com>
-Shouzheng Liu <lshzh.hi@gmail.com>
-Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com>
-Simon Willison <swillison@gmail.com>
-Siwen Yu <yusiwen@gmail.com>
-Sky Yan <skyan83@gmail.com>
-Slaren <2141330+slaren@users.noreply.github.com>
-Slava Primenko <primenko.s@gmail.com>
-SoftwareRenderer <138734813+SoftwareRenderer@users.noreply.github.com>
-Someone <sergei.kozlukov@aalto.fi>
-Someone Serge <sergei.kozlukov@aalto.fi>
-Sourab Mangrulkar <13534540+pacman100@users.noreply.github.com>
-Spencer Sutton <spencersutton@users.noreply.github.com>
-Srinivas Billa <nivibilla@gmail.com>
-Stefan Sydow <stefan@sydow.email>
-Stephan Walter <stephan@walter.name>
-Stephen Nichols <snichols@users.noreply.github.com>
-Steve Grubb <ausearch.1@gmail.com>
-Steven Roussey <sroussey@gmail.com>
-Steward Garcia <57494570+FSSRepo@users.noreply.github.com>
-Suaj Carrot <72162667+SuajCarrot@users.noreply.github.com>
-SuperUserNameMan <yoann@terminajones.com>
-Tai Duc Nguyen <taiducnguyen.drexel@gmail.com>
-Taikono-Himazin <kazu@po.harenet.ne.jp>
-Tameem <113388789+AhmadTameem@users.noreply.github.com>
-Tamotsu Takahashi <ttakah+github@gmail.com>
-Thái Hoàng Tâm <75922889+RoyalHeart@users.noreply.github.com>
-Thatcher Chamberlin <j.thatcher.c@gmail.com>
-Theia Vogel <theia@vgel.me>
-Thérence <13496987+Royalphax@users.noreply.github.com>
-Thibault Terrasson <thibault.terrasson@gmail.com>
-Thomas Klausner <wiz@gatalith.at>
-Tim Miller <drasticactions@users.noreply.github.com>
-Timmy Knight <r2d2fish@gmail.com>
-Timothy Cronin <40186632+4imothy@users.noreply.github.com>
-Ting Lou <ting.lou@gmail.com>
-Ting Sun <suntcrick@gmail.com>
-Tobias Lütke <tobi@shopify.com>
-Tom C <tom.corelis@gmail.com>
-Tom Jobbins <784313+TheBloke@users.noreply.github.com>
-Tomas <tom.tomas.36478119@gmail.com>
-Tomáš Pazdiora <tomas.pazdiora@gmail.com>
-Tristan Ross <rosscomputerguy@protonmail.com>
-Tungsten842 <886724vf@anonaddy.me>
-Tungsten842 <quantmint@protonmail.com>
-Tushar <ditsuke@protonmail.com>
-UEXTM.com <84163508+uextm@users.noreply.github.com>
-Uzo Nweke <uzoechi@gmail.com>
-Vaibhav Srivastav <vaibhavs10@gmail.com>
-Val Kharitonov <mail@kharvd.com>
-Valentin Konovalov <valle.ketsujin@gmail.com>
-Valentyn Bezshapkin <61702053+valentynbez@users.noreply.github.com>
-Victor Z. Peng <ziliangdotme@gmail.com>
-Vlad <spitfireage@gmail.com>
-Vladimir <bogdad@gmail.com>
-Vladimir Malyutin <first-leon@yandex.ru>
-Vladimir Zorin <vladimir@deviant.guru>
-Volodymyr Vitvitskyi <72226+signalpillar@users.noreply.github.com>
-WangHaoranRobin <56047610+WangHaoranRobin@users.noreply.github.com>
-Weird Constructor <weirdconstructor@gmail.com>
-Welby Seely <welbyseely@gmail.com>
-Wentai Zhang <rchardx@gmail.com>
-WillCorticesAI <150854901+WillCorticesAI@users.noreply.github.com>
-Willy Tarreau <w@1wt.eu>
-Wu Jian Ping <wujjpp@hotmail.com>
-Wu Jian Ping <wujp@greatld.com>
-Xiake Sun <xiake.sun@intel.com>
-Xiang (Kevin) Li <kevinli020508@gmail.com>
-Xiao-Yong Jin <jinxiaoyong@gmail.com>
-XiaotaoChen <chenxiaotao1234@gmail.com>
-Xiaoyi Chen <cxychina@gmail.com>
-Xingchen Song(宋星辰) <xingchensong1996@163.com>
-Xuan Son Nguyen <thichthat@gmail.com>
-Yann Follet <131855179+YannFollet@users.noreply.github.com>
-Yiming Cui <conandiy@vip.qq.com>
-Yishuo Wang <MeouSker77@outlook.com>
-Yueh-Po Peng <94939112+y10ab1@users.noreply.github.com>
-Yui <dev@sleepyyui.com>
-Yusuf Kağan Hanoğlu <hanoglu@yahoo.com>
-Yuval Peled <31162840+Yuval-Peled@users.noreply.github.com>
-ZHAOKAI WANG <sanxianwei@163.com>
-Zane Shannon <z@zcs.me>
-Zay <95888118+isaiahbjork@users.noreply.github.com>
-Zenix <zenixls2@gmail.com>
-Zhang Peiyuan <a1286225768@gmail.com>
-ZhouYuChen <zhouyuchen@naver.com>
-Ziad Ben Hadj-Alouane <zied.benhadjalouane@gmail.com>
-Ziang Wu <97337387+ZiangWu-77@users.noreply.github.com>
-Zsapi <martin1.zsapka@gmail.com>
-a-n-n-a-l-e-e <150648636+a-n-n-a-l-e-e@users.noreply.github.com>
-adel boussaken <netdur@gmail.com>
-afrideva <95653597+afrideva@users.noreply.github.com>
-akawrykow <142945436+akawrykow@users.noreply.github.com>
-alexpinel <93524949+alexpinel@users.noreply.github.com>
-alonfaraj <alonfaraj@gmail.com>
-andrijdavid <david@geek.mg>
-anon998 <131767832+anon998@users.noreply.github.com>
-anzz1 <anzz1@live.com>
-apaz <aarpazdera@gmail.com>
-apcameron <37645737+apcameron@users.noreply.github.com>
-arcrank <arcrank@gmail.com>
-arlo-phoenix <140345165+arlo-phoenix@users.noreply.github.com>
-at8u <129688334+at8u@users.noreply.github.com>
-automaticcat <daogiatuank54@gmail.com>
-bandoti <141645996+bandoti@users.noreply.github.com>
-beiller <beiller@gmail.com>
-bhubbb <79117352+bhubbb@users.noreply.github.com>
-bmwl <brian.marshall@tolko.com>
-bobqianic <129547291+bobqianic@users.noreply.github.com>
-bryanSwk <93190252+bryanSwk@users.noreply.github.com>
-bsilvereagle <bsilvereagle@users.noreply.github.com>
-bssrdf <merlintiger@hotmail.com>
-byte-6174 <88070277+byte-6174@users.noreply.github.com>
-cebtenzzre <cebtenzzre@gmail.com>
-chaihahaha <chai836275709@gmail.com>
-chiranko <96988916+chiranko@users.noreply.github.com>
-clibdev <52199778+clibdev@users.noreply.github.com>
-clyang <clyang@clyang.net>
-cocktailpeanut <121128867+cocktailpeanut@users.noreply.github.com>
-coezbek <c.oezbek@gmail.com>
-comex <comexk@gmail.com>
-compilade <113953597+compilade@users.noreply.github.com>
-crasm <crasm@git.vczf.net>
-crasm <crasm@git.vczf.us>
-daboe01 <daboe01@googlemail.com>
-david raistrick <keen99@users.noreply.github.com>
-ddpasa <112642920+ddpasa@users.noreply.github.com>
-deepdiffuser <112834445+deepdiffuser@users.noreply.github.com>
-divinity76 <divinity76@gmail.com>
-dotpy314 <33351922+dotpy314@users.noreply.github.com>
-drbh <david.richard.holtz@gmail.com>
-ds5t5 <145942675+ds5t5@users.noreply.github.com>
-dylan <canardleteer@users.noreply.github.com>
-eastriver <lee@eastriver.dev>
-ebraminio <ebraminio@gmail.com>
-eiery <19350831+eiery@users.noreply.github.com>
-eric8607242 <e0928021388@gmail.com>
-fraxy-v <65565042+fraxy-v@users.noreply.github.com>
-github-actions[bot] <github-actions[bot]@users.noreply.github.com>
-gliptic <gliptic@users.noreply.github.com>
-goerch <jhr.walter@t-online.de>
-grahameth <96447521+grahameth@users.noreply.github.com>
-gwjr <502526+gwjr@users.noreply.github.com>
-h-h-h-h <13482553+h-h-h-h@users.noreply.github.com>
-hankcs <cnhankmc@gmail.com>
-hoangmit <hoangmit@users.noreply.github.com>
-hongbo.mo <352280764@qq.com>
-howlger <eclipse@voormann.de>
-howlger <github@voormann.de>
-hutli <6594598+hutli@users.noreply.github.com>
-hutli <hutli@hutli.hu>
-hutli <jensstaermose@hotmail.com>
-hxer7963 <hxer7963@gmail.com>
-hydai <z54981220@gmail.com>
-iSma <ismail.senhaji@gmail.com>
-iacore <74560659+iacore@users.noreply.github.com>
-igarnier <igarnier@protonmail.com>
-iohub <rickyang.pro@gmail.com>
-jacobi petrucciani <8117202+jpetrucciani@users.noreply.github.com>
-jameswu2014 <545426914@qq.com>
-jneem <joeneeman@gmail.com>
-johnson442 <56517414+johnson442@users.noreply.github.com>
-jon-chuang <9093549+jon-chuang@users.noreply.github.com>
-jp-x-g <jpxg-dev@protonmail.com>
-jwj7140 <32943891+jwj7140@users.noreply.github.com>
-kaizau <kaizau@users.noreply.github.com>
-kalomaze <66376113+kalomaze@users.noreply.github.com>
-kang <tpdns9032100@gmail.com>
-katsu560 <118887472+katsu560@users.noreply.github.com>
-kchro3 <62481661+kchro3@users.noreply.github.com>
-khimaros <me@khimaros.com>
-kiltyj <kiltyj@gmail.com>
-klosax <131523366+klosax@users.noreply.github.com>
-kunal-vaishnavi <115581922+kunal-vaishnavi@users.noreply.github.com>
-kunnis <kunnis@users.noreply.github.com>
-kuronekosaiko <EvanChanJ@163.com>
-kuvaus <22169537+kuvaus@users.noreply.github.com>
-kwin1412 <42286931+kwin1412@users.noreply.github.com>
-l3utterfly <gc.pthzfoldr@gmail.com>
-ldwang <ftgreat@163.com>
-le.chang <cljs118@126.com>
-leejet <leejet714@gmail.com>
-limitedAtonement <limitedAtonement@users.noreply.github.com>
-lon <114724657+longregen@users.noreply.github.com>
-m3ndax <adrian.goessl@outlook.com>
-maddes8cht <55592906+maddes8cht@users.noreply.github.com>
-makomk <makosoft@googlemail.com>
-manikbhandari <mbbhandarimanik2@gmail.com>
-mdrokz <mohammadmunshi@gmail.com>
-mgroeber9110 <45620825+mgroeber9110@users.noreply.github.com>
-minarchist <minarchist@users.noreply.github.com>
-mj-shifu <77107165+mj-shifu@users.noreply.github.com>
-mmyjona <jonathan.gonse@gmail.com>
-momonga <115213907+mmnga@users.noreply.github.com>
-moritzbrantner <31051084+moritzbrantner@users.noreply.github.com>
-mzcu <milos.cubrilo@gmail.com>
-nanahi <130121847+na-na-hi@users.noreply.github.com>
-ngc92 <7938269+ngc92@users.noreply.github.com>
-nhamanasu <45545786+nhamanasu@users.noreply.github.com>
-niansa/tuxifan <anton-sa@web.de>
-niansa/tuxifan <tuxifan@posteo.de>
-ningshanwutuobang <ningshanwutuobang@gmail.com>
-nold <Nold360@users.noreply.github.com>
-nopperl <54780682+nopperl@users.noreply.github.com>
-nusu-github <29514220+nusu-github@users.noreply.github.com>
-olexiyb <olexiyb@gmail.com>
-oobabooga <112222186+oobabooga@users.noreply.github.com>
-opparco <parco.opaai@gmail.com>
-ostix360 <55257054+ostix360@users.noreply.github.com>
-perserk <perserk@gmail.com>
-postmasters <namnguyen@google.com>
-pudepiedj <pudepiedj@gmail.com>
-qingfengfenga <41416092+qingfengfenga@users.noreply.github.com>
-qouoq <qouoq@fastmail.com>
-qunash <anzoria@gmail.com>
-rabidcopy <rabidcopy@yahoo.com>
-rankaiyx <rankaiyx@rankaiyx.com>
-rhjdvsgsgks <26178113+rhjdvsgsgks@users.noreply.github.com>
-rhuddleston <ryan.huddleston@percona.com>
-rimoliga <53384203+rimoliga@users.noreply.github.com>
-runfuture <runfuture@users.noreply.github.com>
-sandyiscool <sandyiscool@gmail.com>
-semidark <me@semidark.net>
-sharpHL <132747147+sharpHL@users.noreply.github.com>
-shibe2 <shibe@tuta.io>
-singularity <12184989+singularity-s0@users.noreply.github.com>
-sjinzh <sjinzh@gmail.com>
-slaren <2141330+slaren@users.noreply.github.com>
-slaren <slarengh@gmail.com>
-snadampal <87143774+snadampal@users.noreply.github.com>
-staviq <staviq@gmail.com>
-stduhpf <stephduh@live.fr>
-swittk <switt1995@gmail.com>
-takov751 <40316768+takov751@users.noreply.github.com>
-tarcey <cey.tarik@gmail.com>
-texmex76 <40733439+texmex76@users.noreply.github.com>
-thement <40525767+thement@users.noreply.github.com>
-tjohnman <tjohnman@users.noreply.github.com>
-tslmy <tslmy@users.noreply.github.com>
-ubik2 <ubik2@users.noreply.github.com>
-uint256_t <konndennsa@gmail.com>
-uint256_t <maekawatoshiki1017@gmail.com>
-unbounded <haakon@likedan.net>
-valiray <133289098+valiray@users.noreply.github.com>
-vodkaslime <646329483@qq.com>
-vvhg1 <94630311+vvhg1@users.noreply.github.com>
-vxiiduu <73044267+vxiiduu@users.noreply.github.com>
-wbpxre150 <100937007+wbpxre150@users.noreply.github.com>
-whoreson <139810751+whoreson@users.noreply.github.com>
-wonjun Jang <strutive07@gmail.com>
-wzy <32936898+Freed-Wu@users.noreply.github.com>
-xaedes <xaedes@gmail.com>
-xaedes <xaedes@googlemail.com>
-xloem <0xloem@gmail.com>
-yangli2 <yangli2@gmail.com>
-yuiseki <yuiseki@gmail.com>
-zakkor <edward.partenie@gmail.com>
-zhouwg <6889919+zhouwg@users.noreply.github.com>
-zrm <trustiosity.zrm@gmail.com>
-源文雨 <41315874+fumiama@users.noreply.github.com>
-Нияз Гарифзянов <112617865+garrnizon@users.noreply.github.com>
--- a/ollama/llm/llama.cpp/CMakeLists.txt
+++ b/ollama/llm/llama.cpp/CMakeLists.txt
-cmake_minimum_required(VERSION 3.14) # for add_link_options and implicit target directories.
-project("llama.cpp" C CXX)
-include(CheckIncludeFileCXX)
-
-set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
-
-if (NOT XCODE AND NOT MSVC AND NOT CMAKE_BUILD_TYPE)
-    set(CMAKE_BUILD_TYPE Release CACHE STRING "Build type" FORCE)
-    set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "Debug" "Release" "MinSizeRel" "RelWithDebInfo")
-endif()
-
-set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin)
-
-if (CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR)
-    set(LLAMA_STANDALONE ON)
-
-    # configure project version
-    # TODO
-else()
-    set(LLAMA_STANDALONE OFF)
-endif()
-
-if (EMSCRIPTEN)
-    set(BUILD_SHARED_LIBS_DEFAULT OFF)
-
-    option(LLAMA_WASM_SINGLE_FILE "llama: embed WASM inside the generated llama.js" ON)
-else()
-    if (MINGW)
-        set(BUILD_SHARED_LIBS_DEFAULT OFF)
-    else()
-        set(BUILD_SHARED_LIBS_DEFAULT ON)
-    endif()
-endif()
-
-
-#
-# Option list
-#
-
-if (APPLE)
-    set(LLAMA_METAL_DEFAULT ON)
-    set(LLAMA_BLAS_DEFAULT ON)
-    set(LLAMA_BLAS_VENDOR_DEFAULT "Apple")
-else()
-    set(LLAMA_METAL_DEFAULT OFF)
-    set(LLAMA_BLAS_DEFAULT OFF)
-    set(LLAMA_BLAS_VENDOR_DEFAULT "Generic")
-endif()
-
-set(LLAMA_LLAMAFILE_DEFAULT ON)
-
-# general
-option(BUILD_SHARED_LIBS                "build shared libraries"                                OFF)
-option(LLAMA_STATIC                     "llama: static link libraries"                          OFF)
-option(LLAMA_NATIVE                     "llama: enable -march=native flag"                      ON)
-option(LLAMA_LTO                        "llama: enable link time optimization"                  OFF)
-option(LLAMA_CCACHE                     "llama: use ccache if available"                        ON)
-
-# debug
-option(LLAMA_ALL_WARNINGS               "llama: enable all compiler warnings"                   ON)
-option(LLAMA_ALL_WARNINGS_3RD_PARTY     "llama: enable all compiler warnings in 3rd party libs" OFF)
-option(LLAMA_GPROF                      "llama: enable gprof"                                   OFF)
-
-# build
-option(LLAMA_FATAL_WARNINGS             "llama: enable -Werror flag"                            OFF)
-
-# sanitizers
-option(LLAMA_SANITIZE_THREAD            "llama: enable thread sanitizer"                        OFF)
-option(LLAMA_SANITIZE_ADDRESS           "llama: enable address sanitizer"                       OFF)
-option(LLAMA_SANITIZE_UNDEFINED         "llama: enable undefined sanitizer"                     OFF)
-
-# instruction set specific
-if (LLAMA_NATIVE)
-    set(INS_ENB OFF)
-else()
-    set(INS_ENB ON)
-endif()
-
-option(LLAMA_SVE                             "llama: enable SVE"                                OFF)
-option(LLAMA_AVX                             "llama: enable AVX"                                ${INS_ENB})
-option(LLAMA_AVX2                            "llama: enable AVX2"                               ${INS_ENB})
-option(LLAMA_AVX512                          "llama: enable AVX512"                             OFF)
-option(LLAMA_AVX512_VBMI                     "llama: enable AVX512-VBMI"                        OFF)
-option(LLAMA_AVX512_VNNI                     "llama: enable AVX512-VNNI"                        OFF)
-option(LLAMA_AVX512_BF16                     "llama: enable AVX512-BF16"                        OFF)
-option(LLAMA_FMA                             "llama: enable FMA"                                ${INS_ENB})
-# in MSVC F16C is implied with AVX2/AVX512
-if (NOT MSVC)
-    option(LLAMA_F16C                        "llama: enable F16C"                               ${INS_ENB})
-endif()
-
-if (WIN32)
-    set(LLAMA_WIN_VER "0x602" CACHE STRING "llama: Windows Version")
-endif()
-
-# 3rd party libs
-option(LLAMA_ACCELERATE                      "llama: enable Accelerate framework"               ON)
-option(LLAMA_BLAS                            "llama: use BLAS"                                  ${LLAMA_BLAS_DEFAULT})
-set(LLAMA_BLAS_VENDOR ${LLAMA_BLAS_VENDOR_DEFAULT} CACHE STRING
-                                             "llama: BLAS library vendor")
-option(LLAMA_LLAMAFILE                       "llama: use llamafile SGEMM"                       ${LLAMA_LLAMAFILE_DEFAULT})
-option(LLAMA_CUDA                            "llama: use CUDA"                                  OFF)
-option(LLAMA_CUBLAS                          "llama: use CUDA (deprecated, use LLAMA_CUDA)"     OFF)
-option(LLAMA_CUDA_FORCE_DMMV                 "llama: use dmmv instead of mmvq CUDA kernels"     OFF)
-option(LLAMA_CUDA_FORCE_MMQ                  "llama: use mmq kernels instead of cuBLAS"         OFF)
-set(LLAMA_CUDA_DMMV_X      "32" CACHE STRING "llama: x stride for dmmv CUDA kernels")
-set(LLAMA_CUDA_MMV_Y        "1" CACHE STRING "llama: y block size for mmv CUDA kernels")
-option(LLAMA_CUDA_F16                        "llama: use 16 bit floats for some calculations"   OFF)
-set(LLAMA_CUDA_KQUANTS_ITER "2" CACHE STRING "llama: iters./thread per block for Q2_K/Q6_K")
-set(LLAMA_CUDA_PEER_MAX_BATCH_SIZE "128" CACHE STRING
-                                             "llama: max. batch size for using peer access")
-option(LLAMA_CUDA_NO_PEER_COPY               "llama: do not use peer to peer copies"            OFF)
-option(LLAMA_CUDA_NO_VMM                     "llama: do not try to use CUDA VMM"                OFF)
-option(LLAMA_CUDA_FA_ALL_QUANTS              "llama: compile all quants for FlashAttention"     OFF)
-
-option(LLAMA_CURL                            "llama: use libcurl to download model from an URL" OFF)
-option(LLAMA_HIPBLAS                         "llama: use hipBLAS"                               OFF)
-option(LLAMA_HIP_UMA                         "llama: use HIP unified memory architecture"       OFF)
-option(LLAMA_VULKAN                          "llama: use Vulkan"                                OFF)
-option(LLAMA_VULKAN_CHECK_RESULTS            "llama: run Vulkan op checks"                      OFF)
-option(LLAMA_VULKAN_DEBUG                    "llama: enable Vulkan debug output"                OFF)
-option(LLAMA_VULKAN_MEMORY_DEBUG             "llama: enable Vulkan memory debug output"         OFF)
-option(LLAMA_VULKAN_VALIDATE                 "llama: enable Vulkan validation"                  OFF)
-option(LLAMA_VULKAN_RUN_TESTS                "llama: run Vulkan tests"                          OFF)
-option(LLAMA_METAL                           "llama: use Metal"                                 ${LLAMA_METAL_DEFAULT})
-option(LLAMA_METAL_NDEBUG                    "llama: disable Metal debugging"                   OFF)
-option(LLAMA_METAL_SHADER_DEBUG              "llama: compile Metal with -fno-fast-math"         OFF)
-option(LLAMA_METAL_EMBED_LIBRARY             "llama: embed Metal library"                       OFF)
-set(LLAMA_METAL_MACOSX_VERSION_MIN "" CACHE STRING
-                                             "llama: metal minimum macOS version")
-set(LLAMA_METAL_STD "" CACHE STRING          "llama: metal standard version (-std flag)")
-option(LLAMA_KOMPUTE                         "llama: use Kompute"                               OFF)
-option(LLAMA_RPC                             "llama: use RPC"                                   OFF)
-option(LLAMA_OPENMP                          "llama: use OpenMP"                                ON)
-option(LLAMA_SYCL                            "llama: use SYCL"                                  OFF)
-option(LLAMA_SYCL_F16                        "llama: use 16 bit floats for sycl calculations"   OFF)
-set(LLAMA_SYCL_TARGET   "INTEL" CACHE STRING "llama: sycl target device")
-option(LLAMA_CPU_HBM                         "llama: use memkind for CPU HBM"                   OFF)
-set(LLAMA_SCHED_MAX_COPIES  "4" CACHE STRING "llama: max input copies for pipeline parallelism")
-
-option(LLAMA_BUILD_TESTS                     "llama: build tests"    ${LLAMA_STANDALONE})
-option(LLAMA_BUILD_EXAMPLES                  "llama: build examples" ${LLAMA_STANDALONE})
-option(LLAMA_BUILD_SERVER                    "llama: build server example"                      ON)
-option(LLAMA_LASX                            "llama: enable lasx"                               ON)
-option(LLAMA_LSX                             "llama: enable lsx"                                ON)
-
-# add perf arguments
-option(LLAMA_PERF                            "llama: enable perf"                               OFF)
-
-# Required for relocatable CMake package
-include(${CMAKE_CURRENT_SOURCE_DIR}/scripts/build-info.cmake)
-
-#
-# Compile flags
-#
-
-if (LLAMA_SYCL)
-    set(CMAKE_CXX_STANDARD 17)
-else()
-    set(CMAKE_CXX_STANDARD 11)
-endif()
-
-set(CMAKE_CXX_STANDARD_REQUIRED true)
-set(CMAKE_C_STANDARD 11)
-set(CMAKE_C_STANDARD_REQUIRED true)
-set(THREADS_PREFER_PTHREAD_FLAG ON)
-
-find_package(Threads REQUIRED)
-include(CheckCXXCompilerFlag)
-
-add_compile_definitions(GGML_SCHED_MAX_COPIES=${LLAMA_SCHED_MAX_COPIES})
-
-# enable libstdc++ assertions for debug builds
-if (CMAKE_SYSTEM_NAME MATCHES "Linux")
-    add_compile_definitions($<$<CONFIG:Debug>:_GLIBCXX_ASSERTIONS>)
-endif()
-
-if (NOT MSVC)
-    if (LLAMA_SANITIZE_THREAD)
-        add_compile_options(-fsanitize=thread)
-        link_libraries     (-fsanitize=thread)
-    endif()
-
-    if (LLAMA_SANITIZE_ADDRESS)
-        add_compile_options(-fsanitize=address -fno-omit-frame-pointer)
-        link_libraries     (-fsanitize=address)
-    endif()
-
-    if (LLAMA_SANITIZE_UNDEFINED)
-        add_compile_options(-fsanitize=undefined)
-        link_libraries     (-fsanitize=undefined)
-    endif()
-endif()
-
-if (APPLE AND LLAMA_ACCELERATE)
-    find_library(ACCELERATE_FRAMEWORK Accelerate)
-    if (ACCELERATE_FRAMEWORK)
-        message(STATUS "Accelerate framework found")
-
-        add_compile_definitions(GGML_USE_ACCELERATE)
-        add_compile_definitions(ACCELERATE_NEW_LAPACK)
-        add_compile_definitions(ACCELERATE_LAPACK_ILP64)
-        set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} ${ACCELERATE_FRAMEWORK})
-    else()
-        message(WARNING "Accelerate framework not found")
-    endif()
-endif()
-
-if (LLAMA_METAL)
-    find_library(FOUNDATION_LIBRARY Foundation REQUIRED)
-    find_library(METAL_FRAMEWORK    Metal      REQUIRED)
-    find_library(METALKIT_FRAMEWORK MetalKit   REQUIRED)
-
-    message(STATUS "Metal framework found")
-    set(GGML_HEADERS_METAL ggml-metal.h)
-    set(GGML_SOURCES_METAL ggml-metal.m)
-
-    add_compile_definitions(GGML_USE_METAL)
-    if (LLAMA_METAL_NDEBUG)
-        add_compile_definitions(GGML_METAL_NDEBUG)
-    endif()
-
-    # copy ggml-common.h and ggml-metal.metal to bin directory
-    configure_file(ggml-common.h    ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml-common.h    COPYONLY)
-    configure_file(ggml-metal.metal ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml-metal.metal COPYONLY)
-
-    if (LLAMA_METAL_EMBED_LIBRARY)
-        enable_language(ASM)
-        add_compile_definitions(GGML_METAL_EMBED_LIBRARY)
-
-        set(METALLIB_COMMON "${CMAKE_CURRENT_SOURCE_DIR}/ggml-common.h")
-        set(METALLIB_SOURCE "${CMAKE_CURRENT_SOURCE_DIR}/ggml-metal.metal")
-
-        file(MAKE_DIRECTORY "${CMAKE_BINARY_DIR}/autogenerated")
-
-        # merge ggml-common.h and ggml-metal.metal into a single file
-        set(METALLIB_EMBED_ASM    "${CMAKE_BINARY_DIR}/autogenerated/ggml-metal-embed.s")
-        set(METALLIB_SOURCE_EMBED "${CMAKE_BINARY_DIR}/autogenerated/ggml-metal-embed.metal")
-
-        add_custom_command(
-            OUTPUT ${METALLIB_EMBED_ASM}
-            COMMAND echo "Embedding Metal library"
-            COMMAND sed -e '/\#include \"ggml-common.h\"/r ${METALLIB_COMMON}' -e '/\#include \"ggml-common.h\"/d' < ${METALLIB_SOURCE} > ${METALLIB_SOURCE_EMBED}
-            COMMAND echo ".section __DATA,__ggml_metallib"          >  ${METALLIB_EMBED_ASM}
-            COMMAND echo ".globl _ggml_metallib_start"              >> ${METALLIB_EMBED_ASM}
-            COMMAND echo "_ggml_metallib_start:"                    >> ${METALLIB_EMBED_ASM}
-            COMMAND echo ".incbin \\\"${METALLIB_SOURCE_EMBED}\\\"" >> ${METALLIB_EMBED_ASM}
-            COMMAND echo ".globl _ggml_metallib_end"                >> ${METALLIB_EMBED_ASM}
-            COMMAND echo "_ggml_metallib_end:"                      >> ${METALLIB_EMBED_ASM}
-            DEPENDS ggml-metal.metal ggml-common.h
-            COMMENT "Generate assembly for embedded Metal library"
-        )
-
-        set(GGML_SOURCES_METAL ${GGML_SOURCES_METAL} ${METALLIB_EMBED_ASM})
-    else()
-        if (LLAMA_METAL_SHADER_DEBUG)
-            # custom command to do the following:
-            #   xcrun -sdk macosx metal    -fno-fast-math -c ggml-metal.metal -o ggml-metal.air
-            #   xcrun -sdk macosx metallib                   ggml-metal.air   -o default.metallib
-            #
-            # note: this is the only way I found to disable fast-math in Metal. it's ugly, but at least it works
-            #       disabling fast math is needed in order to pass tests/test-backend-ops
-            # note: adding -fno-inline fixes the tests when using MTL_SHADER_VALIDATION=1
-            # note: unfortunately, we have to call it default.metallib instead of ggml.metallib
-            #       ref: https://github.com/ggerganov/whisper.cpp/issues/1720
-            set(XC_FLAGS -fno-fast-math -fno-inline -g)
-        else()
-            set(XC_FLAGS -O3)
-        endif()
-
-        # Append macOS metal versioning flags
-        if (LLAMA_METAL_MACOSX_VERSION_MIN)
-            message(STATUS "Adding -mmacosx-version-min=${LLAMA_METAL_MACOSX_VERSION_MIN} flag to metal compilation")
-            list(APPEND XC_FLAGS -mmacosx-version-min=${LLAMA_METAL_MACOSX_VERSION_MIN})
-        endif()
-        if (LLAMA_METAL_STD)
-            message(STATUS "Adding -std=${LLAMA_METAL_STD} flag to metal compilation")
-            list(APPEND XC_FLAGS -std=${LLAMA_METAL_STD})
-        endif()
-
-        add_custom_command(
-            OUTPUT ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/default.metallib
-            COMMAND xcrun -sdk macosx metal    ${XC_FLAGS} -c ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml-metal.metal -o ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml-metal.air
-            COMMAND xcrun -sdk macosx metallib                ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml-metal.air   -o ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/default.metallib
-            COMMAND rm -f ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml-metal.air
-            COMMAND rm -f ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml-common.h
-            COMMAND rm -f ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml-metal.metal
-            DEPENDS ggml-metal.metal ggml-common.h
-            COMMENT "Compiling Metal kernels"
-            )
-
-        add_custom_target(
-            ggml-metal ALL
-            DEPENDS ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/default.metallib
-            )
-    endif() # LLAMA_METAL_EMBED_LIBRARY
-
-    set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS}
-        ${FOUNDATION_LIBRARY}
-        ${METAL_FRAMEWORK}
-        ${METALKIT_FRAMEWORK}
-        )
-endif()
-
-if (LLAMA_OPENMP)
-    find_package(OpenMP)
-    if (OpenMP_FOUND)
-        message(STATUS "OpenMP found")
-        add_compile_definitions(GGML_USE_OPENMP)
-        set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} OpenMP::OpenMP_C OpenMP::OpenMP_CXX)
-    else()
-        message(WARNING "OpenMP not found")
-    endif()
-endif()
-
-if (LLAMA_BLAS)
-    if (LLAMA_STATIC)
-        set(BLA_STATIC ON)
-    endif()
-    #if (CMAKE_VERSION VERSION_GREATER_EQUAL 3.22)
-    #    set(BLA_SIZEOF_INTEGER 8)
-    #endif()
-
-    set(BLA_VENDOR ${LLAMA_BLAS_VENDOR})
-    find_package(BLAS)
-
-    if (BLAS_FOUND)
-        message(STATUS "BLAS found, Libraries: ${BLAS_LIBRARIES}")
-
-        if (("${BLAS_INCLUDE_DIRS}" STREQUAL "") AND NOT (${LLAMA_BLAS_VENDOR} MATCHES "Apple"))
-            # BLAS_INCLUDE_DIRS is missing in FindBLAS.cmake.
-            # see https://gitlab.kitware.com/cmake/cmake/-/issues/20268
-            find_package(PkgConfig REQUIRED)
-            if (${LLAMA_BLAS_VENDOR} MATCHES "Generic")
-                pkg_check_modules(DepBLAS REQUIRED blas)
-            elseif (${LLAMA_BLAS_VENDOR} MATCHES "OpenBLAS")
-                # As of openblas v0.3.22, the 64-bit is named openblas64.pc
-                pkg_check_modules(DepBLAS openblas64)
-                if (NOT DepBLAS_FOUND)
-                    pkg_check_modules(DepBLAS REQUIRED openblas)
-                endif()
-            elseif (${LLAMA_BLAS_VENDOR} MATCHES "FLAME")
-                pkg_check_modules(DepBLAS REQUIRED blis)
-            elseif (${LLAMA_BLAS_VENDOR} MATCHES "ATLAS")
-                pkg_check_modules(DepBLAS REQUIRED blas-atlas)
-            elseif (${LLAMA_BLAS_VENDOR} MATCHES "FlexiBLAS")
-                pkg_check_modules(DepBLAS REQUIRED flexiblas_api)
-            elseif (${LLAMA_BLAS_VENDOR} MATCHES "Intel")
-                # all Intel* libraries share the same include path
-                pkg_check_modules(DepBLAS REQUIRED mkl-sdl)
-            elseif (${LLAMA_BLAS_VENDOR} MATCHES "NVHPC")
-                # this doesn't provide pkg-config
-                # suggest to assign BLAS_INCLUDE_DIRS on your own
-                if ("${NVHPC_VERSION}" STREQUAL "")
-                    message(WARNING "Better to set NVHPC_VERSION")
-                else()
-                    set(DepBLAS_FOUND ON)
-                    set(DepBLAS_INCLUDE_DIRS "/opt/nvidia/hpc_sdk/${CMAKE_SYSTEM_NAME}_${CMAKE_SYSTEM_PROCESSOR}/${NVHPC_VERSION}/math_libs/include")
-                endif()
-            endif()
-            if (DepBLAS_FOUND)
-                set(BLAS_INCLUDE_DIRS ${DepBLAS_INCLUDE_DIRS})
-            else()
-                message(WARNING "BLAS_INCLUDE_DIRS neither been provided nor been automatically"
-                " detected by pkgconfig, trying to find cblas.h from possible paths...")
-                find_path(BLAS_INCLUDE_DIRS
-                    NAMES cblas.h
-                    HINTS
-                        /usr/include
-                        /usr/local/include
-                        /usr/include/openblas
-                        /opt/homebrew/opt/openblas/include
-                        /usr/local/opt/openblas/include
-                        /usr/include/x86_64-linux-gnu/openblas/include
-                )
-            endif()
-        endif()
-
-        message(STATUS "BLAS found, Includes: ${BLAS_INCLUDE_DIRS}")
-
-        add_compile_options(${BLAS_LINKER_FLAGS})
-
-        add_compile_definitions(GGML_USE_BLAS)
-
-        if (${BLAS_INCLUDE_DIRS} MATCHES "mkl" AND (${LLAMA_BLAS_VENDOR} MATCHES "Generic" OR ${LLAMA_BLAS_VENDOR} MATCHES "Intel"))
-            add_compile_definitions(GGML_BLAS_USE_MKL)
-        endif()
-
-        set(GGML_HEADERS_BLAS ggml-blas.h)
-        set(GGML_SOURCES_BLAS ggml-blas.cpp)
-
-        set(LLAMA_EXTRA_LIBS     ${LLAMA_EXTRA_LIBS}     ${BLAS_LIBRARIES})
-        set(LLAMA_EXTRA_INCLUDES ${LLAMA_EXTRA_INCLUDES} ${BLAS_INCLUDE_DIRS})
-    else()
-        message(WARNING "BLAS not found, please refer to "
-        "https://cmake.org/cmake/help/latest/module/FindBLAS.html#blas-lapack-vendors"
-        " to set correct LLAMA_BLAS_VENDOR")
-    endif()
-endif()
-
-if (LLAMA_LLAMAFILE)
-    add_compile_definitions(GGML_USE_LLAMAFILE)
-
-    set(GGML_HEADERS_LLAMAFILE sgemm.h)
-    set(GGML_SOURCES_LLAMAFILE sgemm.cpp)
-endif()
-
-if (LLAMA_CUBLAS)
-    message(WARNING "LLAMA_CUBLAS is deprecated and will be removed in the future.\nUse LLAMA_CUDA instead")
-    set(LLAMA_CUDA ON)
-endif()
-
-if (LLAMA_CUDA)
-    cmake_minimum_required(VERSION 3.18)  # for CMAKE_CUDA_ARCHITECTURES
-
-    find_package(CUDAToolkit)
-    if (CUDAToolkit_FOUND)
-        message(STATUS "CUDA found")
-
-        if (NOT DEFINED CMAKE_CUDA_ARCHITECTURES)
-            # 52 == lowest CUDA 12 standard
-            # 60 == f16 CUDA intrinsics
-            # 61 == integer CUDA intrinsics
-            # 70 == compute capability at which unrolling a loop in mul_mat_q kernels is faster
-            if (LLAMA_CUDA_F16 OR LLAMA_CUDA_DMMV_F16)
-                set(CMAKE_CUDA_ARCHITECTURES "60;61;70") # needed for f16 CUDA intrinsics
-            else()
-                set(CMAKE_CUDA_ARCHITECTURES "52;61;70") # lowest CUDA 12 standard + lowest for integer intrinsics
-                #set(CMAKE_CUDA_ARCHITECTURES "OFF") # use this to compile much faster, but only F16 models work
-            endif()
-        endif()
-        message(STATUS "Using CUDA architectures: ${CMAKE_CUDA_ARCHITECTURES}")
-
-        enable_language(CUDA)
-
-        set(GGML_HEADERS_CUDA ggml-cuda.h)
-
-        file(GLOB GGML_SOURCES_CUDA "ggml-cuda/*.cu")
-        list(APPEND GGML_SOURCES_CUDA "ggml-cuda.cu")
-        file(GLOB SRCS "ggml-cuda/template-instances/fattn-wmma*.cu")
-        list(APPEND GGML_SOURCES_CUDA ${SRCS})
-        file(GLOB SRCS "ggml-cuda/template-instances/mmq*.cu")
-        list(APPEND GGML_SOURCES_CUDA ${SRCS})
-
-        add_compile_definitions(GGML_USE_CUDA)
-        add_compile_definitions(GGML_CUDA_USE_GRAPHS)
-        if (LLAMA_CUDA_FORCE_DMMV)
-            add_compile_definitions(GGML_CUDA_FORCE_DMMV)
-        endif()
-        if (LLAMA_CUDA_FORCE_MMQ)
-            add_compile_definitions(GGML_CUDA_FORCE_MMQ)
-        endif()
-        if (LLAMA_CUDA_NO_VMM)
-            add_compile_definitions(GGML_CUDA_NO_VMM)
-        endif()
-        add_compile_definitions(GGML_CUDA_DMMV_X=${LLAMA_CUDA_DMMV_X})
-        add_compile_definitions(GGML_CUDA_MMV_Y=${LLAMA_CUDA_MMV_Y})
-        if (DEFINED LLAMA_CUDA_DMMV_Y)
-            add_compile_definitions(GGML_CUDA_MMV_Y=${LLAMA_CUDA_DMMV_Y}) # for backwards compatibility
-        endif()
-        if (LLAMA_CUDA_F16 OR LLAMA_CUDA_DMMV_F16)
-            add_compile_definitions(GGML_CUDA_F16)
-        endif()
-        add_compile_definitions(K_QUANTS_PER_ITERATION=${LLAMA_CUDA_KQUANTS_ITER})
-        add_compile_definitions(GGML_CUDA_PEER_MAX_BATCH_SIZE=${LLAMA_CUDA_PEER_MAX_BATCH_SIZE})
-        if (LLAMA_CUDA_NO_PEER_COPY)
-            add_compile_definitions(GGML_CUDA_NO_PEER_COPY)
-        endif()
-        if (LLAMA_CUDA_FA_ALL_QUANTS)
-            file(GLOB SRCS "ggml-cuda/template-instances/fattn-vec*.cu")
-            list(APPEND GGML_SOURCES_CUDA ${SRCS})
-            add_compile_definitions(GGML_CUDA_FA_ALL_QUANTS)
-        else()
-            file(GLOB SRCS "ggml-cuda/template-instances/fattn-vec*q4_0-q4_0.cu")
-            list(APPEND GGML_SOURCES_CUDA ${SRCS})
-            file(GLOB SRCS "ggml-cuda/template-instances/fattn-vec*q8_0-q8_0.cu")
-            list(APPEND GGML_SOURCES_CUDA ${SRCS})
-            file(GLOB SRCS "ggml-cuda/template-instances/fattn-vec*f16-f16.cu")
-            list(APPEND GGML_SOURCES_CUDA ${SRCS})
-        endif()
-
-        if (LLAMA_STATIC)
-            if (WIN32)
-                # As of 12.3.1 CUDA Toolkit for Windows does not offer a static cublas library
-                set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} CUDA::cudart_static CUDA::cublas CUDA::cublasLt)
-            else ()
-                set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} CUDA::cudart_static CUDA::cublas_static CUDA::cublasLt_static)
-            endif()
-        else()
-            set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} CUDA::cudart CUDA::cublas CUDA::cublasLt)
-        endif()
-
-        if (LLAMA_CUDA_NO_VMM)
-            # No VMM requested, no need to link directly with the cuda driver lib (libcuda.so)
-        else()
-            set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} CUDA::cuda_driver) # required by cuDeviceGetAttribute(), cuMemGetAllocationGranularity(...), ...
-        endif()
-    else()
-        message(WARNING "CUDA not found")
-    endif()
-endif()
-
-if (LLAMA_RPC)
-    add_compile_definitions(GGML_USE_RPC)
-
-    if (WIN32)
-        set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} ws2_32)
-    endif()
-
-    set(GGML_HEADERS_RPC ggml-rpc.h)
-    set(GGML_SOURCES_RPC ggml-rpc.cpp)
-endif()
-
-if (LLAMA_VULKAN)
-    find_package(Vulkan)
-    if (Vulkan_FOUND)
-        message(STATUS "Vulkan found")
-
-        set(GGML_HEADERS_VULKAN ggml-vulkan.h)
-        set(GGML_SOURCES_VULKAN ggml-vulkan.cpp)
-
-        add_compile_definitions(GGML_USE_VULKAN)
-
-        # Workaround to the "can't dereference invalidated vector iterator" bug in clang-cl debug build
-        # Posssibly relevant: https://stackoverflow.com/questions/74748276/visual-studio-no-displays-the-correct-length-of-stdvector
-        if (MSVC AND CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
-            add_compile_definitions(_ITERATOR_DEBUG_LEVEL=0)
-        endif()
-
-        if (LLAMA_VULKAN_CHECK_RESULTS)
-            add_compile_definitions(GGML_VULKAN_CHECK_RESULTS)
-        endif()
-
-        if (LLAMA_VULKAN_DEBUG)
-            add_compile_definitions(GGML_VULKAN_DEBUG)
-        endif()
-
-        if (LLAMA_VULKAN_MEMORY_DEBUG)
-            add_compile_definitions(GGML_VULKAN_MEMORY_DEBUG)
-        endif()
-
-        if (LLAMA_VULKAN_VALIDATE)
-            add_compile_definitions(GGML_VULKAN_VALIDATE)
-        endif()
-
-        if (LLAMA_VULKAN_RUN_TESTS)
-            add_compile_definitions(GGML_VULKAN_RUN_TESTS)
-        endif()
-
-        set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} Vulkan::Vulkan)
-    else()
-        message(WARNING "Vulkan not found")
-    endif()
-endif()
-
-if (LLAMA_HIPBLAS)
-    if (NOT EXISTS $ENV{ROCM_PATH})
-        if (NOT EXISTS /opt/rocm)
-            set(ROCM_PATH /usr)
-        else()
-            set(ROCM_PATH /opt/rocm)
-        endif()
-    else()
-        set(ROCM_PATH $ENV{ROCM_PATH})
-    endif()
-    list(APPEND CMAKE_PREFIX_PATH ${ROCM_PATH})
-    list(APPEND CMAKE_PREFIX_PATH "${ROCM_PATH}/lib64/cmake")
-
-    # CMake on Windows doesn't support the HIP language yet
-    if(WIN32)
-        set(CXX_IS_HIPCC TRUE)
-    else()
-        string(REGEX MATCH "hipcc(\.bat)?$" CXX_IS_HIPCC "${CMAKE_CXX_COMPILER}")
-    endif()
-
-    if(CXX_IS_HIPCC)
-        if(LINUX)
-            if (NOT ${CMAKE_CXX_COMPILER_ID} MATCHES "Clang")
-                message(WARNING "Only LLVM is supported for HIP, hint: CXX=/opt/rocm/llvm/bin/clang++")
-            endif()
-
-            message(WARNING "Setting hipcc as the C++ compiler is legacy behavior."
-                    " Prefer setting the HIP compiler directly. See README for details.")
-        endif()
-    else()
-        # Forward AMDGPU_TARGETS to CMAKE_HIP_ARCHITECTURES.
-        if(AMDGPU_TARGETS AND NOT CMAKE_HIP_ARCHITECTURES)
-            set(CMAKE_HIP_ARCHITECTURES ${AMDGPU_TARGETS})
-        endif()
-        cmake_minimum_required(VERSION 3.21)
-        enable_language(HIP)
-    endif()
-    find_package(hip     REQUIRED)
-    find_package(hipblas REQUIRED)
-    find_package(rocblas REQUIRED)
-
-    message(STATUS "HIP and hipBLAS found")
-
-    set(GGML_HEADERS_ROCM ggml-cuda.h)
-
-    file(GLOB GGML_SOURCES_ROCM "ggml-cuda/*.cu")
-    list(APPEND GGML_SOURCES_ROCM "ggml-cuda.cu")
-    file(GLOB SRCS "ggml-cuda/template-instances/fattn-wmma*.cu")
-    list(APPEND GGML_SOURCES_ROCM ${SRCS})
-    file(GLOB SRCS "ggml-cuda/template-instances/mmq*.cu")
-    list(APPEND GGML_SOURCES_ROCM ${SRCS})
-
-    add_compile_definitions(GGML_USE_HIPBLAS GGML_USE_CUDA)
-
-    if (LLAMA_HIP_UMA)
-        add_compile_definitions(GGML_HIP_UMA)
-    endif()
-
-    if (LLAMA_CUDA_FORCE_DMMV)
-        add_compile_definitions(GGML_CUDA_FORCE_DMMV)
-    endif()
-
-    if (LLAMA_CUDA_FORCE_MMQ)
-        add_compile_definitions(GGML_CUDA_FORCE_MMQ)
-    endif()
-
-    if (LLAMA_CUDA_NO_PEER_COPY)
-        add_compile_definitions(GGML_CUDA_NO_PEER_COPY)
-    endif()
-
-    if (LLAMA_CUDA_FA_ALL_QUANTS)
-        file(GLOB SRCS "ggml-cuda/template-instances/fattn-vec*.cu")
-        list(APPEND GGML_SOURCES_ROCM ${SRCS})
-        add_compile_definitions(GGML_CUDA_FA_ALL_QUANTS)
-    else()
-        file(GLOB SRCS "ggml-cuda/template-instances/fattn-vec*q4_0-q4_0.cu")
-        list(APPEND GGML_SOURCES_ROCM ${SRCS})
-        file(GLOB SRCS "ggml-cuda/template-instances/fattn-vec*q8_0-q8_0.cu")
-        list(APPEND GGML_SOURCES_ROCM ${SRCS})
-        file(GLOB SRCS "ggml-cuda/template-instances/fattn-vec*f16-f16.cu")
-        list(APPEND GGML_SOURCES_ROCM ${SRCS})
-    endif()
-
-    add_compile_definitions(GGML_CUDA_DMMV_X=${LLAMA_CUDA_DMMV_X})
-    add_compile_definitions(GGML_CUDA_MMV_Y=${LLAMA_CUDA_MMV_Y})
-    add_compile_definitions(K_QUANTS_PER_ITERATION=${LLAMA_CUDA_KQUANTS_ITER})
-
-    if (CXX_IS_HIPCC)
-        set_source_files_properties(${GGML_SOURCES_ROCM} PROPERTIES LANGUAGE CXX)
-        set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} hip::device)
-    else()
-        set_source_files_properties(${GGML_SOURCES_ROCM} PROPERTIES LANGUAGE HIP)
-    endif()
-
-    if (LLAMA_STATIC)
-        message(FATAL_ERROR "Static linking not supported for HIP/ROCm")
-    endif()
-
-    set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} PUBLIC hip::host roc::rocblas roc::hipblas)
-endif()
-
-if (LLAMA_SYCL)
-    if (NOT LLAMA_SYCL_TARGET MATCHES "^(INTEL|NVIDIA)$")
-        message(FATAL_ERROR "Invalid backend chosen, supported options are INTEL or NVIDIA")
-    endif()
-
-    if ( NOT DEFINED ENV{ONEAPI_ROOT})
-        message(FATAL_ERROR "Not detect ENV {ONEAPI_ROOT}, please install oneAPI & source it, like: source /opt/intel/oneapi/setvars.sh")
-    endif()
-    #todo: AOT
-
-    find_package(IntelSYCL REQUIRED)
-
-    message(STATUS "SYCL found")
-
-    add_compile_definitions(GGML_USE_SYCL)
-
-    if (LLAMA_SYCL_F16)
-        add_compile_definitions(GGML_SYCL_F16)
-    endif()
-
-    if (LLAMA_CUDA_FORCE_MMQ)
-        add_compile_definitions(GGML_SYCL_FORCE_MMQ)
-    endif()
-
-    add_compile_options(-I./) #include DPCT
-    add_compile_options(-I/${SYCL_INCLUDE_DIR})
-
-    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-narrowing")
-    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3")
-    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsycl -L${MKLROOT}/lib")
-    if (LLAMA_SYCL_TARGET STREQUAL "NVIDIA")
-        set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsycl-targets=nvptx64-nvidia-cuda")
-    endif()
-
-    set(GGML_HEADERS_SYCL ggml-sycl.h)
-    file(GLOB GGML_SOURCES_SYCL "ggml-sycl/*.cpp")
-    list(APPEND GGML_SOURCES_SYCL "ggml-sycl.cpp")
-
-    if (WIN32)
-        set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} -fsycl sycl7 OpenCL mkl_sycl_blas_dll.lib mkl_intel_ilp64_dll.lib mkl_sequential_dll.lib mkl_core_dll.lib)
-    else()
-        if (LLAMA_SYCL_TARGET STREQUAL "INTEL")
-            set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} -fsycl OpenCL mkl_core pthread m dl mkl_sycl_blas mkl_intel_ilp64 mkl_tbb_thread)
-        elseif (LLAMA_SYCL_TARGET STREQUAL "NVIDIA")
-            set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} -fsycl pthread m dl onemkl)
-        endif()
-    endif()
-endif()
-
-if (LLAMA_KOMPUTE)
-    add_compile_definitions(VULKAN_HPP_DISPATCH_LOADER_DYNAMIC=1)
-    find_package(Vulkan COMPONENTS glslc REQUIRED)
-    find_program(glslc_executable NAMES glslc HINTS Vulkan::glslc)
-    if (NOT glslc_executable)
-        message(FATAL_ERROR "glslc not found")
-    endif()
-
-    function(compile_shader)
-        set(options)
-        set(oneValueArgs)
-        set(multiValueArgs SOURCES)
-        cmake_parse_arguments(compile_shader "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
-        foreach(source ${compile_shader_SOURCES})
-            get_filename_component(filename ${source} NAME)
-            set(spv_file ${filename}.spv)
-            add_custom_command(
-                OUTPUT ${spv_file}
-                DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/${source}
-                ${CMAKE_CURRENT_SOURCE_DIR}/kompute-shaders/common.comp
-                ${CMAKE_CURRENT_SOURCE_DIR}/kompute-shaders/op_getrows.comp
-                ${CMAKE_CURRENT_SOURCE_DIR}/kompute-shaders/op_mul_mv_q_n_pre.comp
-                ${CMAKE_CURRENT_SOURCE_DIR}/kompute-shaders/op_mul_mv_q_n.comp
-                COMMAND ${glslc_executable} --target-env=vulkan1.2 -o ${spv_file} ${CMAKE_CURRENT_SOURCE_DIR}/${source}
-                COMMENT "Compiling ${source} to ${spv_file}"
-                )
-
-            get_filename_component(RAW_FILE_NAME ${spv_file} NAME)
-            set(FILE_NAME "shader${RAW_FILE_NAME}")
-            string(REPLACE ".comp.spv" ".h" HEADER_FILE ${FILE_NAME})
-            string(TOUPPER ${HEADER_FILE} HEADER_FILE_DEFINE)
-            string(REPLACE "." "_" HEADER_FILE_DEFINE "${HEADER_FILE_DEFINE}")
-            set(OUTPUT_HEADER_FILE "${HEADER_FILE}")
-            message(STATUS "${HEADER_FILE} generating ${HEADER_FILE_DEFINE}")
-            if(CMAKE_GENERATOR MATCHES "Visual Studio")
-                add_custom_command(
-                    OUTPUT ${OUTPUT_HEADER_FILE}
-                    COMMAND ${CMAKE_COMMAND} -E echo "/*THIS FILE HAS BEEN AUTOMATICALLY GENERATED - DO NOT EDIT*/" > ${OUTPUT_HEADER_FILE}
-                    COMMAND ${CMAKE_COMMAND} -E echo \"\#ifndef ${HEADER_FILE_DEFINE}\" >> ${OUTPUT_HEADER_FILE}
-                    COMMAND ${CMAKE_COMMAND} -E echo \"\#define ${HEADER_FILE_DEFINE}\" >> ${OUTPUT_HEADER_FILE}
-                    COMMAND ${CMAKE_COMMAND} -E echo "namespace kp {" >> ${OUTPUT_HEADER_FILE}
-                    COMMAND ${CMAKE_COMMAND} -E echo "namespace shader_data {" >> ${OUTPUT_HEADER_FILE}
-                    COMMAND ${CMAKE_BINARY_DIR}/bin/$<CONFIG>/xxd -i ${RAW_FILE_NAME} >> ${OUTPUT_HEADER_FILE}
-                    COMMAND ${CMAKE_COMMAND} -E echo "}}" >> ${OUTPUT_HEADER_FILE}
-                    COMMAND ${CMAKE_COMMAND} -E echo \"\#endif // define ${HEADER_FILE_DEFINE}\" >> ${OUTPUT_HEADER_FILE}
-                    DEPENDS ${spv_file} xxd
-                    COMMENT "Converting to hpp: ${FILE_NAME} ${CMAKE_BINARY_DIR}/bin/$<CONFIG>/xxd"
-                    )
-            else()
-                add_custom_command(
-                    OUTPUT ${OUTPUT_HEADER_FILE}
-                    COMMAND ${CMAKE_COMMAND} -E echo "/*THIS FILE HAS BEEN AUTOMATICALLY GENERATED - DO NOT EDIT*/" > ${OUTPUT_HEADER_FILE}
-                    COMMAND ${CMAKE_COMMAND} -E echo \"\#ifndef ${HEADER_FILE_DEFINE}\" >> ${OUTPUT_HEADER_FILE}
-                    COMMAND ${CMAKE_COMMAND} -E echo \"\#define ${HEADER_FILE_DEFINE}\" >> ${OUTPUT_HEADER_FILE}
-                    COMMAND ${CMAKE_COMMAND} -E echo "namespace kp {" >> ${OUTPUT_HEADER_FILE}
-                    COMMAND ${CMAKE_COMMAND} -E echo "namespace shader_data {" >> ${OUTPUT_HEADER_FILE}
-                    COMMAND ${CMAKE_BINARY_DIR}/bin/xxd -i ${RAW_FILE_NAME} >> ${OUTPUT_HEADER_FILE}
-                    COMMAND ${CMAKE_COMMAND} -E echo "}}" >> ${OUTPUT_HEADER_FILE}
-                    COMMAND ${CMAKE_COMMAND} -E echo \"\#endif // define ${HEADER_FILE_DEFINE}\" >> ${OUTPUT_HEADER_FILE}
-                    DEPENDS ${spv_file} xxd
-                    COMMENT "Converting to hpp: ${FILE_NAME} ${CMAKE_BINARY_DIR}/bin/xxd"
-                    )
-            endif()
-        endforeach()
-    endfunction()
-
-    if (EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/kompute/CMakeLists.txt")
-        message(STATUS "Kompute found")
-        set(KOMPUTE_OPT_LOG_LEVEL Error CACHE STRING "Kompute log level")
-        add_subdirectory(kompute)
-
-        # Compile our shaders
-        compile_shader(SOURCES
-            kompute-shaders/op_scale.comp
-            kompute-shaders/op_scale_8.comp
-            kompute-shaders/op_add.comp
-            kompute-shaders/op_addrow.comp
-            kompute-shaders/op_mul.comp
-            kompute-shaders/op_silu.comp
-            kompute-shaders/op_relu.comp
-            kompute-shaders/op_gelu.comp
-            kompute-shaders/op_softmax.comp
-            kompute-shaders/op_norm.comp
-            kompute-shaders/op_rmsnorm.comp
-            kompute-shaders/op_diagmask.comp
-            kompute-shaders/op_mul_mat_mat_f32.comp
-            kompute-shaders/op_mul_mat_f16.comp
-            kompute-shaders/op_mul_mat_q8_0.comp
-            kompute-shaders/op_mul_mat_q4_0.comp
-            kompute-shaders/op_mul_mat_q4_1.comp
-            kompute-shaders/op_mul_mat_q6_k.comp
-            kompute-shaders/op_getrows_f32.comp
-            kompute-shaders/op_getrows_f16.comp
-            kompute-shaders/op_getrows_q4_0.comp
-            kompute-shaders/op_getrows_q4_1.comp
-            kompute-shaders/op_getrows_q6_k.comp
-            kompute-shaders/op_rope_f16.comp
-            kompute-shaders/op_rope_f32.comp
-            kompute-shaders/op_cpy_f16_f16.comp
-            kompute-shaders/op_cpy_f16_f32.comp
-            kompute-shaders/op_cpy_f32_f16.comp
-            kompute-shaders/op_cpy_f32_f32.comp
-        )
-
-        # Create a custom target for our generated shaders
-        add_custom_target(generated_shaders DEPENDS
-            shaderop_scale.h
-            shaderop_scale_8.h
-            shaderop_add.h
-            shaderop_addrow.h
-            shaderop_mul.h
-            shaderop_silu.h
-            shaderop_relu.h
-            shaderop_gelu.h
-            shaderop_softmax.h
-            shaderop_norm.h
-            shaderop_rmsnorm.h
-            shaderop_diagmask.h
-            shaderop_mul_mat_mat_f32.h
-            shaderop_mul_mat_f16.h
-            shaderop_mul_mat_q8_0.h
-            shaderop_mul_mat_q4_0.h
-            shaderop_mul_mat_q4_1.h
-            shaderop_mul_mat_q6_k.h
-            shaderop_getrows_f32.h
-            shaderop_getrows_f16.h
-            shaderop_getrows_q4_0.h
-            shaderop_getrows_q4_1.h
-            shaderop_getrows_q6_k.h
-            shaderop_rope_f16.h
-            shaderop_rope_f32.h
-            shaderop_cpy_f16_f16.h
-            shaderop_cpy_f16_f32.h
-            shaderop_cpy_f32_f16.h
-            shaderop_cpy_f32_f32.h
-        )
-
-        # Create a custom command that depends on the generated_shaders
-        add_custom_command(
-            OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/ggml-kompute.stamp
-            COMMAND ${CMAKE_COMMAND} -E touch ${CMAKE_CURRENT_BINARY_DIR}/ggml-kompute.stamp
-            DEPENDS generated_shaders
-            COMMENT "Ensuring shaders are generated before compiling ggml-kompute.cpp"
-        )
-
-        # Add the stamp to the main sources to ensure dependency tracking
-        set(GGML_SOURCES_KOMPUTE ggml-kompute.cpp ${CMAKE_CURRENT_BINARY_DIR}/ggml-kompute.stamp)
-        set(GGML_HEADERS_KOMPUTE ggml-kompute.h   ${CMAKE_CURRENT_BINARY_DIR}/ggml-kompute.stamp)
-
-        add_compile_definitions(GGML_USE_KOMPUTE)
-
-        set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} kompute)
-        set(LLAMA_EXTRA_INCLUDES ${LLAMA_EXTRA_INCLUDES} ${CMAKE_BINARY_DIR})
-    else()
-        message(WARNING "Kompute not found")
-    endif()
-endif()
-
-if (LLAMA_CPU_HBM)
-    find_library(memkind memkind REQUIRED)
-
-    add_compile_definitions(GGML_USE_CPU_HBM)
-
-    target_link_libraries(ggml PUBLIC memkind)
-endif()
-
-if (LLAMA_PERF)
-    add_compile_definitions(GGML_PERF)
-endif()
-
-function(get_flags CCID CCVER)
-    set(C_FLAGS "")
-    set(CXX_FLAGS "")
-
-    if (CCID MATCHES "Clang")
-        set(C_FLAGS   -Wunreachable-code-break -Wunreachable-code-return)
-        set(CXX_FLAGS -Wunreachable-code-break -Wunreachable-code-return -Wmissing-prototypes -Wextra-semi)
-
-        if (
-            (CCID STREQUAL "Clang"      AND CCVER VERSION_GREATER_EQUAL 3.8.0) OR
-            (CCID STREQUAL "AppleClang" AND CCVER VERSION_GREATER_EQUAL 7.3.0)
-        )
-            list(APPEND C_FLAGS -Wdouble-promotion)
-        endif()
-    elseif (CCID STREQUAL "GNU")
-        set(C_FLAGS   -Wdouble-promotion)
-        set(CXX_FLAGS -Wno-array-bounds)
-
-        if (CCVER VERSION_GREATER_EQUAL 7.1.0)
-            list(APPEND CXX_FLAGS -Wno-format-truncation)
-        endif()
-        if (CCVER VERSION_GREATER_EQUAL 8.1.0)
-            list(APPEND CXX_FLAGS -Wextra-semi)
-        endif()
-    endif()
-
-    set(GF_C_FLAGS   ${C_FLAGS}   PARENT_SCOPE)
-    set(GF_CXX_FLAGS ${CXX_FLAGS} PARENT_SCOPE)
-endfunction()
-
-if (LLAMA_FATAL_WARNINGS)
-    if (CMAKE_CXX_COMPILER_ID MATCHES "GNU" OR CMAKE_CXX_COMPILER_ID MATCHES "Clang")
-        list(APPEND C_FLAGS   -Werror)
-        list(APPEND CXX_FLAGS -Werror)
-    elseif (CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
-        add_compile_options(/WX)
-    endif()
-endif()
-
-if (LLAMA_ALL_WARNINGS)
-    if (NOT MSVC)
-        list(APPEND WARNING_FLAGS -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function)
-        list(APPEND C_FLAGS       -Wshadow -Wstrict-prototypes -Wpointer-arith -Wmissing-prototypes
-                                  -Werror=implicit-int -Werror=implicit-function-declaration)
-        list(APPEND CXX_FLAGS     -Wmissing-declarations -Wmissing-noreturn)
-
-        list(APPEND C_FLAGS   ${WARNING_FLAGS})
-        list(APPEND CXX_FLAGS ${WARNING_FLAGS})
-
-        get_flags(${CMAKE_CXX_COMPILER_ID} ${CMAKE_CXX_COMPILER_VERSION})
-
-        add_compile_options("$<$<COMPILE_LANGUAGE:C>:${C_FLAGS};${GF_C_FLAGS}>"
-                            "$<$<COMPILE_LANGUAGE:CXX>:${CXX_FLAGS};${GF_CXX_FLAGS}>")
-    else()
-        # todo : msvc
-        set(C_FLAGS   "")
-        set(CXX_FLAGS "")
-    endif()
-endif()
-
-set(CUDA_CXX_FLAGS "")
-
-if (LLAMA_CUDA)
-    set(CUDA_FLAGS -use_fast_math)
-
-    if (LLAMA_FATAL_WARNINGS)
-        list(APPEND CUDA_FLAGS -Werror all-warnings)
-    endif()
-
-    if (LLAMA_ALL_WARNINGS AND NOT MSVC)
-        set(NVCC_CMD ${CMAKE_CUDA_COMPILER} .c)
-        if (NOT CMAKE_CUDA_HOST_COMPILER STREQUAL "")
-            list(APPEND NVCC_CMD -ccbin ${CMAKE_CUDA_HOST_COMPILER})
-        endif()
-
-        execute_process(
-            COMMAND ${NVCC_CMD} -Xcompiler --version
-            OUTPUT_VARIABLE CUDA_CCFULLVER
-            ERROR_QUIET
-        )
-
-        if (NOT CUDA_CCFULLVER MATCHES clang)
-            set(CUDA_CCID "GNU")
-            execute_process(
-                COMMAND ${NVCC_CMD} -Xcompiler "-dumpfullversion -dumpversion"
-                OUTPUT_VARIABLE CUDA_CCVER
-                ERROR_QUIET
-            )
-        else()
-            if (CUDA_CCFULLVER MATCHES Apple)
-                set(CUDA_CCID "AppleClang")
-            else()
-                set(CUDA_CCID "Clang")
-            endif()
-            string(REGEX REPLACE "^.* version ([0-9.]*).*$" "\\1" CUDA_CCVER ${CUDA_CCFULLVER})
-        endif()
-
-        message("-- CUDA host compiler is ${CUDA_CCID} ${CUDA_CCVER}")
-
-        get_flags(${CUDA_CCID} ${CUDA_CCVER})
-        list(APPEND CUDA_CXX_FLAGS ${CXX_FLAGS} ${GF_CXX_FLAGS})  # This is passed to -Xcompiler later
-    endif()
-
-    if (NOT MSVC)
-        list(APPEND CUDA_CXX_FLAGS -Wno-pedantic)
-    endif()
-endif()
-
-if (WIN32)
-    add_compile_definitions(_CRT_SECURE_NO_WARNINGS)
-
-    if (BUILD_SHARED_LIBS)
-        set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS ON)
-    endif()
-endif()
-
-if (LLAMA_LTO)
-    include(CheckIPOSupported)
-    check_ipo_supported(RESULT result OUTPUT output)
-    if (result)
-        set(CMAKE_INTERPROCEDURAL_OPTIMIZATION TRUE)
-    else()
-        message(WARNING "IPO is not supported: ${output}")
-    endif()
-endif()
-
-if (LLAMA_CCACHE)
-    find_program(LLAMA_CCACHE_FOUND ccache)
-    if (LLAMA_CCACHE_FOUND)
-        set_property(GLOBAL PROPERTY RULE_LAUNCH_COMPILE ccache)
-        set(ENV{CCACHE_SLOPPINESS} time_macros)
-        message(STATUS "ccache found, compilation results will be cached. Disable with LLAMA_CCACHE=OFF.")
-    else()
-        message(STATUS "Warning: ccache not found - consider installing it for faster compilation or disable this warning with LLAMA_CCACHE=OFF")
-    endif ()
-endif()
-
-# this version of Apple ld64 is buggy
-execute_process(
-    COMMAND ${CMAKE_C_COMPILER} ${CMAKE_EXE_LINKER_FLAGS} -Wl,-v
-    ERROR_VARIABLE output
-    OUTPUT_QUIET
-)
-
-if (output MATCHES "dyld-1015\.7")
-    add_compile_definitions(HAVE_BUGGY_APPLE_LINKER)
-endif()
-
-# Architecture specific
-# TODO: probably these flags need to be tweaked on some architectures
-#       feel free to update the Makefile for your architecture and send a pull request or issue
-message(STATUS "CMAKE_SYSTEM_PROCESSOR: ${CMAKE_SYSTEM_PROCESSOR}")
-if (MSVC)
-    string(TOLOWER "${CMAKE_GENERATOR_PLATFORM}" CMAKE_GENERATOR_PLATFORM_LWR)
-    message(STATUS "CMAKE_GENERATOR_PLATFORM: ${CMAKE_GENERATOR_PLATFORM}")
-else ()
-    set(CMAKE_GENERATOR_PLATFORM_LWR "")
-endif ()
-
-if (NOT MSVC)
-    if (LLAMA_STATIC)
-        add_link_options(-static)
-        if (MINGW)
-            add_link_options(-static-libgcc -static-libstdc++)
-        endif()
-    endif()
-    if (LLAMA_GPROF)
-        add_compile_options(-pg)
-    endif()
-endif()
-
-set(ARCH_FLAGS "")
-
-if (CMAKE_OSX_ARCHITECTURES STREQUAL "arm64" OR CMAKE_GENERATOR_PLATFORM_LWR STREQUAL "arm64" OR
-    (NOT CMAKE_OSX_ARCHITECTURES AND NOT CMAKE_GENERATOR_PLATFORM_LWR AND
-     CMAKE_SYSTEM_PROCESSOR MATCHES "^(aarch64|arm.*|ARM64)$"))
-    message(STATUS "ARM detected")
-    if (MSVC)
-        add_compile_definitions(__aarch64__) # MSVC defines _M_ARM64 instead
-        add_compile_definitions(__ARM_NEON)
-        add_compile_definitions(__ARM_FEATURE_FMA)
-
-        set(CMAKE_REQUIRED_FLAGS_PREV ${CMAKE_REQUIRED_FLAGS})
-        string(JOIN " " CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS} "/arch:armv8.2")
-        check_cxx_source_compiles("#include <arm_neon.h>\nint main() { int8x16_t _a, _b; int32x4_t _s = vdotq_s32(_s, _a, _b); return 0; }" GGML_COMPILER_SUPPORT_DOTPROD)
-        if (GGML_COMPILER_SUPPORT_DOTPROD)
-            add_compile_definitions(__ARM_FEATURE_DOTPROD)
-        endif ()
-        check_cxx_source_compiles("#include <arm_neon.h>\nint main() { int8x16_t _a, _b; int32x4_t _s = vmlaq_f32(_s, _a, _b); return 0; }" GGML_COMPILER_SUPPORT_MATMUL_INT8)
-        if (GGML_COMPILER_SUPPORT_MATMUL_INT8)
-            add_compile_definitions(__ARM_FEATURE_MATMUL_INT8)
-        endif ()
-
-        check_cxx_source_compiles("#include <arm_neon.h>\nint main() { float16_t _a; float16x8_t _s = vdupq_n_f16(_a); return 0; }" GGML_COMPILER_SUPPORT_FP16_VECTOR_ARITHMETIC)
-        if (GGML_COMPILER_SUPPORT_FP16_VECTOR_ARITHMETIC)
-            add_compile_definitions(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
-        endif ()
-        set(CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS_PREV})
-    else()
-        check_cxx_compiler_flag(-mfp16-format=ieee COMPILER_SUPPORTS_FP16_FORMAT_I3E)
-        if (NOT "${COMPILER_SUPPORTS_FP16_FORMAT_I3E}" STREQUAL "")
-            list(APPEND ARCH_FLAGS -mfp16-format=ieee)
-        endif()
-        if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "armv6")
-            # Raspberry Pi 1, Zero
-            list(APPEND ARCH_FLAGS -mfpu=neon-fp-armv8 -mno-unaligned-access)
-        endif()
-        if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "armv7")
-            if ("${CMAKE_SYSTEM_NAME}" STREQUAL "Android")
-                # Android armeabi-v7a
-                list(APPEND ARCH_FLAGS -mfpu=neon-vfpv4 -mno-unaligned-access -funsafe-math-optimizations)
-            else()
-                # Raspberry Pi 2
-                list(APPEND ARCH_FLAGS -mfpu=neon-fp-armv8 -mno-unaligned-access -funsafe-math-optimizations)
-            endif()
-        endif()
-        if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "armv8")
-            # Android arm64-v8a
-            # Raspberry Pi 3, 4, Zero 2 (32-bit)
-            list(APPEND ARCH_FLAGS -mno-unaligned-access)
-        endif()
-        if (LLAMA_SVE)
-            list(APPEND ARCH_FLAGS -march=armv8.6-a+sve)
-        endif()
-    endif()
-elseif (CMAKE_OSX_ARCHITECTURES STREQUAL "x86_64" OR CMAKE_GENERATOR_PLATFORM_LWR MATCHES "^(x86_64|i686|amd64|x64|win32)$" OR
-        (NOT CMAKE_OSX_ARCHITECTURES AND NOT CMAKE_GENERATOR_PLATFORM_LWR AND
-         CMAKE_SYSTEM_PROCESSOR MATCHES "^(x86_64|i686|AMD64)$"))
-    message(STATUS "x86 detected")
-    if (MSVC)
-        # instruction set detection for MSVC only
-        if (LLAMA_NATIVE)
-            include(cmake/FindSIMD.cmake)
-        endif ()
-        if (LLAMA_AVX512)
-            list(APPEND ARCH_FLAGS /arch:AVX512)
-            # MSVC has no compile-time flags enabling specific
-            # AVX512 extensions, neither it defines the
-            # macros corresponding to the extensions.
-            # Do it manually.
-            if (LLAMA_AVX512_VBMI)
-                add_compile_definitions($<$<COMPILE_LANGUAGE:C>:__AVX512VBMI__>)
-                add_compile_definitions($<$<COMPILE_LANGUAGE:CXX>:__AVX512VBMI__>)
-            endif()
-            if (LLAMA_AVX512_VNNI)
-                add_compile_definitions($<$<COMPILE_LANGUAGE:C>:__AVX512VNNI__>)
-                add_compile_definitions($<$<COMPILE_LANGUAGE:CXX>:__AVX512VNNI__>)
-            endif()
-            if (LLAMA_AVX512_BF16)
-                add_compile_definitions($<$<COMPILE_LANGUAGE:C>:__AVX512BF16__>)
-                add_compile_definitions($<$<COMPILE_LANGUAGE:CXX>:__AVX512BF16__>)
-            endif()
-        elseif (LLAMA_AVX2)
-            list(APPEND ARCH_FLAGS /arch:AVX2)
-        elseif (LLAMA_AVX)
-            list(APPEND ARCH_FLAGS /arch:AVX)
-        endif()
-    else()
-        if (LLAMA_NATIVE)
-            list(APPEND ARCH_FLAGS -march=native)
-        endif()
-        if (LLAMA_F16C)
-            list(APPEND ARCH_FLAGS -mf16c)
-        endif()
-        if (LLAMA_FMA)
-            list(APPEND ARCH_FLAGS -mfma)
-        endif()
-        if (LLAMA_AVX)
-            list(APPEND ARCH_FLAGS -mavx)
-        endif()
-        if (LLAMA_AVX2)
-            list(APPEND ARCH_FLAGS -mavx2)
-        endif()
-        if (LLAMA_AVX512)
-            list(APPEND ARCH_FLAGS -mavx512f)
-            list(APPEND ARCH_FLAGS -mavx512bw)
-        endif()
-        if (LLAMA_AVX512_VBMI)
-            list(APPEND ARCH_FLAGS -mavx512vbmi)
-        endif()
-        if (LLAMA_AVX512_VNNI)
-            list(APPEND ARCH_FLAGS -mavx512vnni)
-        endif()
-        if (LLAMA_AVX512_BF16)
-            list(APPEND ARCH_FLAGS -mavx512bf16)
-        endif()
-    endif()
-elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "ppc64")
-    message(STATUS "PowerPC detected")
-    if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "ppc64le")
-        list(APPEND ARCH_FLAGS -mcpu=powerpc64le)
-    else()
-        list(APPEND ARCH_FLAGS -mcpu=native -mtune=native)
-        #TODO: Add  targets for Power8/Power9 (Altivec/VSX) and Power10(MMA) and query for big endian systems (ppc64/le/be)
-    endif()
-elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "loongarch64")
-    message(STATUS "loongarch64 detected")
-
-    list(APPEND ARCH_FLAGS -march=loongarch64)
-    if (LLAMA_LASX)
-        list(APPEND ARCH_FLAGS -mlasx)
-    endif()
-    if (LLAMA_LSX)
-        list(APPEND ARCH_FLAGS -mlsx)
-    endif()
-
-else()
-    message(STATUS "Unknown architecture")
-endif()
-
-add_compile_options("$<$<COMPILE_LANGUAGE:CXX>:${ARCH_FLAGS}>")
-add_compile_options("$<$<COMPILE_LANGUAGE:C>:${ARCH_FLAGS}>")
-
-if (LLAMA_CUDA)
-    list(APPEND CUDA_CXX_FLAGS ${ARCH_FLAGS})
-    list(JOIN CUDA_CXX_FLAGS " " CUDA_CXX_FLAGS_JOINED)  # pass host compiler flags as a single argument
-    if (NOT CUDA_CXX_FLAGS_JOINED STREQUAL "")
-        list(APPEND CUDA_FLAGS -Xcompiler ${CUDA_CXX_FLAGS_JOINED})
-    endif()
-    add_compile_options("$<$<COMPILE_LANGUAGE:CUDA>:${CUDA_FLAGS}>")
-endif()
-
-if (MINGW)
-    # Target Windows 8 for PrefetchVirtualMemory
-    add_compile_definitions(_WIN32_WINNT=${LLAMA_WIN_VER})
-endif()
-
-#
-# POSIX conformance
-#
-
-# clock_gettime came in POSIX.1b (1993)
-# CLOCK_MONOTONIC came in POSIX.1-2001 / SUSv3 as optional
-# posix_memalign came in POSIX.1-2001 / SUSv3
-# M_PI is an XSI extension since POSIX.1-2001 / SUSv3, came in XPG1 (1985)
-add_compile_definitions(_XOPEN_SOURCE=600)
-
-# Somehow in OpenBSD whenever POSIX conformance is specified
-# some string functions rely on locale_t availability,
-# which was introduced in POSIX.1-2008, forcing us to go higher
-if (CMAKE_SYSTEM_NAME MATCHES "OpenBSD")
-    remove_definitions(-D_XOPEN_SOURCE=600)
-    add_compile_definitions(_XOPEN_SOURCE=700)
-endif()
-
-# Data types, macros and functions related to controlling CPU affinity and
-# some memory allocation are available on Linux through GNU extensions in libc
-if (CMAKE_SYSTEM_NAME MATCHES "Linux")
-    add_compile_definitions(_GNU_SOURCE)
-endif()
-
-# RLIMIT_MEMLOCK came in BSD, is not specified in POSIX.1,
-# and on macOS its availability depends on enabling Darwin extensions
-# similarly on DragonFly, enabling BSD extensions is necessary
-if (
-    CMAKE_SYSTEM_NAME MATCHES "Darwin" OR
-    CMAKE_SYSTEM_NAME MATCHES "iOS" OR
-    CMAKE_SYSTEM_NAME MATCHES "tvOS" OR
-    CMAKE_SYSTEM_NAME MATCHES "DragonFly"
-)
-    add_compile_definitions(_DARWIN_C_SOURCE)
-endif()
-
-# alloca is a non-standard interface that is not visible on BSDs when
-# POSIX conformance is specified, but not all of them provide a clean way
-# to enable it in such cases
-if (CMAKE_SYSTEM_NAME MATCHES "FreeBSD")
-    add_compile_definitions(__BSD_VISIBLE)
-endif()
-if (CMAKE_SYSTEM_NAME MATCHES "NetBSD")
-    add_compile_definitions(_NETBSD_SOURCE)
-endif()
-if (CMAKE_SYSTEM_NAME MATCHES "OpenBSD")
-    add_compile_definitions(_BSD_SOURCE)
-endif()
-
-#
-# libraries
-#
-
-# ggml
-
-add_library(ggml OBJECT
-            ggml.c
-            ggml.h
-            ggml-alloc.c
-            ggml-alloc.h
-            ggml-backend.c
-            ggml-backend.h
-            ggml-quants.c
-            ggml-quants.h
-            ${GGML_SOURCES_CUDA}      ${GGML_HEADERS_CUDA}
-            ${GGML_SOURCES_METAL}     ${GGML_HEADERS_METAL}
-            ${GGML_SOURCES_RPC}       ${GGML_HEADERS_RPC}
-            ${GGML_SOURCES_EXTRA}     ${GGML_HEADERS_EXTRA}
-            ${GGML_SOURCES_SYCL}      ${GGML_HEADERS_SYCL}
-            ${GGML_SOURCES_KOMPUTE}   ${GGML_HEADERS_KOMPUTE}
-            ${GGML_SOURCES_VULKAN}    ${GGML_HEADERS_VULKAN}
-            ${GGML_SOURCES_ROCM}      ${GGML_HEADERS_ROCM}
-            ${GGML_SOURCES_BLAS}      ${GGML_HEADERS_BLAS}
-            ${GGML_SOURCES_LLAMAFILE} ${GGML_HEADERS_LLAMAFILE}
-            )
-
-target_include_directories(ggml PUBLIC . ${LLAMA_EXTRA_INCLUDES})
-target_compile_features   (ggml PUBLIC c_std_11) # don't bump
-
-target_link_libraries(ggml PUBLIC Threads::Threads ${LLAMA_EXTRA_LIBS})
-
-add_library(ggml_static STATIC $<TARGET_OBJECTS:ggml>)
-
-if (BUILD_SHARED_LIBS)
-    set_target_properties(ggml PROPERTIES POSITION_INDEPENDENT_CODE ON)
-    add_library(ggml_shared SHARED $<TARGET_OBJECTS:ggml>)
-    target_link_libraries(ggml_shared PUBLIC Threads::Threads ${LLAMA_EXTRA_LIBS})
-    install(TARGETS ggml_shared LIBRARY)
-endif()
-
-# llama
-
-add_library(llama
-            llama.cpp
-            llama.h
-            unicode.h
-            unicode.cpp
-            unicode-data.cpp
-            )
-
-target_include_directories(llama PUBLIC .)
-target_compile_features   (llama PUBLIC cxx_std_11) # don't bump
-
-target_link_libraries(llama PRIVATE
-    ggml
-    ${LLAMA_EXTRA_LIBS}
-    )
-
-if (BUILD_SHARED_LIBS)
-    set_target_properties(llama PROPERTIES POSITION_INDEPENDENT_CODE ON)
-    target_compile_definitions(llama PRIVATE LLAMA_SHARED LLAMA_BUILD)
-    if (LLAMA_METAL)
-        set_target_properties(llama PROPERTIES RESOURCE "${CMAKE_CURRENT_SOURCE_DIR}/ggml-metal.metal")
-    endif()
-endif()
-
-
-#
-# install
-#
-
-include(GNUInstallDirs)
-include(CMakePackageConfigHelpers)
-
-set(LLAMA_INCLUDE_INSTALL_DIR ${CMAKE_INSTALL_INCLUDEDIR}
-    CACHE PATH "Location of header files")
-set(LLAMA_LIB_INSTALL_DIR ${CMAKE_INSTALL_LIBDIR}
-    CACHE PATH "Location of library files")
-set(LLAMA_BIN_INSTALL_DIR ${CMAKE_INSTALL_BINDIR}
-    CACHE PATH "Location of binary files")
-set(LLAMA_BUILD_NUMBER ${BUILD_NUMBER})
-set(LLAMA_BUILD_COMMIT ${BUILD_COMMIT})
-set(LLAMA_INSTALL_VERSION 0.0.${BUILD_NUMBER})
-get_directory_property(LLAMA_TRANSIENT_DEFINES COMPILE_DEFINITIONS)
-
-configure_package_config_file(
-        ${CMAKE_CURRENT_SOURCE_DIR}/scripts/LlamaConfig.cmake.in
-        ${CMAKE_CURRENT_BINARY_DIR}/LlamaConfig.cmake
-    INSTALL_DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/Llama
-    PATH_VARS LLAMA_INCLUDE_INSTALL_DIR
-              LLAMA_LIB_INSTALL_DIR
-              LLAMA_BIN_INSTALL_DIR )
-
-write_basic_package_version_file(
-        ${CMAKE_CURRENT_BINARY_DIR}/LlamaConfigVersion.cmake
-    VERSION ${LLAMA_INSTALL_VERSION}
-    COMPATIBILITY SameMajorVersion)
-
-install(FILES ${CMAKE_CURRENT_BINARY_DIR}/LlamaConfig.cmake
-              ${CMAKE_CURRENT_BINARY_DIR}/LlamaConfigVersion.cmake
-        DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/Llama)
-
-set(GGML_PUBLIC_HEADERS "ggml.h" "ggml-alloc.h" "ggml-backend.h"
-        "${GGML_HEADERS_CUDA}"
-        "${GGML_HEADERS_METAL}"
-        "${GGML_HEADERS_EXTRA}")
-
-set_target_properties(ggml PROPERTIES PUBLIC_HEADER "${GGML_PUBLIC_HEADERS}")
-install(TARGETS ggml PUBLIC_HEADER)
-
-set_target_properties(llama PROPERTIES PUBLIC_HEADER ${CMAKE_CURRENT_SOURCE_DIR}/llama.h)
-install(TARGETS llama LIBRARY PUBLIC_HEADER)
-
-install(
-    FILES convert-hf-to-gguf.py
-    PERMISSIONS
-        OWNER_READ
-        OWNER_WRITE
-        OWNER_EXECUTE
-        GROUP_READ
-        GROUP_EXECUTE
-        WORLD_READ
-        WORLD_EXECUTE
-    DESTINATION ${CMAKE_INSTALL_BINDIR})
-if (LLAMA_METAL)
-    install(
-        FILES ggml-metal.metal
-        PERMISSIONS
-            OWNER_READ
-            OWNER_WRITE
-            GROUP_READ
-            WORLD_READ
-        DESTINATION ${CMAKE_INSTALL_BINDIR})
-    if (NOT LLAMA_METAL_EMBED_LIBRARY)
-        install(
-            FILES ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/default.metallib
-            DESTINATION ${CMAKE_INSTALL_BINDIR}
-        )
-    endif()
-endif()
-
-configure_file(cmake/llama.pc.in
-        "${CMAKE_CURRENT_BINARY_DIR}/llama.pc"
-        @ONLY)
-
-install(FILES "${CMAKE_CURRENT_BINARY_DIR}/llama.pc"
-        DESTINATION lib/pkgconfig)
-
-#
-# programs, examples and tests
-#
-
-add_subdirectory(common)
-
-if (LLAMA_BUILD_TESTS AND NOT CMAKE_JS_VERSION)
-    include(CTest)
-    add_subdirectory(tests)
-endif ()
-
-if (LLAMA_BUILD_EXAMPLES)
-    add_subdirectory(examples)
-    add_subdirectory(pocs)
-endif()
--- a/ollama/llm/llama.cpp/CMakePresets.json
+++ b/ollama/llm/llama.cpp/CMakePresets.json
-{
-  "version": 4,
-  "configurePresets": [
-    {
-        "name":  "base",
-        "hidden": true,
-        "generator":   "Ninja",
-        "binaryDir":   "${sourceDir}/build-${presetName}",
-        "cacheVariables": {
-            "CMAKE_EXPORT_COMPILE_COMMANDS": "ON",
-            "CMAKE_INSTALL_RPATH": "$ORIGIN;$ORIGIN/.."
-        }
-    },
-
-    { "name": "debug",   "hidden": true, "cacheVariables": { "CMAKE_BUILD_TYPE": "Debug" } },
-    { "name": "release", "hidden": true, "cacheVariables": { "CMAKE_BUILD_TYPE": "RelWithDebInfo" } },
-    { "name": "static",  "hidden": true, "cacheVariables": { "LLAMA_STATIC": "ON" } },
-
-    {
-        "name": "arm64-windows-msvc", "hidden": true,
-        "architecture": { "value": "arm64",       "strategy": "external" },
-        "toolset":      { "value": "host=x86_64", "strategy": "external" },
-        "cacheVariables": {
-            "CMAKE_TOOLCHAIN_FILE": "${sourceDir}/cmake/arm64-windows-msvc.cmake"
-        }
-    },
-
-    {
-        "name": "arm64-windows-llvm", "hidden": true,
-        "architecture": { "value": "arm64",       "strategy": "external" },
-        "toolset":      { "value": "host=x86_64", "strategy": "external" },
-        "cacheVariables": {
-            "CMAKE_TOOLCHAIN_FILE": "${sourceDir}/cmake/arm64-windows-llvm.cmake"
-        }
-    },
-
-    { "name": "arm64-windows-llvm-debug"  , "inherits": [ "base", "arm64-windows-llvm",  "debug"   ] },
-    { "name": "arm64-windows-llvm-release", "inherits": [ "base", "arm64-windows-llvm",  "release" ] },
-    { "name": "arm64-windows-llvm+static-release", "inherits": [ "base", "arm64-windows-llvm",  "release", "static" ] },
-
-    { "name": "arm64-windows-msvc-debug"  , "inherits": [ "base", "arm64-windows-msvc",  "debug"   ] },
-    { "name": "arm64-windows-msvc-release", "inherits": [ "base", "arm64-windows-msvc",  "release" ] },
-    { "name": "arm64-windows-msvc+static-release", "inherits": [ "base", "arm64-windows-msvc",  "release", "static" ] },
-
-    { "name": "x64-windows-msvc-debug"  , "inherits": [ "base", "debug"   ] },
-    { "name": "x64-windows-msvc-release", "inherits": [ "base", "release" ] },
-    { "name": "x64-windows-msvc+static-release", "inherits": [ "base", "release", "static" ] }
-  ]
-}
--- a/ollama/llm/llama.cpp/CONTRIBUTING.md
+++ b/ollama/llm/llama.cpp/CONTRIBUTING.md
-# Contributing Guidelines
-
-## Checklist
-
-* Make sure your PR follows the [coding guidelines](https://github.com/ggerganov/llama.cpp/blob/master/README.md#coding-guidelines)
-* Test your changes using the commands in the [`tests`](tests) folder. For instance, running the `./tests/test-backend-ops` command tests different backend implementations of the GGML library
-* Execute [the full CI locally on your machine](ci/README.md) before publishing
-
-## PR formatting
-
-* Please rate the complexity of your PR (i.e. `Review Complexity : Low`, `Review Complexity : Medium`, `Review Complexity : High`). This makes it easier for maintainers to triage the PRs.
-    - The PR template has a series of review complexity checkboxes `[ ]` that you can mark as `[X]` for your conveience. Refer to [About task lists](https://docs.github.com/en/get-started/writing-on-github/working-with-advanced-formatting/about-task-lists) for more information.
-* If the pull request only contains documentation changes (e.g., updating READMEs, adding new wiki pages), please add `[no ci]` to the commit title. This will skip unnecessary CI checks and help reduce build times.
-* When squashing multiple commits on merge, use the following format for your commit title: `<module> : <commit title> (#<issue_number>)`. For example: `utils : Fix typo in utils.py (#1234)`
--- a/ollama/llm/llama.cpp/LICENSE
+++ b/ollama/llm/llama.cpp/LICENSE
-MIT License
-
-Copyright (c) 2023-2024 The ggml authors
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in all
-copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
--- a/ollama/llm/llama.cpp/Makefile
+++ b/ollama/llm/llama.cpp/Makefile
-# Define the default target now so that it is always the first target
-BUILD_TARGETS = \
-	libllava.a \
-	llama-baby-llama \
-	llama-batched \
-	llama-batched-bench \
-	llama-bench \
-	llama-benchmark-matmult \
-	llama-cli \
-	llama-convert-llama2c-to-ggml \
-	llama-embedding \
-	llama-eval-callback \
-	llama-export-lora \
-	llama-finetune \
-	llama-gbnf-validator \
-	llama-gguf \
-	llama-gguf-split \
-	llama-gritlm \
-	llama-imatrix \
-	llama-infill \
-	llama-llava-cli \
-	llama-lookahead \
-	llama-lookup \
-	llama-lookup-create \
-	llama-lookup-merge \
-	llama-lookup-stats \
-	llama-parallel \
-	llama-passkey \
-	llama-perplexity \
-	llama-q8dot \
-	llama-quantize \
-	llama-quantize-stats \
-	llama-retrieval \
-	llama-save-load-state \
-	llama-server \
-	llama-simple \
-	llama-speculative \
-	llama-tokenize \
-	llama-train-text-from-scratch \
-	llama-vdot \
-	llama-cvector-generator \
-	tests/test-c.o
-
-# Binaries only useful for tests
-TEST_TARGETS = \
-	tests/test-autorelease \
-	tests/test-backend-ops \
-	tests/test-double-float \
-	tests/test-grad0 \
-	tests/test-grammar-integration \
-	tests/test-grammar-parser \
-	tests/test-json-schema-to-grammar \
-	tests/test-llama-grammar \
-	tests/test-model-load-cancel \
-	tests/test-opt \
-	tests/test-quantize-fns \
-	tests/test-quantize-perf \
-	tests/test-rope \
-	tests/test-sampling \
-	tests/test-tokenizer-0 \
-	tests/test-tokenizer-1-bpe \
-	tests/test-tokenizer-1-spm
-
-# Code coverage output files
-COV_TARGETS = *.gcno tests/*.gcno *.gcda tests/*.gcda *.gcov tests/*.gcov lcov-report gcovr-report
-
-ifndef UNAME_S
-UNAME_S := $(shell uname -s)
-endif
-
-ifndef UNAME_P
-UNAME_P := $(shell uname -p)
-endif
-
-ifndef UNAME_M
-UNAME_M := $(shell uname -m)
-endif
-
-# In GNU make default CXX is g++ instead of c++.  Let's fix that so that users
-# of non-gcc compilers don't have to provide g++ alias or wrapper.
-DEFCC  := cc
-DEFCXX := c++
-ifeq ($(origin CC),default)
-CC  := $(DEFCC)
-endif
-ifeq ($(origin CXX),default)
-CXX := $(DEFCXX)
-endif
-
-# Mac OS + Arm can report x86_64
-# ref: https://github.com/ggerganov/whisper.cpp/issues/66#issuecomment-1282546789
-ifeq ($(UNAME_S),Darwin)
-	ifndef LLAMA_NO_METAL
-		LLAMA_METAL := 1
-	endif
-
-	LLAMA_NO_OPENMP := 1
-
-	ifneq ($(UNAME_P),arm)
-		SYSCTL_M := $(shell sysctl -n hw.optional.arm64 2>/dev/null)
-		ifeq ($(SYSCTL_M),1)
-			# UNAME_P := arm
-			# UNAME_M := arm64
-			warn := $(warning Your arch is announced as x86_64, but it seems to actually be ARM64. Not fixing that can lead to bad performance. For more info see: https://github.com/ggerganov/whisper.cpp/issues/66\#issuecomment-1282546789)
-		endif
-	endif
-endif
-
-ifdef LLAMA_RPC
-	BUILD_TARGETS += rpc-server
-endif
-
-default: $(BUILD_TARGETS)
-
-test: $(TEST_TARGETS)
-	@failures=0; \
-	for test_target in $(TEST_TARGETS); do \
-		if [ "$$test_target" = "tests/test-tokenizer-0" ]; then \
-			./$$test_target $(CURDIR)/models/ggml-vocab-llama-spm.gguf; \
-			./$$test_target $(CURDIR)/models/ggml-vocab-llama-bpe.gguf; \
-			./$$test_target $(CURDIR)/models/ggml-vocab-phi-3.gguf; \
-			./$$test_target $(CURDIR)/models/ggml-vocab-falcon.gguf; \
-			./$$test_target $(CURDIR)/models/ggml-vocab-bert-bge.gguf; \
-			./$$test_target $(CURDIR)/models/ggml-vocab-starcoder.gguf; \
-			./$$test_target $(CURDIR)/models/ggml-vocab-gpt-2.gguf; \
-			./$$test_target $(CURDIR)/models/ggml-vocab-refact.gguf; \
-		elif [ "$$test_target" = "tests/test-tokenizer-1-spm" ]; then \
-			continue; \
-		elif [ "$$test_target" = "tests/test-tokenizer-1-bpe" ]; then \
-			continue; \
-		else \
-			echo "Running test $$test_target..."; \
-			./$$test_target; \
-		fi; \
-		if [ $$? -ne 0 ]; then \
-			printf 'Test %s FAILED!\n\n' $$test_target; \
-			failures=$$(( failures + 1 )); \
-		else \
-			printf 'Test %s passed.\n\n' $$test_target; \
-		fi; \
-	done; \
-	if [ $$failures -gt 0 ]; then \
-		printf '\n%s tests failed.\n' $$failures; \
-		exit 1; \
-	fi
-	@echo 'All tests passed.'
-
-all: $(BUILD_TARGETS) $(TEST_TARGETS)
-
-coverage: ## Run code coverage
-	gcov -pb tests/*.cpp
-
-lcov-report: coverage ## Generate lcov report
-	mkdir -p lcov-report
-	lcov --capture --directory . --output-file lcov-report/coverage.info
-	genhtml lcov-report/coverage.info --output-directory lcov-report
-
-gcovr-report: coverage ## Generate gcovr report
-	mkdir -p gcovr-report
-	gcovr --root . --html --html-details --output gcovr-report/coverage.html
-
-ifdef RISCV_CROSS_COMPILE
-CC	:= riscv64-unknown-linux-gnu-gcc
-CXX	:= riscv64-unknown-linux-gnu-g++
-endif
-
-#
-# Compile flags
-#
-
-# keep standard at C11 and C++11
-MK_CPPFLAGS  = -I. -Icommon
-MK_CFLAGS    = -std=c11   -fPIC
-MK_CXXFLAGS  = -std=c++11 -fPIC
-MK_NVCCFLAGS = -std=c++11
-
-# -Ofast tends to produce faster code, but may not be available for some compilers.
-ifdef LLAMA_FAST
-MK_CFLAGS     += -Ofast
-HOST_CXXFLAGS += -Ofast
-ifndef LLAMA_DEBUG
-MK_NVCCFLAGS  += -O3
-endif # LLAMA_DEBUG
-else
-MK_CFLAGS     += -O3
-MK_CXXFLAGS   += -O3
-ifndef LLAMA_DEBUG
-MK_NVCCFLAGS  += -O3
-endif # LLAMA_DEBUG
-endif # LLAMA_FAST
-
-ifndef LLAMA_NO_CCACHE
-CCACHE := $(shell which ccache)
-ifdef CCACHE
-export CCACHE_SLOPPINESS = time_macros
-$(info I ccache found, compilation results will be cached. Disable with LLAMA_NO_CCACHE.)
-CC    := $(CCACHE) $(CC)
-CXX   := $(CCACHE) $(CXX)
-else
-$(info I ccache not found. Consider installing it for faster compilation.)
-endif # CCACHE
-endif # LLAMA_NO_CCACHE
-
-# clock_gettime came in POSIX.1b (1993)
-# CLOCK_MONOTONIC came in POSIX.1-2001 / SUSv3 as optional
-# posix_memalign came in POSIX.1-2001 / SUSv3
-# M_PI is an XSI extension since POSIX.1-2001 / SUSv3, came in XPG1 (1985)
-MK_CPPFLAGS += -D_XOPEN_SOURCE=600
-
-# Somehow in OpenBSD whenever POSIX conformance is specified
-# some string functions rely on locale_t availability,
-# which was introduced in POSIX.1-2008, forcing us to go higher
-ifeq ($(UNAME_S),OpenBSD)
-	MK_CPPFLAGS += -U_XOPEN_SOURCE -D_XOPEN_SOURCE=700
-endif
-
-# Data types, macros and functions related to controlling CPU affinity and
-# some memory allocation are available on Linux through GNU extensions in libc
-ifeq ($(UNAME_S),Linux)
-	MK_CPPFLAGS += -D_GNU_SOURCE
-endif
-
-# RLIMIT_MEMLOCK came in BSD, is not specified in POSIX.1,
-# and on macOS its availability depends on enabling Darwin extensions
-# similarly on DragonFly, enabling BSD extensions is necessary
-ifeq ($(UNAME_S),Darwin)
-	MK_CPPFLAGS += -D_DARWIN_C_SOURCE
-endif
-ifeq ($(UNAME_S),DragonFly)
-	MK_CPPFLAGS += -D__BSD_VISIBLE
-endif
-
-# alloca is a non-standard interface that is not visible on BSDs when
-# POSIX conformance is specified, but not all of them provide a clean way
-# to enable it in such cases
-ifeq ($(UNAME_S),FreeBSD)
-	MK_CPPFLAGS += -D__BSD_VISIBLE
-endif
-ifeq ($(UNAME_S),NetBSD)
-	MK_CPPFLAGS += -D_NETBSD_SOURCE
-endif
-ifeq ($(UNAME_S),OpenBSD)
-	MK_CPPFLAGS += -D_BSD_SOURCE
-endif
-
-ifdef LLAMA_SCHED_MAX_COPIES
-	MK_CPPFLAGS += -DGGML_SCHED_MAX_COPIES=$(LLAMA_SCHED_MAX_COPIES)
-endif
-
-ifdef LLAMA_DEBUG
-	MK_CFLAGS    += -O0 -g
-	MK_CXXFLAGS  += -O0 -g
-	MK_LDFLAGS   += -g
-	MK_NVCCFLAGS += -O0 -g
-
-	ifeq ($(UNAME_S),Linux)
-		MK_CPPFLAGS += -D_GLIBCXX_ASSERTIONS
-	endif
-else
-	MK_CPPFLAGS += -DNDEBUG
-endif
-
-ifdef LLAMA_SANITIZE_THREAD
-	MK_CFLAGS   += -fsanitize=thread -g
-	MK_CXXFLAGS += -fsanitize=thread -g
-	MK_LDFLAGS  += -fsanitize=thread -g
-endif
-
-ifdef LLAMA_SANITIZE_ADDRESS
-	MK_CFLAGS   += -fsanitize=address -fno-omit-frame-pointer -g
-	MK_CXXFLAGS += -fsanitize=address -fno-omit-frame-pointer -g
-	MK_LDFLAGS  += -fsanitize=address -fno-omit-frame-pointer -g
-endif
-
-ifdef LLAMA_SANITIZE_UNDEFINED
-	MK_CFLAGS   += -fsanitize=undefined -g
-	MK_CXXFLAGS += -fsanitize=undefined -g
-	MK_LDFLAGS  += -fsanitize=undefined -g
-endif
-
-ifdef LLAMA_SERVER_VERBOSE
-	MK_CPPFLAGS += -DSERVER_VERBOSE=$(LLAMA_SERVER_VERBOSE)
-endif
-
-ifdef LLAMA_SERVER_SSL
-	MK_CPPFLAGS += -DCPPHTTPLIB_OPENSSL_SUPPORT
-	MK_LDFLAGS += -lssl -lcrypto
-endif
-
-ifdef LLAMA_CODE_COVERAGE
-	MK_CXXFLAGS += -fprofile-arcs -ftest-coverage -dumpbase ''
-endif
-
-ifdef LLAMA_DISABLE_LOGS
-	MK_CPPFLAGS += -DLOG_DISABLE_LOGS
-endif # LLAMA_DISABLE_LOGS
-
-# warnings
-WARN_FLAGS    = -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function
-MK_CFLAGS    += $(WARN_FLAGS) -Wshadow -Wstrict-prototypes -Wpointer-arith -Wmissing-prototypes -Werror=implicit-int \
-				-Werror=implicit-function-declaration
-MK_CXXFLAGS  += $(WARN_FLAGS) -Wmissing-declarations -Wmissing-noreturn
-
-ifeq ($(LLAMA_FATAL_WARNINGS),1)
-	MK_CFLAGS   += -Werror
-	MK_CXXFLAGS += -Werror
-endif
-
-# this version of Apple ld64 is buggy
-ifneq '' '$(findstring dyld-1015.7,$(shell $(CC) $(LDFLAGS) -Wl,-v 2>&1))'
-	MK_CPPFLAGS += -DHAVE_BUGGY_APPLE_LINKER
-endif
-
-# OS specific
-# TODO: support Windows
-ifneq '' '$(filter $(UNAME_S),Linux Darwin FreeBSD NetBSD OpenBSD Haiku)'
-	MK_CFLAGS   += -pthread
-	MK_CXXFLAGS += -pthread
-endif
-
-# detect Windows
-ifneq ($(findstring _NT,$(UNAME_S)),)
-	_WIN32 := 1
-endif
-
-# library name prefix
-ifneq ($(_WIN32),1)
-	LIB_PRE := lib
-endif
-
-# Dynamic Shared Object extension
-ifneq ($(_WIN32),1)
-	DSO_EXT := .so
-else
-	DSO_EXT := .dll
-endif
-
-# Windows Sockets 2 (Winsock) for network-capable apps
-ifeq ($(_WIN32),1)
-	LWINSOCK2 := -lws2_32
-endif
-
-ifdef LLAMA_GPROF
-	MK_CFLAGS   += -pg
-	MK_CXXFLAGS += -pg
-endif
-ifdef LLAMA_PERF
-	MK_CPPFLAGS += -DGGML_PERF
-endif
-
-# Architecture specific
-# TODO: probably these flags need to be tweaked on some architectures
-#       feel free to update the Makefile for your architecture and send a pull request or issue
-
-ifndef RISCV
-
-ifeq ($(UNAME_M),$(filter $(UNAME_M),x86_64 i686 amd64))
-	# Use all CPU extensions that are available:
-	MK_CFLAGS     += -march=native -mtune=native
-	HOST_CXXFLAGS += -march=native -mtune=native
-
-	# Usage AVX-only
-	#MK_CFLAGS   += -mfma -mf16c -mavx
-	#MK_CXXFLAGS += -mfma -mf16c -mavx
-
-	# Usage SSSE3-only (Not is SSE3!)
-	#MK_CFLAGS   += -mssse3
-	#MK_CXXFLAGS += -mssse3
-endif
-
-ifneq '' '$(findstring mingw,$(shell $(CC) -dumpmachine))'
-	# The stack is only 16-byte aligned on Windows, so don't let gcc emit aligned moves.
-	# https://gcc.gnu.org/bugzilla/show_bug.cgi?id=54412
-	# https://github.com/ggerganov/llama.cpp/issues/2922
-	MK_CFLAGS   += -Xassembler -muse-unaligned-vector-move
-	MK_CXXFLAGS += -Xassembler -muse-unaligned-vector-move
-
-	# Target Windows 8 for PrefetchVirtualMemory
-	MK_CPPFLAGS += -D_WIN32_WINNT=0x602
-endif
-
-ifneq ($(filter aarch64%,$(UNAME_M)),)
-	# Apple M1, M2, etc.
-	# Raspberry Pi 3, 4, Zero 2 (64-bit)
-	# Nvidia Jetson
-	MK_CFLAGS   += -mcpu=native
-	MK_CXXFLAGS += -mcpu=native
-	JETSON_RELEASE_INFO = $(shell jetson_release)
-	ifdef JETSON_RELEASE_INFO
-		ifneq ($(filter TX2%,$(JETSON_RELEASE_INFO)),)
-			JETSON_EOL_MODULE_DETECT = 1
-			CC = aarch64-unknown-linux-gnu-gcc
-			cxx = aarch64-unknown-linux-gnu-g++
-		endif
-	endif
-endif
-
-ifneq ($(filter armv6%,$(UNAME_M)),)
-	# Raspberry Pi 1, Zero
-	MK_CFLAGS   += -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access
-	MK_CXXFLAGS += -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access
-endif
-
-ifneq ($(filter armv7%,$(UNAME_M)),)
-	# Raspberry Pi 2
-	MK_CFLAGS   += -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access -funsafe-math-optimizations
-	MK_CXXFLAGS += -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access -funsafe-math-optimizations
-endif
-
-ifneq ($(filter armv8%,$(UNAME_M)),)
-	# Raspberry Pi 3, 4, Zero 2 (32-bit)
-	MK_CFLAGS   += -mfp16-format=ieee -mno-unaligned-access
-	MK_CXXFLAGS += -mfp16-format=ieee -mno-unaligned-access
-endif
-
-ifneq ($(filter ppc64%,$(UNAME_M)),)
-	POWER9_M := $(shell grep "POWER9" /proc/cpuinfo)
-	ifneq (,$(findstring POWER9,$(POWER9_M)))
-		MK_CFLAGS   += -mcpu=power9
-		MK_CXXFLAGS += -mcpu=power9
-	endif
-endif
-
-ifneq ($(filter ppc64le%,$(UNAME_M)),)
-	MK_CFLAGS   += -mcpu=powerpc64le
-	MK_CXXFLAGS += -mcpu=powerpc64le
-	CUDA_POWER_ARCH = 1
-endif
-
-ifneq ($(filter loongarch64%,$(UNAME_M)),)
-	MK_CFLAGS   += -mlasx
-	MK_CXXFLAGS += -mlasx
-endif
-
-else
-	MK_CFLAGS   += -march=rv64gcv -mabi=lp64d
-	MK_CXXFLAGS += -march=rv64gcv -mabi=lp64d
-endif
-
-ifndef LLAMA_NO_ACCELERATE
-	# Mac OS - include Accelerate framework.
-	# `-framework Accelerate` works both with Apple Silicon and Mac Intel
-	ifeq ($(UNAME_S),Darwin)
-		MK_CPPFLAGS += -DGGML_USE_ACCELERATE -DGGML_USE_BLAS
-		MK_CPPFLAGS += -DACCELERATE_NEW_LAPACK
-		MK_CPPFLAGS += -DACCELERATE_LAPACK_ILP64
-		MK_LDFLAGS  += -framework Accelerate
-		OBJS        += ggml-blas.o
-	endif
-endif # LLAMA_NO_ACCELERATE
-
-ifndef LLAMA_NO_OPENMP
-	MK_CPPFLAGS += -DGGML_USE_OPENMP
-	MK_CFLAGS   += -fopenmp
-	MK_CXXFLAGS += -fopenmp
-endif # LLAMA_NO_OPENMP
-
-ifdef LLAMA_OPENBLAS
-	MK_CPPFLAGS += -DGGML_USE_BLAS $(shell pkg-config --cflags-only-I openblas)
-	MK_CFLAGS   += $(shell pkg-config --cflags-only-other openblas)
-	MK_LDFLAGS  += $(shell pkg-config --libs openblas)
-	OBJS        += ggml-blas.o
-endif # LLAMA_OPENBLAS
-
-ifdef LLAMA_OPENBLAS64
-	MK_CPPFLAGS += -DGGML_USE_BLAS $(shell pkg-config --cflags-only-I openblas64)
-	MK_CFLAGS   += $(shell pkg-config --cflags-only-other openblas64)
-	MK_LDFLAGS  += $(shell pkg-config --libs openblas64)
-	OBJS        += ggml-blas.o
-endif # LLAMA_OPENBLAS64
-
-ifdef LLAMA_BLIS
-	MK_CPPFLAGS += -DGGML_USE_BLAS -I/usr/local/include/blis -I/usr/include/blis
-	MK_LDFLAGS  += -lblis -L/usr/local/lib
-	OBJS        += ggml-blas.o
-endif # LLAMA_BLIS
-
-ifndef LLAMA_NO_LLAMAFILE
-	MK_CPPFLAGS += -DGGML_USE_LLAMAFILE
-	OBJS        += sgemm.o
-endif
-
-ifdef LLAMA_RPC
-	MK_CPPFLAGS   += -DGGML_USE_RPC
-	OBJS          += ggml-rpc.o
-endif # LLAMA_RPC
-
-ifdef LLAMA_CUBLAS
-# LLAMA_CUBLAS is deprecated and will be removed in the future
-	LLAMA_CUDA := 1
-endif
-
-OBJS_CUDA_TEMP_INST      = $(patsubst %.cu,%.o,$(wildcard ggml-cuda/template-instances/fattn-wmma*.cu))
-OBJS_CUDA_TEMP_INST     += $(patsubst %.cu,%.o,$(wildcard ggml-cuda/template-instances/mmq*.cu))
-ifdef LLAMA_CUDA_FA_ALL_QUANTS
-	OBJS_CUDA_TEMP_INST += $(patsubst %.cu,%.o,$(wildcard ggml-cuda/template-instances/fattn-vec*.cu))
-else
-	OBJS_CUDA_TEMP_INST += $(patsubst %.cu,%.o,$(wildcard ggml-cuda/template-instances/fattn-vec*q4_0-q4_0.cu))
-	OBJS_CUDA_TEMP_INST += $(patsubst %.cu,%.o,$(wildcard ggml-cuda/template-instances/fattn-vec*q8_0-q8_0.cu))
-	OBJS_CUDA_TEMP_INST += $(patsubst %.cu,%.o,$(wildcard ggml-cuda/template-instances/fattn-vec*f16-f16.cu))
-endif # LLAMA_CUDA_FA_ALL_QUANTS
-
-ifdef LLAMA_CUDA
-	ifneq ('', '$(wildcard /opt/cuda)')
-		CUDA_PATH ?= /opt/cuda
-	else
-		CUDA_PATH ?= /usr/local/cuda
-	endif
-	MK_CPPFLAGS  += -DGGML_USE_CUDA -I$(CUDA_PATH)/include -I$(CUDA_PATH)/targets/$(UNAME_M)-linux/include -DGGML_CUDA_USE_GRAPHS
-	MK_LDFLAGS   += -lcuda -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L$(CUDA_PATH)/lib64 -L/usr/lib64 -L$(CUDA_PATH)/targets/$(UNAME_M)-linux/lib -L/usr/lib/wsl/lib
-	OBJS         += ggml-cuda.o
-	OBJS         += $(patsubst %.cu,%.o,$(wildcard ggml-cuda/*.cu))
-	OBJS         += $(OBJS_CUDA_TEMP_INST)
-	MK_NVCCFLAGS += -use_fast_math
-ifdef LLAMA_FATAL_WARNINGS
-	MK_NVCCFLAGS += -Werror all-warnings
-endif # LLAMA_FATAL_WARNINGS
-ifndef JETSON_EOL_MODULE_DETECT
-	MK_NVCCFLAGS += --forward-unknown-to-host-compiler
-endif # JETSON_EOL_MODULE_DETECT
-ifdef LLAMA_DEBUG
-	MK_NVCCFLAGS += -lineinfo
-endif # LLAMA_DEBUG
-ifdef LLAMA_CUDA_DEBUG
-	MK_NVCCFLAGS += --device-debug
-endif # LLAMA_CUDA_DEBUG
-ifdef LLAMA_CUDA_NVCC
-	NVCC = $(CCACHE) $(LLAMA_CUDA_NVCC)
-else
-	NVCC = $(CCACHE) nvcc
-endif #LLAMA_CUDA_NVCC
-ifdef CUDA_DOCKER_ARCH
-	MK_NVCCFLAGS += -Wno-deprecated-gpu-targets -arch=$(CUDA_DOCKER_ARCH)
-else ifndef CUDA_POWER_ARCH
-	MK_NVCCFLAGS += -arch=native
-endif # CUDA_DOCKER_ARCH
-ifdef LLAMA_CUDA_FORCE_DMMV
-	MK_NVCCFLAGS += -DGGML_CUDA_FORCE_DMMV
-endif # LLAMA_CUDA_FORCE_DMMV
-ifdef LLAMA_CUDA_FORCE_MMQ
-	MK_NVCCFLAGS += -DGGML_CUDA_FORCE_MMQ
-endif # LLAMA_CUDA_FORCE_MMQ
-ifdef LLAMA_CUDA_DMMV_X
-	MK_NVCCFLAGS += -DGGML_CUDA_DMMV_X=$(LLAMA_CUDA_DMMV_X)
-else
-	MK_NVCCFLAGS += -DGGML_CUDA_DMMV_X=32
-endif # LLAMA_CUDA_DMMV_X
-ifdef LLAMA_CUDA_MMV_Y
-	MK_NVCCFLAGS += -DGGML_CUDA_MMV_Y=$(LLAMA_CUDA_MMV_Y)
-else ifdef LLAMA_CUDA_DMMV_Y
-	MK_NVCCFLAGS += -DGGML_CUDA_MMV_Y=$(LLAMA_CUDA_DMMV_Y) # for backwards compatibility
-else
-	MK_NVCCFLAGS += -DGGML_CUDA_MMV_Y=1
-endif # LLAMA_CUDA_MMV_Y
-ifdef LLAMA_CUDA_F16
-	MK_NVCCFLAGS += -DGGML_CUDA_F16
-endif # LLAMA_CUDA_F16
-ifdef LLAMA_CUDA_DMMV_F16
-	MK_NVCCFLAGS += -DGGML_CUDA_F16
-endif # LLAMA_CUDA_DMMV_F16
-ifdef LLAMA_CUDA_KQUANTS_ITER
-	MK_NVCCFLAGS += -DK_QUANTS_PER_ITERATION=$(LLAMA_CUDA_KQUANTS_ITER)
-else
-	MK_NVCCFLAGS += -DK_QUANTS_PER_ITERATION=2
-endif
-ifdef LLAMA_CUDA_PEER_MAX_BATCH_SIZE
-	MK_NVCCFLAGS += -DGGML_CUDA_PEER_MAX_BATCH_SIZE=$(LLAMA_CUDA_PEER_MAX_BATCH_SIZE)
-else
-	MK_NVCCFLAGS += -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128
-endif # LLAMA_CUDA_PEER_MAX_BATCH_SIZE
-ifdef LLAMA_CUDA_NO_PEER_COPY
-	MK_NVCCFLAGS += -DGGML_CUDA_NO_PEER_COPY
-endif # LLAMA_CUDA_NO_PEER_COPY
-ifdef LLAMA_CUDA_CCBIN
-	MK_NVCCFLAGS += -ccbin $(LLAMA_CUDA_CCBIN)
-endif # LLAMA_CUDA_CCBIN
-ifdef LLAMA_CUDA_FA_ALL_QUANTS
-	MK_NVCCFLAGS += -DGGML_CUDA_FA_ALL_QUANTS
-endif # LLAMA_CUDA_FA_ALL_QUANTS
-
-ifdef JETSON_EOL_MODULE_DETECT
-define NVCC_COMPILE
-	$(NVCC) -I. -Icommon -D_XOPEN_SOURCE=600 -D_GNU_SOURCE -DNDEBUG -DGGML_USE_CUDA -I/usr/local/cuda/include -I/opt/cuda/include -I/usr/local/cuda/targets/aarch64-linux/include -std=c++11 -O3 $(NVCCFLAGS) $(CPPFLAGS) -Xcompiler "$(CUDA_CXXFLAGS)" -c $< -o $@
-endef # NVCC_COMPILE
-else
-define NVCC_COMPILE
-	$(NVCC) $(NVCCFLAGS) $(CPPFLAGS) -Xcompiler "$(CUDA_CXXFLAGS)" -c $< -o $@
-endef # NVCC_COMPILE
-endif # JETSON_EOL_MODULE_DETECT
-
-ggml-cuda/%.o: ggml-cuda/%.cu ggml.h ggml-common.h ggml-cuda/common.cuh
-	$(NVCC_COMPILE)
-
-ggml-cuda.o: ggml-cuda.cu ggml-cuda.h ggml.h ggml-backend.h ggml-backend-impl.h ggml-common.h $(wildcard ggml-cuda/*.cuh)
-	$(NVCC_COMPILE)
-endif # LLAMA_CUDA
-
-ifdef LLAMA_VULKAN
-	MK_CPPFLAGS  += -DGGML_USE_VULKAN
-	MK_LDFLAGS += -lvulkan
-	OBJS    += ggml-vulkan.o
-
-ifdef LLAMA_VULKAN_CHECK_RESULTS
-	MK_CPPFLAGS  += -DGGML_VULKAN_CHECK_RESULTS
-endif
-
-ifdef LLAMA_VULKAN_DEBUG
-	MK_CPPFLAGS  += -DGGML_VULKAN_DEBUG
-endif
-
-ifdef LLAMA_VULKAN_MEMORY_DEBUG
-	MK_CPPFLAGS  += -DGGML_VULKAN_MEMORY_DEBUG
-endif
-
-ifdef LLAMA_VULKAN_VALIDATE
-	MK_CPPFLAGS  += -DGGML_VULKAN_VALIDATE
-endif
-
-ifdef LLAMA_VULKAN_RUN_TESTS
-	MK_CPPFLAGS  += -DGGML_VULKAN_RUN_TESTS
-endif
-
-ggml-vulkan.o: ggml-vulkan.cpp ggml-vulkan.h
-	$(CXX) $(CXXFLAGS) -c $< -o $@
-endif # LLAMA_VULKAN
-
-ifdef LLAMA_HIPBLAS
-	ifeq ($(wildcard /opt/rocm),)
-		ROCM_PATH	?= /usr
-		AMDGPU_TARGETS ?= $(shell $(shell which amdgpu-arch))
-	else
-		ROCM_PATH	?= /opt/rocm
-		AMDGPU_TARGETS ?= $(shell $(ROCM_PATH)/llvm/bin/amdgpu-arch)
-	endif
-	HIPCC                   ?= $(CCACHE) $(ROCM_PATH)/bin/hipcc
-	LLAMA_CUDA_DMMV_X       ?= 32
-	LLAMA_CUDA_MMV_Y        ?= 1
-	LLAMA_CUDA_KQUANTS_ITER ?= 2
-	MK_CPPFLAGS += -DGGML_USE_HIPBLAS -DGGML_USE_CUDA
-ifdef LLAMA_HIP_UMA
-	MK_CPPFLAGS += -DGGML_HIP_UMA
-endif # LLAMA_HIP_UMA
-	MK_LDFLAGS  += -L$(ROCM_PATH)/lib -Wl,-rpath=$(ROCM_PATH)/lib
-	MK_LDFLAGS  += -L$(ROCM_PATH)/lib64 -Wl,-rpath=$(ROCM_PATH)/lib64
-	MK_LDFLAGS	+= -lhipblas -lamdhip64 -lrocblas
-	HIPFLAGS    += $(addprefix --offload-arch=,$(AMDGPU_TARGETS))
-	HIPFLAGS    += -DGGML_CUDA_DMMV_X=$(LLAMA_CUDA_DMMV_X)
-	HIPFLAGS    += -DGGML_CUDA_MMV_Y=$(LLAMA_CUDA_MMV_Y)
-	HIPFLAGS    += -DK_QUANTS_PER_ITERATION=$(LLAMA_CUDA_KQUANTS_ITER)
-ifdef LLAMA_CUDA_FORCE_DMMV
-	HIPFLAGS 	+= -DGGML_CUDA_FORCE_DMMV
-endif # LLAMA_CUDA_FORCE_DMMV
-ifdef LLAMA_CUDA_NO_PEER_COPY
-	HIPFLAGS 	+= -DGGML_CUDA_NO_PEER_COPY
-endif # LLAMA_CUDA_NO_PEER_COPY
-	OBJS        += ggml-cuda.o
-	OBJS        += $(patsubst %.cu,%.o,$(wildcard ggml-cuda/*.cu))
-	OBJS        += $(OBJS_CUDA_TEMP_INST)
-
-ggml-cuda.o: ggml-cuda.cu ggml-cuda.h ggml.h ggml-backend.h ggml-backend-impl.h ggml-common.h $(wildcard ggml-cuda/*.cuh)
-	$(HIPCC) $(CXXFLAGS) $(HIPFLAGS) -x hip -c -o $@ $<
-
-ggml-cuda/%.o: ggml-cuda/%.cu ggml.h ggml-common.h ggml-cuda/common.cuh
-	$(HIPCC) $(CXXFLAGS) $(HIPFLAGS) -x hip -c -o $@ $<
-
-endif # LLAMA_HIPBLAS
-
-ifdef LLAMA_METAL
-	MK_CPPFLAGS += -DGGML_USE_METAL
-	MK_LDFLAGS  += -framework Foundation -framework Metal -framework MetalKit
-	OBJS		+= ggml-metal.o
-ifdef LLAMA_METAL_NDEBUG
-	MK_CPPFLAGS += -DGGML_METAL_NDEBUG
-endif
-ifdef LLAMA_METAL_EMBED_LIBRARY
-	MK_CPPFLAGS += -DGGML_METAL_EMBED_LIBRARY
-	OBJS        += ggml-metal-embed.o
-endif
-endif # LLAMA_METAL
-
-ifdef LLAMA_METAL
-ggml-metal.o: ggml-metal.m ggml-metal.h ggml.h
-	$(CC) $(CFLAGS) -c $< -o $@
-
-ifdef LLAMA_METAL_EMBED_LIBRARY
-ggml-metal-embed.o: ggml-metal.metal ggml-common.h
-	@echo "Embedding Metal library"
-	@sed -e '/#include "ggml-common.h"/r ggml-common.h' -e '/#include "ggml-common.h"/d' < ggml-metal.metal > ggml-metal-embed.metal
-	$(eval TEMP_ASSEMBLY=$(shell mktemp))
-	@echo ".section __DATA, __ggml_metallib"   >  $(TEMP_ASSEMBLY)
-	@echo ".globl _ggml_metallib_start"        >> $(TEMP_ASSEMBLY)
-	@echo "_ggml_metallib_start:"              >> $(TEMP_ASSEMBLY)
-	@echo ".incbin \"ggml-metal-embed.metal\"" >> $(TEMP_ASSEMBLY)
-	@echo ".globl _ggml_metallib_end"          >> $(TEMP_ASSEMBLY)
-	@echo "_ggml_metallib_end:"                >> $(TEMP_ASSEMBLY)
-	@$(AS) $(TEMP_ASSEMBLY) -o $@
-	@rm -f ${TEMP_ASSEMBLY}
-endif
-endif # LLAMA_METAL
-
-OBJS += ggml-alloc.o ggml-backend.o ggml-quants.o unicode.o unicode-data.o
-COMMON_H_DEPS = common/common.h common/sampling.h common/log.h llama.h
-COMMON_DEPS   = common.o sampling.o grammar-parser.o build-info.o json-schema-to-grammar.o
-
-ifndef LLAMA_NO_LLAMAFILE
-sgemm.o: sgemm.cpp sgemm.h ggml.h
-	$(CXX) $(CXXFLAGS) -c $< -o $@
-endif
-
-ifdef LLAMA_RPC
-ggml-rpc.o: ggml-rpc.cpp ggml-rpc.h
-	$(CXX) $(CXXFLAGS) -c $< -o $@
-
-rpc-server.o: examples/rpc/rpc-server.cpp ggml-rpc.h
-	$(CXX) $(CXXFLAGS) -c $< -o $@
-
-rpc-server: rpc-server.o ggml.o llama.o $(COMMON_DEPS) $(OBJS)
-	$(CXX) $(CXXFLAGS) $^ -o $@ $(LDFLAGS)
-endif # LLAMA_RPC
-
-GF_CC := $(CC)
-include scripts/get-flags.mk
-
-# combine build flags with cmdline overrides
-override CPPFLAGS  := $(MK_CPPFLAGS) $(CPPFLAGS)
-override CFLAGS    := $(CPPFLAGS) $(MK_CFLAGS) $(GF_CFLAGS) $(CFLAGS)
-BASE_CXXFLAGS      := $(MK_CXXFLAGS) $(CXXFLAGS)
-override CXXFLAGS  := $(BASE_CXXFLAGS) $(HOST_CXXFLAGS) $(GF_CXXFLAGS) $(CPPFLAGS)
-override NVCCFLAGS := $(MK_NVCCFLAGS) $(NVCCFLAGS)
-override LDFLAGS   := $(MK_LDFLAGS) $(LDFLAGS)
-
-# identify CUDA host compiler
-ifdef LLAMA_CUDA
-GF_CC := $(NVCC) $(NVCCFLAGS) 2>/dev/null .c -Xcompiler
-include scripts/get-flags.mk
-CUDA_CXXFLAGS := $(BASE_CXXFLAGS) $(GF_CXXFLAGS) -Wno-pedantic
-endif
-
-ifdef LLAMA_CURL
-override CXXFLAGS := $(CXXFLAGS) -DLLAMA_USE_CURL
-override LDFLAGS  := $(LDFLAGS) -lcurl
-endif
-
-#
-# Print build information
-#
-
-$(info I llama.cpp build info: )
-$(info I UNAME_S:   $(UNAME_S))
-$(info I UNAME_P:   $(UNAME_P))
-$(info I UNAME_M:   $(UNAME_M))
-$(info I CFLAGS:    $(CFLAGS))
-$(info I CXXFLAGS:  $(CXXFLAGS))
-$(info I NVCCFLAGS: $(NVCCFLAGS))
-$(info I LDFLAGS:   $(LDFLAGS))
-$(info I CC:        $(shell $(CC)   --version | head -n 1))
-$(info I CXX:       $(shell $(CXX)  --version | head -n 1))
-ifdef LLAMA_CUDA
-$(info I NVCC:      $(shell $(NVCC) --version | tail -n 1))
-CUDA_VERSION := $(shell $(NVCC) --version | grep -oP 'release (\K[0-9]+\.[0-9])')
-ifeq ($(shell awk -v "v=$(CUDA_VERSION)" 'BEGIN { print (v < 11.7) }'),1)
-ifndef CUDA_DOCKER_ARCH
-ifndef CUDA_POWER_ARCH
-$(error I ERROR: For CUDA versions < 11.7 a target CUDA architecture must be explicitly provided via environment variable CUDA_DOCKER_ARCH, e.g. by running "export CUDA_DOCKER_ARCH=compute_XX" on Unix-like systems, where XX is the minimum compute capability that the code needs to run on. A list with compute capabilities can be found here: https://developer.nvidia.com/cuda-gpus )
-endif # CUDA_POWER_ARCH
-endif # CUDA_DOCKER_ARCH
-endif # eq ($(shell echo "$(CUDA_VERSION) < 11.7" | bc),1)
-endif # LLAMA_CUDA
-$(info )
-
-ifdef LLAMA_CUBLAS
-$(info !!!!)
-$(info LLAMA_CUBLAS is deprecated and will be removed in the future. Use LLAMA_CUDA instead.)
-$(info !!!!)
-$(info )
-endif
-
-#
-# Build library
-#
-
-ggml.o: ggml.c ggml.h ggml-cuda.h
-	$(CC)  $(CFLAGS)   -c $< -o $@
-
-ggml-alloc.o: ggml-alloc.c ggml.h ggml-alloc.h
-	$(CC)  $(CFLAGS)   -c $< -o $@
-
-ggml-backend.o: ggml-backend.c ggml.h ggml-backend.h
-	$(CC)  $(CFLAGS)   -c $< -o $@
-
-ggml-quants.o: ggml-quants.c ggml.h ggml-quants.h ggml-common.h
-	$(CC) $(CFLAGS)    -c $< -o $@
-
-ggml-blas.o: ggml-blas.cpp ggml-blas.h
-	$(CXX) $(CXXFLAGS) -c $< -o $@
-
-unicode.o: unicode.cpp unicode.h
-	$(CXX) $(CXXFLAGS) -c $< -o $@
-
-unicode-data.o: unicode-data.cpp unicode-data.h
-	$(CXX) $(CXXFLAGS) -c $< -o $@
-
-llama.o: llama.cpp unicode.h ggml.h ggml-alloc.h ggml-backend.h ggml-cuda.h ggml-metal.h llama.h
-	$(CXX) $(CXXFLAGS) -c $< -o $@
-
-common.o: common/common.cpp $(COMMON_H_DEPS)
-	$(CXX) $(CXXFLAGS) -c $< -o $@
-
-sampling.o: common/sampling.cpp $(COMMON_H_DEPS)
-	$(CXX) $(CXXFLAGS) -c $< -o $@
-
-console.o: common/console.cpp common/console.h
-	$(CXX) $(CXXFLAGS) -c $< -o $@
-
-grammar-parser.o: common/grammar-parser.cpp common/grammar-parser.h
-	$(CXX) $(CXXFLAGS) -c $< -o $@
-
-json-schema-to-grammar.o: common/json-schema-to-grammar.cpp common/json-schema-to-grammar.h
-	$(CXX) $(CXXFLAGS) -c $< -o $@
-
-train.o: common/train.cpp common/train.h
-	$(CXX) $(CXXFLAGS) -c $< -o $@
-
-ngram-cache.o: common/ngram-cache.cpp common/ngram-cache.h
-	$(CXX) $(CXXFLAGS) -c $< -o $@
-
-libllama.so: llama.o ggml.o $(OBJS)
-	$(CXX) $(CXXFLAGS) -shared -fPIC -o $@ $^ $(LDFLAGS)
-
-libllama.a: llama.o ggml.o $(OBJS) $(COMMON_DEPS)
-	ar rcs libllama.a llama.o ggml.o $(OBJS) $(COMMON_DEPS)
-
-clean:
-	rm -vrf *.o tests/*.o *.so *.a *.dll common/build-info.cpp *.dot $(COV_TARGETS) $(BUILD_TARGETS) $(TEST_TARGETS)
-	rm -vrf ggml-cuda/*.o
-	rm -vrf ggml-cuda/template-instances/*.o
-	find examples pocs -type f -name "*.o" -delete
-
-#
-# Examples
-#
-
-# $< is the first prerequisite, i.e. the source file.
-# Explicitly compile this to an object file so that it can be cached with ccache.
-# The source file is then filtered out from $^ (the list of all prerequisites) and the object file is added instead.
-
-# Helper function that replaces .c, .cpp, and .cu file endings with .o:
-GET_OBJ_FILE = $(patsubst %.c,%.o,$(patsubst %.cpp,%.o,$(patsubst %.cu,%.o,$(1))))
-
-llama-cli: examples/main/main.cpp                                  ggml.o llama.o $(COMMON_DEPS) console.o grammar-parser.o $(OBJS)
-	$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
-	$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
-	@echo
-	@echo '====  Run ./llama-cli -h for help.  ===='
-	@echo
-
-llama-infill: examples/infill/infill.cpp                            ggml.o llama.o $(COMMON_DEPS) console.o grammar-parser.o $(OBJS)
-	$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
-	$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
-
-llama-simple: examples/simple/simple.cpp                            ggml.o llama.o $(COMMON_DEPS) $(OBJS)
-	$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
-	$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
-
-llama-tokenize: examples/tokenize/tokenize.cpp                      ggml.o llama.o $(COMMON_DEPS) $(OBJS)
-	$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
-	$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
-
-llama-batched: examples/batched/batched.cpp                         ggml.o llama.o $(COMMON_DEPS) $(OBJS)
-	$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
-	$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
-
-llama-batched-bench: examples/batched-bench/batched-bench.cpp       build-info.o ggml.o llama.o $(COMMON_DEPS) $(OBJS)
-	$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
-	$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
-
-llama-quantize: examples/quantize/quantize.cpp                      ggml.o llama.o $(COMMON_DEPS) $(OBJS)
-	$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
-	$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
-
-llama-quantize-stats: examples/quantize-stats/quantize-stats.cpp    build-info.o ggml.o llama.o $(OBJS)
-	$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
-	$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
-
-llama-perplexity: examples/perplexity/perplexity.cpp                ggml.o llama.o $(COMMON_DEPS) $(OBJS)
-	$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
-	$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
-
-llama-imatrix: examples/imatrix/imatrix.cpp                         ggml.o llama.o $(COMMON_DEPS) $(OBJS)
-	$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
-	$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
-
-llama-embedding: examples/embedding/embedding.cpp                   ggml.o llama.o $(COMMON_DEPS) $(OBJS)
-	$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
-	$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
-
-llama-gritlm: examples/gritlm/gritlm.cpp                         ggml.o llama.o $(COMMON_DEPS) $(OBJS)
-	$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
-	$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
-
-llama-save-load-state: examples/save-load-state/save-load-state.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
-	$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
-	$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
-
-llama-server: examples/server/server.cpp examples/server/utils.hpp examples/server/httplib.h common/json.hpp examples/server/colorthemes.css.hpp examples/server/style.css.hpp examples/server/theme-beeninorder.css.hpp examples/server/theme-ketivah.css.hpp examples/server/theme-mangotango.css.hpp examples/server/theme-playground.css.hpp examples/server/theme-polarnight.css.hpp examples/server/theme-snowstorm.css.hpp examples/server/index.html.hpp examples/server/index-new.html.hpp examples/server/index.js.hpp examples/server/completion.js.hpp examples/server/system-prompts.js.hpp examples/server/prompt-formats.js.hpp examples/server/json-schema-to-grammar.mjs.hpp common/stb_image.h ggml.o llama.o $(COMMON_DEPS) grammar-parser.o $(OBJS)
-	$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
-	$(CXX) $(CXXFLAGS) $(filter-out %.h %.hpp $<,$^) -Iexamples/server $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) $(LWINSOCK2)
-
-# Portable equivalent of `cd examples/server/public && xxd -i $(notdir $<) ../$(notdir $<).hpp`:
-examples/server/%.hpp: examples/server/public/% Makefile
-	@( export NAME=$(subst .,_,$(subst -,_,$(notdir $<))) && \
-		echo "unsigned char $${NAME}[] = {" && \
-		cat $< | od -v -t x1 -An | sed -E 's/([0-9a-fA-F]+)/0x\1, /g' && \
-		echo "};" && \
-		echo "unsigned int $${NAME}_len = $(shell cat $< | wc -c );" \
-	) > $@
-
-llama-gguf: examples/gguf/gguf.cpp ggml.o $(OBJS)
-	$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
-	$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
-
-llama-gguf-split: examples/gguf-split/gguf-split.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
-	$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
-	$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
-
-llama-eval-callback: examples/eval-callback/eval-callback.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
-	$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
-	$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
-
-llama-cvector-generator: examples/cvector-generator/cvector-generator.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
-	$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
-	$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
-
-llama-train-text-from-scratch: examples/train-text-from-scratch/train-text-from-scratch.cpp ggml.o llama.o $(COMMON_DEPS) train.o $(OBJS)
-	$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
-	$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
-
-llama-convert-llama2c-to-ggml: examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp ggml.o llama.o $(OBJS)
-	$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
-	$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
-
-llama-bench: examples/llama-bench/llama-bench.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
-	$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
-	$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
-
-libllava.a: examples/llava/llava.cpp examples/llava/llava.h examples/llava/clip.cpp examples/llava/clip.h common/stb_image.h common/base64.hpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
-	$(CXX) $(CXXFLAGS) -static -fPIC -c $< -o $@ -Wno-cast-qual
-
-llama-llava-cli: examples/llava/llava-cli.cpp examples/llava/clip.h examples/llava/clip.cpp examples/llava/llava.h examples/llava/llava.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
-	$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
-	$(CXX) $(CXXFLAGS) -c examples/llava/clip.cpp  -o $(call GET_OBJ_FILE, examples/llava/clip.cpp) -Wno-cast-qual
-	$(CXX) $(CXXFLAGS) -c examples/llava/llava.cpp -o $(call GET_OBJ_FILE, examples/llava/llava.cpp)
-	$(CXX) $(CXXFLAGS) $(filter-out %.h $< examples/llava/clip.cpp examples/llava/llava.cpp,$^) $(call GET_OBJ_FILE, $<) $(call GET_OBJ_FILE, examples/llava/clip.cpp) $(call GET_OBJ_FILE, examples/llava/llava.cpp) -o $@ $(LDFLAGS)
-
-llama-baby-llama: examples/baby-llama/baby-llama.cpp ggml.o llama.o $(COMMON_DEPS) train.o $(OBJS)
-	$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
-	$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
-
-llama-finetune: examples/finetune/finetune.cpp ggml.o llama.o $(COMMON_DEPS) train.o $(OBJS)
-	$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
-	$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
-
-llama-export-lora: examples/export-lora/export-lora.cpp ggml.o common/common.h $(OBJS)
-	$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
-	$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
-
-llama-retrieval: examples/retrieval/retrieval.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
-	$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
-	$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
-
-llama-speculative: examples/speculative/speculative.cpp ggml.o llama.o $(COMMON_DEPS) grammar-parser.o $(OBJS)
-	$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
-	$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
-
-llama-parallel: examples/parallel/parallel.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
-	$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
-	$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
-
-llama-lookahead: examples/lookahead/lookahead.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
-	$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
-	$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
-
-llama-lookup: examples/lookup/lookup.cpp ggml.o llama.o ngram-cache.o $(COMMON_DEPS) $(OBJS)
-	$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
-	$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
-
-llama-lookup-create: examples/lookup/lookup-create.cpp ggml.o llama.o ngram-cache.o $(COMMON_DEPS) $(OBJS)
-	$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
-	$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
-
-llama-lookup-merge: examples/lookup/lookup-merge.cpp ggml.o llama.o ngram-cache.o $(COMMON_DEPS) $(OBJS)
-	$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
-	$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
-
-llama-lookup-stats: examples/lookup/lookup-stats.cpp ggml.o llama.o ngram-cache.o $(COMMON_DEPS) $(OBJS)
-	$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
-	$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
-
-llama-passkey: examples/passkey/passkey.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
-	$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
-	$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
-
-llama-gbnf-validator: examples/gbnf-validator/gbnf-validator.cpp ggml.o llama.o $(COMMON_DEPS) grammar-parser.o $(OBJS)
-	$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
-	$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
-
-ifeq ($(UNAME_S),Darwin)
-swift: examples/batched.swift
-	(cd examples/batched.swift; make build)
-endif
-
-common/build-info.cpp: $(wildcard .git/index) scripts/build-info.sh
-	@sh scripts/build-info.sh "$(CC)" > $@.tmp
-	@if ! cmp -s $@.tmp $@; then \
-		mv $@.tmp $@; \
-	else \
-		rm $@.tmp; \
-	fi
-
-build-info.o: common/build-info.cpp
-	$(CXX) $(CXXFLAGS) -c $(filter-out %.h,$^) -o $@
-
-#
-# Tests
-#
-
-tests: $(TEST_TARGETS)
-
-llama-benchmark-matmult: examples/benchmark/benchmark-matmult.cpp build-info.o ggml.o $(OBJS)
-	$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
-	$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
-
-run-benchmark-matmult: llama-benchmark-matmult
-	./$@
-
-.PHONY: run-benchmark-matmult swift
-
-llama-vdot: pocs/vdot/vdot.cpp ggml.o $(OBJS)
-	$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
-	$(CXX) $(CXXFLAGS) $(filter-out $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
-
-llama-q8dot: pocs/vdot/q8dot.cpp ggml.o $(OBJS)
-	$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
-	$(CXX) $(CXXFLAGS) $(filter-out $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
-
-tests/test-llama-grammar: tests/test-llama-grammar.cpp ggml.o grammar-parser.o $(OBJS)
-	$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
-	$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
-
-tests/test-grammar-parser: tests/test-grammar-parser.cpp ggml.o llama.o grammar-parser.o $(OBJS)
-	$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
-	$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
-
-tests/test-grammar-integration: tests/test-grammar-integration.cpp ggml.o llama.o grammar-parser.o $(OBJS)
-	$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
-	$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
-
-tests/test-double-float: tests/test-double-float.cpp ggml.o $(OBJS)
-	$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
-	$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
-
-tests/test-json-schema-to-grammar: tests/test-json-schema-to-grammar.cpp json-schema-to-grammar.o ggml.o llama.o grammar-parser.o $(OBJS)
-	$(CXX) $(CXXFLAGS) -Iexamples/server -c $< -o $(call GET_OBJ_FILE, $<)
-	$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
-
-tests/test-grad0: tests/test-grad0.cpp ggml.o $(OBJS)
-	$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
-	$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
-
-tests/test-opt: tests/test-opt.cpp ggml.o $(OBJS)
-	$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
-	$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
-
-tests/test-quantize-fns: tests/test-quantize-fns.cpp ggml.o $(OBJS)
-	$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
-	$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
-
-tests/test-quantize-perf: tests/test-quantize-perf.cpp ggml.o $(OBJS)
-	$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
-	$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
-
-tests/test-sampling: tests/test-sampling.cpp ggml.o llama.o $(OBJS)
-	$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
-	$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
-
-tests/test-tokenizer-0: tests/test-tokenizer-0.cpp ggml.o llama.o $(COMMON_DEPS) console.o $(OBJS)
-	$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
-	$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
-
-tests/test-tokenizer-1-bpe: tests/test-tokenizer-1-bpe.cpp ggml.o llama.o $(COMMON_DEPS) console.o $(OBJS)
-	$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
-	$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
-
-tests/test-tokenizer-1-spm: tests/test-tokenizer-1-spm.cpp ggml.o llama.o $(COMMON_DEPS) console.o $(OBJS)
-	$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
-	$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
-
-tests/test-rope: tests/test-rope.cpp ggml.o $(OBJS)
-	$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
-	$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
-
-tests/test-c.o: tests/test-c.c llama.h
-	$(CC) $(CFLAGS) -c $(filter-out %.h,$^) -o $@
-
-tests/test-backend-ops: tests/test-backend-ops.cpp ggml.o $(OBJS)
-	$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
-	$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
-
-tests/test-model-load-cancel: tests/test-model-load-cancel.cpp ggml.o llama.o tests/get-model.cpp $(COMMON_DEPS) $(OBJS)
-	$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
-	$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
-
-tests/test-autorelease: tests/test-autorelease.cpp ggml.o llama.o tests/get-model.cpp $(COMMON_DEPS) $(OBJS)
-	$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
-	$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
-
-tests/test-chat-template: tests/test-chat-template.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
-	$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
-	$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
--- a/ollama/llm/llama.cpp/Package.swift
+++ b/ollama/llm/llama.cpp/Package.swift
-// swift-tools-version:5.5
-
-import PackageDescription
-
-var sources = [
-    "ggml.c",
-    "sgemm.cpp",
-    "llama.cpp",
-    "unicode.cpp",
-    "unicode-data.cpp",
-    "ggml-alloc.c",
-    "ggml-backend.c",
-    "ggml-quants.c",
-]
-
-var resources: [Resource] = []
-var linkerSettings: [LinkerSetting] = []
-var cSettings: [CSetting] =  [
-    .unsafeFlags(["-Wno-shorten-64-to-32", "-O3", "-DNDEBUG"]),
-    .unsafeFlags(["-fno-objc-arc"]),
-    // NOTE: NEW_LAPACK will required iOS version 16.4+
-    // We should consider add this in the future when we drop support for iOS 14
-    // (ref: ref: https://developer.apple.com/documentation/accelerate/1513264-cblas_sgemm?language=objc)
-    // .define("ACCELERATE_NEW_LAPACK"),
-    // .define("ACCELERATE_LAPACK_ILP64")
-]
-
-#if canImport(Darwin)
-sources.append("ggml-metal.m")
-resources.append(.process("ggml-metal.metal"))
-linkerSettings.append(.linkedFramework("Accelerate"))
-cSettings.append(
-    contentsOf: [
-        .define("GGML_USE_ACCELERATE"),
-        .define("GGML_USE_METAL")
-    ]
-)
-#endif
-
-#if os(Linux)
-    cSettings.append(.define("_GNU_SOURCE"))
-#endif
-
-let package = Package(
-    name: "llama",
-    platforms: [
-        .macOS(.v12),
-        .iOS(.v14),
-        .watchOS(.v4),
-        .tvOS(.v14)
-    ],
-    products: [
-        .library(name: "llama", targets: ["llama"]),
-    ],
-    targets: [
-        .target(
-            name: "llama",
-            path: ".",
-            exclude: [
-               "cmake",
-               "examples",
-               "scripts",
-               "models",
-               "tests",
-               "CMakeLists.txt",
-               "ggml-cuda.cu",
-               "ggml-cuda.h",
-               "Makefile"
-            ],
-            sources: sources,
-            resources: resources,
-            publicHeadersPath: "spm-headers",
-            cSettings: cSettings,
-            linkerSettings: linkerSettings
-        )
-    ],
-    cxxLanguageStandard: .cxx11
-)
--- a/ollama/llm/llama.cpp/README-sycl.md
+++ b/ollama/llm/llama.cpp/README-sycl.md
-# llama.cpp for SYCL
-
- [Background](#background)
- [Recommended Release](#recommended-release)
- [News](#news)
- [OS](#os)
- [Hardware](#hardware)
- [Docker](#docker)
- [Linux](#linux)
- [Windows](#windows)
- [Environment Variable](#environment-variable)
- [Known Issue](#known-issues)
- [Q&A](#qa)
- [TODO](#todo)
-
-## Background
-
-**SYCL** is a high-level parallel programming model designed to improve developers productivity writing code across various hardware accelerators such as CPUs, GPUs, and FPGAs. It is a single-source language designed for heterogeneous computing and based on standard C++17.
-
-**oneAPI** is an open ecosystem and a standard-based specification, supporting multiple architectures including but not limited to intel CPUs, GPUs and FPGAs. The key components of the oneAPI ecosystem include:
-
- **DPCPP** *(Data Parallel C++)*: The primary oneAPI SYCL implementation, which includes the icpx/icx Compilers.
- **oneAPI Libraries**: A set of highly optimized libraries targeting multiple domains *(e.g. oneMKL - Math Kernel Library)*.
- **oneAPI LevelZero**: A high performance low level interface for fine-grained control over intel iGPUs and dGPUs.
- **Nvidia & AMD Plugins**: These are plugins extending oneAPI's DPCPP support to SYCL on Nvidia and AMD GPU targets.
-
-### Llama.cpp + SYCL
-
-The llama.cpp SYCL backend is designed to support **Intel GPU** firstly. Based on the cross-platform feature of SYCL, it could support other vendor GPUs: Nvidia GPU (*AMD GPU coming*).
-
-When targeting **Intel CPU**, it is recommended to use llama.cpp for [Intel oneMKL](README.md#intel-onemkl) backend.
-
-It has the similar design of other llama.cpp BLAS-based paths such as *OpenBLAS, cuBLAS, etc..*. In beginning work, the oneAPI's [SYCLomatic](https://github.com/oneapi-src/SYCLomatic) open-source migration tool (Commercial release [Intel® DPC++ Compatibility Tool](https://www.intel.com/content/www/us/en/developer/tools/oneapi/dpc-compatibility-tool.html)) was used for this purpose.
-
-## Recommended Release
-
-The SYCL backend would be broken by some PRs due to no online CI.
-
-The following release is verified with good quality:
-
-|Commit ID|Tag|Release|Verified  Platform|
-|-|-|-|-|
-|fb76ec31a9914b7761c1727303ab30380fd4f05c|b3038 |[llama-b3038-bin-win-sycl-x64.zip](https://github.com/ggerganov/llama.cpp/releases/download/b3038/llama-b3038-bin-win-sycl-x64.zip) |Arc770/Linux/oneAPI 2024.1<br>MTL Arc GPU/Windows 11/oneAPI 2024.1|
-
-
-## News
-
- 2024.5
-  - Performance is increased: 34 -> 37 tokens/s of llama-2-7b.Q4_0 on Arc770.
-  - Arch Linux is verified successfully.
-
- 2024.4
-  - Support data types: GGML_TYPE_IQ4_NL, GGML_TYPE_IQ4_XS, GGML_TYPE_IQ3_XXS, GGML_TYPE_IQ3_S, GGML_TYPE_IQ2_XXS, GGML_TYPE_IQ2_XS, GGML_TYPE_IQ2_S, GGML_TYPE_IQ1_S, GGML_TYPE_IQ1_M.
-
- 2024.3
-  - Release binary files of Windows.
-  - A blog is published: **Run LLM on all Intel GPUs Using llama.cpp**: [intel.com](https://www.intel.com/content/www/us/en/developer/articles/technical/run-llm-on-all-gpus-using-llama-cpp-artical.html) or [medium.com](https://medium.com/@jianyu_neo/run-llm-on-all-intel-gpus-using-llama-cpp-fd2e2dcbd9bd).
-  - New base line is ready: [tag b2437](https://github.com/ggerganov/llama.cpp/tree/b2437).
-  - Support multiple cards: **--split-mode**: [none|layer]; not support [row], it's on developing.
-  - Support to assign main GPU by **--main-gpu**, replace $GGML_SYCL_DEVICE.
-  - Support detecting all GPUs with level-zero and same top **Max compute units**.
-  - Support OPs
-    - hardsigmoid
-    - hardswish
-    - pool2d
-
- 2024.1
-  - Create SYCL backend for Intel GPU.
-  - Support Windows build
-
-## OS
-
-| OS      | Status  | Verified                                       |
-|---------|---------|------------------------------------------------|
-| Linux   | Support | Ubuntu 22.04, Fedora Silverblue 39, Arch Linux |
-| Windows | Support | Windows 11                                     |
-
-
-## Hardware
-
-### Intel GPU
-
-**Verified devices**
-
-| Intel GPU                     | Status  | Verified Model                        |
-|-------------------------------|---------|---------------------------------------|
-| Intel Data Center Max Series  | Support | Max 1550, 1100                        |
-| Intel Data Center Flex Series | Support | Flex 170                              |
-| Intel Arc Series              | Support | Arc 770, 730M, Arc A750               |
-| Intel built-in Arc GPU        | Support | built-in Arc GPU in Meteor Lake       |
-| Intel iGPU                    | Support | iGPU in i5-1250P, i7-1260P, i7-1165G7 |
-
-*Notes:*
-
- **Memory**
-  - The device memory is a limitation when running a large model. The loaded model size, *`llm_load_tensors: buffer_size`*, is displayed in the log when running `./bin/llama-cli`.
-
-  - Please make sure the GPU shared memory from the host is large enough to account for the model's size. For e.g. the *llama-2-7b.Q4_0* requires at least 8.0GB for integrated GPU and 4.0GB for discrete GPU.
-
- **Execution Unit (EU)**
-  - If the iGPU has less than 80 EUs, the inference speed will likely be too slow for practical use.
-
-### Other Vendor GPU
-
-**Verified devices**
-
-| Nvidia GPU               | Status  | Verified Model |
-|--------------------------|---------|----------------|
-| Ampere Series            | Support | A100, A4000    |
-| Ampere Series *(Mobile)* | Support | RTX 40 Series  |
-
-## Docker
-The docker build option is currently limited to *intel GPU* targets.
-
-### Build image
-```sh
-# Using FP16
-docker build -t llama-cpp-sycl --build-arg="LLAMA_SYCL_F16=ON" -f .devops/llama-cli-intel.Dockerfile .
-```
-
-*Notes*:
-
-To build in default FP32 *(Slower than FP16 alternative)*, you can remove the `--build-arg="LLAMA_SYCL_F16=ON"` argument from the previous command.
-
-You can also use the `.devops/llama-server-intel.Dockerfile`, which builds the *"server"* alternative.
-
-### Run container
-
-```sh
-# First, find all the DRI cards
-ls -la /dev/dri
-# Then, pick the card that you want to use (here for e.g. /dev/dri/card1).
-docker run -it --rm -v "$(pwd):/app:Z" --device /dev/dri/renderD128:/dev/dri/renderD128 --device /dev/dri/card1:/dev/dri/card1 llama-cpp-sycl -m "/app/models/YOUR_MODEL_FILE" -p "Building a website can be done in 10 simple steps:" -n 400 -e -ngl 33
-```
-
-*Notes:*
- Docker has been tested successfully on native Linux. WSL support has not been verified yet.
- You may need to install Intel GPU driver on the **host** machine *(Please refer to the [Linux configuration](#linux) for details)*.
-
-## Linux
-
-### I. Setup Environment
-
-1. **Install GPU drivers**
-
-  - **Intel GPU**
-
-Intel data center GPUs drivers installation guide and download page can be found here: [Get intel dGPU Drivers](https://dgpu-docs.intel.com/driver/installation.html#ubuntu-install-steps).
-
-*Note*: for client GPUs *(iGPU & Arc A-Series)*, please refer to the [client iGPU driver installation](https://dgpu-docs.intel.com/driver/client/overview.html).
-
-Once installed, add the user(s) to the `video` and `render` groups.
-
-```sh
-sudo usermod -aG render $USER
-sudo usermod -aG video $USER
-```
-
-*Note*: logout/re-login for the changes to take effect.
-
-Verify installation through `clinfo`:
-
-```sh
-sudo apt install clinfo
-sudo clinfo -l
-```
-
-Sample output:
-
-```sh
-Platform #0: Intel(R) OpenCL Graphics
- `-- Device #0: Intel(R) Arc(TM) A770 Graphics
-
-Platform #0: Intel(R) OpenCL HD Graphics
- `-- Device #0: Intel(R) Iris(R) Xe Graphics [0x9a49]
-```
-
- **Nvidia GPU**
-
-In order to target Nvidia GPUs through SYCL, please make sure the CUDA/CUBLAS native requirements *-found [here](README.md#cuda)-* are installed.
-
-2. **Install Intel® oneAPI Base toolkit**
-
- **For Intel GPU**
-
-The base toolkit can be obtained from the official [Intel® oneAPI Base Toolkit](https://www.intel.com/content/www/us/en/developer/tools/oneapi/base-toolkit.html) page.
-
-Please follow the instructions for downloading and installing the Toolkit for Linux, and preferably keep the default installation values unchanged, notably the installation path *(`/opt/intel/oneapi` by default)*.
-
-Following guidelines/code snippets assume the default installation values. Otherwise, please make sure the necessary changes are reflected where applicable.
-
-Upon a successful installation, SYCL is enabled for the available intel devices, along with relevant libraries such as oneAPI MKL for intel GPUs.
-
- **Adding support to Nvidia GPUs**
-
-**oneAPI Plugin**: In order to enable SYCL support on Nvidia GPUs, please install the [Codeplay oneAPI Plugin for Nvidia GPUs](https://developer.codeplay.com/products/oneapi/nvidia/download). User should also make sure the plugin version matches the installed base toolkit one *(previous step)* for a seamless "oneAPI on Nvidia GPU" setup.
-
-
-**oneMKL for cuBlas**: The current oneMKL releases *(shipped with the oneAPI base-toolkit)* do not contain the cuBLAS backend. A build from source of the upstream [oneMKL](https://github.com/oneapi-src/oneMKL) with the *cuBLAS* backend enabled is thus required to run it on Nvidia GPUs.
-
-```sh
-git clone https://github.com/oneapi-src/oneMKL
-cd oneMKL
-cmake -B buildWithCublas -DCMAKE_CXX_COMPILER=icpx -DCMAKE_C_COMPILER=icx -DENABLE_MKLGPU_BACKEND=OFF -DENABLE_MKLCPU_BACKEND=OFF -DENABLE_CUBLAS_BACKEND=ON -DTARGET_DOMAINS=blas
-cmake --build buildWithCublas --config Release
-```
-
-
-3. **Verify installation and environment**
-
-In order to check the available SYCL devices on the machine, please use the `sycl-ls` command.
-```sh
-source /opt/intel/oneapi/setvars.sh
-sycl-ls
-```
-
- **Intel GPU**
-
-When targeting an intel GPU, the user should expect one or more level-zero devices among the available SYCL devices. Please make sure that at least one GPU is present, for instance [`ext_oneapi_level_zero:gpu:0`] in the sample output below:
-
-```
-[opencl:acc:0] Intel(R) FPGA Emulation Platform for OpenCL(TM), Intel(R) FPGA Emulation Device OpenCL 1.2  [2023.16.10.0.17_160000]
-[opencl:cpu:1] Intel(R) OpenCL, 13th Gen Intel(R) Core(TM) i7-13700K OpenCL 3.0 (Build 0) [2023.16.10.0.17_160000]
-[opencl:gpu:2] Intel(R) OpenCL Graphics, Intel(R) Arc(TM) A770 Graphics OpenCL 3.0 NEO  [23.30.26918.50]
-[ext_oneapi_level_zero:gpu:0] Intel(R) Level-Zero, Intel(R) Arc(TM) A770 Graphics 1.3 [1.3.26918]
-```
-
- **Nvidia GPU**
-
-Similarly, user targeting Nvidia GPUs should expect at least one SYCL-CUDA device [`ext_oneapi_cuda:gpu`] as bellow:
-```
-[opencl:acc:0] Intel(R) FPGA Emulation Platform for OpenCL(TM), Intel(R) FPGA Emulation Device OpenCL 1.2  [2023.16.12.0.12_195853.xmain-hotfix]
-[opencl:cpu:1] Intel(R) OpenCL, Intel(R) Xeon(R) Gold 6326 CPU @ 2.90GHz OpenCL 3.0 (Build 0) [2023.16.12.0.12_195853.xmain-hotfix]
-[ext_oneapi_cuda:gpu:0] NVIDIA CUDA BACKEND, NVIDIA A100-PCIE-40GB 8.0 [CUDA 12.2]
-```
-
-### II. Build llama.cpp
-
-#### Intel GPU
-```sh
-# Export relevant ENV variables
-source /opt/intel/oneapi/setvars.sh
-
-# Build LLAMA with MKL BLAS acceleration for intel GPU
-
-# Option 1: Use FP32 (recommended for better performance in most cases)
-cmake -B build -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx
-
-# Option 2: Use FP16
-cmake -B build -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DLLAMA_SYCL_F16=ON
-
-# build all binary
-cmake --build build --config Release -j -v
-```
-
-#### Nvidia GPU
-```sh
-# Export relevant ENV variables
-export LD_LIBRARY_PATH=/path/to/oneMKL/buildWithCublas/lib:$LD_LIBRARY_PATH
-export LIBRARY_PATH=/path/to/oneMKL/buildWithCublas/lib:$LIBRARY_PATH
-export CPLUS_INCLUDE_DIR=/path/to/oneMKL/buildWithCublas/include:$CPLUS_INCLUDE_DIR
-export CPLUS_INCLUDE_DIR=/path/to/oneMKL/include:$CPLUS_INCLUDE_DIR
-
-# Build LLAMA with Nvidia BLAS acceleration through SYCL
-
-# Option 1: Use FP32 (recommended for better performance in most cases)
-cmake -B build -DLLAMA_SYCL=ON -DLLAMA_SYCL_TARGET=NVIDIA -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx
-
-# Option 2: Use FP16
-cmake -B build -DLLAMA_SYCL=ON -DLLAMA_SYCL_TARGET=NVIDIA -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DLLAMA_SYCL_F16=ON
-
-# build all binary
-cmake --build build --config Release -j -v
-
-```
-
-### III. Run the inference
-
-1. Retrieve and prepare model
-
-You can refer to the general [*Prepare and Quantize*](README.md#prepare-and-quantize) guide for model prepration, or simply download [llama-2-7b.Q4_0.gguf](https://huggingface.co/TheBloke/Llama-2-7B-GGUF/blob/main/llama-2-7b.Q4_0.gguf) model as example.
-
-2. Enable oneAPI running environment
-
-```sh
-source /opt/intel/oneapi/setvars.sh
-```
-
-3. List devices information
-
-Similar to the native `sycl-ls`, available SYCL devices can be queried as follow:
-
-```sh
-./build/bin/llama-ls-sycl-device
-```
-A example of such log in a system with 1 *intel CPU* and 1 *intel GPU* can look like the following:
-```
-found 6 SYCL devices:
-|  |                  |                                             |Compute   |Max compute|Max work|Max sub|               |
-|ID|       Device Type|                                         Name|capability|units      |group   |group  |Global mem size|
-|--|------------------|---------------------------------------------|----------|-----------|--------|-------|---------------|
-| 0|[level_zero:gpu:0]|               Intel(R) Arc(TM) A770 Graphics|       1.3|        512|    1024|     32|    16225243136|
-| 1|[level_zero:gpu:1]|                    Intel(R) UHD Graphics 770|       1.3|         32|     512|     32|    53651849216|
-| 2|    [opencl:gpu:0]|               Intel(R) Arc(TM) A770 Graphics|       3.0|        512|    1024|     32|    16225243136|
-| 3|    [opencl:gpu:1]|                    Intel(R) UHD Graphics 770|       3.0|         32|     512|     32|    53651849216|
-| 4|    [opencl:cpu:0]|         13th Gen Intel(R) Core(TM) i7-13700K|       3.0|         24|    8192|     64|    67064815616|
-| 5|    [opencl:acc:0]|               Intel(R) FPGA Emulation Device|       1.2|         24|67108864|     64|    67064815616|
-```
-
-| Attribute              | Note                                                        |
-|------------------------|-------------------------------------------------------------|
-| compute capability 1.3 | Level-zero driver/runtime, recommended                      |
-| compute capability 3.0 | OpenCL driver/runtime, slower than level-zero in most cases |
-
-4. Launch inference
-
-There are two device selection modes:
-
- Single device: Use one device target specified by the user.
- Multiple devices: Automatically select the devices with the same largest Max compute-units.
-
-| Device selection | Parameter                              |
-|------------------|----------------------------------------|
-| Single device    | --split-mode none --main-gpu DEVICE_ID |
-| Multiple devices | --split-mode layer (default)           |
-
-Examples:
-
- Use device 0:
-
-```sh
-ZES_ENABLE_SYSMAN=1 ./build/bin/llama-cli -m models/llama-2-7b.Q4_0.gguf -p "Building a website can be done in 10 simple steps:" -n 400 -e -ngl 33 -sm none -mg 0
-```
-or run by script:
-
-```sh
-./examples/sycl/run_llama2.sh 0
-```
-
- Use multiple devices:
-
-```sh
-ZES_ENABLE_SYSMAN=1 ./build/bin/llama-cli -m models/llama-2-7b.Q4_0.gguf -p "Building a website can be done in 10 simple steps:" -n 400 -e -ngl 33 -sm layer
-```
-
-Otherwise, you can run the script:
-
-```sh
-./examples/sycl/run_llama2.sh
-```
-
-*Notes:*
-
- Upon execution, verify the selected device(s) ID(s) in the output log, which can for instance be displayed as follow:
-
-```sh
-detect 1 SYCL GPUs: [0] with top Max compute units:512
-```
-Or
-```sh
-use 1 SYCL GPUs: [0] with Max compute units:512
-```
-
-## Windows
-
-### I. Setup Environment
-
-1. Install GPU driver
-
-Intel GPU drivers instructions guide and download page can be found here: [Get intel GPU Drivers](https://www.intel.com/content/www/us/en/products/docs/discrete-gpus/arc/software/drivers.html).
-
-2. Install Visual Studio
-
-If you already have a recent version of Microsoft Visual Studio, you can skip this step. Otherwise, please refer to the official download page for [Microsoft Visual Studio](https://visualstudio.microsoft.com/).
-
-3. Install Intel® oneAPI Base toolkit
-
-The base toolkit can be obtained from the official [Intel® oneAPI Base Toolkit](https://www.intel.com/content/www/us/en/developer/tools/oneapi/base-toolkit.html) page.
-
-Please follow the instructions for downloading and installing the Toolkit for Windows, and preferably keep the default installation values unchanged, notably the installation path *(`C:\Program Files (x86)\Intel\oneAPI` by default)*.
-
-Following guidelines/code snippets assume the default installation values. Otherwise, please make sure the necessary changes are reflected where applicable.
-
-b. Enable oneAPI running environment:
-
- Type "oneAPI" in the search bar, then open the `Intel oneAPI command prompt for Intel 64 for Visual Studio 2022` App.
-
- On the command prompt, enable the runtime environment with the following:
-```
-"C:\Program Files (x86)\Intel\oneAPI\setvars.bat" intel64
-```
-
-c. Verify installation
-
-In the oneAPI command line, run the following to print the available SYCL devices:
-
-```
-sycl-ls
-```
-
-There should be one or more *level-zero* GPU devices displayed as **[ext_oneapi_level_zero:gpu]**. Below is example of such output detecting an *intel Iris Xe* GPU as a Level-zero SYCL device:
-
-Output (example):
-```
-[opencl:acc:0] Intel(R) FPGA Emulation Platform for OpenCL(TM), Intel(R) FPGA Emulation Device OpenCL 1.2  [2023.16.10.0.17_160000]
-[opencl:cpu:1] Intel(R) OpenCL, 11th Gen Intel(R) Core(TM) i7-1185G7 @ 3.00GHz OpenCL 3.0 (Build 0) [2023.16.10.0.17_160000]
-[opencl:gpu:2] Intel(R) OpenCL Graphics, Intel(R) Iris(R) Xe Graphics OpenCL 3.0 NEO  [31.0.101.5186]
-[ext_oneapi_level_zero:gpu:0] Intel(R) Level-Zero, Intel(R) Iris(R) Xe Graphics 1.3 [1.3.28044]
-```
-
-4. Install build tools
-
-a. Download & install cmake for Windows: https://cmake.org/download/
-
-b. Download & install mingw-w64 make for Windows provided by w64devkit
-
- Download the 1.19.0 version of [w64devkit](https://github.com/skeeto/w64devkit/releases/download/v1.19.0/w64devkit-1.19.0.zip).
-
- Extract `w64devkit` on your pc.
-
- Add the **bin** folder path in the Windows system PATH environment (for e.g. `C:\xxx\w64devkit\bin\`).
-
-### II. Build llama.cpp
-
-On the oneAPI command line window, step into the llama.cpp main directory and run the following:
-
-```
-@call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat" intel64 --force
-
-# Option 1: Use FP32 (recommended for better performance in most cases)
-cmake -B build -G "MinGW Makefiles" -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icx  -DCMAKE_BUILD_TYPE=Release
-
-# Option 2: Or FP16
-cmake -B build -G "MinGW Makefiles" -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icx  -DCMAKE_BUILD_TYPE=Release -DLLAMA_SYCL_F16=ON
-
-cmake --build build --config Release -j
-```
-
-Otherwise, run the `win-build-sycl.bat` wrapper which encapsulates the former instructions:
-```sh
-.\examples\sycl\win-build-sycl.bat
-```
-
-*Notes:*
-
- By default, calling `make` will build all target binary files. In case of a minimal experimental setup, the user can build the inference executable only through `make llama-cli`.
-
-### III. Run the inference
-
-1. Retrieve and prepare model
-
-You can refer to the general [*Prepare and Quantize*](README#prepare-and-quantize) guide for model prepration, or simply download [llama-2-7b.Q4_0.gguf](https://huggingface.co/TheBloke/Llama-2-7B-GGUF/blob/main/llama-2-7b.Q4_0.gguf) model as example.
-
-2. Enable oneAPI running environment
-
-On the oneAPI command line window, run the following and step into the llama.cpp directory:
-```
-"C:\Program Files (x86)\Intel\oneAPI\setvars.bat" intel64
-```
-
-3. List devices information
-
-Similar to the native `sycl-ls`, available SYCL devices can be queried as follow:
-
-```
-build\bin\ls-sycl-device.exe
-```
-
-The output of this command in a system with 1 *intel CPU* and 1 *intel GPU* would look like the following:
-```
-found 6 SYCL devices:
-|  |                  |                                             |Compute   |Max compute|Max work|Max sub|               |
-|ID|       Device Type|                                         Name|capability|units      |group   |group  |Global mem size|
-|--|------------------|---------------------------------------------|----------|-----------|--------|-------|---------------|
-| 0|[level_zero:gpu:0]|               Intel(R) Arc(TM) A770 Graphics|       1.3|        512|    1024|     32|    16225243136|
-| 1|[level_zero:gpu:1]|                    Intel(R) UHD Graphics 770|       1.3|         32|     512|     32|    53651849216|
-| 2|    [opencl:gpu:0]|               Intel(R) Arc(TM) A770 Graphics|       3.0|        512|    1024|     32|    16225243136|
-| 3|    [opencl:gpu:1]|                    Intel(R) UHD Graphics 770|       3.0|         32|     512|     32|    53651849216|
-| 4|    [opencl:cpu:0]|         13th Gen Intel(R) Core(TM) i7-13700K|       3.0|         24|    8192|     64|    67064815616|
-| 5|    [opencl:acc:0]|               Intel(R) FPGA Emulation Device|       1.2|         24|67108864|     64|    67064815616|
-
-```
-
-| Attribute              | Note                                                      |
-|------------------------|-----------------------------------------------------------|
-| compute capability 1.3 | Level-zero running time, recommended                      |
-| compute capability 3.0 | OpenCL running time, slower than level-zero in most cases |
-
-
-4. Launch inference
-
-There are two device selection modes:
-
- Single device: Use one device assigned by user.
- Multiple devices: Automatically choose the devices with the same biggest Max compute units.
-
-| Device selection | Parameter                              |
-|------------------|----------------------------------------|
-| Single device    | --split-mode none --main-gpu DEVICE_ID |
-| Multiple devices | --split-mode layer (default)           |
-
-Examples:
-
- Use device 0:
-
-```
-build\bin\llama-cli.exe -m models\llama-2-7b.Q4_0.gguf -p "Building a website can be done in 10 simple steps:\nStep 1:" -n 400 -e -ngl 33 -s 0 -sm none -mg 0
-```
-
- Use multiple devices:
-
-```
-build\bin\llama-cli.exe -m models\llama-2-7b.Q4_0.gguf -p "Building a website can be done in 10 simple steps:\nStep 1:" -n 400 -e -ngl 33 -s 0 -sm layer
-```
-Otherwise, run the following wrapper script:
-
-```
-.\examples\sycl\win-run-llama2.bat
-```
-
-Note:
-
- Upon execution, verify the selected device(s) ID(s) in the output log, which can for instance be displayed as follow:
-
-```sh
-detect 1 SYCL GPUs: [0] with top Max compute units:512
-```
-Or
-```sh
-use 1 SYCL GPUs: [0] with Max compute units:512
-```
-
-## Environment Variable
-
-#### Build
-
-| Name               | Value                             | Function                                    |
-|--------------------|-----------------------------------|---------------------------------------------|
-| LLAMA_SYCL         | ON (mandatory)                    | Enable build with SYCL code path.           |
-| LLAMA_SYCL_TARGET  | INTEL *(default)* \| NVIDIA       | Set the SYCL target device type.            |
-| LLAMA_SYCL_F16     | OFF *(default)* \|ON *(optional)* | Enable FP16 build with SYCL code path.      |
-| CMAKE_C_COMPILER   | icx                               | Set *icx* compiler for SYCL code path.      |
-| CMAKE_CXX_COMPILER | icpx *(Linux)*, icx *(Windows)*   | Set `icpx/icx` compiler for SYCL code path. |
-
-#### Runtime
-
-| Name              | Value            | Function                                                                                                                  |
-|-------------------|------------------|---------------------------------------------------------------------------------------------------------------------------|
-| GGML_SYCL_DEBUG   | 0 (default) or 1 | Enable log function by macro: GGML_SYCL_DEBUG                                                                             |
-| ZES_ENABLE_SYSMAN | 0 (default) or 1 | Support to get free memory of GPU by sycl::aspect::ext_intel_free_memory.<br>Recommended to use when --split-mode = layer |
-
-## Known Issues
-
- `Split-mode:[row]` is not supported.
-
-## Q&A
-
- Error:  `error while loading shared libraries: libsycl.so.7: cannot open shared object file: No such file or directory`.
-
-  - Potential cause: Unavailable oneAPI installation or not set ENV variables.
-  - Solution: Install *oneAPI base toolkit* and enable its ENV through: `source /opt/intel/oneapi/setvars.sh`.
-
- General compiler error:
-
-  - Remove **build** folder or try a clean-build.
-
- I can **not** see `[ext_oneapi_level_zero:gpu]` afer installing the GPU driver on Linux.
-
-  Please double-check with `sudo sycl-ls`.
-
-  If it's present in the list, please add video/render group to your user then **logout/login** or restart your system:
-
-  ```
-  sudo usermod -aG render $USER
-  sudo usermod -aG video $USER
-  ```
-  Otherwise, please double-check the GPU driver installation steps.
-
-### **GitHub contribution**:
-Please add the **[SYCL]** prefix/tag in issues/PRs titles to help the SYCL-team check/address them without delay.
-
-## TODO
-
- Support row layer split for multiple card runs.