init

4cc1a614 · xuxzh1 · 4cc1a614 · 4cc1a614 · 4cc1a614 · 4cc1a614
Commit 4cc1a614 authored Nov 11, 2024 by xuxzh1 🎱
20 changed files
--- a/CMakePresets.json
+++ b/CMakePresets.json
+{
+  "version": 4,
+  "configurePresets": [
+    {
+        "name":  "base",
+        "hidden": true,
+        "generator":   "Ninja",
+        "binaryDir":   "${sourceDir}/build-${presetName}",
+        "cacheVariables": {
+            "CMAKE_EXPORT_COMPILE_COMMANDS": "ON",
+            "CMAKE_INSTALL_RPATH": "$ORIGIN;$ORIGIN/.."
+        }
+    },
+    {
+        "name": "sycl-base",
+        "hidden": true,
+        "generator": "Ninja",
+        "binaryDir": "${sourceDir}/build-${presetName}",
+        "cacheVariables": {
+            "CMAKE_EXPORT_COMPILE_COMMANDS": "ON",
+            "CMAKE_CXX_COMPILER": "icx",
+            "CMAKE_C_COMPILER": "cl",
+            "GGML_SYCL": "ON",
+            "CMAKE_INSTALL_RPATH": "$ORIGIN;$ORIGIN/.."
+        }
+    },
+    { "name": "debug",   "hidden": true, "cacheVariables": { "CMAKE_BUILD_TYPE": "Debug" } },
+    { "name": "release", "hidden": true, "cacheVariables": { "CMAKE_BUILD_TYPE": "Release" } },
+    { "name": "reldbg",  "hidden": true, "cacheVariables": { "CMAKE_BUILD_TYPE": "RelWithDebInfo" } },
+    { "name": "static",  "hidden": true, "cacheVariables": { "GGML_STATIC": "ON" } },
+    {
+        "name": "arm64-windows-msvc", "hidden": true,
+        "architecture": { "value": "arm64",       "strategy": "external" },
+        "toolset":      { "value": "host=x86_64", "strategy": "external" },
+        "cacheVariables": {
+            "CMAKE_TOOLCHAIN_FILE": "${sourceDir}/cmake/arm64-windows-msvc.cmake"
+        }
+    },
+    {
+        "name": "arm64-windows-llvm", "hidden": true,
+        "architecture": { "value": "arm64",       "strategy": "external" },
+        "toolset":      { "value": "host=x86_64", "strategy": "external" },
+        "cacheVariables": {
+            "CMAKE_TOOLCHAIN_FILE": "${sourceDir}/cmake/arm64-windows-llvm.cmake"
+        }
+    },
+    { "name": "arm64-windows-llvm-debug"  , "inherits": [ "base", "arm64-windows-llvm",  "debug"   ] },
+    { "name": "arm64-windows-llvm-release", "inherits": [ "base", "arm64-windows-llvm",  "reldbg" ] },
+    { "name": "arm64-windows-llvm+static-release", "inherits": [ "base", "arm64-windows-llvm",  "reldbg", "static" ] },
+    { "name": "arm64-windows-msvc-debug"  , "inherits": [ "base", "arm64-windows-msvc",  "debug"   ] },
+    { "name": "arm64-windows-msvc-release", "inherits": [ "base", "arm64-windows-msvc",  "reldbg" ] },
+    { "name": "arm64-windows-msvc+static-release", "inherits": [ "base", "arm64-windows-msvc",  "reldbg", "static" ] },
+    { "name": "x64-windows-msvc-debug"  , "inherits": [ "base", "debug"   ] },
+    { "name": "x64-windows-msvc-release", "inherits": [ "base", "reldbg" ] },
+    { "name": "x64-windows-msvc+static-release", "inherits": [ "base", "reldbg", "static" ] },
+    { "name": "x64-windows-sycl-debug"  , "inherits": [ "sycl-base", "debug"   ] },
+    { "name": "x64-windows-sycl-release", "inherits": [ "sycl-base", "release" ] }
+  ]
+}
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
+# Pull requests (for contributors)
+- Test your changes:
+  - Using the commands in the [`tests`](tests) folder. For instance, running the `./tests/test-backend-ops` command tests different backend implementations of the GGML library
+  - Execute [the full CI locally on your machine](ci/README.md) before publishing
+- Please rate the complexity of your PR (i.e. `Review Complexity : Low`, `Review Complexity : Medium`, `Review Complexity : High`). This makes it easier for maintainers to triage the PRs.
+  - The PR template has a series of review complexity checkboxes `[ ]` that [you can mark as](https://docs.github.com/en/get-started/writing-on-github/working-with-advanced-formatting/about-task-lists) `[X]` for your convenience
+- Consider allowing write access to your branch for faster review
+- If your PR becomes stale, don't hesitate to ping the maintainers in the comments
+# Pull requests (for collaborators)
+- Squash-merge PRs
+- Use the following format for the squashed commit title: `<module> : <commit title> (#<issue_number>)`. For example: `utils : fix typo in utils.py (#1234)`
+- Optionally, pick a `<module>` from here: https://github.com/ggerganov/llama.cpp/wiki/Modules
+# Coding guidelines
+- Avoid adding third-party dependencies, extra files, extra headers, etc.
+- Always consider cross-compatibility with other operating systems and architectures
+- Avoid fancy looking modern STL constructs, use basic `for` loops, avoid templates, keep it simple
+- There are no strict rules for the code style, but try to follow the patterns in the code (indentation, spaces, etc.). Vertical alignment makes things more readable and easier to batch edit
+- Clean-up any trailing whitespaces, use 4 spaces for indentation, brackets on the same line, `void * ptr`, `int & a`
+- Naming usually optimizes for common prefix (see https://github.com/ggerganov/ggml/pull/302#discussion_r1243240963)
+- Tensors store data in row-major order. We refer to dimension 0 as columns, 1 as rows, 2 as matrices
+- Matrix multiplication is unconventional: [`C = ggml_mul_mat(ctx, A, B)`](https://github.com/ggerganov/llama.cpp/blob/880e352277fc017df4d5794f0c21c44e1eae2b84/ggml.h#L1058-L1064) means $C^T = A B^T \Leftrightarrow C = B A^T.$
+![matmul](media/matmul.png)
--- a/LICENSE
+++ b/LICENSE
+MIT License
+Copyright (c) 2023-2024 The ggml authors
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
--- a/Makefile
+++ b/Makefile
--- a/Package.swift
+++ b/Package.swift
+// swift-tools-version:5.5
+import PackageDescription
+var sources = [
+    "src/llama.cpp",
+    "src/llama-vocab.cpp",
+    "src/llama-grammar.cpp",
+    "src/llama-sampling.cpp",
+    "src/unicode.cpp",
+    "src/unicode-data.cpp",
+    "ggml/src/ggml.c",
+    "ggml/src/ggml-alloc.c",
+    "ggml/src/ggml-backend.c",
+    "ggml/src/ggml-quants.c",
+    "ggml/src/ggml-aarch64.c",
+]
+var resources: [Resource] = []
+var linkerSettings: [LinkerSetting] = []
+var cSettings: [CSetting] =  [
+    .unsafeFlags(["-Wno-shorten-64-to-32", "-O3", "-DNDEBUG"]),
+    .unsafeFlags(["-fno-objc-arc"]),
+    // NOTE: NEW_LAPACK will required iOS version 16.4+
+    // We should consider add this in the future when we drop support for iOS 14
+    // (ref: ref: https://developer.apple.com/documentation/accelerate/1513264-cblas_sgemm?language=objc)
+    // .define("ACCELERATE_NEW_LAPACK"),
+    // .define("ACCELERATE_LAPACK_ILP64")
+]
+#if canImport(Darwin)
+sources.append("ggml/src/ggml-metal.m")
+resources.append(.process("ggml/src/ggml-metal.metal"))
+linkerSettings.append(.linkedFramework("Accelerate"))
+cSettings.append(
+    contentsOf: [
+        .define("GGML_USE_ACCELERATE"),
+        .define("GGML_USE_METAL")
+    ]
+)
+#endif
+#if os(Linux)
+    cSettings.append(.define("_GNU_SOURCE"))
+#endif
+let package = Package(
+    name: "llama",
+    platforms: [
+        .macOS(.v12),
+        .iOS(.v14),
+        .watchOS(.v4),
+        .tvOS(.v14)
+    ],
+    products: [
+        .library(name: "llama", targets: ["llama"]),
+    ],
+    targets: [
+        .target(
+            name: "llama",
+            path: ".",
+            exclude: [
+               "cmake",
+               "examples",
+               "scripts",
+               "models",
+               "tests",
+               "CMakeLists.txt",
+               "Makefile"
+            ],
+            sources: sources,
+            resources: resources,
+            publicHeadersPath: "spm-headers",
+            cSettings: cSettings,
+            linkerSettings: linkerSettings
+        )
+    ],
+    cxxLanguageStandard: .cxx11
+)
--- a/README.md
+++ b/README.md
--- a/SECURITY.md
+++ b/SECURITY.md
+# Security Policy
+ - [**Using llama.cpp securely**](#using-llamacpp-securely)
+   - [Untrusted models](#untrusted-models)
+   - [Untrusted inputs](#untrusted-inputs)
+   - [Data privacy](#data-privacy)
+   - [Untrusted environments or networks](#untrusted-environments-or-networks)
+   - [Multi-Tenant environments](#multi-tenant-environments)
+ - [**Reporting a vulnerability**](#reporting-a-vulnerability)
+## Using llama.cpp securely
+### Untrusted models
+Be careful when running untrusted models. This classification includes models created by unknown developers or utilizing data obtained from unknown sources.
+*Always execute untrusted models within a secure, isolated environment such as a sandbox* (e.g., containers, virtual machines). This helps protect your system from potentially malicious code.
+> [!NOTE]
+> The trustworthiness of a model is not binary. You must always determine the proper level of caution depending on the specific model and how it matches your use case and risk tolerance.
+### Untrusted inputs
+Some models accept various input formats (text, images, audio, etc.). The libraries converting these inputs have varying security levels, so it's crucial to isolate the model and carefully pre-process inputs to mitigate script injection risks.
+For maximum security when handling untrusted inputs, you may need to employ the following:
+* Sandboxing: Isolate the environment where the inference happens.
+* Pre-analysis: Check how the model performs by default when exposed to prompt injection (e.g. using [fuzzing for prompt injection](https://github.com/FonduAI/awesome-prompt-injection?tab=readme-ov-file#tools)). This will give you leads on how hard you will have to work on the next topics.
+* Updates: Keep both LLaMA C++ and your libraries updated with the latest security patches.
+* Input Sanitation: Before feeding data to the model, sanitize inputs rigorously. This involves techniques such as:
+    * Validation: Enforce strict rules on allowed characters and data types.
+    * Filtering: Remove potentially malicious scripts or code fragments.
+    * Encoding: Convert special characters into safe representations.
+    * Verification: Run tooling that identifies potential script injections (e.g. [models that detect prompt injection attempts](https://python.langchain.com/docs/guides/safety/hugging_face_prompt_injection)).
+### Data privacy
+To protect sensitive data from potential leaks or unauthorized access, it is crucial to sandbox the model execution. This means running the model in a secure, isolated environment, which helps mitigate many attack vectors.
+### Untrusted environments or networks
+If you can't run your models in a secure and isolated environment or if it must be exposed to an untrusted network, make sure to take the following security precautions:
+* Confirm the hash of any downloaded artifact (e.g. pre-trained model weights) matches a known-good value
+* Encrypt your data if sending it over the network.
+### Multi-Tenant environments
+If you intend to run multiple models in parallel with shared memory, it is your responsibility to ensure the models do not interact or access each other's data. The primary areas of concern are tenant isolation, resource allocation, model sharing and hardware attacks.
+1. Tenant Isolation: Models should run separately with strong isolation methods to prevent unwanted data access. Separating networks is crucial for isolation, as it prevents unauthorized access to data or models and malicious users from sending graphs to execute under another tenant's identity.
+2. Resource Allocation: A denial of service caused by one model can impact the overall system health. Implement safeguards like rate limits, access controls, and health monitoring.
+3. Model Sharing: In a multitenant model sharing design, tenants and users must understand the security risks of running code provided by others. Since there are no reliable methods to detect malicious models, sandboxing the model execution is the recommended approach to mitigate the risk.
+4. Hardware Attacks: GPUs or TPUs can also be attacked. [Researches](https://scholar.google.com/scholar?q=gpu+side+channel) has shown that side channel attacks on GPUs are possible, which can make data leak from other models or processes running on the same system at the same time.
+## Reporting a vulnerability
+Beware that none of the topics under [Using llama.cpp securely](#using-llamacpp-securely) are considered vulnerabilities of LLaMA C++.
+<!-- normal version -->
+However, If you have discovered a security vulnerability in this project, please report it privately. **Do not disclose it as a public issue.** This gives us time to work with you to fix the issue before public exposure, reducing the chance that the exploit will be used before a patch is released.
+Please disclose it as a private [security advisory](https://github.com/ggerganov/llama.cpp/security/advisories/new).
+A team of volunteers on a reasonable-effort basis maintains this project. As such, please give us at least 90 days to work on a fix before public exposure.
--- a/ci/README.md
+++ b/ci/README.md
+# CI
+In addition to [Github Actions](https://github.com/ggerganov/llama.cpp/actions) `llama.cpp` uses a custom CI framework:
+https://github.com/ggml-org/ci
+It monitors the `master` branch for new commits and runs the
+[ci/run.sh](https://github.com/ggerganov/llama.cpp/blob/master/ci/run.sh) script on dedicated cloud instances. This allows us
+to execute heavier workloads compared to just using Github Actions. Also with time, the cloud instances will be scaled
+to cover various hardware architectures, including GPU and Apple Silicon instances.
+Collaborators can optionally trigger the CI run by adding the `ggml-ci` keyword to their commit message.
+Only the branches of this repo are monitored for this keyword.
+It is a good practice, before publishing changes to execute the full CI locally on your machine:
+```bash
+mkdir tmp
+# CPU-only build
+bash ./ci/run.sh ./tmp/results ./tmp/mnt
+# with CUDA support
+GG_BUILD_CUDA=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt
+# with SYCL support
+source /opt/intel/oneapi/setvars.sh
+GG_BUILD_SYCL=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt
+```
--- a/ci/run.sh
+++ b/ci/run.sh
--- a/cmake/arm64-windows-llvm.cmake
+++ b/cmake/arm64-windows-llvm.cmake
+set( CMAKE_SYSTEM_NAME Windows )
+set( CMAKE_SYSTEM_PROCESSOR arm64 )
+set( target arm64-pc-windows-msvc )
+set( CMAKE_C_COMPILER    clang )
+set( CMAKE_CXX_COMPILER  clang++ )
+set( CMAKE_C_COMPILER_TARGET   ${target} )
+set( CMAKE_CXX_COMPILER_TARGET ${target} )
+set( arch_c_flags "-march=armv8.7-a -fvectorize -ffp-model=fast -fno-finite-math-only" )
+set( warn_c_flags "-Wno-format -Wno-unused-variable -Wno-unused-function -Wno-gnu-zero-variadic-macro-arguments" )
+set( CMAKE_C_FLAGS_INIT   "${arch_c_flags} ${warn_c_flags}" )
+set( CMAKE_CXX_FLAGS_INIT "${arch_c_flags} ${warn_c_flags}" )
--- a/cmake/arm64-windows-msvc.cmake
+++ b/cmake/arm64-windows-msvc.cmake
+set( CMAKE_SYSTEM_NAME Windows )
+set( CMAKE_SYSTEM_PROCESSOR arm64 )
+set( target arm64-pc-windows-msvc )
+set( CMAKE_C_COMPILER_TARGET   ${target} )
+set( CMAKE_CXX_COMPILER_TARGET ${target} )
--- a/cmake/build-info.cmake
+++ b/cmake/build-info.cmake
+set(BUILD_NUMBER 0)
+set(BUILD_COMMIT "unknown")
+set(BUILD_COMPILER "unknown")
+set(BUILD_TARGET "unknown")
+# Look for git
+find_package(Git)
+if(NOT Git_FOUND)
+    find_program(GIT_EXECUTABLE NAMES git git.exe)
+    if(GIT_EXECUTABLE)
+        set(Git_FOUND TRUE)
+        message(STATUS "Found Git: ${GIT_EXECUTABLE}")
+    else()
+        message(WARNING "Git not found. Build info will not be accurate.")
+    endif()
+endif()
+# Get the commit count and hash
+if(Git_FOUND)
+    execute_process(
+        COMMAND ${GIT_EXECUTABLE} rev-parse --short HEAD
+        WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
+        OUTPUT_VARIABLE HEAD
+        OUTPUT_STRIP_TRAILING_WHITESPACE
+        RESULT_VARIABLE RES
+    )
+    if (RES EQUAL 0)
+        set(BUILD_COMMIT ${HEAD})
+    endif()
+    execute_process(
+        COMMAND ${GIT_EXECUTABLE} rev-list --count HEAD
+        WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
+        OUTPUT_VARIABLE COUNT
+        OUTPUT_STRIP_TRAILING_WHITESPACE
+        RESULT_VARIABLE RES
+    )
+    if (RES EQUAL 0)
+        set(BUILD_NUMBER ${COUNT})
+    endif()
+endif()
+if(MSVC)
+    set(BUILD_COMPILER "${CMAKE_C_COMPILER_ID} ${CMAKE_C_COMPILER_VERSION}")
+    set(BUILD_TARGET ${CMAKE_VS_PLATFORM_NAME})
+else()
+    execute_process(
+        COMMAND sh -c "$@ --version | head -1" _ ${CMAKE_C_COMPILER}
+        OUTPUT_VARIABLE OUT
+        OUTPUT_STRIP_TRAILING_WHITESPACE
+    )
+    set(BUILD_COMPILER ${OUT})
+    execute_process(
+        COMMAND ${CMAKE_C_COMPILER} -dumpmachine
+        OUTPUT_VARIABLE OUT
+        OUTPUT_STRIP_TRAILING_WHITESPACE
+    )
+    set(BUILD_TARGET ${OUT})
+endif()
--- a/cmake/git-vars.cmake
+++ b/cmake/git-vars.cmake
+find_package(Git)
+# the commit's SHA1
+execute_process(COMMAND
+    "${GIT_EXECUTABLE}" describe --match=NeVeRmAtCh --always --abbrev=8
+    WORKING_DIRECTORY "${CMAKE_SOURCE_DIR}"
+    OUTPUT_VARIABLE GIT_SHA1
+    ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE)
+# the date of the commit
+execute_process(COMMAND
+    "${GIT_EXECUTABLE}" log -1 --format=%ad --date=local
+    WORKING_DIRECTORY "${CMAKE_SOURCE_DIR}"
+    OUTPUT_VARIABLE GIT_DATE
+    ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE)
+# the subject of the commit
+execute_process(COMMAND
+    "${GIT_EXECUTABLE}" log -1 --format=%s
+    WORKING_DIRECTORY "${CMAKE_SOURCE_DIR}"
+    OUTPUT_VARIABLE GIT_COMMIT_SUBJECT
+    ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE)
--- a/cmake/llama-config.cmake.in
+++ b/cmake/llama-config.cmake.in
+set(LLAMA_VERSION      @LLAMA_INSTALL_VERSION@)
+set(LLAMA_BUILD_COMMIT @LLAMA_BUILD_COMMIT@)
+set(LLAMA_BUILD_NUMBER @LLAMA_BUILD_NUMBER@)
+set(LLAMA_SHARED_LIB   @BUILD_SHARED_LIBS@)
+set(GGML_BLAS       @GGML_BLAS@)
+set(GGML_CUDA       @GGML_CUDA@)
+set(GGML_METAL      @GGML_METAL@)
+set(GGML_HIPBLAS    @GGML_HIPBLAS@)
+set(GGML_ACCELERATE @GGML_ACCELERATE@)
+set(GGML_VULKAN @GGML_VULKAN@)
+set(GGML_VULKAN_CHECK_RESULTS @GGML_VULKAN_CHECK_RESULTS@)
+set(GGML_VULKAN_DEBUG @GGML_VULKAN_DEBUG@)
+set(GGML_VULKAN_MEMORY_DEBUG @GGML_VULKAN_MEMORY_DEBUG@)
+set(GGML_VULKAN_VALIDATE @GGML_VULKAN_VALIDATE@)
+set(GGML_SYCL @GGML_SYCL@)
+set(GGML_OPENMP @GGML_OPENMP@)
+@PACKAGE_INIT@
+set_and_check(LLAMA_INCLUDE_DIR "@PACKAGE_LLAMA_INCLUDE_INSTALL_DIR@")
+set_and_check(LLAMA_LIB_DIR     "@PACKAGE_LLAMA_LIB_INSTALL_DIR@")
+set_and_check(LLAMA_BIN_DIR     "@PACKAGE_LLAMA_BIN_INSTALL_DIR@")
+# Ensure transient dependencies satisfied
+find_package(Threads REQUIRED)
+if (APPLE AND GGML_ACCELERATE)
+    find_library(ACCELERATE_FRAMEWORK Accelerate REQUIRED)
+endif()
+if (GGML_BLAS)
+    find_package(BLAS REQUIRED)
+endif()
+if (GGML_CUDA)
+    find_package(CUDAToolkit REQUIRED)
+endif()
+if (GGML_METAL)
+    find_library(FOUNDATION_LIBRARY Foundation REQUIRED)
+    find_library(METAL_FRAMEWORK Metal REQUIRED)
+    find_library(METALKIT_FRAMEWORK MetalKit REQUIRED)
+endif()
+if (GGML_VULKAN)
+    find_package(Vulkan REQUIRED)
+endif()
+if (GGML_HIPBLAS)
+    find_package(hip REQUIRED)
+    find_package(hipblas REQUIRED)
+    find_package(rocblas REQUIRED)
+endif()
+if (GGML_SYCL)
+    find_package(IntelSYCL REQUIRED)
+    find_package(MKL REQUIRED)
+endif()
+if (GGML_OPENMP)
+    find_package(OpenMP REQUIRED)
+endif()
+find_library(ggml_LIBRARY ggml
+    REQUIRED
+    HINTS ${LLAMA_LIB_DIR})
+find_library(llama_LIBRARY llama
+    REQUIRED
+    HINTS ${LLAMA_LIB_DIR})
+set(_llama_link_deps "${ggml_LIBRARY}" "@GGML_LINK_LIBRARIES@")
+set(_llama_transient_defines "@GGML_TRANSIENT_DEFINES@")
+add_library(llama UNKNOWN IMPORTED)
+set_target_properties(llama
+    PROPERTIES
+        INTERFACE_INCLUDE_DIRECTORIES "${LLAMA_INCLUDE_DIR}"
+        INTERFACE_LINK_LIBRARIES "${_llama_link_deps}"
+        INTERFACE_COMPILE_DEFINITIONS "${_llama_transient_defines}"
+        IMPORTED_LINK_INTERFACE_LANGUAGES "CXX"
+        IMPORTED_LOCATION "${llama_LIBRARY}"
+        INTERFACE_COMPILE_FEATURES cxx_std_11
+        POSITION_INDEPENDENT_CODE ON )
+check_required_components(Llama)
--- a/cmake/llama.pc.in
+++ b/cmake/llama.pc.in
+prefix=@CMAKE_INSTALL_PREFIX@
+exec_prefix=${prefix}
+libdir=${exec_prefix}/lib
+includedir=${prefix}/include
+Name: llama
+Description: Port of Facebook's LLaMA model in C/C++
+Version: @PROJECT_VERSION@
+Libs: -L${libdir} -lllama
+Cflags: -I${includedir}
--- a/common/CMakeLists.txt
+++ b/common/CMakeLists.txt
--- a/common/base64.hpp
+++ b/common/base64.hpp
--- a/common/build-info.cpp.in
+++ b/common/build-info.cpp.in
+int LLAMA_BUILD_NUMBER = @BUILD_NUMBER@;
+char const *LLAMA_COMMIT = "@BUILD_COMMIT@";
+char const *LLAMA_COMPILER = "@BUILD_COMPILER@";
+char const *LLAMA_BUILD_TARGET = "@BUILD_TARGET@";
--- a/common/common.cpp
+++ b/common/common.cpp
--- a/common/common.h
+++ b/common/common.h