Commit 0ce8bcfd authored by xuxzh1's avatar xuxzh1 🎱
Browse files

init

parent b0135f4b
ARG UBUNTU_VERSION=22.04
FROM ubuntu:$UBUNTU_VERSION AS build
RUN apt-get update && \
apt-get install -y build-essential git
WORKDIR /app
COPY . .
RUN make -j$(nproc) llama-cli
FROM ubuntu:$UBUNTU_VERSION AS runtime
RUN apt-get update && \
apt-get install -y libgomp1
COPY --from=build /app/llama-cli /llama-cli
ENV LC_ALL=C.utf8
ENTRYPOINT [ "/llama-cli" ]
...@@ -32,13 +32,13 @@ CPU inference for Meta's Lllama2 models using default options. ...@@ -32,13 +32,13 @@ CPU inference for Meta's Lllama2 models using default options.
%setup -n llama.cpp-master %setup -n llama.cpp-master
%build %build
make -j LLAMA_CUDA=1 make -j GGML_CUDA=1
%install %install
mkdir -p %{buildroot}%{_bindir}/ mkdir -p %{buildroot}%{_bindir}/
cp -p main %{buildroot}%{_bindir}/llamacppcuda cp -p llama-cli %{buildroot}%{_bindir}/llama-cuda-cli
cp -p server %{buildroot}%{_bindir}/llamacppcudaserver cp -p llama-server %{buildroot}%{_bindir}/llama-cuda-server
cp -p simple %{buildroot}%{_bindir}/llamacppcudasimple cp -p llama-simple %{buildroot}%{_bindir}/llama-cuda-simple
mkdir -p %{buildroot}/usr/lib/systemd/system mkdir -p %{buildroot}/usr/lib/systemd/system
%{__cat} <<EOF > %{buildroot}/usr/lib/systemd/system/llamacuda.service %{__cat} <<EOF > %{buildroot}/usr/lib/systemd/system/llamacuda.service
...@@ -49,7 +49,7 @@ After=syslog.target network.target local-fs.target remote-fs.target nss-lookup.t ...@@ -49,7 +49,7 @@ After=syslog.target network.target local-fs.target remote-fs.target nss-lookup.t
[Service] [Service]
Type=simple Type=simple
EnvironmentFile=/etc/sysconfig/llama EnvironmentFile=/etc/sysconfig/llama
ExecStart=/usr/bin/llamacppcudaserver $LLAMA_ARGS ExecStart=/usr/bin/llama-cuda-server $LLAMA_ARGS
ExecReload=/bin/kill -s HUP $MAINPID ExecReload=/bin/kill -s HUP $MAINPID
Restart=never Restart=never
...@@ -67,9 +67,9 @@ rm -rf %{buildroot} ...@@ -67,9 +67,9 @@ rm -rf %{buildroot}
rm -rf %{_builddir}/* rm -rf %{_builddir}/*
%files %files
%{_bindir}/llamacppcuda %{_bindir}/llama-cuda-cli
%{_bindir}/llamacppcudaserver %{_bindir}/llama-cuda-server
%{_bindir}/llamacppcudasimple %{_bindir}/llama-cuda-simple
/usr/lib/systemd/system/llamacuda.service /usr/lib/systemd/system/llamacuda.service
%config /etc/sysconfig/llama %config /etc/sysconfig/llama
......
...@@ -38,9 +38,9 @@ make -j ...@@ -38,9 +38,9 @@ make -j
%install %install
mkdir -p %{buildroot}%{_bindir}/ mkdir -p %{buildroot}%{_bindir}/
cp -p main %{buildroot}%{_bindir}/llama cp -p llama-cli %{buildroot}%{_bindir}/llama-cli
cp -p server %{buildroot}%{_bindir}/llamaserver cp -p llama-server %{buildroot}%{_bindir}/llama-server
cp -p simple %{buildroot}%{_bindir}/llamasimple cp -p llama-simple %{buildroot}%{_bindir}/llama-simple
mkdir -p %{buildroot}/usr/lib/systemd/system mkdir -p %{buildroot}/usr/lib/systemd/system
%{__cat} <<EOF > %{buildroot}/usr/lib/systemd/system/llama.service %{__cat} <<EOF > %{buildroot}/usr/lib/systemd/system/llama.service
...@@ -51,7 +51,7 @@ After=syslog.target network.target local-fs.target remote-fs.target nss-lookup.t ...@@ -51,7 +51,7 @@ After=syslog.target network.target local-fs.target remote-fs.target nss-lookup.t
[Service] [Service]
Type=simple Type=simple
EnvironmentFile=/etc/sysconfig/llama EnvironmentFile=/etc/sysconfig/llama
ExecStart=/usr/bin/llamaserver $LLAMA_ARGS ExecStart=/usr/bin/llama-server $LLAMA_ARGS
ExecReload=/bin/kill -s HUP $MAINPID ExecReload=/bin/kill -s HUP $MAINPID
Restart=never Restart=never
...@@ -69,9 +69,9 @@ rm -rf %{buildroot} ...@@ -69,9 +69,9 @@ rm -rf %{buildroot}
rm -rf %{_builddir}/* rm -rf %{_builddir}/*
%files %files
%{_bindir}/llama %{_bindir}/llama-cli
%{_bindir}/llamaserver %{_bindir}/llama-server
%{_bindir}/llamasimple %{_bindir}/llama-simple
/usr/lib/systemd/system/llama.service /usr/lib/systemd/system/llama.service
%config /etc/sysconfig/llama %config /etc/sysconfig/llama
......
ARG UBUNTU_VERSION=22.04
# This needs to generally match the container host's environment.
ARG CUDA_VERSION=11.7.1
# Target the CUDA build image
ARG BASE_CUDA_DEV_CONTAINER=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION}
# Target the CUDA runtime image
ARG BASE_CUDA_RUN_CONTAINER=nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu${UBUNTU_VERSION}
FROM ${BASE_CUDA_DEV_CONTAINER} AS build
# Unless otherwise specified, we make a fat build.
ARG CUDA_DOCKER_ARCH=all
RUN apt-get update && \
apt-get install -y build-essential git libcurl4-openssl-dev
WORKDIR /app
COPY . .
# Set nvcc architecture
ENV CUDA_DOCKER_ARCH=${CUDA_DOCKER_ARCH}
# Enable CUDA
ENV GGML_CUDA=1
# Enable cURL
ENV LLAMA_CURL=1
RUN make -j$(nproc) llama-server
FROM ${BASE_CUDA_RUN_CONTAINER} AS runtime
RUN apt-get update && \
apt-get install -y libcurl4-openssl-dev libgomp1 curl
COPY --from=build /app/llama-server /llama-server
HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ]
ENTRYPOINT [ "/llama-server" ]
ARG ONEAPI_VERSION=2024.1.1-devel-ubuntu22.04
FROM intel/oneapi-basekit:$ONEAPI_VERSION AS build
ARG GGML_SYCL_F16=OFF
RUN apt-get update && \
apt-get install -y git libcurl4-openssl-dev
WORKDIR /app
COPY . .
RUN if [ "${GGML_SYCL_F16}" = "ON" ]; then \
echo "GGML_SYCL_F16 is set" && \
export OPT_SYCL_F16="-DGGML_SYCL_F16=ON"; \
fi && \
echo "Building with dynamic libs" && \
cmake -B build -DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DLLAMA_CURL=ON ${OPT_SYCL_F16} && \
cmake --build build --config Release --target llama-server
FROM intel/oneapi-basekit:$ONEAPI_VERSION AS runtime
RUN apt-get update && \
apt-get install -y libcurl4-openssl-dev curl
COPY --from=build /app/build/bin/llama-server /llama-server
ENV LC_ALL=C.utf8
HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ]
ENTRYPOINT [ "/llama-server" ]
ARG UBUNTU_VERSION=22.04
# This needs to generally match the container host's environment.
ARG ROCM_VERSION=5.6
# Target the CUDA build image
ARG BASE_ROCM_DEV_CONTAINER=rocm/dev-ubuntu-${UBUNTU_VERSION}:${ROCM_VERSION}-complete
FROM ${BASE_ROCM_DEV_CONTAINER} AS build
# Unless otherwise specified, we make a fat build.
# List from https://github.com/ggerganov/llama.cpp/pull/1087#issuecomment-1682807878
# This is mostly tied to rocBLAS supported archs.
ARG ROCM_DOCKER_ARCH=\
gfx803 \
gfx900 \
gfx906 \
gfx908 \
gfx90a \
gfx1010 \
gfx1030 \
gfx1100 \
gfx1101 \
gfx1102
COPY requirements.txt requirements.txt
COPY requirements requirements
RUN pip install --upgrade pip setuptools wheel \
&& pip install -r requirements.txt
WORKDIR /app
COPY . .
# Set nvcc architecture
ENV GPU_TARGETS=${ROCM_DOCKER_ARCH}
# Enable ROCm
ENV GGML_HIPBLAS=1
ENV CC=/opt/rocm/llvm/bin/clang
ENV CXX=/opt/rocm/llvm/bin/clang++
# Enable cURL
ENV LLAMA_CURL=1
RUN apt-get update && \
apt-get install -y libcurl4-openssl-dev curl
RUN make -j$(nproc) llama-server
HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ]
ENTRYPOINT [ "/app/llama-server" ]
ARG UBUNTU_VERSION=jammy
FROM ubuntu:$UBUNTU_VERSION AS build
# Install build tools
RUN apt update && apt install -y git build-essential cmake wget
# Install Vulkan SDK and cURL
RUN wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | apt-key add - && \
wget -qO /etc/apt/sources.list.d/lunarg-vulkan-jammy.list https://packages.lunarg.com/vulkan/lunarg-vulkan-jammy.list && \
apt update -y && \
apt-get install -y vulkan-sdk libcurl4-openssl-dev curl
# Build it
WORKDIR /app
COPY . .
RUN cmake -B build -DGGML_VULKAN=1 -DLLAMA_CURL=1 && \
cmake --build build --config Release --target llama-server
# Clean up
WORKDIR /
RUN cp /app/build/bin/llama-server /llama-server && \
rm -rf /app
ENV LC_ALL=C.utf8
HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ]
ENTRYPOINT [ "/llama-server" ]
ARG UBUNTU_VERSION=22.04
FROM ubuntu:$UBUNTU_VERSION AS build
RUN apt-get update && \
apt-get install -y build-essential git libcurl4-openssl-dev
WORKDIR /app
COPY . .
ENV LLAMA_CURL=1
RUN make -j$(nproc) llama-server
FROM ubuntu:$UBUNTU_VERSION AS runtime
RUN apt-get update && \
apt-get install -y libcurl4-openssl-dev libgomp1 curl
COPY --from=build /app/llama-server /llama-server
ENV LC_ALL=C.utf8
HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ]
ENTRYPOINT [ "/llama-server" ]
...@@ -6,11 +6,10 @@ ...@@ -6,11 +6,10 @@
let let
inherit (config.packages) default; inherit (config.packages) default;
binaries = [ binaries = [
"llama" "llama-cli"
"llama-embedding" "llama-embedding"
"llama-server" "llama-server"
"quantize" "llama-quantize"
"train-text-from-scratch"
]; ];
mkApp = name: { mkApp = name: {
type = "app"; type = "app";
......
...@@ -17,19 +17,19 @@ ...@@ -17,19 +17,19 @@
rocmPackages, rocmPackages,
vulkan-headers, vulkan-headers,
vulkan-loader, vulkan-loader,
clblast, curl,
shaderc,
useBlas ? builtins.all (x: !x) [ useBlas ? builtins.all (x: !x) [
useCuda useCuda
useMetalKit useMetalKit
useOpenCL
useRocm useRocm
useVulkan useVulkan
] && blas.meta.available, ] && blas.meta.available,
useCuda ? config.cudaSupport, useCuda ? config.cudaSupport,
useMetalKit ? stdenv.isAarch64 && stdenv.isDarwin && !useOpenCL, useMetalKit ? stdenv.isAarch64 && stdenv.isDarwin,
useMpi ? false, # Increases the runtime closure size by ~700M useMpi ? false, # Increases the runtime closure size by ~700M
useOpenCL ? false,
useRocm ? config.rocmSupport, useRocm ? config.rocmSupport,
enableCurl ? true,
useVulkan ? false, useVulkan ? false,
llamaVersion ? "0.0.0", # Arbitrary version, substituted by the flake llamaVersion ? "0.0.0", # Arbitrary version, substituted by the flake
...@@ -56,7 +56,6 @@ let ...@@ -56,7 +56,6 @@ let
++ lib.optionals useCuda [ "CUDA" ] ++ lib.optionals useCuda [ "CUDA" ]
++ lib.optionals useMetalKit [ "MetalKit" ] ++ lib.optionals useMetalKit [ "MetalKit" ]
++ lib.optionals useMpi [ "MPI" ] ++ lib.optionals useMpi [ "MPI" ]
++ lib.optionals useOpenCL [ "OpenCL" ]
++ lib.optionals useRocm [ "ROCm" ] ++ lib.optionals useRocm [ "ROCm" ]
++ lib.optionals useVulkan [ "Vulkan" ]; ++ lib.optionals useVulkan [ "Vulkan" ];
...@@ -91,6 +90,22 @@ let ...@@ -91,6 +90,22 @@ let
ps.tiktoken ps.tiktoken
ps.torchWithoutCuda ps.torchWithoutCuda
ps.transformers ps.transformers
# server bench
ps.matplotlib
# server tests
ps.openai
ps.behave
ps.prometheus-client
# for examples/pydantic-models-to-grammar-examples.py
ps.docstring-parser
ps.pydantic
# for scripts/compare-llama-bench.py
ps.gitpython
ps.tabulate
] ]
); );
...@@ -111,16 +126,9 @@ let ...@@ -111,16 +126,9 @@ let
++ optionals useMetalKit [ MetalKit ]; ++ optionals useMetalKit [ MetalKit ];
cudaBuildInputs = with cudaPackages; [ cudaBuildInputs = with cudaPackages; [
cuda_cccl.dev # <nv/target> cuda_cudart
cuda_cccl # <nv/target>
# A temporary hack for reducing the closure size, remove once cudaPackages libcublas
# have stopped using lndir: https://github.com/NixOS/nixpkgs/issues/271792
cuda_cudart.dev
cuda_cudart.lib
cuda_cudart.static
libcublas.dev
libcublas.lib
libcublas.static
]; ];
rocmBuildInputs = with rocmPackages; [ rocmBuildInputs = with rocmPackages; [
...@@ -132,6 +140,7 @@ let ...@@ -132,6 +140,7 @@ let
vulkanBuildInputs = [ vulkanBuildInputs = [
vulkan-headers vulkan-headers
vulkan-loader vulkan-loader
shaderc
]; ];
in in
...@@ -160,9 +169,9 @@ effectiveStdenv.mkDerivation ( ...@@ -160,9 +169,9 @@ effectiveStdenv.mkDerivation (
}; };
postPatch = '' postPatch = ''
substituteInPlace ./ggml-metal.m \ substituteInPlace ./ggml/src/ggml-metal.m \
--replace '[bundle pathForResource:@"ggml-metal" ofType:@"metal"];' "@\"$out/bin/ggml-metal.metal\";" --replace '[bundle pathForResource:@"ggml-metal" ofType:@"metal"];' "@\"$out/bin/ggml-metal.metal\";"
substituteInPlace ./ggml-metal.m \ substituteInPlace ./ggml/src/ggml-metal.m \
--replace '[bundle pathForResource:@"default" ofType:@"metallib"];' "@\"$out/bin/default.metallib\";" --replace '[bundle pathForResource:@"default" ofType:@"metallib"];' "@\"$out/bin/default.metallib\";"
''; '';
...@@ -198,24 +207,24 @@ effectiveStdenv.mkDerivation ( ...@@ -198,24 +207,24 @@ effectiveStdenv.mkDerivation (
optionals effectiveStdenv.isDarwin darwinBuildInputs optionals effectiveStdenv.isDarwin darwinBuildInputs
++ optionals useCuda cudaBuildInputs ++ optionals useCuda cudaBuildInputs
++ optionals useMpi [ mpi ] ++ optionals useMpi [ mpi ]
++ optionals useOpenCL [ clblast ]
++ optionals useRocm rocmBuildInputs ++ optionals useRocm rocmBuildInputs
++ optionals useBlas [ blas ] ++ optionals useBlas [ blas ]
++ optionals useVulkan vulkanBuildInputs; ++ optionals useVulkan vulkanBuildInputs
++ optionals enableCurl [ curl ];
cmakeFlags = cmakeFlags =
[ [
(cmakeBool "LLAMA_NATIVE" false)
(cmakeBool "LLAMA_BUILD_SERVER" true) (cmakeBool "LLAMA_BUILD_SERVER" true)
(cmakeBool "BUILD_SHARED_LIBS" (!enableStatic)) (cmakeBool "BUILD_SHARED_LIBS" (!enableStatic))
(cmakeBool "CMAKE_SKIP_BUILD_RPATH" true) (cmakeBool "CMAKE_SKIP_BUILD_RPATH" true)
(cmakeBool "LLAMA_BLAS" useBlas) (cmakeBool "LLAMA_CURL" enableCurl)
(cmakeBool "LLAMA_CLBLAST" useOpenCL) (cmakeBool "GGML_NATIVE" false)
(cmakeBool "LLAMA_CUDA" useCuda) (cmakeBool "GGML_BLAS" useBlas)
(cmakeBool "LLAMA_HIPBLAS" useRocm) (cmakeBool "GGML_CUDA" useCuda)
(cmakeBool "LLAMA_METAL" useMetalKit) (cmakeBool "GGML_HIPBLAS" useRocm)
(cmakeBool "LLAMA_VULKAN" useVulkan) (cmakeBool "GGML_METAL" useMetalKit)
(cmakeBool "LLAMA_STATIC" enableStatic) (cmakeBool "GGML_VULKAN" useVulkan)
(cmakeBool "GGML_STATIC" enableStatic)
] ]
++ optionals useCuda [ ++ optionals useCuda [
( (
...@@ -231,7 +240,7 @@ effectiveStdenv.mkDerivation ( ...@@ -231,7 +240,7 @@ effectiveStdenv.mkDerivation (
] ]
++ optionals useMetalKit [ ++ optionals useMetalKit [
(lib.cmakeFeature "CMAKE_C_FLAGS" "-D__ARM_FEATURE_DOTPROD=1") (lib.cmakeFeature "CMAKE_C_FLAGS" "-D__ARM_FEATURE_DOTPROD=1")
(cmakeBool "LLAMA_METAL_EMBED_LIBRARY" (!precompileMetalShaders)) (cmakeBool "GGML_METAL_EMBED_LIBRARY" (!precompileMetalShaders))
]; ];
# Environment variables needed for ROCm # Environment variables needed for ROCm
...@@ -243,10 +252,8 @@ effectiveStdenv.mkDerivation ( ...@@ -243,10 +252,8 @@ effectiveStdenv.mkDerivation (
# TODO(SomeoneSerge): It's better to add proper install targets at the CMake level, # TODO(SomeoneSerge): It's better to add proper install targets at the CMake level,
# if they haven't been added yet. # if they haven't been added yet.
postInstall = '' postInstall = ''
mv $out/bin/main${executableSuffix} $out/bin/llama${executableSuffix}
mv $out/bin/server${executableSuffix} $out/bin/llama-server${executableSuffix}
mkdir -p $out/include mkdir -p $out/include
cp $src/llama.h $out/include/ cp $src/include/llama.h $out/include/
''; '';
# Define the shells here, but don't add in the inputsFrom to avoid recursion. # Define the shells here, but don't add in the inputsFrom to avoid recursion.
...@@ -256,7 +263,6 @@ effectiveStdenv.mkDerivation ( ...@@ -256,7 +263,6 @@ effectiveStdenv.mkDerivation (
useCuda useCuda
useMetalKit useMetalKit
useMpi useMpi
useOpenCL
useRocm useRocm
useVulkan useVulkan
; ;
...@@ -283,7 +289,7 @@ effectiveStdenv.mkDerivation ( ...@@ -283,7 +289,7 @@ effectiveStdenv.mkDerivation (
# Configurations we don't want even the CI to evaluate. Results in the # Configurations we don't want even the CI to evaluate. Results in the
# "unsupported platform" messages. This is mostly a no-op, because # "unsupported platform" messages. This is mostly a no-op, because
# cudaPackages would've refused to evaluate anyway. # cudaPackages would've refused to evaluate anyway.
badPlatforms = optionals (useCuda || useOpenCL) lib.platforms.darwin; badPlatforms = optionals useCuda lib.platforms.darwin;
# Configurations that are known to result in build failures. Can be # Configurations that are known to result in build failures. Can be
# overridden by importing Nixpkgs with `allowBroken = true`. # overridden by importing Nixpkgs with `allowBroken = true`.
...@@ -294,7 +300,7 @@ effectiveStdenv.mkDerivation ( ...@@ -294,7 +300,7 @@ effectiveStdenv.mkDerivation (
license = lib.licenses.mit; license = lib.licenses.mit;
# Accommodates `nix run` and `lib.getExe` # Accommodates `nix run` and `lib.getExe`
mainProgram = "llama"; mainProgram = "llama-cli";
# These people might respond, on the best effort basis, if you ping them # These people might respond, on the best effort basis, if you ping them
# in case of Nix-specific regressions or for reviewing Nix-specific PRs. # in case of Nix-specific regressions or for reviewing Nix-specific PRs.
......
...@@ -8,13 +8,11 @@ arg1="$1" ...@@ -8,13 +8,11 @@ arg1="$1"
shift shift
if [[ "$arg1" == '--convert' || "$arg1" == '-c' ]]; then if [[ "$arg1" == '--convert' || "$arg1" == '-c' ]]; then
python3 ./convert-hf-to-gguf.py "$@" python3 ./convert_hf_to_gguf.py "$@"
elif [[ "$arg1" == '--quantize' || "$arg1" == '-q' ]]; then elif [[ "$arg1" == '--quantize' || "$arg1" == '-q' ]]; then
./quantize "$@" ./llama-quantize "$@"
elif [[ "$arg1" == '--run' || "$arg1" == '-r' ]]; then elif [[ "$arg1" == '--run' || "$arg1" == '-r' ]]; then
./main "$@" ./llama-cli "$@"
elif [[ "$arg1" == '--finetune' || "$arg1" == '-f' ]]; then
./finetune "$@"
elif [[ "$arg1" == '--all-in-one' || "$arg1" == '-a' ]]; then elif [[ "$arg1" == '--all-in-one' || "$arg1" == '-a' ]]; then
echo "Converting PTH to GGML..." echo "Converting PTH to GGML..."
for i in `ls $1/$2/ggml-model-f16.bin*`; do for i in `ls $1/$2/ggml-model-f16.bin*`; do
...@@ -22,11 +20,11 @@ elif [[ "$arg1" == '--all-in-one' || "$arg1" == '-a' ]]; then ...@@ -22,11 +20,11 @@ elif [[ "$arg1" == '--all-in-one' || "$arg1" == '-a' ]]; then
echo "Skip model quantization, it already exists: ${i/f16/q4_0}" echo "Skip model quantization, it already exists: ${i/f16/q4_0}"
else else
echo "Converting PTH to GGML: $i into ${i/f16/q4_0}..." echo "Converting PTH to GGML: $i into ${i/f16/q4_0}..."
./quantize "$i" "${i/f16/q4_0}" q4_0 ./llama-quantize "$i" "${i/f16/q4_0}" q4_0
fi fi
done done
elif [[ "$arg1" == '--server' || "$arg1" == '-s' ]]; then elif [[ "$arg1" == '--server' || "$arg1" == '-s' ]]; then
./server "$@" ./llama-server "$@"
else else
echo "Unknown command: $arg1" echo "Unknown command: $arg1"
echo "Available commands: " echo "Available commands: "
...@@ -36,8 +34,6 @@ else ...@@ -36,8 +34,6 @@ else
echo " ex: --outtype f16 \"/models/7B/\" " echo " ex: --outtype f16 \"/models/7B/\" "
echo " --quantize (-q): Optimize with quantization process ggml" echo " --quantize (-q): Optimize with quantization process ggml"
echo " ex: \"/models/7B/ggml-model-f16.bin\" \"/models/7B/ggml-model-q4_0.bin\" 2" echo " ex: \"/models/7B/ggml-model-f16.bin\" \"/models/7B/ggml-model-q4_0.bin\" 2"
echo " --finetune (-f): Run finetune command to create a lora finetune of the model"
echo " See documentation for finetune for command-line parameters"
echo " --all-in-one (-a): Execute --convert & --quantize" echo " --all-in-one (-a): Execute --convert & --quantize"
echo " ex: \"/models/\" 7B" echo " ex: \"/models/\" 7B"
echo " --server (-s): Run a model on the server" echo " --server (-s): Run a model on the server"
......
...@@ -12,8 +12,8 @@ build*/ ...@@ -12,8 +12,8 @@ build*/
models/* models/*
/main /llama-cli
/quantize /llama-quantize
arm_neon.h arm_neon.h
compile_commands.json compile_commands.json
......
...@@ -26,3 +26,7 @@ indent_size = 2 ...@@ -26,3 +26,7 @@ indent_size = 2
[examples/llama.swiftui/llama.swiftui.xcodeproj/*] [examples/llama.swiftui/llama.swiftui.xcodeproj/*]
indent_style = tab indent_style = tab
[examples/cvector-generator/*.txt]
trim_trailing_whitespace = unset
insert_final_newline = unset
...@@ -24,7 +24,7 @@ body: ...@@ -24,7 +24,7 @@ body:
label: Name and Version label: Name and Version
description: Which executable and which version of our software are you running? (use `--version` to get a version string) description: Which executable and which version of our software are you running? (use `--version` to get a version string)
placeholder: | placeholder: |
$./main --version $./llama-cli --version
version: 2999 (42b4109e) version: 2999 (42b4109e)
built with cc (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0 for x86_64-linux-gnu built with cc (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0 for x86_64-linux-gnu
validations: validations:
......
...@@ -24,7 +24,7 @@ body: ...@@ -24,7 +24,7 @@ body:
label: Name and Version label: Name and Version
description: Which executable and which version of our software are you running? (use `--version` to get a version string) description: Which executable and which version of our software are you running? (use `--version` to get a version string)
placeholder: | placeholder: |
$./main --version $./llama-cli --version
version: 2999 (42b4109e) version: 2999 (42b4109e)
built with cc (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0 for x86_64-linux-gnu built with cc (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0 for x86_64-linux-gnu
validations: validations:
......
...@@ -24,7 +24,7 @@ body: ...@@ -24,7 +24,7 @@ body:
label: Name and Version label: Name and Version
description: Which executable and which version of our software are you running? (use `--version` to get a version string) description: Which executable and which version of our software are you running? (use `--version` to get a version string)
placeholder: | placeholder: |
$./main --version $./llama-cli --version
version: 2999 (42b4109e) version: 2999 (42b4109e)
built with cc (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0 for x86_64-linux-gnu built with cc (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0 for x86_64-linux-gnu
validations: validations:
......
...@@ -24,7 +24,7 @@ body: ...@@ -24,7 +24,7 @@ body:
label: Name and Version label: Name and Version
description: Which executable and which version of our software are you running? (use `--version` to get a version string) description: Which executable and which version of our software are you running? (use `--version` to get a version string)
placeholder: | placeholder: |
$./main --version $./llama-cli --version
version: 2999 (42b4109e) version: 2999 (42b4109e)
built with cc (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0 for x86_64-linux-gnu built with cc (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0 for x86_64-linux-gnu
validations: validations:
......
...@@ -9,5 +9,3 @@ contact_links: ...@@ -9,5 +9,3 @@ contact_links:
- name: Want to contribute? - name: Want to contribute?
url: https://github.com/ggerganov/llama.cpp/wiki/contribute url: https://github.com/ggerganov/llama.cpp/wiki/contribute
about: Head to the contribution guide page of the wiki for areas you can help with about: Head to the contribution guide page of the wiki for areas you can help with
...@@ -2,31 +2,33 @@ ...@@ -2,31 +2,33 @@
Kompute: Kompute:
- changed-files: - changed-files:
- any-glob-to-any-file: - any-glob-to-any-file:
- ggml-kompute.h - ggml/include/ggml-kompute.h
- ggml-kompute.cpp - ggml/src/ggml-kompute.cpp
- README-kompute.md - README-kompute.md
Apple Metal: Apple Metal:
- changed-files: - changed-files:
- any-glob-to-any-file: - any-glob-to-any-file:
- ggml-metal.h - ggml/include/ggml-metal.h
- ggml-metal.cpp - ggml/src/ggml-metal.cpp
- README-metal.md - README-metal.md
SYCL: SYCL:
- changed-files: - changed-files:
- any-glob-to-any-file: - any-glob-to-any-file:
- ggml-sycl.h - ggml/include/ggml-sycl.h
- ggml-sycl.cpp - ggml/src/ggml-sycl.cpp
- README-sycl.md - ggml/src/ggml-sycl/**
- docs/backend/SYCL.md
- examples/sycl/**
Nvidia GPU: Nvidia GPU:
- changed-files: - changed-files:
- any-glob-to-any-file: - any-glob-to-any-file:
- ggml-cuda.h - ggml/include/ggml-cuda.h
- ggml-cuda/** - ggml/src/ggml-cuda/**
Vulkan: Vulkan:
- changed-files: - changed-files:
- any-glob-to-any-file: - any-glob-to-any-file:
- ggml_vk_generate_shaders.py - ggml/ggml_vk_generate_shaders.py
- ggml-vulkan* - ggml/src/ggml-vulkan*
documentation: documentation:
- changed-files: - changed-files:
- any-glob-to-any-file: - any-glob-to-any-file:
...@@ -42,7 +44,6 @@ build: ...@@ -42,7 +44,6 @@ build:
- cmake/** - cmake/**
- CMakeLists.txt - CMakeLists.txt
- CMakePresets.json - CMakePresets.json
- codecov.yml
examples: examples:
- changed-files: - changed-files:
- any-glob-to-any-file: examples/** - any-glob-to-any-file: examples/**
...@@ -74,10 +75,10 @@ server: ...@@ -74,10 +75,10 @@ server:
ggml: ggml:
- changed-files: - changed-files:
- any-glob-to-any-file: - any-glob-to-any-file:
- ggml.c - ggml/include/ggml*.h
- ggml.h - ggml/src/ggml*.c
- ggml-*.c - ggml/src/ggml*.cpp
- ggml-*.h - ggml/src/ggml*.h
- ggml-cuda/** - ggml-cuda/**
nix: nix:
- changed-files: - changed-files:
......
- [x] I have read the [contributing guidelines](https://github.com/ggerganov/llama.cpp/blob/master/CONTRIBUTING.md)
- Self-reported review complexity:
- [ ] Low
- [ ] Medium
- [ ] High
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment