Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
ollama
Commits
0ce8bcfd
Commit
0ce8bcfd
authored
Nov 12, 2024
by
xuxzh1
🎱
Browse files
init
parent
b0135f4b
Changes
361
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
298 additions
and
85 deletions
+298
-85
llm/llama.cpp/.devops/llama-cli.Dockerfile
llm/llama.cpp/.devops/llama-cli.Dockerfile
+23
-0
llm/llama.cpp/.devops/llama-cpp-cuda.srpm.spec
llm/llama.cpp/.devops/llama-cpp-cuda.srpm.spec
+8
-8
llm/llama.cpp/.devops/llama-cpp.srpm.spec
llm/llama.cpp/.devops/llama-cpp.srpm.spec
+7
-7
llm/llama.cpp/.devops/llama-server-cuda.Dockerfile
llm/llama.cpp/.devops/llama-server-cuda.Dockerfile
+39
-0
llm/llama.cpp/.devops/llama-server-intel.Dockerfile
llm/llama.cpp/.devops/llama-server-intel.Dockerfile
+32
-0
llm/llama.cpp/.devops/llama-server-rocm.Dockerfile
llm/llama.cpp/.devops/llama-server-rocm.Dockerfile
+52
-0
llm/llama.cpp/.devops/llama-server-vulkan.Dockerfile
llm/llama.cpp/.devops/llama-server-vulkan.Dockerfile
+29
-0
llm/llama.cpp/.devops/llama-server.Dockerfile
llm/llama.cpp/.devops/llama-server.Dockerfile
+27
-0
llm/llama.cpp/.devops/nix/apps.nix
llm/llama.cpp/.devops/nix/apps.nix
+2
-3
llm/llama.cpp/.devops/nix/package.nix
llm/llama.cpp/.devops/nix/package.nix
+40
-34
llm/llama.cpp/.devops/tools.sh
llm/llama.cpp/.devops/tools.sh
+5
-9
llm/llama.cpp/.dockerignore
llm/llama.cpp/.dockerignore
+2
-2
llm/llama.cpp/.editorconfig
llm/llama.cpp/.editorconfig
+4
-0
llm/llama.cpp/.github/ISSUE_TEMPLATE/01-bug-low.yml
llm/llama.cpp/.github/ISSUE_TEMPLATE/01-bug-low.yml
+1
-1
llm/llama.cpp/.github/ISSUE_TEMPLATE/02-bug-medium.yml
llm/llama.cpp/.github/ISSUE_TEMPLATE/02-bug-medium.yml
+1
-1
llm/llama.cpp/.github/ISSUE_TEMPLATE/03-bug-high.yml
llm/llama.cpp/.github/ISSUE_TEMPLATE/03-bug-high.yml
+1
-1
llm/llama.cpp/.github/ISSUE_TEMPLATE/04-bug-critical.yml
llm/llama.cpp/.github/ISSUE_TEMPLATE/04-bug-critical.yml
+1
-1
llm/llama.cpp/.github/ISSUE_TEMPLATE/config.yml
llm/llama.cpp/.github/ISSUE_TEMPLATE/config.yml
+0
-2
llm/llama.cpp/.github/labeler.yml
llm/llama.cpp/.github/labeler.yml
+17
-16
llm/llama.cpp/.github/pull_request_template.md
llm/llama.cpp/.github/pull_request_template.md
+7
-0
No files found.
Too many changes to show.
To preserve performance only
361 of 361+
files are displayed.
Plain diff
Email patch
llm/llama.cpp/.devops/llama-cli.Dockerfile
0 → 100644
View file @
0ce8bcfd
ARG
UBUNTU_VERSION=22.04
FROM
ubuntu:$UBUNTU_VERSION AS build
RUN
apt-get update
&&
\
apt-get
install
-y
build-essential git
WORKDIR
/app
COPY
. .
RUN
make
-j
$(
nproc
)
llama-cli
FROM
ubuntu:$UBUNTU_VERSION AS runtime
RUN
apt-get update
&&
\
apt-get
install
-y
libgomp1
COPY
--from=build /app/llama-cli /llama-cli
ENV
LC_ALL=C.utf8
ENTRYPOINT
[ "/llama-cli" ]
llm/llama.cpp/.devops/llama-cpp-cuda.srpm.spec
View file @
0ce8bcfd
...
...
@@ -32,13 +32,13 @@ CPU inference for Meta's Lllama2 models using default options.
%setup -n llama.cpp-master
%build
make -j
LLAMA
_CUDA=1
make -j
GGML
_CUDA=1
%install
mkdir -p %{buildroot}%{_bindir}/
cp -p
main
%{buildroot}%{_bindir}/llama
cpp
cuda
cp -p server %{buildroot}%{_bindir}/llama
cpp
cudaserver
cp -p simple %{buildroot}%{_bindir}/llama
cpp
cudasimple
cp -p
llama-cli
%{buildroot}%{_bindir}/llama
-
cuda
-cli
cp -p
llama-
server %{buildroot}%{_bindir}/llama
-
cuda
-
server
cp -p
llama-
simple %{buildroot}%{_bindir}/llama
-
cuda
-
simple
mkdir -p %{buildroot}/usr/lib/systemd/system
%{__cat} <<EOF > %{buildroot}/usr/lib/systemd/system/llamacuda.service
...
...
@@ -49,7 +49,7 @@ After=syslog.target network.target local-fs.target remote-fs.target nss-lookup.t
[Service]
Type=simple
EnvironmentFile=/etc/sysconfig/llama
ExecStart=/usr/bin/llama
cpp
cudaserver $LLAMA_ARGS
ExecStart=/usr/bin/llama
-
cuda
-
server $LLAMA_ARGS
ExecReload=/bin/kill -s HUP $MAINPID
Restart=never
...
...
@@ -67,9 +67,9 @@ rm -rf %{buildroot}
rm -rf %{_builddir}/*
%files
%{_bindir}/llama
cpp
cuda
%{_bindir}/llama
cpp
cudaserver
%{_bindir}/llama
cpp
cudasimple
%{_bindir}/llama
-
cuda
-cli
%{_bindir}/llama
-
cuda
-
server
%{_bindir}/llama
-
cuda
-
simple
/usr/lib/systemd/system/llamacuda.service
%config /etc/sysconfig/llama
...
...
llm/llama.cpp/.devops/llama-cpp.srpm.spec
View file @
0ce8bcfd
...
...
@@ -38,9 +38,9 @@ make -j
%install
mkdir -p %{buildroot}%{_bindir}/
cp -p
main
%{buildroot}%{_bindir}/llama
cp -p server %{buildroot}%{_bindir}/llamaserver
cp -p simple %{buildroot}%{_bindir}/llamasimple
cp -p
llama-cli
%{buildroot}%{_bindir}/llama
-cli
cp -p
llama-
server %{buildroot}%{_bindir}/llama
-
server
cp -p
llama-
simple %{buildroot}%{_bindir}/llama
-
simple
mkdir -p %{buildroot}/usr/lib/systemd/system
%{__cat} <<EOF > %{buildroot}/usr/lib/systemd/system/llama.service
...
...
@@ -51,7 +51,7 @@ After=syslog.target network.target local-fs.target remote-fs.target nss-lookup.t
[Service]
Type=simple
EnvironmentFile=/etc/sysconfig/llama
ExecStart=/usr/bin/llamaserver $LLAMA_ARGS
ExecStart=/usr/bin/llama
-
server $LLAMA_ARGS
ExecReload=/bin/kill -s HUP $MAINPID
Restart=never
...
...
@@ -69,9 +69,9 @@ rm -rf %{buildroot}
rm -rf %{_builddir}/*
%files
%{_bindir}/llama
%{_bindir}/llamaserver
%{_bindir}/llamasimple
%{_bindir}/llama
-cli
%{_bindir}/llama
-
server
%{_bindir}/llama
-
simple
/usr/lib/systemd/system/llama.service
%config /etc/sysconfig/llama
...
...
llm/llama.cpp/.devops/llama-server-cuda.Dockerfile
0 → 100644
View file @
0ce8bcfd
ARG
UBUNTU_VERSION=22.04
# This needs to generally match the container host's environment.
ARG
CUDA_VERSION=11.7.1
# Target the CUDA build image
ARG
BASE_CUDA_DEV_CONTAINER=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION}
# Target the CUDA runtime image
ARG
BASE_CUDA_RUN_CONTAINER=nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu${UBUNTU_VERSION}
FROM
${BASE_CUDA_DEV_CONTAINER} AS build
# Unless otherwise specified, we make a fat build.
ARG
CUDA_DOCKER_ARCH=all
RUN
apt-get update
&&
\
apt-get
install
-y
build-essential git libcurl4-openssl-dev
WORKDIR
/app
COPY
. .
# Set nvcc architecture
ENV
CUDA_DOCKER_ARCH=${CUDA_DOCKER_ARCH}
# Enable CUDA
ENV
GGML_CUDA=1
# Enable cURL
ENV
LLAMA_CURL=1
RUN
make
-j
$(
nproc
)
llama-server
FROM
${BASE_CUDA_RUN_CONTAINER} AS runtime
RUN
apt-get update
&&
\
apt-get
install
-y
libcurl4-openssl-dev libgomp1 curl
COPY
--from=build /app/llama-server /llama-server
HEALTHCHECK
CMD [ "curl", "-f", "http://localhost:8080/health" ]
ENTRYPOINT
[ "/llama-server" ]
llm/llama.cpp/.devops/llama-server-intel.Dockerfile
0 → 100644
View file @
0ce8bcfd
ARG
ONEAPI_VERSION=2024.1.1-devel-ubuntu22.04
FROM
intel/oneapi-basekit:$ONEAPI_VERSION AS build
ARG
GGML_SYCL_F16=OFF
RUN
apt-get update
&&
\
apt-get
install
-y
git libcurl4-openssl-dev
WORKDIR
/app
COPY
. .
RUN if
[
"
${
GGML_SYCL_F16
}
"
=
"ON"
]
;
then
\
echo
"GGML_SYCL_F16 is set"
&&
\
export
OPT_SYCL_F16
=
"-DGGML_SYCL_F16=ON"
;
\
fi
&&
\
echo
"Building with dynamic libs"
&&
\
cmake
-B
build
-DGGML_SYCL
=
ON
-DCMAKE_C_COMPILER
=
icx
-DCMAKE_CXX_COMPILER
=
icpx
-DLLAMA_CURL
=
ON
${
OPT_SYCL_F16
}
&&
\
cmake
--build
build
--config
Release
--target
llama-server
FROM
intel/oneapi-basekit:$ONEAPI_VERSION AS runtime
RUN
apt-get update
&&
\
apt-get
install
-y
libcurl4-openssl-dev curl
COPY
--from=build /app/build/bin/llama-server /llama-server
ENV
LC_ALL=C.utf8
HEALTHCHECK
CMD [ "curl", "-f", "http://localhost:8080/health" ]
ENTRYPOINT
[ "/llama-server" ]
llm/llama.cpp/.devops/llama-server-rocm.Dockerfile
0 → 100644
View file @
0ce8bcfd
ARG
UBUNTU_VERSION=22.04
# This needs to generally match the container host's environment.
ARG
ROCM_VERSION=5.6
# Target the CUDA build image
ARG
BASE_ROCM_DEV_CONTAINER=rocm/dev-ubuntu-${UBUNTU_VERSION}:${ROCM_VERSION}-complete
FROM
${BASE_ROCM_DEV_CONTAINER} AS build
# Unless otherwise specified, we make a fat build.
# List from https://github.com/ggerganov/llama.cpp/pull/1087#issuecomment-1682807878
# This is mostly tied to rocBLAS supported archs.
ARG
ROCM_DOCKER_ARCH=\
gfx803 \
gfx900 \
gfx906 \
gfx908 \
gfx90a \
gfx1010 \
gfx1030 \
gfx1100 \
gfx1101 \
gfx1102
COPY
requirements.txt requirements.txt
COPY
requirements requirements
RUN
pip
install
--upgrade
pip setuptools wheel
\
&&
pip
install
-r
requirements.txt
WORKDIR
/app
COPY
. .
# Set nvcc architecture
ENV
GPU_TARGETS=${ROCM_DOCKER_ARCH}
# Enable ROCm
ENV
GGML_HIPBLAS=1
ENV
CC=/opt/rocm/llvm/bin/clang
ENV
CXX=/opt/rocm/llvm/bin/clang++
# Enable cURL
ENV
LLAMA_CURL=1
RUN
apt-get update
&&
\
apt-get
install
-y
libcurl4-openssl-dev curl
RUN
make
-j
$(
nproc
)
llama-server
HEALTHCHECK
CMD [ "curl", "-f", "http://localhost:8080/health" ]
ENTRYPOINT
[ "/app/llama-server" ]
llm/llama.cpp/.devops/llama-server-vulkan.Dockerfile
0 → 100644
View file @
0ce8bcfd
ARG
UBUNTU_VERSION=jammy
FROM
ubuntu:$UBUNTU_VERSION AS build
# Install build tools
RUN
apt update
&&
apt
install
-y
git build-essential cmake wget
# Install Vulkan SDK and cURL
RUN
wget
-qO
- https://packages.lunarg.com/lunarg-signing-key-pub.asc | apt-key add -
&&
\
wget
-qO
/etc/apt/sources.list.d/lunarg-vulkan-jammy.list https://packages.lunarg.com/vulkan/lunarg-vulkan-jammy.list
&&
\
apt update
-y
&&
\
apt-get
install
-y
vulkan-sdk libcurl4-openssl-dev curl
# Build it
WORKDIR
/app
COPY
. .
RUN
cmake
-B
build
-DGGML_VULKAN
=
1
-DLLAMA_CURL
=
1
&&
\
cmake
--build
build
--config
Release
--target
llama-server
# Clean up
WORKDIR
/
RUN
cp
/app/build/bin/llama-server /llama-server
&&
\
rm
-rf
/app
ENV
LC_ALL=C.utf8
HEALTHCHECK
CMD [ "curl", "-f", "http://localhost:8080/health" ]
ENTRYPOINT
[ "/llama-server" ]
llm/llama.cpp/.devops/llama-server.Dockerfile
0 → 100644
View file @
0ce8bcfd
ARG
UBUNTU_VERSION=22.04
FROM
ubuntu:$UBUNTU_VERSION AS build
RUN
apt-get update
&&
\
apt-get
install
-y
build-essential git libcurl4-openssl-dev
WORKDIR
/app
COPY
. .
ENV
LLAMA_CURL=1
RUN
make
-j
$(
nproc
)
llama-server
FROM
ubuntu:$UBUNTU_VERSION AS runtime
RUN
apt-get update
&&
\
apt-get
install
-y
libcurl4-openssl-dev libgomp1 curl
COPY
--from=build /app/llama-server /llama-server
ENV
LC_ALL=C.utf8
HEALTHCHECK
CMD [ "curl", "-f", "http://localhost:8080/health" ]
ENTRYPOINT
[ "/llama-server" ]
llm/llama.cpp/.devops/nix/apps.nix
View file @
0ce8bcfd
...
...
@@ -6,11 +6,10 @@
let
inherit
(
config
.
packages
)
default
;
binaries
=
[
"llama"
"llama
-cli
"
"llama-embedding"
"llama-server"
"quantize"
"train-text-from-scratch"
"llama-quantize"
];
mkApp
=
name
:
{
type
=
"app"
;
...
...
llm/llama.cpp/.devops/nix/package.nix
View file @
0ce8bcfd
...
...
@@ -17,19 +17,19 @@
rocmPackages
,
vulkan-headers
,
vulkan-loader
,
clblast
,
curl
,
shaderc
,
useBlas
?
builtins
.
all
(
x
:
!
x
)
[
useCuda
useMetalKit
useOpenCL
useRocm
useVulkan
]
&&
blas
.
meta
.
available
,
useCuda
?
config
.
cudaSupport
,
useMetalKit
?
stdenv
.
isAarch64
&&
stdenv
.
isDarwin
&&
!
useOpenCL
,
useMetalKit
?
stdenv
.
isAarch64
&&
stdenv
.
isDarwin
,
useMpi
?
false
,
# Increases the runtime closure size by ~700M
useOpenCL
?
false
,
useRocm
?
config
.
rocmSupport
,
enableCurl
?
true
,
useVulkan
?
false
,
llamaVersion
?
"0.0.0"
,
# Arbitrary version, substituted by the flake
...
...
@@ -56,7 +56,6 @@ let
++
lib
.
optionals
useCuda
[
"CUDA"
]
++
lib
.
optionals
useMetalKit
[
"MetalKit"
]
++
lib
.
optionals
useMpi
[
"MPI"
]
++
lib
.
optionals
useOpenCL
[
"OpenCL"
]
++
lib
.
optionals
useRocm
[
"ROCm"
]
++
lib
.
optionals
useVulkan
[
"Vulkan"
];
...
...
@@ -91,6 +90,22 @@ let
ps
.
tiktoken
ps
.
torchWithoutCuda
ps
.
transformers
# server bench
ps
.
matplotlib
# server tests
ps
.
openai
ps
.
behave
ps
.
prometheus-client
# for examples/pydantic-models-to-grammar-examples.py
ps
.
docstring-parser
ps
.
pydantic
# for scripts/compare-llama-bench.py
ps
.
gitpython
ps
.
tabulate
]
);
...
...
@@ -111,16 +126,9 @@ let
++
optionals
useMetalKit
[
MetalKit
];
cudaBuildInputs
=
with
cudaPackages
;
[
cuda_cccl
.
dev
# <nv/target>
# A temporary hack for reducing the closure size, remove once cudaPackages
# have stopped using lndir: https://github.com/NixOS/nixpkgs/issues/271792
cuda_cudart
.
dev
cuda_cudart
.
lib
cuda_cudart
.
static
libcublas
.
dev
libcublas
.
lib
libcublas
.
static
cuda_cudart
cuda_cccl
# <nv/target>
libcublas
];
rocmBuildInputs
=
with
rocmPackages
;
[
...
...
@@ -132,6 +140,7 @@ let
vulkanBuildInputs
=
[
vulkan-headers
vulkan-loader
shaderc
];
in
...
...
@@ -160,9 +169,9 @@ effectiveStdenv.mkDerivation (
};
postPatch
=
''
substituteInPlace ./ggml-metal.m \
substituteInPlace ./ggml
/src/ggml
-metal.m \
--replace '[bundle pathForResource:@"ggml-metal" ofType:@"metal"];' "@\"$out/bin/ggml-metal.metal\";"
substituteInPlace ./ggml-metal.m \
substituteInPlace ./ggml
/src/ggml
-metal.m \
--replace '[bundle pathForResource:@"default" ofType:@"metallib"];' "@\"$out/bin/default.metallib\";"
''
;
...
...
@@ -198,24 +207,24 @@ effectiveStdenv.mkDerivation (
optionals
effectiveStdenv
.
isDarwin
darwinBuildInputs
++
optionals
useCuda
cudaBuildInputs
++
optionals
useMpi
[
mpi
]
++
optionals
useOpenCL
[
clblast
]
++
optionals
useRocm
rocmBuildInputs
++
optionals
useBlas
[
blas
]
++
optionals
useVulkan
vulkanBuildInputs
;
++
optionals
useVulkan
vulkanBuildInputs
++
optionals
enableCurl
[
curl
];
cmakeFlags
=
[
(
cmakeBool
"LLAMA_NATIVE"
false
)
(
cmakeBool
"LLAMA_BUILD_SERVER"
true
)
(
cmakeBool
"BUILD_SHARED_LIBS"
(
!
enableStatic
))
(
cmakeBool
"CMAKE_SKIP_BUILD_RPATH"
true
)
(
cmakeBool
"LLAMA_BLAS"
useBlas
)
(
cmakeBool
"LLAMA_CLBLAST"
useOpenCL
)
(
cmakeBool
"LLAMA_CUDA"
useCuda
)
(
cmakeBool
"LLAMA_HIPBLAS"
useRocm
)
(
cmakeBool
"LLAMA_METAL"
useMetalKit
)
(
cmakeBool
"LLAMA_VULKAN"
useVulkan
)
(
cmakeBool
"LLAMA_STATIC"
enableStatic
)
(
cmakeBool
"LLAMA_CURL"
enableCurl
)
(
cmakeBool
"GGML_NATIVE"
false
)
(
cmakeBool
"GGML_BLAS"
useBlas
)
(
cmakeBool
"GGML_CUDA"
useCuda
)
(
cmakeBool
"GGML_HIPBLAS"
useRocm
)
(
cmakeBool
"GGML_METAL"
useMetalKit
)
(
cmakeBool
"GGML_VULKAN"
useVulkan
)
(
cmakeBool
"GGML_STATIC"
enableStatic
)
]
++
optionals
useCuda
[
(
...
...
@@ -231,7 +240,7 @@ effectiveStdenv.mkDerivation (
]
++
optionals
useMetalKit
[
(
lib
.
cmakeFeature
"CMAKE_C_FLAGS"
"-D__ARM_FEATURE_DOTPROD=1"
)
(
cmakeBool
"
LLAMA
_METAL_EMBED_LIBRARY"
(
!
precompileMetalShaders
))
(
cmakeBool
"
GGML
_METAL_EMBED_LIBRARY"
(
!
precompileMetalShaders
))
];
# Environment variables needed for ROCm
...
...
@@ -243,10 +252,8 @@ effectiveStdenv.mkDerivation (
# TODO(SomeoneSerge): It's better to add proper install targets at the CMake level,
# if they haven't been added yet.
postInstall
=
''
mv $out/bin/main
${
executableSuffix
}
$out/bin/llama
${
executableSuffix
}
mv $out/bin/server
${
executableSuffix
}
$out/bin/llama-server
${
executableSuffix
}
mkdir -p $out/include
cp $src/llama.h $out/include/
cp $src/
include/
llama.h $out/include/
''
;
# Define the shells here, but don't add in the inputsFrom to avoid recursion.
...
...
@@ -256,7 +263,6 @@ effectiveStdenv.mkDerivation (
useCuda
useMetalKit
useMpi
useOpenCL
useRocm
useVulkan
;
...
...
@@ -283,7 +289,7 @@ effectiveStdenv.mkDerivation (
# Configurations we don't want even the CI to evaluate. Results in the
# "unsupported platform" messages. This is mostly a no-op, because
# cudaPackages would've refused to evaluate anyway.
badPlatforms
=
optionals
(
useCuda
||
useOpenCL
)
lib
.
platforms
.
darwin
;
badPlatforms
=
optionals
useCuda
lib
.
platforms
.
darwin
;
# Configurations that are known to result in build failures. Can be
# overridden by importing Nixpkgs with `allowBroken = true`.
...
...
@@ -294,7 +300,7 @@ effectiveStdenv.mkDerivation (
license
=
lib
.
licenses
.
mit
;
# Accommodates `nix run` and `lib.getExe`
mainProgram
=
"llama"
;
mainProgram
=
"llama
-cli
"
;
# These people might respond, on the best effort basis, if you ping them
# in case of Nix-specific regressions or for reviewing Nix-specific PRs.
...
...
llm/llama.cpp/.devops/tools.sh
View file @
0ce8bcfd
...
...
@@ -8,13 +8,11 @@ arg1="$1"
shift
if
[[
"
$arg1
"
==
'--convert'
||
"
$arg1
"
==
'-c'
]]
;
then
python3 ./convert
-
hf
-
to
-
gguf.py
"
$@
"
python3 ./convert
_
hf
_
to
_
gguf.py
"
$@
"
elif
[[
"
$arg1
"
==
'--quantize'
||
"
$arg1
"
==
'-q'
]]
;
then
./quantize
"
$@
"
./
llama-
quantize
"
$@
"
elif
[[
"
$arg1
"
==
'--run'
||
"
$arg1
"
==
'-r'
]]
;
then
./main
"
$@
"
elif
[[
"
$arg1
"
==
'--finetune'
||
"
$arg1
"
==
'-f'
]]
;
then
./finetune
"
$@
"
./llama-cli
"
$@
"
elif
[[
"
$arg1
"
==
'--all-in-one'
||
"
$arg1
"
==
'-a'
]]
;
then
echo
"Converting PTH to GGML..."
for
i
in
`
ls
$1
/
$2
/ggml-model-f16.bin
*
`
;
do
...
...
@@ -22,11 +20,11 @@ elif [[ "$arg1" == '--all-in-one' || "$arg1" == '-a' ]]; then
echo
"Skip model quantization, it already exists:
${
i
/f16/q4_0
}
"
else
echo
"Converting PTH to GGML:
$i
into
${
i
/f16/q4_0
}
..."
./quantize
"
$i
"
"
${
i
/f16/q4_0
}
"
q4_0
./
llama-
quantize
"
$i
"
"
${
i
/f16/q4_0
}
"
q4_0
fi
done
elif
[[
"
$arg1
"
==
'--server'
||
"
$arg1
"
==
'-s'
]]
;
then
./server
"
$@
"
./
llama-
server
"
$@
"
else
echo
"Unknown command:
$arg1
"
echo
"Available commands: "
...
...
@@ -36,8 +34,6 @@ else
echo
" ex: --outtype f16
\"
/models/7B/
\"
"
echo
" --quantize (-q): Optimize with quantization process ggml"
echo
" ex:
\"
/models/7B/ggml-model-f16.bin
\"
\"
/models/7B/ggml-model-q4_0.bin
\"
2"
echo
" --finetune (-f): Run finetune command to create a lora finetune of the model"
echo
" See documentation for finetune for command-line parameters"
echo
" --all-in-one (-a): Execute --convert & --quantize"
echo
" ex:
\"
/models/
\"
7B"
echo
" --server (-s): Run a model on the server"
...
...
llm/llama.cpp/.dockerignore
View file @
0ce8bcfd
...
...
@@ -12,8 +12,8 @@ build*/
models/*
/
main
/quantize
/
llama-cli
/
llama-
quantize
arm_neon.h
compile_commands.json
...
...
llm/llama.cpp/.editorconfig
View file @
0ce8bcfd
...
...
@@ -26,3 +26,7 @@ indent_size = 2
[examples/llama.swiftui/llama.swiftui.xcodeproj/*]
indent_style = tab
[examples/cvector-generator/*.txt]
trim_trailing_whitespace = unset
insert_final_newline = unset
llm/llama.cpp/.github/ISSUE_TEMPLATE/01-bug-low.yml
View file @
0ce8bcfd
...
...
@@ -24,7 +24,7 @@ body:
label
:
Name and Version
description
:
Which executable and which version of our software are you running? (use `--version` to get a version string)
placeholder
:
|
$./
main
--version
$./
llama-cli
--version
version: 2999 (42b4109e)
built with cc (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0 for x86_64-linux-gnu
validations
:
...
...
llm/llama.cpp/.github/ISSUE_TEMPLATE/02-bug-medium.yml
View file @
0ce8bcfd
...
...
@@ -24,7 +24,7 @@ body:
label
:
Name and Version
description
:
Which executable and which version of our software are you running? (use `--version` to get a version string)
placeholder
:
|
$./
main
--version
$./
llama-cli
--version
version: 2999 (42b4109e)
built with cc (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0 for x86_64-linux-gnu
validations
:
...
...
llm/llama.cpp/.github/ISSUE_TEMPLATE/03-bug-high.yml
View file @
0ce8bcfd
...
...
@@ -24,7 +24,7 @@ body:
label
:
Name and Version
description
:
Which executable and which version of our software are you running? (use `--version` to get a version string)
placeholder
:
|
$./
main
--version
$./
llama-cli
--version
version: 2999 (42b4109e)
built with cc (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0 for x86_64-linux-gnu
validations
:
...
...
llm/llama.cpp/.github/ISSUE_TEMPLATE/04-bug-critical.yml
View file @
0ce8bcfd
...
...
@@ -24,7 +24,7 @@ body:
label
:
Name and Version
description
:
Which executable and which version of our software are you running? (use `--version` to get a version string)
placeholder
:
|
$./
main
--version
$./
llama-cli
--version
version: 2999 (42b4109e)
built with cc (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0 for x86_64-linux-gnu
validations
:
...
...
llm/llama.cpp/.github/ISSUE_TEMPLATE/config.yml
View file @
0ce8bcfd
...
...
@@ -9,5 +9,3 @@ contact_links:
-
name
:
Want to contribute?
url
:
https://github.com/ggerganov/llama.cpp/wiki/contribute
about
:
Head to the contribution guide page of the wiki for areas you can help with
llm/llama.cpp/.github/labeler.yml
View file @
0ce8bcfd
...
...
@@ -2,31 +2,33 @@
Kompute
:
-
changed-files
:
-
any-glob-to-any-file
:
-
ggml-kompute.h
-
ggml-kompute.cpp
-
ggml/include/
ggml-kompute.h
-
ggml/src/
ggml-kompute.cpp
-
README-kompute.md
Apple Metal
:
-
changed-files
:
-
any-glob-to-any-file
:
-
ggml-metal.h
-
ggml-metal.cpp
-
ggml/include/
ggml-metal.h
-
ggml/src/
ggml-metal.cpp
-
README-metal.md
SYCL
:
-
changed-files
:
-
any-glob-to-any-file
:
-
ggml-sycl.h
-
ggml-sycl.cpp
-
README-sycl.md
-
ggml/include/ggml-sycl.h
-
ggml/src/ggml-sycl.cpp
-
ggml/src/ggml-sycl/**
-
docs/backend/SYCL.md
-
examples/sycl/**
Nvidia GPU
:
-
changed-files
:
-
any-glob-to-any-file
:
-
ggml-cuda.h
-
ggml-cuda/**
-
ggml/include/
ggml-cuda.h
-
ggml/src/
ggml-cuda/**
Vulkan
:
-
changed-files
:
-
any-glob-to-any-file
:
-
ggml_vk_generate_shaders.py
-
ggml-vulkan*
-
ggml/
ggml_vk_generate_shaders.py
-
ggml/src/
ggml-vulkan*
documentation
:
-
changed-files
:
-
any-glob-to-any-file
:
...
...
@@ -42,7 +44,6 @@ build:
-
cmake/**
-
CMakeLists.txt
-
CMakePresets.json
-
codecov.yml
examples
:
-
changed-files
:
-
any-glob-to-any-file
:
examples/**
...
...
@@ -74,10 +75,10 @@ server:
ggml
:
-
changed-files
:
-
any-glob-to-any-file
:
-
ggml
.c
-
ggml
.h
-
ggml
-
*.c
-
ggml
-
*.h
-
ggml
/include/ggml*.h
-
ggml
/src/ggml*.c
-
ggml
/src/ggml
*.c
pp
-
ggml
/src/ggml
*.h
-
ggml-cuda/**
nix
:
-
changed-files
:
...
...
llm/llama.cpp/.github/pull_request_template.md
0 → 100644
View file @
0ce8bcfd
-
[
x] I have read the [contributing guidelines
](
https://github.com/ggerganov/llama.cpp/blob/master/CONTRIBUTING.md
)
-
Self-reported review complexity:
-
[ ] Low
-
[ ] Medium
-
[ ] High
Prev
1
…
4
5
6
7
8
9
10
11
12
…
19
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment