Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
voicechat2
Commits
2b67188c
"src/git@developer.sourcefind.cn:OpenDAS/nni.git" did not exist on "af80021301485595e81867eb000d06d78d4c0669"
Commit
2b67188c
authored
Aug 09, 2024
by
chenpangpang
Browse files
refator: llama.cpp
parent
8939e76b
Changes
943
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
733 additions
and
0 deletions
+733
-0
voicechat2/llama.cpp/.clang-tidy
voicechat2/llama.cpp/.clang-tidy
+24
-0
voicechat2/llama.cpp/.devops/cloud-v-pipeline
voicechat2/llama.cpp/.devops/cloud-v-pipeline
+22
-0
voicechat2/llama.cpp/.devops/full-cuda.Dockerfile
voicechat2/llama.cpp/.devops/full-cuda.Dockerfile
+36
-0
voicechat2/llama.cpp/.devops/full-rocm.Dockerfile
voicechat2/llama.cpp/.devops/full-rocm.Dockerfile
+50
-0
voicechat2/llama.cpp/.devops/full.Dockerfile
voicechat2/llama.cpp/.devops/full.Dockerfile
+25
-0
voicechat2/llama.cpp/.devops/llama-cli-cuda.Dockerfile
voicechat2/llama.cpp/.devops/llama-cli-cuda.Dockerfile
+35
-0
voicechat2/llama.cpp/.devops/llama-cli-intel.Dockerfile
voicechat2/llama.cpp/.devops/llama-cli-intel.Dockerfile
+28
-0
voicechat2/llama.cpp/.devops/llama-cli-rocm.Dockerfile
voicechat2/llama.cpp/.devops/llama-cli-rocm.Dockerfile
+45
-0
voicechat2/llama.cpp/.devops/llama-cli-vulkan.Dockerfile
voicechat2/llama.cpp/.devops/llama-cli-vulkan.Dockerfile
+27
-0
voicechat2/llama.cpp/.devops/llama-cli.Dockerfile
voicechat2/llama.cpp/.devops/llama-cli.Dockerfile
+23
-0
voicechat2/llama.cpp/.devops/llama-cpp-cuda.srpm.spec
voicechat2/llama.cpp/.devops/llama-cpp-cuda.srpm.spec
+83
-0
voicechat2/llama.cpp/.devops/llama-cpp.srpm.spec
voicechat2/llama.cpp/.devops/llama-cpp.srpm.spec
+85
-0
voicechat2/llama.cpp/.devops/llama-server-cuda.Dockerfile
voicechat2/llama.cpp/.devops/llama-server-cuda.Dockerfile
+39
-0
voicechat2/llama.cpp/.devops/llama-server-intel.Dockerfile
voicechat2/llama.cpp/.devops/llama-server-intel.Dockerfile
+32
-0
voicechat2/llama.cpp/.devops/llama-server-rocm.Dockerfile
voicechat2/llama.cpp/.devops/llama-server-rocm.Dockerfile
+52
-0
voicechat2/llama.cpp/.devops/llama-server-vulkan.Dockerfile
voicechat2/llama.cpp/.devops/llama-server-vulkan.Dockerfile
+29
-0
voicechat2/llama.cpp/.devops/llama-server.Dockerfile
voicechat2/llama.cpp/.devops/llama-server.Dockerfile
+27
-0
voicechat2/llama.cpp/.devops/nix/apps.nix
voicechat2/llama.cpp/.devops/nix/apps.nix
+21
-0
voicechat2/llama.cpp/.devops/nix/devshells.nix
voicechat2/llama.cpp/.devops/nix/devshells.nix
+13
-0
voicechat2/llama.cpp/.devops/nix/docker.nix
voicechat2/llama.cpp/.devops/nix/docker.nix
+37
-0
No files found.
voicechat2/llama.cpp/.clang-tidy
0 → 100644
View file @
2b67188c
---
Checks: >
bugprone-*,
-bugprone-easily-swappable-parameters,
-bugprone-implicit-widening-of-multiplication-result,
-bugprone-misplaced-widening-cast,
-bugprone-narrowing-conversions,
readability-*,
-readability-avoid-unconditional-preprocessor-if,
-readability-function-cognitive-complexity,
-readability-identifier-length,
-readability-implicit-bool-conversion,
-readability-magic-numbers,
-readability-uppercase-literal-suffix,
-readability-simplify-boolean-expr,
clang-analyzer-*,
-clang-analyzer-security.insecureAPI.DeprecatedOrUnsafeBufferHandling,
performance-*,
portability-*,
misc-*,
-misc-const-correctness,
-misc-non-private-member-variables-in-classes,
-misc-no-recursion,
FormatStyle: none
voicechat2/llama.cpp/.devops/cloud-v-pipeline
0 → 100644
View file @
2b67188c
node('x86_runner1'){ // Running on x86 runner containing latest vector qemu, latest vector gcc and all the necessary libraries
stage('Cleanup'){
cleanWs() // Cleaning previous CI build in workspace
}
stage('checkout repo'){
retry(5){ // Retry if the cloning fails due to some reason
checkout scm // Clone the repo on Runner
}
}
stage('Compiling llama.cpp'){
sh'''#!/bin/bash
make RISCV=1 RISCV_CROSS_COMPILE=1 # Compiling llama for RISC-V
'''
}
stage('Running llama.cpp'){
sh'''#!/bin/bash
module load gnu-bin2/0.1 # loading latest versions of vector qemu and vector gcc
qemu-riscv64 -L /softwares/gnu-bin2/sysroot -cpu rv64,v=true,vlen=256,elen=64,vext_spec=v1.0 ./llama-cli -m /home/alitariq/codellama-7b.Q4_K_M.gguf -p "Anything" -n 9 > llama_log.txt # Running llama.cpp on vector qemu-riscv64
cat llama_log.txt # Printing results
'''
}
}
voicechat2/llama.cpp/.devops/full-cuda.Dockerfile
0 → 100644
View file @
2b67188c
ARG
UBUNTU_VERSION=22.04
# This needs to generally match the container host's environment.
ARG
CUDA_VERSION=11.7.1
# Target the CUDA build image
ARG
BASE_CUDA_DEV_CONTAINER=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION}
FROM
${BASE_CUDA_DEV_CONTAINER} AS build
# Unless otherwise specified, we make a fat build.
ARG
CUDA_DOCKER_ARCH=all
RUN
apt-get update
&&
\
apt-get
install
-y
build-essential python3 python3-pip git libcurl4-openssl-dev libgomp1
COPY
requirements.txt requirements.txt
COPY
requirements requirements
RUN
pip
install
--upgrade
pip setuptools wheel
\
&&
pip
install
-r
requirements.txt
WORKDIR
/app
COPY
. .
# Set nvcc architecture
ENV
CUDA_DOCKER_ARCH=${CUDA_DOCKER_ARCH}
# Enable CUDA
ENV
GGML_CUDA=1
# Enable cURL
ENV
LLAMA_CURL=1
RUN
make
-j
$(
nproc
)
ENTRYPOINT
["/app/.devops/tools.sh"]
voicechat2/llama.cpp/.devops/full-rocm.Dockerfile
0 → 100644
View file @
2b67188c
ARG
UBUNTU_VERSION=22.04
# This needs to generally match the container host's environment.
ARG
ROCM_VERSION=5.6
# Target the CUDA build image
ARG
BASE_ROCM_DEV_CONTAINER=rocm/dev-ubuntu-${UBUNTU_VERSION}:${ROCM_VERSION}-complete
FROM
${BASE_ROCM_DEV_CONTAINER} AS build
# Unless otherwise specified, we make a fat build.
# List from https://github.com/ggerganov/llama.cpp/pull/1087#issuecomment-1682807878
# This is mostly tied to rocBLAS supported archs.
ARG
ROCM_DOCKER_ARCH=\
gfx803 \
gfx900 \
gfx906 \
gfx908 \
gfx90a \
gfx1010 \
gfx1030 \
gfx1100 \
gfx1101 \
gfx1102
COPY
requirements.txt requirements.txt
COPY
requirements requirements
RUN
pip
install
--upgrade
pip setuptools wheel
\
&&
pip
install
-r
requirements.txt
WORKDIR
/app
COPY
. .
# Set nvcc architecture
ENV
GPU_TARGETS=${ROCM_DOCKER_ARCH}
# Enable ROCm
ENV
GGML_HIPBLAS=1
ENV
CC=/opt/rocm/llvm/bin/clang
ENV
CXX=/opt/rocm/llvm/bin/clang++
# Enable cURL
ENV
LLAMA_CURL=1
RUN
apt-get update
&&
\
apt-get
install
-y
libcurl4-openssl-dev
RUN
make
-j
$(
nproc
)
ENTRYPOINT
["/app/.devops/tools.sh"]
voicechat2/llama.cpp/.devops/full.Dockerfile
0 → 100644
View file @
2b67188c
ARG
UBUNTU_VERSION=22.04
FROM
ubuntu:$UBUNTU_VERSION AS build
RUN
apt-get update
&&
\
apt-get
install
-y
build-essential python3 python3-pip git libcurl4-openssl-dev libgomp1
COPY
requirements.txt requirements.txt
COPY
requirements requirements
RUN
pip
install
--upgrade
pip setuptools wheel
\
&&
pip
install
-r
requirements.txt
WORKDIR
/app
COPY
. .
ENV
LLAMA_CURL=1
RUN
make
-j
$(
nproc
)
ENV
LC_ALL=C.utf8
ENTRYPOINT
["/app/.devops/tools.sh"]
voicechat2/llama.cpp/.devops/llama-cli-cuda.Dockerfile
0 → 100644
View file @
2b67188c
ARG
UBUNTU_VERSION=22.04
# This needs to generally match the container host's environment.
ARG
CUDA_VERSION=11.7.1
# Target the CUDA build image
ARG
BASE_CUDA_DEV_CONTAINER=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION}
# Target the CUDA runtime image
ARG
BASE_CUDA_RUN_CONTAINER=nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu${UBUNTU_VERSION}
FROM
${BASE_CUDA_DEV_CONTAINER} AS build
# Unless otherwise specified, we make a fat build.
ARG
CUDA_DOCKER_ARCH=all
RUN
apt-get update
&&
\
apt-get
install
-y
build-essential git
WORKDIR
/app
COPY
. .
# Set nvcc architecture
ENV
CUDA_DOCKER_ARCH=${CUDA_DOCKER_ARCH}
# Enable CUDA
ENV
GGML_CUDA=1
RUN
make
-j
$(
nproc
)
llama-cli
FROM
${BASE_CUDA_RUN_CONTAINER} AS runtime
RUN
apt-get update
&&
\
apt-get
install
-y
libgomp1
COPY
--from=build /app/llama-cli /llama-cli
ENTRYPOINT
[ "/llama-cli" ]
voicechat2/llama.cpp/.devops/llama-cli-intel.Dockerfile
0 → 100644
View file @
2b67188c
ARG
ONEAPI_VERSION=2024.1.1-devel-ubuntu22.04
FROM
intel/oneapi-basekit:$ONEAPI_VERSION AS build
ARG
GGML_SYCL_F16=OFF
RUN
apt-get update
&&
\
apt-get
install
-y
git
WORKDIR
/app
COPY
. .
RUN if
[
"
${
GGML_SYCL_F16
}
"
=
"ON"
]
;
then
\
echo
"GGML_SYCL_F16 is set"
&&
\
export
OPT_SYCL_F16
=
"-DGGML_SYCL_F16=ON"
;
\
fi
&&
\
echo
"Building with static libs"
&&
\
cmake
-B
build
-DGGML_SYCL
=
ON
-DCMAKE_C_COMPILER
=
icx
-DCMAKE_CXX_COMPILER
=
icpx
\
${
OPT_SYCL_F16
}
-DBUILD_SHARED_LIBS
=
OFF
&&
\
cmake
--build
build
--config
Release
--target
llama-cli
FROM
intel/oneapi-basekit:$ONEAPI_VERSION AS runtime
COPY
--from=build /app/build/bin/llama-cli /llama-cli
ENV
LC_ALL=C.utf8
ENTRYPOINT
[ "/llama-cli" ]
voicechat2/llama.cpp/.devops/llama-cli-rocm.Dockerfile
0 → 100644
View file @
2b67188c
ARG
UBUNTU_VERSION=22.04
# This needs to generally match the container host's environment.
ARG
ROCM_VERSION=5.6
# Target the CUDA build image
ARG
BASE_ROCM_DEV_CONTAINER=rocm/dev-ubuntu-${UBUNTU_VERSION}:${ROCM_VERSION}-complete
FROM
${BASE_ROCM_DEV_CONTAINER} AS build
# Unless otherwise specified, we make a fat build.
# List from https://github.com/ggerganov/llama.cpp/pull/1087#issuecomment-1682807878
# This is mostly tied to rocBLAS supported archs.
ARG
ROCM_DOCKER_ARCH=\
gfx803 \
gfx900 \
gfx906 \
gfx908 \
gfx90a \
gfx1010 \
gfx1030 \
gfx1100 \
gfx1101 \
gfx1102
COPY
requirements.txt requirements.txt
COPY
requirements requirements
RUN
pip
install
--upgrade
pip setuptools wheel
\
&&
pip
install
-r
requirements.txt
WORKDIR
/app
COPY
. .
# Set nvcc architecture
ENV
GPU_TARGETS=${ROCM_DOCKER_ARCH}
# Enable ROCm
ENV
GGML_HIPBLAS=1
ENV
CC=/opt/rocm/llvm/bin/clang
ENV
CXX=/opt/rocm/llvm/bin/clang++
RUN
make
-j
$(
nproc
)
llama-cli
ENTRYPOINT
[ "/app/llama-cli" ]
voicechat2/llama.cpp/.devops/llama-cli-vulkan.Dockerfile
0 → 100644
View file @
2b67188c
ARG
UBUNTU_VERSION=jammy
FROM
ubuntu:$UBUNTU_VERSION AS build
# Install build tools
RUN
apt update
&&
apt
install
-y
git build-essential cmake wget libgomp1
# Install Vulkan SDK
RUN
wget
-qO
- https://packages.lunarg.com/lunarg-signing-key-pub.asc | apt-key add -
&&
\
wget
-qO
/etc/apt/sources.list.d/lunarg-vulkan-jammy.list https://packages.lunarg.com/vulkan/lunarg-vulkan-jammy.list
&&
\
apt update
-y
&&
\
apt-get
install
-y
vulkan-sdk
# Build it
WORKDIR
/app
COPY
. .
RUN
cmake
-B
build
-DGGML_VULKAN
=
1
&&
\
cmake
--build
build
--config
Release
--target
llama-cli
# Clean up
WORKDIR
/
RUN
cp
/app/build/bin/llama-cli /llama-cli
&&
\
rm
-rf
/app
ENV
LC_ALL=C.utf8
ENTRYPOINT
[ "/llama-cli" ]
voicechat2/llama.cpp/.devops/llama-cli.Dockerfile
0 → 100644
View file @
2b67188c
ARG
UBUNTU_VERSION=22.04
FROM
ubuntu:$UBUNTU_VERSION AS build
RUN
apt-get update
&&
\
apt-get
install
-y
build-essential git
WORKDIR
/app
COPY
. .
RUN
make
-j
$(
nproc
)
llama-cli
FROM
ubuntu:$UBUNTU_VERSION AS runtime
RUN
apt-get update
&&
\
apt-get
install
-y
libgomp1
COPY
--from=build /app/llama-cli /llama-cli
ENV
LC_ALL=C.utf8
ENTRYPOINT
[ "/llama-cli" ]
voicechat2/llama.cpp/.devops/llama-cpp-cuda.srpm.spec
0 → 100644
View file @
2b67188c
# SRPM for building from source and packaging an RPM for RPM-based distros.
# https://docs.fedoraproject.org/en-US/quick-docs/creating-rpm-packages
# Built and maintained by John Boero - boeroboy@gmail.com
# In honor of Seth Vidal https://www.redhat.com/it/blog/thank-you-seth-vidal
# Notes for llama.cpp:
# 1. Tags are currently based on hash - which will not sort asciibetically.
# We need to declare standard versioning if people want to sort latest releases.
# 2. Builds for CUDA/OpenCL support are separate, with different depenedencies.
# 3. NVidia's developer repo must be enabled with nvcc, cublas, clblas, etc installed.
# Example: https://developer.download.nvidia.com/compute/cuda/repos/fedora37/x86_64/cuda-fedora37.repo
# 4. OpenCL/CLBLAST support simply requires the ICD loader and basic opencl libraries.
# It is up to the user to install the correct vendor-specific support.
Name: llama.cpp-cuda
Version: %( date "+%%Y%%m%%d" )
Release: 1%{?dist}
Summary: CPU Inference of LLaMA model in pure C/C++ (no CUDA/OpenCL)
License: MIT
Source0: https://github.com/ggerganov/llama.cpp/archive/refs/heads/master.tar.gz
BuildRequires: coreutils make gcc-c++ git cuda-toolkit
Requires: cuda-toolkit
URL: https://github.com/ggerganov/llama.cpp
%define debug_package %{nil}
%define source_date_epoch_from_changelog 0
%description
CPU inference for Meta's Lllama2 models using default options.
%prep
%setup -n llama.cpp-master
%build
make -j GGML_CUDA=1
%install
mkdir -p %{buildroot}%{_bindir}/
cp -p llama-cli %{buildroot}%{_bindir}/llama-cuda-cli
cp -p llama-server %{buildroot}%{_bindir}/llama-cuda-server
cp -p llama-simple %{buildroot}%{_bindir}/llama-cuda-simple
mkdir -p %{buildroot}/usr/lib/systemd/system
%{__cat} <<EOF > %{buildroot}/usr/lib/systemd/system/llamacuda.service
[Unit]
Description=Llama.cpp server, CPU only (no GPU support in this build).
After=syslog.target network.target local-fs.target remote-fs.target nss-lookup.target
[Service]
Type=simple
EnvironmentFile=/etc/sysconfig/llama
ExecStart=/usr/bin/llama-cuda-server $LLAMA_ARGS
ExecReload=/bin/kill -s HUP $MAINPID
Restart=never
[Install]
WantedBy=default.target
EOF
mkdir -p %{buildroot}/etc/sysconfig
%{__cat} <<EOF > %{buildroot}/etc/sysconfig/llama
LLAMA_ARGS="-m /opt/llama2/ggml-model-f32.bin"
EOF
%clean
rm -rf %{buildroot}
rm -rf %{_builddir}/*
%files
%{_bindir}/llama-cuda-cli
%{_bindir}/llama-cuda-server
%{_bindir}/llama-cuda-simple
/usr/lib/systemd/system/llamacuda.service
%config /etc/sysconfig/llama
%pre
%post
%preun
%postun
%changelog
voicechat2/llama.cpp/.devops/llama-cpp.srpm.spec
0 → 100644
View file @
2b67188c
# SRPM for building from source and packaging an RPM for RPM-based distros.
# https://docs.fedoraproject.org/en-US/quick-docs/creating-rpm-packages
# Built and maintained by John Boero - boeroboy@gmail.com
# In honor of Seth Vidal https://www.redhat.com/it/blog/thank-you-seth-vidal
# Notes for llama.cpp:
# 1. Tags are currently based on hash - which will not sort asciibetically.
# We need to declare standard versioning if people want to sort latest releases.
# In the meantime, YYYYMMDD format will be used.
# 2. Builds for CUDA/OpenCL support are separate, with different depenedencies.
# 3. NVidia's developer repo must be enabled with nvcc, cublas, clblas, etc installed.
# Example: https://developer.download.nvidia.com/compute/cuda/repos/fedora37/x86_64/cuda-fedora37.repo
# 4. OpenCL/CLBLAST support simply requires the ICD loader and basic opencl libraries.
# It is up to the user to install the correct vendor-specific support.
Name: llama.cpp
Version: %( date "+%%Y%%m%%d" )
Release: 1%{?dist}
Summary: CPU Inference of LLaMA model in pure C/C++ (no CUDA/OpenCL)
License: MIT
Source0: https://github.com/ggerganov/llama.cpp/archive/refs/heads/master.tar.gz
BuildRequires: coreutils make gcc-c++ git libstdc++-devel
Requires: libstdc++
URL: https://github.com/ggerganov/llama.cpp
%define debug_package %{nil}
%define source_date_epoch_from_changelog 0
%description
CPU inference for Meta's Lllama2 models using default options.
Models are not included in this package and must be downloaded separately.
%prep
%setup -n llama.cpp-master
%build
make -j
%install
mkdir -p %{buildroot}%{_bindir}/
cp -p llama-cli %{buildroot}%{_bindir}/llama-cli
cp -p llama-server %{buildroot}%{_bindir}/llama-server
cp -p llama-simple %{buildroot}%{_bindir}/llama-simple
mkdir -p %{buildroot}/usr/lib/systemd/system
%{__cat} <<EOF > %{buildroot}/usr/lib/systemd/system/llama.service
[Unit]
Description=Llama.cpp server, CPU only (no GPU support in this build).
After=syslog.target network.target local-fs.target remote-fs.target nss-lookup.target
[Service]
Type=simple
EnvironmentFile=/etc/sysconfig/llama
ExecStart=/usr/bin/llama-server $LLAMA_ARGS
ExecReload=/bin/kill -s HUP $MAINPID
Restart=never
[Install]
WantedBy=default.target
EOF
mkdir -p %{buildroot}/etc/sysconfig
%{__cat} <<EOF > %{buildroot}/etc/sysconfig/llama
LLAMA_ARGS="-m /opt/llama2/ggml-model-f32.bin"
EOF
%clean
rm -rf %{buildroot}
rm -rf %{_builddir}/*
%files
%{_bindir}/llama-cli
%{_bindir}/llama-server
%{_bindir}/llama-simple
/usr/lib/systemd/system/llama.service
%config /etc/sysconfig/llama
%pre
%post
%preun
%postun
%changelog
voicechat2/llama.cpp/.devops/llama-server-cuda.Dockerfile
0 → 100644
View file @
2b67188c
ARG
UBUNTU_VERSION=22.04
# This needs to generally match the container host's environment.
ARG
CUDA_VERSION=11.7.1
# Target the CUDA build image
ARG
BASE_CUDA_DEV_CONTAINER=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION}
# Target the CUDA runtime image
ARG
BASE_CUDA_RUN_CONTAINER=nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu${UBUNTU_VERSION}
FROM
${BASE_CUDA_DEV_CONTAINER} AS build
# Unless otherwise specified, we make a fat build.
ARG
CUDA_DOCKER_ARCH=all
RUN
apt-get update
&&
\
apt-get
install
-y
build-essential git libcurl4-openssl-dev
WORKDIR
/app
COPY
. .
# Set nvcc architecture
ENV
CUDA_DOCKER_ARCH=${CUDA_DOCKER_ARCH}
# Enable CUDA
ENV
GGML_CUDA=1
# Enable cURL
ENV
LLAMA_CURL=1
RUN
make
-j
$(
nproc
)
llama-server
FROM
${BASE_CUDA_RUN_CONTAINER} AS runtime
RUN
apt-get update
&&
\
apt-get
install
-y
libcurl4-openssl-dev libgomp1 curl
COPY
--from=build /app/llama-server /llama-server
HEALTHCHECK
CMD [ "curl", "-f", "http://localhost:8080/health" ]
ENTRYPOINT
[ "/llama-server" ]
voicechat2/llama.cpp/.devops/llama-server-intel.Dockerfile
0 → 100644
View file @
2b67188c
ARG
ONEAPI_VERSION=2024.1.1-devel-ubuntu22.04
FROM
intel/oneapi-basekit:$ONEAPI_VERSION AS build
ARG
GGML_SYCL_F16=OFF
RUN
apt-get update
&&
\
apt-get
install
-y
git libcurl4-openssl-dev
WORKDIR
/app
COPY
. .
RUN if
[
"
${
GGML_SYCL_F16
}
"
=
"ON"
]
;
then
\
echo
"GGML_SYCL_F16 is set"
&&
\
export
OPT_SYCL_F16
=
"-DGGML_SYCL_F16=ON"
;
\
fi
&&
\
echo
"Building with dynamic libs"
&&
\
cmake
-B
build
-DGGML_SYCL
=
ON
-DCMAKE_C_COMPILER
=
icx
-DCMAKE_CXX_COMPILER
=
icpx
-DLLAMA_CURL
=
ON
${
OPT_SYCL_F16
}
&&
\
cmake
--build
build
--config
Release
--target
llama-server
FROM
intel/oneapi-basekit:$ONEAPI_VERSION AS runtime
RUN
apt-get update
&&
\
apt-get
install
-y
libcurl4-openssl-dev curl
COPY
--from=build /app/build/bin/llama-server /llama-server
ENV
LC_ALL=C.utf8
HEALTHCHECK
CMD [ "curl", "-f", "http://localhost:8080/health" ]
ENTRYPOINT
[ "/llama-server" ]
voicechat2/llama.cpp/.devops/llama-server-rocm.Dockerfile
0 → 100644
View file @
2b67188c
ARG
UBUNTU_VERSION=22.04
# This needs to generally match the container host's environment.
ARG
ROCM_VERSION=5.6
# Target the CUDA build image
ARG
BASE_ROCM_DEV_CONTAINER=rocm/dev-ubuntu-${UBUNTU_VERSION}:${ROCM_VERSION}-complete
FROM
${BASE_ROCM_DEV_CONTAINER} AS build
# Unless otherwise specified, we make a fat build.
# List from https://github.com/ggerganov/llama.cpp/pull/1087#issuecomment-1682807878
# This is mostly tied to rocBLAS supported archs.
ARG
ROCM_DOCKER_ARCH=\
gfx803 \
gfx900 \
gfx906 \
gfx908 \
gfx90a \
gfx1010 \
gfx1030 \
gfx1100 \
gfx1101 \
gfx1102
COPY
requirements.txt requirements.txt
COPY
requirements requirements
RUN
pip
install
--upgrade
pip setuptools wheel
\
&&
pip
install
-r
requirements.txt
WORKDIR
/app
COPY
. .
# Set nvcc architecture
ENV
GPU_TARGETS=${ROCM_DOCKER_ARCH}
# Enable ROCm
ENV
GGML_HIPBLAS=1
ENV
CC=/opt/rocm/llvm/bin/clang
ENV
CXX=/opt/rocm/llvm/bin/clang++
# Enable cURL
ENV
LLAMA_CURL=1
RUN
apt-get update
&&
\
apt-get
install
-y
libcurl4-openssl-dev curl
RUN
make
-j
$(
nproc
)
llama-server
HEALTHCHECK
CMD [ "curl", "-f", "http://localhost:8080/health" ]
ENTRYPOINT
[ "/app/llama-server" ]
voicechat2/llama.cpp/.devops/llama-server-vulkan.Dockerfile
0 → 100644
View file @
2b67188c
ARG
UBUNTU_VERSION=jammy
FROM
ubuntu:$UBUNTU_VERSION AS build
# Install build tools
RUN
apt update
&&
apt
install
-y
git build-essential cmake wget
# Install Vulkan SDK and cURL
RUN
wget
-qO
- https://packages.lunarg.com/lunarg-signing-key-pub.asc | apt-key add -
&&
\
wget
-qO
/etc/apt/sources.list.d/lunarg-vulkan-jammy.list https://packages.lunarg.com/vulkan/lunarg-vulkan-jammy.list
&&
\
apt update
-y
&&
\
apt-get
install
-y
vulkan-sdk libcurl4-openssl-dev curl
# Build it
WORKDIR
/app
COPY
. .
RUN
cmake
-B
build
-DGGML_VULKAN
=
1
-DLLAMA_CURL
=
1
&&
\
cmake
--build
build
--config
Release
--target
llama-server
# Clean up
WORKDIR
/
RUN
cp
/app/build/bin/llama-server /llama-server
&&
\
rm
-rf
/app
ENV
LC_ALL=C.utf8
HEALTHCHECK
CMD [ "curl", "-f", "http://localhost:8080/health" ]
ENTRYPOINT
[ "/llama-server" ]
voicechat2/llama.cpp/.devops/llama-server.Dockerfile
0 → 100644
View file @
2b67188c
ARG
UBUNTU_VERSION=22.04
FROM
ubuntu:$UBUNTU_VERSION AS build
RUN
apt-get update
&&
\
apt-get
install
-y
build-essential git libcurl4-openssl-dev
WORKDIR
/app
COPY
. .
ENV
LLAMA_CURL=1
RUN
make
-j
$(
nproc
)
llama-server
FROM
ubuntu:$UBUNTU_VERSION AS runtime
RUN
apt-get update
&&
\
apt-get
install
-y
libcurl4-openssl-dev libgomp1 curl
COPY
--from=build /app/llama-server /llama-server
ENV
LC_ALL=C.utf8
HEALTHCHECK
CMD [ "curl", "-f", "http://localhost:8080/health" ]
ENTRYPOINT
[ "/llama-server" ]
voicechat2/llama.cpp/.devops/nix/apps.nix
0 → 100644
View file @
2b67188c
{
perSystem
=
{
config
,
lib
,
...
}:
{
apps
=
let
inherit
(
config
.
packages
)
default
;
binaries
=
[
"llama-cli"
"llama-embedding"
"llama-server"
"llama-quantize"
];
mkApp
=
name
:
{
type
=
"app"
;
program
=
"
${
default
}
/bin/
${
name
}
"
;
};
in
lib
.
genAttrs
binaries
mkApp
;
};
}
voicechat2/llama.cpp/.devops/nix/devshells.nix
0 → 100644
View file @
2b67188c
{
perSystem
=
{
config
,
lib
,
...
}:
{
devShells
=
lib
.
concatMapAttrs
(
name
:
package
:
{
${
name
}
=
package
.
passthru
.
shell
;
${
name
+
"-extra"
}
=
package
.
passthru
.
shell-extra
;
})
config
.
packages
;
};
}
voicechat2/llama.cpp/.devops/nix/docker.nix
0 → 100644
View file @
2b67188c
{
lib
,
dockerTools
,
buildEnv
,
llama-cpp
,
interactive
?
true
,
coreutils
,
}:
# A tar that can be fed into `docker load`:
#
# $ nix build .#llamaPackages.docker
# $ docker load < result
# For details and variations cf.
# - https://nixos.org/manual/nixpkgs/unstable/#ssec-pkgs-dockerTools-buildLayeredImage
# - https://discourse.nixos.org/t/a-faster-dockertools-buildimage-prototype/16922
# - https://nixery.dev/
# Approximate (compressed) sizes, at the time of writing, are:
#
# .#llamaPackages.docker: 125M;
# .#llamaPackagesCuda.docker: 537M;
# .#legacyPackages.aarch64-linux.llamaPackagesXavier.docker: 415M.
dockerTools
.
buildLayeredImage
{
name
=
llama-cpp
.
pname
;
tag
=
"latest"
;
contents
=
[
llama-cpp
]
++
lib
.
optionals
interactive
[
coreutils
dockerTools
.
binSh
dockerTools
.
caCertificates
];
}
Prev
1
2
3
4
5
…
48
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment