Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
wangkx1
ollama_dcu
Commits
ff27a817
Commit
ff27a817
authored
Aug 13, 2024
by
wangkx1
Browse files
init
parents
Pipeline
#1521
canceled with stages
Changes
331
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
1079 additions
and
0 deletions
+1079
-0
ollama/llm/llama.cpp/.devops/full.Dockerfile
ollama/llm/llama.cpp/.devops/full.Dockerfile
+25
-0
ollama/llm/llama.cpp/.devops/llama-cli-cuda.Dockerfile
ollama/llm/llama.cpp/.devops/llama-cli-cuda.Dockerfile
+35
-0
ollama/llm/llama.cpp/.devops/llama-cli-intel.Dockerfile
ollama/llm/llama.cpp/.devops/llama-cli-intel.Dockerfile
+26
-0
ollama/llm/llama.cpp/.devops/llama-cli-rocm.Dockerfile
ollama/llm/llama.cpp/.devops/llama-cli-rocm.Dockerfile
+45
-0
ollama/llm/llama.cpp/.devops/llama-cli-vulkan.Dockerfile
ollama/llm/llama.cpp/.devops/llama-cli-vulkan.Dockerfile
+27
-0
ollama/llm/llama.cpp/.devops/llama-cli.Dockerfile
ollama/llm/llama.cpp/.devops/llama-cli.Dockerfile
+23
-0
ollama/llm/llama.cpp/.devops/llama-cpp-clblast.srpm.spec
ollama/llm/llama.cpp/.devops/llama-cpp-clblast.srpm.spec
+84
-0
ollama/llm/llama.cpp/.devops/llama-cpp-cuda.srpm.spec
ollama/llm/llama.cpp/.devops/llama-cpp-cuda.srpm.spec
+83
-0
ollama/llm/llama.cpp/.devops/llama-cpp.srpm.spec
ollama/llm/llama.cpp/.devops/llama-cpp.srpm.spec
+85
-0
ollama/llm/llama.cpp/.devops/llama-server-cuda.Dockerfile
ollama/llm/llama.cpp/.devops/llama-server-cuda.Dockerfile
+37
-0
ollama/llm/llama.cpp/.devops/llama-server-intel.Dockerfile
ollama/llm/llama.cpp/.devops/llama-server-intel.Dockerfile
+29
-0
ollama/llm/llama.cpp/.devops/llama-server-rocm.Dockerfile
ollama/llm/llama.cpp/.devops/llama-server-rocm.Dockerfile
+50
-0
ollama/llm/llama.cpp/.devops/llama-server-vulkan.Dockerfile
ollama/llm/llama.cpp/.devops/llama-server-vulkan.Dockerfile
+31
-0
ollama/llm/llama.cpp/.devops/llama-server.Dockerfile
ollama/llm/llama.cpp/.devops/llama-server.Dockerfile
+25
-0
ollama/llm/llama.cpp/.devops/nix/apps.nix
ollama/llm/llama.cpp/.devops/nix/apps.nix
+22
-0
ollama/llm/llama.cpp/.devops/nix/devshells.nix
ollama/llm/llama.cpp/.devops/nix/devshells.nix
+13
-0
ollama/llm/llama.cpp/.devops/nix/docker.nix
ollama/llm/llama.cpp/.devops/nix/docker.nix
+37
-0
ollama/llm/llama.cpp/.devops/nix/jetson-support.nix
ollama/llm/llama.cpp/.devops/nix/jetson-support.nix
+39
-0
ollama/llm/llama.cpp/.devops/nix/nixpkgs-instances.nix
ollama/llm/llama.cpp/.devops/nix/nixpkgs-instances.nix
+47
-0
ollama/llm/llama.cpp/.devops/nix/package.nix
ollama/llm/llama.cpp/.devops/nix/package.nix
+316
-0
No files found.
Too many changes to show.
To preserve performance only
331 of 331+
files are displayed.
Plain diff
Email patch
ollama/llm/llama.cpp/.devops/full.Dockerfile
0 → 100755
View file @
ff27a817
ARG
UBUNTU_VERSION=22.04
FROM
ubuntu:$UBUNTU_VERSION as build
RUN
apt-get update
&&
\
apt-get
install
-y
build-essential python3 python3-pip git libcurl4-openssl-dev libgomp1
COPY
requirements.txt requirements.txt
COPY
requirements requirements
RUN
pip
install
--upgrade
pip setuptools wheel
\
&&
pip
install
-r
requirements.txt
WORKDIR
/app
COPY
. .
ENV
LLAMA_CURL=1
RUN
make
-j
$(
nproc
)
ENV
LC_ALL=C.utf8
ENTRYPOINT
["/app/.devops/tools.sh"]
ollama/llm/llama.cpp/.devops/llama-cli-cuda.Dockerfile
0 → 100755
View file @
ff27a817
ARG
UBUNTU_VERSION=22.04
# This needs to generally match the container host's environment.
ARG
CUDA_VERSION=11.7.1
# Target the CUDA build image
ARG
BASE_CUDA_DEV_CONTAINER=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION}
# Target the CUDA runtime image
ARG
BASE_CUDA_RUN_CONTAINER=nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu${UBUNTU_VERSION}
FROM
${BASE_CUDA_DEV_CONTAINER} as build
# Unless otherwise specified, we make a fat build.
ARG
CUDA_DOCKER_ARCH=all
RUN
apt-get update
&&
\
apt-get
install
-y
build-essential git
WORKDIR
/app
COPY
. .
# Set nvcc architecture
ENV
CUDA_DOCKER_ARCH=${CUDA_DOCKER_ARCH}
# Enable CUDA
ENV
LLAMA_CUDA=1
RUN
make
-j
$(
nproc
)
llama-cli
FROM
${BASE_CUDA_RUN_CONTAINER} as runtime
RUN
apt-get update
&&
\
apt-get
install
-y
libgomp1
COPY
--from=build /app/llama-cli /llama-cli
ENTRYPOINT
[ "/llama-cli" ]
ollama/llm/llama.cpp/.devops/llama-cli-intel.Dockerfile
0 → 100755
View file @
ff27a817
ARG
ONEAPI_VERSION=2024.1.1-devel-ubuntu22.04
FROM
intel/oneapi-basekit:$ONEAPI_VERSION as build
ARG
LLAMA_SYCL_F16=OFF
RUN
apt-get update
&&
\
apt-get
install
-y
git
WORKDIR
/app
COPY
. .
RUN if
[
"
${
LLAMA_SYCL_F16
}
"
=
"ON"
]
;
then
\
echo
"LLAMA_SYCL_F16 is set"
&&
\
export
OPT_SYCL_F16
=
"-DLLAMA_SYCL_F16=ON"
;
\
fi
&&
\
cmake
-B
build
-DLLAMA_SYCL
=
ON
-DCMAKE_C_COMPILER
=
icx
-DCMAKE_CXX_COMPILER
=
icpx
${
OPT_SYCL_F16
}
&&
\
cmake
--build
build
--config
Release
--target
llama-cli
FROM
intel/oneapi-basekit:$ONEAPI_VERSION as runtime
COPY
--from=build /app/build/bin/llama-cli /llama-cli
ENV
LC_ALL=C.utf8
ENTRYPOINT
[ "/llama-cli" ]
ollama/llm/llama.cpp/.devops/llama-cli-rocm.Dockerfile
0 → 100755
View file @
ff27a817
ARG
UBUNTU_VERSION=22.04
# This needs to generally match the container host's environment.
ARG
ROCM_VERSION=5.6
# Target the CUDA build image
ARG
BASE_ROCM_DEV_CONTAINER=rocm/dev-ubuntu-${UBUNTU_VERSION}:${ROCM_VERSION}-complete
FROM
${BASE_ROCM_DEV_CONTAINER} as build
# Unless otherwise specified, we make a fat build.
# List from https://github.com/ggerganov/llama.cpp/pull/1087#issuecomment-1682807878
# This is mostly tied to rocBLAS supported archs.
ARG
ROCM_DOCKER_ARCH=\
gfx803 \
gfx900 \
gfx906 \
gfx908 \
gfx90a \
gfx1010 \
gfx1030 \
gfx1100 \
gfx1101 \
gfx1102
COPY
requirements.txt requirements.txt
COPY
requirements requirements
RUN
pip
install
--upgrade
pip setuptools wheel
\
&&
pip
install
-r
requirements.txt
WORKDIR
/app
COPY
. .
# Set nvcc architecture
ENV
GPU_TARGETS=${ROCM_DOCKER_ARCH}
# Enable ROCm
ENV
LLAMA_HIPBLAS=1
ENV
CC=/opt/rocm/llvm/bin/clang
ENV
CXX=/opt/rocm/llvm/bin/clang++
RUN
make
-j
$(
nproc
)
llama-cli
ENTRYPOINT
[ "/app/llama-cli" ]
ollama/llm/llama.cpp/.devops/llama-cli-vulkan.Dockerfile
0 → 100755
View file @
ff27a817
ARG
UBUNTU_VERSION=jammy
FROM
ubuntu:$UBUNTU_VERSION as build
# Install build tools
RUN
apt update
&&
apt
install
-y
git build-essential cmake wget libgomp1
# Install Vulkan SDK
RUN
wget
-qO
- https://packages.lunarg.com/lunarg-signing-key-pub.asc | apt-key add -
&&
\
wget
-qO
/etc/apt/sources.list.d/lunarg-vulkan-jammy.list https://packages.lunarg.com/vulkan/lunarg-vulkan-jammy.list
&&
\
apt update
-y
&&
\
apt-get
install
-y
vulkan-sdk
# Build it
WORKDIR
/app
COPY
. .
RUN
cmake
-B
build
-DLLAMA_VULKAN
=
1
&&
\
cmake
--build
build
--config
Release
--target
llama-cli
# Clean up
WORKDIR
/
RUN
cp
/app/build/bin/llama-cli /llama-cli
&&
\
rm
-rf
/app
ENV
LC_ALL=C.utf8
ENTRYPOINT
[ "/llama-cli" ]
ollama/llm/llama.cpp/.devops/llama-cli.Dockerfile
0 → 100755
View file @
ff27a817
ARG
UBUNTU_VERSION=22.04
FROM
ubuntu:$UBUNTU_VERSION as build
RUN
apt-get update
&&
\
apt-get
install
-y
build-essential git
WORKDIR
/app
COPY
. .
RUN
make
-j
$(
nproc
)
llama-cli
FROM
ubuntu:$UBUNTU_VERSION as runtime
RUN
apt-get update
&&
\
apt-get
install
-y
libgomp1
COPY
--from=build /app/llama-cli /llama-cli
ENV
LC_ALL=C.utf8
ENTRYPOINT
[ "/llama-cli" ]
ollama/llm/llama.cpp/.devops/llama-cpp-clblast.srpm.spec
0 → 100755
View file @
ff27a817
# SRPM for building from source and packaging an RPM for RPM-based distros.
# https://docs.fedoraproject.org/en-US/quick-docs/creating-rpm-packages
# Built and maintained by John Boero - boeroboy@gmail.com
# In honor of Seth Vidal https://www.redhat.com/it/blog/thank-you-seth-vidal
# Notes for llama.cpp:
# 1. Tags are currently based on hash - which will not sort asciibetically.
# We need to declare standard versioning if people want to sort latest releases.
# 2. Builds for CUDA/OpenCL support are separate, with different depenedencies.
# 3. NVidia's developer repo must be enabled with nvcc, cublas, clblas, etc installed.
# Example: https://developer.download.nvidia.com/compute/cuda/repos/fedora37/x86_64/cuda-fedora37.repo
# 4. OpenCL/CLBLAST support simply requires the ICD loader and basic opencl libraries.
# It is up to the user to install the correct vendor-specific support.
Name: llama.cpp-clblast
Version: %( date "+%%Y%%m%%d" )
Release: 1%{?dist}
Summary: OpenCL Inference of LLaMA model in C/C++
License: MIT
Source0: https://github.com/ggerganov/llama.cpp/archive/refs/heads/master.tar.gz
BuildRequires: coreutils make gcc-c++ git mesa-libOpenCL-devel clblast-devel
Requires: clblast
URL: https://github.com/ggerganov/llama.cpp
%define debug_package %{nil}
%define source_date_epoch_from_changelog 0
%description
CPU inference for Meta's Lllama2 models using default options.
%prep
%setup -n llama.cpp-master
%build
make -j LLAMA_CLBLAST=1
%install
mkdir -p %{buildroot}%{_bindir}/
cp -p llama-cli %{buildroot}%{_bindir}/llama-clblast-cli
cp -p llama-server %{buildroot}%{_bindir}/llama-clblast-server
cp -p llama-simple %{buildroot}%{_bindir}/llama-clblast-simple
mkdir -p %{buildroot}/usr/lib/systemd/system
%{__cat} <<EOF > %{buildroot}/usr/lib/systemd/system/llamaclblast.service
[Unit]
Description=Llama.cpp server, CPU only (no GPU support in this build).
After=syslog.target network.target local-fs.target remote-fs.target nss-lookup.target
[Service]
Type=simple
EnvironmentFile=/etc/sysconfig/llama
ExecStart=/usr/bin/llama-clblast-server $LLAMA_ARGS
ExecReload=/bin/kill -s HUP $MAINPID
Restart=never
[Install]
WantedBy=default.target
EOF
mkdir -p %{buildroot}/etc/sysconfig
%{__cat} <<EOF > %{buildroot}/etc/sysconfig/llama
LLAMA_ARGS="-m /opt/llama2/ggml-model-f32.bin"
EOF
%clean
rm -rf %{buildroot}
rm -rf %{_builddir}/*
%files
%{_bindir}/llama-clblast-cli
%{_bindir}/llama-clblast-server
%{_bindir}/llama-clblast-simple
/usr/lib/systemd/system/llamaclblast.service
%config /etc/sysconfig/llama
%pre
%post
%preun
%postun
%changelog
ollama/llm/llama.cpp/.devops/llama-cpp-cuda.srpm.spec
0 → 100755
View file @
ff27a817
# SRPM for building from source and packaging an RPM for RPM-based distros.
# https://docs.fedoraproject.org/en-US/quick-docs/creating-rpm-packages
# Built and maintained by John Boero - boeroboy@gmail.com
# In honor of Seth Vidal https://www.redhat.com/it/blog/thank-you-seth-vidal
# Notes for llama.cpp:
# 1. Tags are currently based on hash - which will not sort asciibetically.
# We need to declare standard versioning if people want to sort latest releases.
# 2. Builds for CUDA/OpenCL support are separate, with different depenedencies.
# 3. NVidia's developer repo must be enabled with nvcc, cublas, clblas, etc installed.
# Example: https://developer.download.nvidia.com/compute/cuda/repos/fedora37/x86_64/cuda-fedora37.repo
# 4. OpenCL/CLBLAST support simply requires the ICD loader and basic opencl libraries.
# It is up to the user to install the correct vendor-specific support.
Name: llama.cpp-cuda
Version: %( date "+%%Y%%m%%d" )
Release: 1%{?dist}
Summary: CPU Inference of LLaMA model in pure C/C++ (no CUDA/OpenCL)
License: MIT
Source0: https://github.com/ggerganov/llama.cpp/archive/refs/heads/master.tar.gz
BuildRequires: coreutils make gcc-c++ git cuda-toolkit
Requires: cuda-toolkit
URL: https://github.com/ggerganov/llama.cpp
%define debug_package %{nil}
%define source_date_epoch_from_changelog 0
%description
CPU inference for Meta's Lllama2 models using default options.
%prep
%setup -n llama.cpp-master
%build
make -j LLAMA_CUDA=1
%install
mkdir -p %{buildroot}%{_bindir}/
cp -p llama-cli %{buildroot}%{_bindir}/llama-cuda-cli
cp -p llama-server %{buildroot}%{_bindir}/llama-cuda-server
cp -p llama-simple %{buildroot}%{_bindir}/llama-cuda-simple
mkdir -p %{buildroot}/usr/lib/systemd/system
%{__cat} <<EOF > %{buildroot}/usr/lib/systemd/system/llamacuda.service
[Unit]
Description=Llama.cpp server, CPU only (no GPU support in this build).
After=syslog.target network.target local-fs.target remote-fs.target nss-lookup.target
[Service]
Type=simple
EnvironmentFile=/etc/sysconfig/llama
ExecStart=/usr/bin/llama-cuda-server $LLAMA_ARGS
ExecReload=/bin/kill -s HUP $MAINPID
Restart=never
[Install]
WantedBy=default.target
EOF
mkdir -p %{buildroot}/etc/sysconfig
%{__cat} <<EOF > %{buildroot}/etc/sysconfig/llama
LLAMA_ARGS="-m /opt/llama2/ggml-model-f32.bin"
EOF
%clean
rm -rf %{buildroot}
rm -rf %{_builddir}/*
%files
%{_bindir}/llama-cuda-cli
%{_bindir}/llama-cuda-server
%{_bindir}/llama-cuda-simple
/usr/lib/systemd/system/llamacuda.service
%config /etc/sysconfig/llama
%pre
%post
%preun
%postun
%changelog
ollama/llm/llama.cpp/.devops/llama-cpp.srpm.spec
0 → 100755
View file @
ff27a817
# SRPM for building from source and packaging an RPM for RPM-based distros.
# https://docs.fedoraproject.org/en-US/quick-docs/creating-rpm-packages
# Built and maintained by John Boero - boeroboy@gmail.com
# In honor of Seth Vidal https://www.redhat.com/it/blog/thank-you-seth-vidal
# Notes for llama.cpp:
# 1. Tags are currently based on hash - which will not sort asciibetically.
# We need to declare standard versioning if people want to sort latest releases.
# In the meantime, YYYYMMDD format will be used.
# 2. Builds for CUDA/OpenCL support are separate, with different depenedencies.
# 3. NVidia's developer repo must be enabled with nvcc, cublas, clblas, etc installed.
# Example: https://developer.download.nvidia.com/compute/cuda/repos/fedora37/x86_64/cuda-fedora37.repo
# 4. OpenCL/CLBLAST support simply requires the ICD loader and basic opencl libraries.
# It is up to the user to install the correct vendor-specific support.
Name: llama.cpp
Version: %( date "+%%Y%%m%%d" )
Release: 1%{?dist}
Summary: CPU Inference of LLaMA model in pure C/C++ (no CUDA/OpenCL)
License: MIT
Source0: https://github.com/ggerganov/llama.cpp/archive/refs/heads/master.tar.gz
BuildRequires: coreutils make gcc-c++ git libstdc++-devel
Requires: libstdc++
URL: https://github.com/ggerganov/llama.cpp
%define debug_package %{nil}
%define source_date_epoch_from_changelog 0
%description
CPU inference for Meta's Lllama2 models using default options.
Models are not included in this package and must be downloaded separately.
%prep
%setup -n llama.cpp-master
%build
make -j
%install
mkdir -p %{buildroot}%{_bindir}/
cp -p llama-cli %{buildroot}%{_bindir}/llama-cli
cp -p llama-server %{buildroot}%{_bindir}/llama-server
cp -p llama-simple %{buildroot}%{_bindir}/llama-simple
mkdir -p %{buildroot}/usr/lib/systemd/system
%{__cat} <<EOF > %{buildroot}/usr/lib/systemd/system/llama.service
[Unit]
Description=Llama.cpp server, CPU only (no GPU support in this build).
After=syslog.target network.target local-fs.target remote-fs.target nss-lookup.target
[Service]
Type=simple
EnvironmentFile=/etc/sysconfig/llama
ExecStart=/usr/bin/llama-server $LLAMA_ARGS
ExecReload=/bin/kill -s HUP $MAINPID
Restart=never
[Install]
WantedBy=default.target
EOF
mkdir -p %{buildroot}/etc/sysconfig
%{__cat} <<EOF > %{buildroot}/etc/sysconfig/llama
LLAMA_ARGS="-m /opt/llama2/ggml-model-f32.bin"
EOF
%clean
rm -rf %{buildroot}
rm -rf %{_builddir}/*
%files
%{_bindir}/llama-cli
%{_bindir}/llama-server
%{_bindir}/llama-simple
/usr/lib/systemd/system/llama.service
%config /etc/sysconfig/llama
%pre
%post
%preun
%postun
%changelog
ollama/llm/llama.cpp/.devops/llama-server-cuda.Dockerfile
0 → 100755
View file @
ff27a817
ARG
UBUNTU_VERSION=22.04
# This needs to generally match the container host's environment.
ARG
CUDA_VERSION=11.7.1
# Target the CUDA build image
ARG
BASE_CUDA_DEV_CONTAINER=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION}
# Target the CUDA runtime image
ARG
BASE_CUDA_RUN_CONTAINER=nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu${UBUNTU_VERSION}
FROM
${BASE_CUDA_DEV_CONTAINER} as build
# Unless otherwise specified, we make a fat build.
ARG
CUDA_DOCKER_ARCH=all
RUN
apt-get update
&&
\
apt-get
install
-y
build-essential git libcurl4-openssl-dev
WORKDIR
/app
COPY
. .
# Set nvcc architecture
ENV
CUDA_DOCKER_ARCH=${CUDA_DOCKER_ARCH}
# Enable CUDA
ENV
LLAMA_CUDA=1
# Enable cURL
ENV
LLAMA_CURL=1
RUN
make
-j
$(
nproc
)
llama-server
FROM
${BASE_CUDA_RUN_CONTAINER} as runtime
RUN
apt-get update
&&
\
apt-get
install
-y
libcurl4-openssl-dev libgomp1
COPY
--from=build /app/llama-server /llama-server
ENTRYPOINT
[ "/llama-server" ]
ollama/llm/llama.cpp/.devops/llama-server-intel.Dockerfile
0 → 100755
View file @
ff27a817
ARG
ONEAPI_VERSION=2024.1.1-devel-ubuntu22.04
FROM
intel/oneapi-basekit:$ONEAPI_VERSION as build
ARG
LLAMA_SYCL_F16=OFF
RUN
apt-get update
&&
\
apt-get
install
-y
git libcurl4-openssl-dev
WORKDIR
/app
COPY
. .
RUN if
[
"
${
LLAMA_SYCL_F16
}
"
=
"ON"
]
;
then
\
echo
"LLAMA_SYCL_F16 is set"
&&
\
export
OPT_SYCL_F16
=
"-DLLAMA_SYCL_F16=ON"
;
\
fi
&&
\
cmake
-B
build
-DLLAMA_SYCL
=
ON
-DCMAKE_C_COMPILER
=
icx
-DCMAKE_CXX_COMPILER
=
icpx
-DLLAMA_CURL
=
ON
${
OPT_SYCL_F16
}
&&
\
cmake
--build
build
--config
Release
--target
llama-server
FROM
intel/oneapi-basekit:$ONEAPI_VERSION as runtime
RUN
apt-get update
&&
\
apt-get
install
-y
libcurl4-openssl-dev
COPY
--from=build /app/build/bin/llama-server /llama-server
ENV
LC_ALL=C.utf8
ENTRYPOINT
[ "/llama-server" ]
ollama/llm/llama.cpp/.devops/llama-server-rocm.Dockerfile
0 → 100755
View file @
ff27a817
ARG
UBUNTU_VERSION=22.04
# This needs to generally match the container host's environment.
ARG
ROCM_VERSION=5.6
# Target the CUDA build image
ARG
BASE_ROCM_DEV_CONTAINER=rocm/dev-ubuntu-${UBUNTU_VERSION}:${ROCM_VERSION}-complete
FROM
${BASE_ROCM_DEV_CONTAINER} as build
# Unless otherwise specified, we make a fat build.
# List from https://github.com/ggerganov/llama.cpp/pull/1087#issuecomment-1682807878
# This is mostly tied to rocBLAS supported archs.
ARG
ROCM_DOCKER_ARCH=\
gfx803 \
gfx900 \
gfx906 \
gfx908 \
gfx90a \
gfx1010 \
gfx1030 \
gfx1100 \
gfx1101 \
gfx1102
COPY
requirements.txt requirements.txt
COPY
requirements requirements
RUN
pip
install
--upgrade
pip setuptools wheel
\
&&
pip
install
-r
requirements.txt
WORKDIR
/app
COPY
. .
# Set nvcc architecture
ENV
GPU_TARGETS=${ROCM_DOCKER_ARCH}
# Enable ROCm
ENV
LLAMA_HIPBLAS=1
ENV
CC=/opt/rocm/llvm/bin/clang
ENV
CXX=/opt/rocm/llvm/bin/clang++
# Enable cURL
ENV
LLAMA_CURL=1
RUN
apt-get update
&&
\
apt-get
install
-y
libcurl4-openssl-dev
RUN
make
-j
$(
nproc
)
llama-server
ENTRYPOINT
[ "/app/llama-server" ]
ollama/llm/llama.cpp/.devops/llama-server-vulkan.Dockerfile
0 → 100755
View file @
ff27a817
ARG
UBUNTU_VERSION=jammy
FROM
ubuntu:$UBUNTU_VERSION as build
# Install build tools
RUN
apt update
&&
apt
install
-y
git build-essential cmake wget
# Install Vulkan SDK
RUN
wget
-qO
- https://packages.lunarg.com/lunarg-signing-key-pub.asc | apt-key add -
&&
\
wget
-qO
/etc/apt/sources.list.d/lunarg-vulkan-jammy.list https://packages.lunarg.com/vulkan/lunarg-vulkan-jammy.list
&&
\
apt update
-y
&&
\
apt-get
install
-y
vulkan-sdk
# Install cURL
RUN
apt-get update
&&
\
apt-get
install
-y
libcurl4-openssl-dev
# Build it
WORKDIR
/app
COPY
. .
RUN
cmake
-B
build
-DLLAMA_VULKAN
=
1
-DLLAMA_CURL
=
1
&&
\
cmake
--build
build
--config
Release
--target
llama-server
# Clean up
WORKDIR
/
RUN
cp
/app/build/bin/llama-server /llama-server
&&
\
rm
-rf
/app
ENV
LC_ALL=C.utf8
ENTRYPOINT
[ "/llama-server" ]
ollama/llm/llama.cpp/.devops/llama-server.Dockerfile
0 → 100755
View file @
ff27a817
ARG
UBUNTU_VERSION=22.04
FROM
ubuntu:$UBUNTU_VERSION as build
RUN
apt-get update
&&
\
apt-get
install
-y
build-essential git libcurl4-openssl-dev
WORKDIR
/app
COPY
. .
ENV
LLAMA_CURL=1
RUN
make
-j
$(
nproc
)
llama-server
FROM
ubuntu:$UBUNTU_VERSION as runtime
RUN
apt-get update
&&
\
apt-get
install
-y
libcurl4-openssl-dev libgomp1
COPY
--from=build /app/llama-server /llama-server
ENV
LC_ALL=C.utf8
ENTRYPOINT
[ "/llama-server" ]
ollama/llm/llama.cpp/.devops/nix/apps.nix
0 → 100755
View file @
ff27a817
{
perSystem
=
{
config
,
lib
,
...
}:
{
apps
=
let
inherit
(
config
.
packages
)
default
;
binaries
=
[
"llama-cli"
"llama-embedding"
"llama-server"
"llama-quantize"
"llama-train-text-from-scratch"
];
mkApp
=
name
:
{
type
=
"app"
;
program
=
"
${
default
}
/bin/
${
name
}
"
;
};
in
lib
.
genAttrs
binaries
mkApp
;
};
}
ollama/llm/llama.cpp/.devops/nix/devshells.nix
0 → 100755
View file @
ff27a817
{
perSystem
=
{
config
,
lib
,
...
}:
{
devShells
=
lib
.
concatMapAttrs
(
name
:
package
:
{
${
name
}
=
package
.
passthru
.
shell
;
${
name
+
"-extra"
}
=
package
.
passthru
.
shell-extra
;
})
config
.
packages
;
};
}
ollama/llm/llama.cpp/.devops/nix/docker.nix
0 → 100755
View file @
ff27a817
{
lib
,
dockerTools
,
buildEnv
,
llama-cpp
,
interactive
?
true
,
coreutils
,
}:
# A tar that can be fed into `docker load`:
#
# $ nix build .#llamaPackages.docker
# $ docker load < result
# For details and variations cf.
# - https://nixos.org/manual/nixpkgs/unstable/#ssec-pkgs-dockerTools-buildLayeredImage
# - https://discourse.nixos.org/t/a-faster-dockertools-buildimage-prototype/16922
# - https://nixery.dev/
# Approximate (compressed) sizes, at the time of writing, are:
#
# .#llamaPackages.docker: 125M;
# .#llamaPackagesCuda.docker: 537M;
# .#legacyPackages.aarch64-linux.llamaPackagesXavier.docker: 415M.
dockerTools
.
buildLayeredImage
{
name
=
llama-cpp
.
pname
;
tag
=
"latest"
;
contents
=
[
llama-cpp
]
++
lib
.
optionals
interactive
[
coreutils
dockerTools
.
binSh
dockerTools
.
caCertificates
];
}
ollama/llm/llama.cpp/.devops/nix/jetson-support.nix
0 → 100755
View file @
ff27a817
{
inputs
,
...
}:
{
perSystem
=
{
config
,
system
,
lib
,
pkgsCuda
,
...
}:
{
legacyPackages
=
let
caps
.
llamaPackagesXavier
=
"7.2"
;
caps
.
llamaPackagesOrin
=
"8.7"
;
caps
.
llamaPackagesTX2
=
"6.2"
;
caps
.
llamaPackagesNano
=
"5.3"
;
pkgsFor
=
cap
:
import
inputs
.
nixpkgs
{
inherit
system
;
config
=
{
cudaSupport
=
true
;
cudaCapabilities
=
[
cap
];
cudaEnableForwardCompat
=
false
;
inherit
(
pkgsCuda
.
config
)
allowUnfreePredicate
;
};
};
in
builtins
.
mapAttrs
(
name
:
cap
:
(
pkgsFor
cap
)
.
callPackage
./scope.nix
{
})
caps
;
packages
=
lib
.
optionalAttrs
(
system
==
"aarch64-linux"
)
{
jetson-xavier
=
config
.
legacyPackages
.
llamaPackagesXavier
.
llama-cpp
;
jetson-orin
=
config
.
legacyPackages
.
llamaPackagesOrin
.
llama-cpp
;
jetson-nano
=
config
.
legacyPackages
.
llamaPackagesNano
.
llama-cpp
;
};
};
}
ollama/llm/llama.cpp/.devops/nix/nixpkgs-instances.nix
0 → 100755
View file @
ff27a817
{
inputs
,
...
}:
{
# The _module.args definitions are passed on to modules as arguments. E.g.
# the module `{ pkgs ... }: { /* config */ }` implicitly uses
# `_module.args.pkgs` (defined in this case by flake-parts).
perSystem
=
{
system
,
...
}:
{
_module
.
args
=
{
# Note: bringing up https://zimbatm.com/notes/1000-instances-of-nixpkgs
# again, the below creates several nixpkgs instances which the
# flake-centric CLI will be forced to evaluate e.g. on `nix flake show`.
#
# This is currently "slow" and "expensive", on a certain scale.
# This also isn't "right" in that this hinders dependency injection at
# the level of flake inputs. This might get removed in the foreseeable
# future.
#
# Note that you can use these expressions without Nix
# (`pkgs.callPackage ./devops/nix/scope.nix { }` is the entry point).
pkgsCuda
=
import
inputs
.
nixpkgs
{
inherit
system
;
# Ensure dependencies use CUDA consistently (e.g. that openmpi, ucc,
# and ucx are built with CUDA support)
config
.
cudaSupport
=
true
;
config
.
allowUnfreePredicate
=
p
:
builtins
.
all
(
license
:
license
.
free
||
builtins
.
elem
license
.
shortName
[
"CUDA EULA"
"cuDNN EULA"
]
)
(
p
.
meta
.
licenses
or
[
p
.
meta
.
license
]);
};
# Ensure dependencies use ROCm consistently
pkgsRocm
=
import
inputs
.
nixpkgs
{
inherit
system
;
config
.
rocmSupport
=
true
;
};
};
};
}
ollama/llm/llama.cpp/.devops/nix/package.nix
0 → 100755
View file @
ff27a817
{
lib
,
glibc
,
config
,
stdenv
,
mkShell
,
runCommand
,
cmake
,
ninja
,
pkg-config
,
git
,
python3
,
mpi
,
blas
,
cudaPackages
,
darwin
,
rocmPackages
,
vulkan-headers
,
vulkan-loader
,
clblast
,
useBlas
?
builtins
.
all
(
x
:
!
x
)
[
useCuda
useMetalKit
useOpenCL
useRocm
useVulkan
]
&&
blas
.
meta
.
available
,
useCuda
?
config
.
cudaSupport
,
useMetalKit
?
stdenv
.
isAarch64
&&
stdenv
.
isDarwin
&&
!
useOpenCL
,
useMpi
?
false
,
# Increases the runtime closure size by ~700M
useOpenCL
?
false
,
useRocm
?
config
.
rocmSupport
,
useVulkan
?
false
,
llamaVersion
?
"0.0.0"
,
# Arbitrary version, substituted by the flake
# It's necessary to consistently use backendStdenv when building with CUDA support,
# otherwise we get libstdc++ errors downstream.
effectiveStdenv
?
if
useCuda
then
cudaPackages
.
backendStdenv
else
stdenv
,
enableStatic
?
effectiveStdenv
.
hostPlatform
.
isStatic
,
precompileMetalShaders
?
false
}
@
inputs
:
let
inherit
(
lib
)
cmakeBool
cmakeFeature
optionals
strings
versionOlder
;
stdenv
=
throw
"Use effectiveStdenv instead"
;
suffices
=
lib
.
optionals
useBlas
[
"BLAS"
]
++
lib
.
optionals
useCuda
[
"CUDA"
]
++
lib
.
optionals
useMetalKit
[
"MetalKit"
]
++
lib
.
optionals
useMpi
[
"MPI"
]
++
lib
.
optionals
useOpenCL
[
"OpenCL"
]
++
lib
.
optionals
useRocm
[
"ROCm"
]
++
lib
.
optionals
useVulkan
[
"Vulkan"
];
pnameSuffix
=
strings
.
optionalString
(
suffices
!=
[
])
"-
${
strings
.
concatMapStringsSep
"-"
strings
.
toLower
suffices
}
"
;
descriptionSuffix
=
strings
.
optionalString
(
suffices
!=
[
])
", accelerated with
${
strings
.
concatStringsSep
", "
suffices
}
"
;
executableSuffix
=
effectiveStdenv
.
hostPlatform
.
extensions
.
executable
;
# TODO: package the Python in this repository in a Nix-like way.
# It'd be nice to migrate to buildPythonPackage, as well as ensure this repo
# is PEP 517-compatible, and ensure the correct .dist-info is generated.
# https://peps.python.org/pep-0517/
#
# TODO: Package up each Python script or service appropriately, by making
# them into "entrypoints"
llama-python
=
python3
.
withPackages
(
ps
:
[
ps
.
numpy
ps
.
sentencepiece
]
);
# TODO(Green-Sky): find a better way to opt-into the heavy ml python runtime
llama-python-extra
=
python3
.
withPackages
(
ps
:
[
ps
.
numpy
ps
.
sentencepiece
ps
.
tiktoken
ps
.
torchWithoutCuda
ps
.
transformers
]
);
xcrunHost
=
runCommand
"xcrunHost"
{}
''
mkdir -p $out/bin
ln -s /usr/bin/xcrun $out/bin
''
;
# apple_sdk is supposed to choose sane defaults, no need to handle isAarch64
# separately
darwinBuildInputs
=
with
darwin
.
apple_sdk
.
frameworks
;
[
Accelerate
CoreVideo
CoreGraphics
]
++
optionals
useMetalKit
[
MetalKit
];
cudaBuildInputs
=
with
cudaPackages
;
[
cuda_cccl
.
dev
# <nv/target>
# A temporary hack for reducing the closure size, remove once cudaPackages
# have stopped using lndir: https://github.com/NixOS/nixpkgs/issues/271792
cuda_cudart
.
dev
cuda_cudart
.
lib
cuda_cudart
.
static
libcublas
.
dev
libcublas
.
lib
libcublas
.
static
];
rocmBuildInputs
=
with
rocmPackages
;
[
clr
hipblas
rocblas
];
vulkanBuildInputs
=
[
vulkan-headers
vulkan-loader
];
in
effectiveStdenv
.
mkDerivation
(
finalAttrs
:
{
pname
=
"llama-cpp
${
pnameSuffix
}
"
;
version
=
llamaVersion
;
# Note: none of the files discarded here are visible in the sandbox or
# affect the output hash. This also means they can be modified without
# triggering a rebuild.
src
=
lib
.
cleanSourceWith
{
filter
=
name
:
type
:
let
noneOf
=
builtins
.
all
(
x
:
!
x
);
baseName
=
baseNameOf
name
;
in
noneOf
[
(
lib
.
hasSuffix
".nix"
name
)
# Ignore *.nix files when computing outPaths
(
lib
.
hasSuffix
".md"
name
)
# Ignore *.md changes whe computing outPaths
(
lib
.
hasPrefix
"."
baseName
)
# Skip hidden files and directories
(
baseName
==
"flake.lock"
)
];
src
=
lib
.
cleanSource
../../.
;
};
postPatch
=
''
substituteInPlace ./ggml-metal.m \
--replace '[bundle pathForResource:@"ggml-metal" ofType:@"metal"];' "@\"$out/bin/ggml-metal.metal\";"
substituteInPlace ./ggml-metal.m \
--replace '[bundle pathForResource:@"default" ofType:@"metallib"];' "@\"$out/bin/default.metallib\";"
''
;
# With PR#6015 https://github.com/ggerganov/llama.cpp/pull/6015,
# `default.metallib` may be compiled with Metal compiler from XCode
# and we need to escape sandbox on MacOS to access Metal compiler.
# `xcrun` is used find the path of the Metal compiler, which is varible
# and not on $PATH
# see https://github.com/ggerganov/llama.cpp/pull/6118 for discussion
__noChroot
=
effectiveStdenv
.
isDarwin
&&
useMetalKit
&&
precompileMetalShaders
;
nativeBuildInputs
=
[
cmake
ninja
pkg-config
git
]
++
optionals
useCuda
[
cudaPackages
.
cuda_nvcc
# TODO: Replace with autoAddDriverRunpath
# once https://github.com/NixOS/nixpkgs/pull/275241 has been merged
cudaPackages
.
autoAddOpenGLRunpathHook
]
++
optionals
(
effectiveStdenv
.
hostPlatform
.
isGnu
&&
enableStatic
)
[
glibc
.
static
]
++
optionals
(
effectiveStdenv
.
isDarwin
&&
useMetalKit
&&
precompileMetalShaders
)
[
xcrunHost
];
buildInputs
=
optionals
effectiveStdenv
.
isDarwin
darwinBuildInputs
++
optionals
useCuda
cudaBuildInputs
++
optionals
useMpi
[
mpi
]
++
optionals
useOpenCL
[
clblast
]
++
optionals
useRocm
rocmBuildInputs
++
optionals
useBlas
[
blas
]
++
optionals
useVulkan
vulkanBuildInputs
;
cmakeFlags
=
[
(
cmakeBool
"LLAMA_NATIVE"
false
)
(
cmakeBool
"LLAMA_BUILD_SERVER"
true
)
(
cmakeBool
"BUILD_SHARED_LIBS"
(
!
enableStatic
))
(
cmakeBool
"CMAKE_SKIP_BUILD_RPATH"
true
)
(
cmakeBool
"LLAMA_BLAS"
useBlas
)
(
cmakeBool
"LLAMA_CLBLAST"
useOpenCL
)
(
cmakeBool
"LLAMA_CUDA"
useCuda
)
(
cmakeBool
"LLAMA_HIPBLAS"
useRocm
)
(
cmakeBool
"LLAMA_METAL"
useMetalKit
)
(
cmakeBool
"LLAMA_VULKAN"
useVulkan
)
(
cmakeBool
"LLAMA_STATIC"
enableStatic
)
]
++
optionals
useCuda
[
(
with
cudaPackages
.
flags
;
cmakeFeature
"CMAKE_CUDA_ARCHITECTURES"
(
builtins
.
concatStringsSep
";"
(
map
dropDot
cudaCapabilities
)
)
)
]
++
optionals
useRocm
[
(
cmakeFeature
"CMAKE_HIP_COMPILER"
"
${
rocmPackages
.
llvm
.
clang
}
/bin/clang"
)
(
cmakeFeature
"CMAKE_HIP_ARCHITECTURES"
(
builtins
.
concatStringsSep
";"
rocmPackages
.
clr
.
gpuTargets
))
]
++
optionals
useMetalKit
[
(
lib
.
cmakeFeature
"CMAKE_C_FLAGS"
"-D__ARM_FEATURE_DOTPROD=1"
)
(
cmakeBool
"LLAMA_METAL_EMBED_LIBRARY"
(
!
precompileMetalShaders
))
];
# Environment variables needed for ROCm
env
=
optionals
useRocm
{
ROCM_PATH
=
"
${
rocmPackages
.
clr
}
"
;
HIP_DEVICE_LIB_PATH
=
"
${
rocmPackages
.
rocm-device-libs
}
/amdgcn/bitcode"
;
};
# TODO(SomeoneSerge): It's better to add proper install targets at the CMake level,
# if they haven't been added yet.
postInstall
=
''
mkdir -p $out/include
cp $src/llama.h $out/include/
''
;
# Define the shells here, but don't add in the inputsFrom to avoid recursion.
passthru
=
{
inherit
useBlas
useCuda
useMetalKit
useMpi
useOpenCL
useRocm
useVulkan
;
shell
=
mkShell
{
name
=
"shell-
${
finalAttrs
.
finalPackage
.
name
}
"
;
description
=
"contains numpy and sentencepiece"
;
buildInputs
=
[
llama-python
];
inputsFrom
=
[
finalAttrs
.
finalPackage
];
shellHook
=
''
addToSearchPath "LD_LIBRARY_PATH" "
${
lib
.
getLib
effectiveStdenv
.
cc
.
cc
}
/lib"
''
;
};
shell-extra
=
mkShell
{
name
=
"shell-extra-
${
finalAttrs
.
finalPackage
.
name
}
"
;
description
=
"contains numpy, sentencepiece, torchWithoutCuda, and transformers"
;
buildInputs
=
[
llama-python-extra
];
inputsFrom
=
[
finalAttrs
.
finalPackage
];
};
};
meta
=
{
# Configurations we don't want even the CI to evaluate. Results in the
# "unsupported platform" messages. This is mostly a no-op, because
# cudaPackages would've refused to evaluate anyway.
badPlatforms
=
optionals
(
useCuda
||
useOpenCL
)
lib
.
platforms
.
darwin
;
# Configurations that are known to result in build failures. Can be
# overridden by importing Nixpkgs with `allowBroken = true`.
broken
=
(
useMetalKit
&&
!
effectiveStdenv
.
isDarwin
);
description
=
"Inference of LLaMA model in pure C/C++
${
descriptionSuffix
}
"
;
homepage
=
"https://github.com/ggerganov/llama.cpp/"
;
license
=
lib
.
licenses
.
mit
;
# Accommodates `nix run` and `lib.getExe`
mainProgram
=
"llama-cli"
;
# These people might respond, on the best effort basis, if you ping them
# in case of Nix-specific regressions or for reviewing Nix-specific PRs.
# Consider adding yourself to this list if you want to ensure this flake
# stays maintained and you're willing to invest your time. Do not add
# other people without their consent. Consider removing people after
# they've been unreachable for long periods of time.
# Note that lib.maintainers is defined in Nixpkgs, but you may just add
# an attrset following the same format as in
# https://github.com/NixOS/nixpkgs/blob/f36a80e54da29775c78d7eff0e628c2b4e34d1d7/maintainers/maintainer-list.nix
maintainers
=
with
lib
.
maintainers
;
[
philiptaron
SomeoneSerge
];
# Extend `badPlatforms` instead
platforms
=
lib
.
platforms
.
all
;
};
}
)
Prev
1
…
9
10
11
12
13
14
15
16
17
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment