Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
llama.cpp
Commits
97ef6ff8
Commit
97ef6ff8
authored
Dec 02, 2024
by
xuxzh1
🎱
Browse files
update
parent
4cc1a614
Pipeline
#2023
canceled with stages
Changes
508
Pipelines
1
Show whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
607 additions
and
302 deletions
+607
-302
.clang-format
.clang-format
+161
-0
.devops/full-cuda.Dockerfile
.devops/full-cuda.Dockerfile
+11
-14
.devops/full-musa.Dockerfile
.devops/full-musa.Dockerfile
+26
-0
.devops/full-rocm.Dockerfile
.devops/full-rocm.Dockerfile
+3
-3
.devops/llama-cli-cann.Dockerfile
.devops/llama-cli-cann.Dockerfile
+44
-0
.devops/llama-cli-cuda.Dockerfile
.devops/llama-cli-cuda.Dockerfile
+14
-11
.devops/llama-cli-intel.Dockerfile
.devops/llama-cli-intel.Dockerfile
+2
-2
.devops/llama-cli-musa.Dockerfile
.devops/llama-cli-musa.Dockerfile
+31
-0
.devops/llama-cli-rocm.Dockerfile
.devops/llama-cli-rocm.Dockerfile
+3
-3
.devops/llama-cli-vulkan.Dockerfile
.devops/llama-cli-vulkan.Dockerfile
+1
-1
.devops/llama-server-cuda.Dockerfile
.devops/llama-server-cuda.Dockerfile
+17
-13
.devops/llama-server-intel.Dockerfile
.devops/llama-server-intel.Dockerfile
+4
-2
.devops/llama-server-musa.Dockerfile
.devops/llama-server-musa.Dockerfile
+36
-0
.devops/llama-server-rocm.Dockerfile
.devops/llama-server-rocm.Dockerfile
+5
-3
.devops/llama-server-vulkan.Dockerfile
.devops/llama-server-vulkan.Dockerfile
+3
-1
.devops/llama-server.Dockerfile
.devops/llama-server.Dockerfile
+2
-0
.devops/nix/devshells.nix
.devops/nix/devshells.nix
+46
-7
.devops/nix/nixpkgs-instances.nix
.devops/nix/nixpkgs-instances.nix
+8
-10
.devops/nix/package-gguf-py.nix
.devops/nix/package-gguf-py.nix
+36
-0
.devops/nix/package.nix
.devops/nix/package.nix
+154
-232
No files found.
.clang-format
0 → 100644
View file @
97ef6ff8
---
Language: Cpp
AlignAfterOpenBracket: Align
AlignArrayOfStructures: Left
AlignConsecutiveAssignments: AcrossComments
AlignConsecutiveBitFields: AcrossComments
AlignConsecutiveDeclarations: AcrossComments
AlignConsecutiveMacros: AcrossComments
# AlignConsecutiveShortCaseStatements: AcrossComments
AlignEscapedNewlines: Left # LeftWithLastLine
AlignOperands: Align
AlignTrailingComments:
Kind: Always
OverEmptyLines: 1
AllowAllArgumentsOnNextLine: true
AllowAllParametersOfDeclarationOnNextLine: false
# AllowBreakBeforeNoexceptSpecifier: OnlyWithParen
AllowShortBlocksOnASingleLine: Never
AllowShortCaseLabelsOnASingleLine: false
AllowShortFunctionsOnASingleLine: Inline
AllowShortIfStatementsOnASingleLine: Never
AllowShortLambdasOnASingleLine: Inline
AllowShortLoopsOnASingleLine: false
AlwaysBreakBeforeMultilineStrings: true
BinPackArguments: true
BinPackParameters: true # OnePerLine
BitFieldColonSpacing: Both
BreakBeforeBraces: Custom # Attach
BraceWrapping:
AfterCaseLabel: true
AfterClass: false
AfterControlStatement: false
AfterEnum: false
AfterFunction: false
AfterNamespace: false
AfterObjCDeclaration: false
AfterStruct: false
AfterUnion: false
AfterExternBlock: false
BeforeCatch: false
BeforeElse: false
BeforeLambdaBody: false
BeforeWhile: false
IndentBraces: false
SplitEmptyFunction: false
SplitEmptyRecord: false
SplitEmptyNamespace: false
# BreakAdjacentStringLiterals: true
BreakAfterAttributes: Never
BreakBeforeBinaryOperators: None
BreakBeforeInlineASMColon: OnlyMultiline
BreakBeforeTernaryOperators: false
# BreakBinaryOperations: Never
BreakConstructorInitializers: AfterColon
# BreakFunctionDefinitionParameters: false
BreakInheritanceList: AfterComma
BreakStringLiterals: true
# BreakTemplateDeclarations: Yes
ColumnLimit: 120
CommentPragmas: '^ IWYU pragma:'
CompactNamespaces: false
ConstructorInitializerIndentWidth: 4
ContinuationIndentWidth: 4
Cpp11BracedListStyle: false
DerivePointerAlignment: false
DisableFormat: false
EmptyLineBeforeAccessModifier: Leave
EmptyLineAfterAccessModifier: Never
ExperimentalAutoDetectBinPacking: false
FixNamespaceComments: true
IncludeBlocks: Regroup
IncludeCategories:
- Regex: '^<.*\.h>'
Priority: 1
SortPriority: 0
- Regex: '^<.*'
Priority: 2
SortPriority: 0
- Regex: '.*'
Priority: 3
SortPriority: 0
IncludeIsMainRegex: '([-_](test|unittest))?$'
IncludeIsMainSourceRegex: ''
IndentAccessModifiers: false
IndentCaseBlocks: true
IndentCaseLabels: true
IndentExternBlock: NoIndent
IndentGotoLabels: false
IndentPPDirectives: AfterHash
IndentWidth: 4
IndentWrappedFunctionNames: false
InsertBraces: true # NOTE: may lead to incorrect formatting
InsertNewlineAtEOF: true
JavaScriptQuotes: Leave
JavaScriptWrapImports: true
KeepEmptyLinesAtTheStartOfBlocks: false
LambdaBodyIndentation: Signature
LineEnding: LF
MacroBlockBegin: ''
MacroBlockEnd: ''
MaxEmptyLinesToKeep: 1
NamespaceIndentation: None
ObjCBinPackProtocolList: Auto
ObjCBlockIndentWidth: 4
ObjCSpaceAfterProperty: true
ObjCSpaceBeforeProtocolList: true
PPIndentWidth: -1
PackConstructorInitializers: CurrentLine
PenaltyBreakAssignment: 2
PenaltyBreakBeforeFirstCallParameter: 1
PenaltyBreakComment: 300
PenaltyBreakFirstLessLess: 120
PenaltyBreakString: 1000
PenaltyBreakTemplateDeclaration: 10
PenaltyExcessCharacter: 1000000
PenaltyReturnTypeOnItsOwnLine: 200
PointerAlignment: Middle
QualifierAlignment: Left
#QualifierOrder: ['static', 'inline', 'friend', 'constexpr', 'const', 'volatile', 'type', 'restrict']
RawStringFormats:
- Language: Cpp
Delimiters:
- cc
- CC
- cpp
- Cpp
- CPP
- 'c++'
- 'C++'
CanonicalDelimiter: ''
ReferenceAlignment: Middle
ReflowComments: false # IndentOnly
SeparateDefinitionBlocks: Always
SortIncludes: CaseInsensitive
SortUsingDeclarations: LexicographicNumeric
SpaceAfterCStyleCast: true
SpaceAfterLogicalNot: false
SpaceAfterTemplateKeyword: true
SpaceBeforeAssignmentOperators: true
SpaceBeforeCpp11BracedList: false
SpaceBeforeCtorInitializerColon: true
SpaceBeforeInheritanceColon: true
SpaceBeforeParens: ControlStatements
SpaceBeforeRangeBasedForLoopColon: true
SpaceInEmptyBlock: false
SpaceInEmptyParentheses: false
SpacesBeforeTrailingComments: 2
SpacesInAngles: Never
SpacesInContainerLiterals: true
SpacesInLineCommentPrefix:
Minimum: 1
Maximum: -1
SpacesInParentheses: false
SpacesInSquareBrackets: false
SpaceBeforeSquareBrackets: false
Standard: c++17
TabWidth: 4
UseTab: Never
WhitespaceSensitiveMacros: ['STRINGIZE']
...
.devops/full-cuda.Dockerfile
View file @
97ef6ff8
ARG
UBUNTU_VERSION=22.04
ARG
UBUNTU_VERSION=22.04
# This needs to generally match the container host's environment.
# This needs to generally match the container host's environment.
ARG
CUDA_VERSION=11.7.1
ARG
CUDA_VERSION=12.6.0
# Target the CUDA build image
# Target the CUDA build image
ARG
BASE_CUDA_DEV_CONTAINER=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION}
ARG
BASE_CUDA_DEV_CONTAINER=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION}
FROM
${BASE_CUDA_DEV_CONTAINER} AS build
FROM
${BASE_CUDA_DEV_CONTAINER} AS build
#
Unless otherwise specified, we make a fat build.
#
CUDA architecture to build for (defaults to all supported archs)
ARG
CUDA_DOCKER_ARCH=
all
ARG
CUDA_DOCKER_ARCH=
default
RUN
apt-get update
&&
\
RUN
apt-get update
&&
\
apt-get
install
-y
build-essential python3 python3-pip git libcurl4-openssl-dev libgomp1
apt-get
install
-y
build-essential
cmake
python3 python3-pip git libcurl4-openssl-dev libgomp1
COPY
requirements.txt requirements.txt
COPY
requirements.txt requirements.txt
COPY
requirements requirements
COPY
requirements requirements
...
@@ -24,13 +22,12 @@ WORKDIR /app
...
@@ -24,13 +22,12 @@ WORKDIR /app
COPY
. .
COPY
. .
# Set nvcc architecture
# Use the default CUDA archs if not specified
ENV
CUDA_DOCKER_ARCH=${CUDA_DOCKER_ARCH}
RUN if
[
"
${
CUDA_DOCKER_ARCH
}
"
!=
"default"
]
;
then
\
# Enable CUDA
export
CMAKE_ARGS
=
"-DCMAKE_CUDA_ARCHITECTURES=
${
CUDA_DOCKER_ARCH
}
"
;
\
ENV
GGML_CUDA=1
fi
&&
\
# Enable cURL
cmake
-B
build
-DGGML_NATIVE
=
OFF
-DGGML_CUDA
=
ON
-DLLAMA_CURL
=
ON
${
CMAKE_ARGS
}
-DCMAKE_EXE_LINKER_FLAGS
=
-Wl
,--allow-shlib-undefined
.
&&
\
ENV
LLAMA_CURL=1
cmake
--build
build
--config
Release
-j
$(
nproc
)
&&
\
cp
build/bin/
*
.
RUN
make
-j
$(
nproc
)
ENTRYPOINT
["/app/.devops/tools.sh"]
ENTRYPOINT
["/app/.devops/tools.sh"]
.devops/full-musa.Dockerfile
0 → 100644
View file @
97ef6ff8
ARG
UBUNTU_VERSION=22.04
# This needs to generally match the container host's environment.
ARG
MUSA_VERSION=rc3.1.0
# Target the MUSA build image
ARG
BASE_MUSA_DEV_CONTAINER=mthreads/musa:${MUSA_VERSION}-devel-ubuntu${UBUNTU_VERSION}
FROM
${BASE_MUSA_DEV_CONTAINER} AS build
RUN
apt-get update
&&
\
apt-get
install
-y
build-essential cmake python3 python3-pip git libcurl4-openssl-dev libgomp1
COPY
requirements.txt requirements.txt
COPY
requirements requirements
RUN
pip
install
--upgrade
pip setuptools wheel
\
&&
pip
install
-r
requirements.txt
WORKDIR
/app
COPY
. .
RUN
cmake
-B
build
-DGGML_NATIVE
=
OFF
-DGGML_MUSA
=
ON
-DLLAMA_CURL
=
ON
${
CMAKE_ARGS
}
-DCMAKE_EXE_LINKER_FLAGS
=
-Wl
,--allow-shlib-undefined
.
&&
\
cmake
--build
build
--config
Release
-j
$(
nproc
)
&&
\
cp
build/bin/
*
.
ENTRYPOINT
["/app/.devops/tools.sh"]
.devops/full-rocm.Dockerfile
View file @
97ef6ff8
...
@@ -11,7 +11,7 @@ FROM ${BASE_ROCM_DEV_CONTAINER} AS build
...
@@ -11,7 +11,7 @@ FROM ${BASE_ROCM_DEV_CONTAINER} AS build
# Unless otherwise specified, we make a fat build.
# Unless otherwise specified, we make a fat build.
# List from https://github.com/ggerganov/llama.cpp/pull/1087#issuecomment-1682807878
# List from https://github.com/ggerganov/llama.cpp/pull/1087#issuecomment-1682807878
# This is mostly tied to rocBLAS supported archs.
# This is mostly tied to rocBLAS supported archs.
ARG
ROCM_DOCKER_ARCH=\
ARG
ROCM_DOCKER_ARCH=
"
\
gfx803 \
gfx803 \
gfx900 \
gfx900 \
gfx906 \
gfx906 \
...
@@ -21,7 +21,7 @@ ARG ROCM_DOCKER_ARCH=\
...
@@ -21,7 +21,7 @@ ARG ROCM_DOCKER_ARCH=\
gfx1030 \
gfx1030 \
gfx1100 \
gfx1100 \
gfx1101 \
gfx1101 \
gfx1102
gfx1102
"
COPY
requirements.txt requirements.txt
COPY
requirements.txt requirements.txt
COPY
requirements requirements
COPY
requirements requirements
...
@@ -34,7 +34,7 @@ WORKDIR /app
...
@@ -34,7 +34,7 @@ WORKDIR /app
COPY
. .
COPY
. .
# Set nvcc architecture
# Set nvcc architecture
ENV
GPU_TARGETS=${ROCM_DOCKER_ARCH}
ENV
AMD
GPU_TARGETS=${ROCM_DOCKER_ARCH}
# Enable ROCm
# Enable ROCm
ENV
GGML_HIPBLAS=1
ENV
GGML_HIPBLAS=1
ENV
CC=/opt/rocm/llvm/bin/clang
ENV
CC=/opt/rocm/llvm/bin/clang
...
...
.devops/llama-cli-cann.Dockerfile
0 → 100644
View file @
97ef6ff8
ARG
ASCEND_VERSION=8.0.rc2.alpha003-910b-openeuler22.03-py3.8
FROM
ascendai/cann:$ASCEND_VERSION AS build
WORKDIR
/app
COPY
. .
RUN
yum
install
-y
gcc g++ cmake make
ENV
ASCEND_TOOLKIT_HOME=/usr/local/Ascend/ascend-toolkit/latest
ENV
LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/lib64:$LIBRARY_PATH
ENV
LD_LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/lib64:${ASCEND_TOOLKIT_HOME}/lib64/plugin/opskernel:${ASCEND_TOOLKIT_HOME}/lib64/plugin/nnengine:${ASCEND_TOOLKIT_HOME}/opp/built-in/op_impl/ai_core/tbe/op_tiling:${LD_LIBRARY_PATH}
ENV
PYTHONPATH=${ASCEND_TOOLKIT_HOME}/python/site-packages:${ASCEND_TOOLKIT_HOME}/opp/built-in/op_impl/ai_core/tbe:${PYTHONPATH}
ENV
PATH=${ASCEND_TOOLKIT_HOME}/bin:${ASCEND_TOOLKIT_HOME}/compiler/ccec_compiler/bin:${PATH}
ENV
ASCEND_AICPU_PATH=${ASCEND_TOOLKIT_HOME}
ENV
ASCEND_OPP_PATH=${ASCEND_TOOLKIT_HOME}/opp
ENV
TOOLCHAIN_HOME=${ASCEND_TOOLKIT_HOME}/toolkit
ENV
ASCEND_HOME_PATH=${ASCEND_TOOLKIT_HOME}
# find libascend_hal.so, because the drive hasn`t been mounted.
ENV
LD_LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/runtime/lib64/stub:$LD_LIBRARY_PATH
RUN
echo
"Building with static libs"
&&
\
source
/usr/local/Ascend/ascend-toolkit/set_env.sh
--force
&&
\
cmake
-B
build
-DGGML_NATIVE
=
OFF
-DGGML_CANN
=
ON
-DBUILD_SHARED_LIBS
=
OFF
&&
\
cmake
--build
build
--config
Release
--target
llama-cli
# TODO: use image with NNRT
FROM
ascendai/cann:$ASCEND_VERSION AS runtime
COPY
--from=build /app/build/bin/llama-cli /llama-cli
ENV
LC_ALL=C.utf8
ENV
ASCEND_TOOLKIT_HOME=/usr/local/Ascend/ascend-toolkit/latest
ENV
LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/lib64:$LIBRARY_PATH
ENV
LD_LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/lib64:${ASCEND_TOOLKIT_HOME}/lib64/plugin/opskernel:${ASCEND_TOOLKIT_HOME}/lib64/plugin/nnengine:${ASCEND_TOOLKIT_HOME}/opp/built-in/op_impl/ai_core/tbe/op_tiling:${LD_LIBRARY_PATH}
ENV
PYTHONPATH=${ASCEND_TOOLKIT_HOME}/python/site-packages:${ASCEND_TOOLKIT_HOME}/opp/built-in/op_impl/ai_core/tbe:${PYTHONPATH}
ENV
PATH=${ASCEND_TOOLKIT_HOME}/bin:${ASCEND_TOOLKIT_HOME}/compiler/ccec_compiler/bin:${PATH}
ENV
ASCEND_AICPU_PATH=${ASCEND_TOOLKIT_HOME}
ENV
ASCEND_OPP_PATH=${ASCEND_TOOLKIT_HOME}/opp
ENV
TOOLCHAIN_HOME=${ASCEND_TOOLKIT_HOME}/toolkit
ENV
ASCEND_HOME_PATH=${ASCEND_TOOLKIT_HOME}
ENTRYPOINT
["/llama-cli" ]
.devops/llama-cli-cuda.Dockerfile
View file @
97ef6ff8
ARG
UBUNTU_VERSION=22.04
ARG
UBUNTU_VERSION=22.04
# This needs to generally match the container host's environment.
# This needs to generally match the container host's environment.
ARG
CUDA_VERSION=1
1.7.1
ARG
CUDA_VERSION=1
2.6.0
# Target the CUDA build image
# Target the CUDA build image
ARG
BASE_CUDA_DEV_CONTAINER=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION}
ARG
BASE_CUDA_DEV_CONTAINER=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION}
# Target the CUDA runtime image
# Target the CUDA runtime image
...
@@ -8,28 +8,31 @@ ARG BASE_CUDA_RUN_CONTAINER=nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu${UBUNTU_V
...
@@ -8,28 +8,31 @@ ARG BASE_CUDA_RUN_CONTAINER=nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu${UBUNTU_V
FROM
${BASE_CUDA_DEV_CONTAINER} AS build
FROM
${BASE_CUDA_DEV_CONTAINER} AS build
#
Unless otherwise specified, we make a fat build.
#
CUDA architecture to build for (defaults to all supported archs)
ARG
CUDA_DOCKER_ARCH=
all
ARG
CUDA_DOCKER_ARCH=
default
RUN
apt-get update
&&
\
RUN
apt-get update
&&
\
apt-get
install
-y
build-essential git
apt-get
install
-y
build-essential git
cmake
WORKDIR
/app
WORKDIR
/app
COPY
. .
COPY
. .
# Set nvcc architecture
# Use the default CUDA archs if not specified
ENV
CUDA_DOCKER_ARCH=${CUDA_DOCKER_ARCH}
RUN if
[
"
${
CUDA_DOCKER_ARCH
}
"
!=
"default"
]
;
then
\
# Enable CUDA
export
CMAKE_ARGS
=
"-DCMAKE_CUDA_ARCHITECTURES=
${
CUDA_DOCKER_ARCH
}
"
;
\
ENV
GGML_CUDA=1
fi
&&
\
cmake
-B
build
-DGGML_NATIVE
=
OFF
-DGGML_CUDA
=
ON
${
CMAKE_ARGS
}
-DCMAKE_EXE_LINKER_FLAGS
=
-Wl
,--allow-shlib-undefined
.
&&
\
RUN
make
-j
$(
nproc
)
llama-cli
cmake
--build
build
--config
Release
--target
llama-cli
-j
$(
nproc
)
&&
\
mkdir
-p
/app/lib
&&
\
find build
-name
"*.so"
-exec
cp
{}
/app/lib
\;
FROM
${BASE_CUDA_RUN_CONTAINER} AS runtime
FROM
${BASE_CUDA_RUN_CONTAINER} AS runtime
RUN
apt-get update
&&
\
RUN
apt-get update
&&
\
apt-get
install
-y
libgomp1
apt-get
install
-y
libgomp1
COPY
--from=build /app/llama-cli /llama-cli
COPY
--from=build /app/lib/ /
COPY
--from=build /app/build/bin/llama-cli /
ENTRYPOINT
[ "/llama-cli" ]
ENTRYPOINT
[ "/llama-cli" ]
.devops/llama-cli-intel.Dockerfile
View file @
97ef6ff8
ARG
ONEAPI_VERSION=202
4.1.1
-devel-ubuntu22.04
ARG
ONEAPI_VERSION=202
5.0.0-0
-devel-ubuntu22.04
FROM
intel/oneapi-basekit:$ONEAPI_VERSION AS build
FROM
intel/oneapi-basekit:$ONEAPI_VERSION AS build
...
@@ -15,7 +15,7 @@ RUN if [ "${GGML_SYCL_F16}" = "ON" ]; then \
...
@@ -15,7 +15,7 @@ RUN if [ "${GGML_SYCL_F16}" = "ON" ]; then \
export
OPT_SYCL_F16
=
"-DGGML_SYCL_F16=ON"
;
\
export
OPT_SYCL_F16
=
"-DGGML_SYCL_F16=ON"
;
\
fi
&&
\
fi
&&
\
echo
"Building with static libs"
&&
\
echo
"Building with static libs"
&&
\
cmake
-B
build
-DGGML_SYCL
=
ON
-DCMAKE_C_COMPILER
=
icx
-DCMAKE_CXX_COMPILER
=
icpx
\
cmake
-B
build
-DGGML_NATIVE
=
OFF
-DGGML_SYCL
=
ON
-DCMAKE_C_COMPILER
=
icx
-DCMAKE_CXX_COMPILER
=
icpx
\
${
OPT_SYCL_F16
}
-DBUILD_SHARED_LIBS
=
OFF
&&
\
${
OPT_SYCL_F16
}
-DBUILD_SHARED_LIBS
=
OFF
&&
\
cmake
--build
build
--config
Release
--target
llama-cli
cmake
--build
build
--config
Release
--target
llama-cli
...
...
.devops/llama-cli-musa.Dockerfile
0 → 100644
View file @
97ef6ff8
ARG
UBUNTU_VERSION=22.04
# This needs to generally match the container host's environment.
ARG
MUSA_VERSION=rc3.1.0
# Target the MUSA build image
ARG
BASE_MUSA_DEV_CONTAINER=mthreads/musa:${MUSA_VERSION}-devel-ubuntu${UBUNTU_VERSION}
# Target the MUSA runtime image
ARG
BASE_MUSA_RUN_CONTAINER=mthreads/musa:${MUSA_VERSION}-runtime-ubuntu${UBUNTU_VERSION}
FROM
${BASE_MUSA_DEV_CONTAINER} AS build
RUN
apt-get update
&&
\
apt-get
install
-y
build-essential git cmake
WORKDIR
/app
COPY
. .
RUN
cmake
-B
build
-DGGML_NATIVE
=
OFF
-DGGML_MUSA
=
ON
${
CMAKE_ARGS
}
-DCMAKE_EXE_LINKER_FLAGS
=
-Wl
,--allow-shlib-undefined
.
&&
\
cmake
--build
build
--config
Release
--target
llama-cli
-j
$(
nproc
)
&&
\
mkdir
-p
/app/lib
&&
\
find build
-name
"*.so"
-exec
cp
{}
/app/lib
\;
FROM
${BASE_MUSA_RUN_CONTAINER} AS runtime
RUN
apt-get update
&&
\
apt-get
install
-y
libgomp1
COPY
--from=build /app/lib/ /
COPY
--from=build /app/build/bin/llama-cli /llama-cli
ENTRYPOINT
[ "/llama-cli" ]
.devops/llama-cli-rocm.Dockerfile
View file @
97ef6ff8
...
@@ -11,7 +11,7 @@ FROM ${BASE_ROCM_DEV_CONTAINER} AS build
...
@@ -11,7 +11,7 @@ FROM ${BASE_ROCM_DEV_CONTAINER} AS build
# Unless otherwise specified, we make a fat build.
# Unless otherwise specified, we make a fat build.
# List from https://github.com/ggerganov/llama.cpp/pull/1087#issuecomment-1682807878
# List from https://github.com/ggerganov/llama.cpp/pull/1087#issuecomment-1682807878
# This is mostly tied to rocBLAS supported archs.
# This is mostly tied to rocBLAS supported archs.
ARG
ROCM_DOCKER_ARCH=\
ARG
ROCM_DOCKER_ARCH=
"
\
gfx803 \
gfx803 \
gfx900 \
gfx900 \
gfx906 \
gfx906 \
...
@@ -21,7 +21,7 @@ ARG ROCM_DOCKER_ARCH=\
...
@@ -21,7 +21,7 @@ ARG ROCM_DOCKER_ARCH=\
gfx1030 \
gfx1030 \
gfx1100 \
gfx1100 \
gfx1101 \
gfx1101 \
gfx1102
gfx1102
"
COPY
requirements.txt requirements.txt
COPY
requirements.txt requirements.txt
COPY
requirements requirements
COPY
requirements requirements
...
@@ -34,7 +34,7 @@ WORKDIR /app
...
@@ -34,7 +34,7 @@ WORKDIR /app
COPY
. .
COPY
. .
# Set nvcc architecture
# Set nvcc architecture
ENV
GPU_TARGETS=${ROCM_DOCKER_ARCH}
ENV
AMD
GPU_TARGETS=${ROCM_DOCKER_ARCH}
# Enable ROCm
# Enable ROCm
ENV
GGML_HIPBLAS=1
ENV
GGML_HIPBLAS=1
ENV
CC=/opt/rocm/llvm/bin/clang
ENV
CC=/opt/rocm/llvm/bin/clang
...
...
.devops/llama-cli-vulkan.Dockerfile
View file @
97ef6ff8
...
@@ -14,7 +14,7 @@ RUN wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | apt-key
...
@@ -14,7 +14,7 @@ RUN wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | apt-key
# Build it
# Build it
WORKDIR
/app
WORKDIR
/app
COPY
. .
COPY
. .
RUN
cmake
-B
build
-DGGML_VULKAN
=
1
&&
\
RUN
cmake
-B
build
-DGGML_NATIVE
=
OFF
-DGGML_VULKAN
=
1
&&
\
cmake
--build
build
--config
Release
--target
llama-cli
cmake
--build
build
--config
Release
--target
llama-cli
# Clean up
# Clean up
...
...
.devops/llama-server-cuda.Dockerfile
View file @
97ef6ff8
ARG
UBUNTU_VERSION=22.04
ARG
UBUNTU_VERSION=22.04
# This needs to generally match the container host's environment.
# This needs to generally match the container host's environment.
ARG
CUDA_VERSION=1
1.7.1
ARG
CUDA_VERSION=1
2.6.0
# Target the CUDA build image
# Target the CUDA build image
ARG
BASE_CUDA_DEV_CONTAINER=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION}
ARG
BASE_CUDA_DEV_CONTAINER=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION}
# Target the CUDA runtime image
# Target the CUDA runtime image
...
@@ -8,31 +8,35 @@ ARG BASE_CUDA_RUN_CONTAINER=nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu${UBUNTU_V
...
@@ -8,31 +8,35 @@ ARG BASE_CUDA_RUN_CONTAINER=nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu${UBUNTU_V
FROM
${BASE_CUDA_DEV_CONTAINER} AS build
FROM
${BASE_CUDA_DEV_CONTAINER} AS build
#
Unless otherwise specified, we make a fat build.
#
CUDA architecture to build for (defaults to all supported archs)
ARG
CUDA_DOCKER_ARCH=
all
ARG
CUDA_DOCKER_ARCH=
default
RUN
apt-get update
&&
\
RUN
apt-get update
&&
\
apt-get
install
-y
build-essential git libcurl4-openssl-dev
apt-get
install
-y
build-essential git
cmake
libcurl4-openssl-dev
WORKDIR
/app
WORKDIR
/app
COPY
. .
COPY
. .
#
Set nvcc architecture
#
Use the default CUDA archs if not specified
ENV
CUDA_DOCKER_ARCH=${CUDA_DOCKER_ARCH}
RUN if
[
"
${
CUDA_DOCKER_ARCH
}
"
!=
"default"
]
;
then
\
# Enable CUDA
export
CMAKE_ARGS
=
"-DCMAKE_CUDA_ARCHITECTURES=
${
CUDA_DOCKER_ARCH
}
"
;
\
ENV
GGML_CUDA=1
fi
&&
\
# Enable cURL
cmake
-B
build
-DGGML_NATIVE
=
OFF
-DGGML_CUDA
=
ON
-DLLAMA_CURL
=
ON
${
CMAKE_ARGS
}
-DCMAKE_EXE_LINKER_FLAGS
=
-Wl
,--allow-shlib-undefined
.
&&
\
ENV
LLAMA_CURL=1
cmake
--build
build
--config
Release
--target
llama-server
-j
$(
nproc
)
&&
\
mkdir
-p
/app/lib
&&
\
RUN
make
-j
$(
nproc
)
llama-server
find build
-name
"*.so"
-exec
cp
{}
/app/lib
\;
FROM
${BASE_CUDA_RUN_CONTAINER} AS runtime
FROM
${BASE_CUDA_RUN_CONTAINER} AS runtime
RUN
apt-get update
&&
\
RUN
apt-get update
&&
\
apt-get
install
-y
libcurl4-openssl-dev libgomp1 curl
apt-get
install
-y
libcurl4-openssl-dev libgomp1 curl
COPY
--from=build /app/llama-server /llama-server
COPY
--from=build /app/lib/ /
COPY
--from=build /app/build/bin/llama-server /llama-server
# Must be set to 0.0.0.0 so it can listen to requests from host machine
ENV
LLAMA_ARG_HOST=0.0.0.0
HEALTHCHECK
CMD [ "curl", "-f", "http://localhost:8080/health" ]
HEALTHCHECK
CMD [ "curl", "-f", "http://localhost:8080/health" ]
...
...
.devops/llama-server-intel.Dockerfile
View file @
97ef6ff8
ARG
ONEAPI_VERSION=202
4.1.1
-devel-ubuntu22.04
ARG
ONEAPI_VERSION=202
5.0.0-0
-devel-ubuntu22.04
FROM
intel/oneapi-basekit:$ONEAPI_VERSION AS build
FROM
intel/oneapi-basekit:$ONEAPI_VERSION AS build
...
@@ -15,7 +15,7 @@ RUN if [ "${GGML_SYCL_F16}" = "ON" ]; then \
...
@@ -15,7 +15,7 @@ RUN if [ "${GGML_SYCL_F16}" = "ON" ]; then \
export
OPT_SYCL_F16
=
"-DGGML_SYCL_F16=ON"
;
\
export
OPT_SYCL_F16
=
"-DGGML_SYCL_F16=ON"
;
\
fi
&&
\
fi
&&
\
echo
"Building with dynamic libs"
&&
\
echo
"Building with dynamic libs"
&&
\
cmake
-B
build
-DGGML_SYCL
=
ON
-DCMAKE_C_COMPILER
=
icx
-DCMAKE_CXX_COMPILER
=
icpx
-DLLAMA_CURL
=
ON
${
OPT_SYCL_F16
}
&&
\
cmake
-B
build
-DGGML_NATIVE
=
OFF
-DGGML_SYCL
=
ON
-DCMAKE_C_COMPILER
=
icx
-DCMAKE_CXX_COMPILER
=
icpx
-DLLAMA_CURL
=
ON
${
OPT_SYCL_F16
}
&&
\
cmake
--build
build
--config
Release
--target
llama-server
cmake
--build
build
--config
Release
--target
llama-server
FROM
intel/oneapi-basekit:$ONEAPI_VERSION AS runtime
FROM
intel/oneapi-basekit:$ONEAPI_VERSION AS runtime
...
@@ -26,6 +26,8 @@ RUN apt-get update && \
...
@@ -26,6 +26,8 @@ RUN apt-get update && \
COPY
--from=build /app/build/bin/llama-server /llama-server
COPY
--from=build /app/build/bin/llama-server /llama-server
ENV
LC_ALL=C.utf8
ENV
LC_ALL=C.utf8
# Must be set to 0.0.0.0 so it can listen to requests from host machine
ENV
LLAMA_ARG_HOST=0.0.0.0
HEALTHCHECK
CMD [ "curl", "-f", "http://localhost:8080/health" ]
HEALTHCHECK
CMD [ "curl", "-f", "http://localhost:8080/health" ]
...
...
.devops/llama-server-musa.Dockerfile
0 → 100644
View file @
97ef6ff8
ARG
UBUNTU_VERSION=22.04
# This needs to generally match the container host's environment.
ARG
MUSA_VERSION=rc3.1.0
# Target the MUSA build image
ARG
BASE_MUSA_DEV_CONTAINER=mthreads/musa:${MUSA_VERSION}-devel-ubuntu${UBUNTU_VERSION}
# Target the MUSA runtime image
ARG
BASE_MUSA_RUN_CONTAINER=mthreads/musa:${MUSA_VERSION}-runtime-ubuntu${UBUNTU_VERSION}
FROM
${BASE_MUSA_DEV_CONTAINER} AS build
RUN
apt-get update
&&
\
apt-get
install
-y
build-essential git cmake libcurl4-openssl-dev
WORKDIR
/app
COPY
. .
RUN
cmake
-B
build
-DGGML_NATIVE
=
OFF
-DGGML_MUSA
=
ON
-DLLAMA_CURL
=
ON
${
CMAKE_ARGS
}
-DCMAKE_EXE_LINKER_FLAGS
=
-Wl
,--allow-shlib-undefined
.
&&
\
cmake
--build
build
--config
Release
--target
llama-server
-j
$(
nproc
)
&&
\
mkdir
-p
/app/lib
&&
\
find build
-name
"*.so"
-exec
cp
{}
/app/lib
\;
FROM
${BASE_MUSA_RUN_CONTAINER} AS runtime
RUN
apt-get update
&&
\
apt-get
install
-y
libcurl4-openssl-dev libgomp1 curl
COPY
--from=build /app/lib/ /
COPY
--from=build /app/build/bin/llama-server /llama-server
# Must be set to 0.0.0.0 so it can listen to requests from host machine
ENV
LLAMA_ARG_HOST=0.0.0.0
HEALTHCHECK
CMD [ "curl", "-f", "http://localhost:8080/health" ]
ENTRYPOINT
[ "/llama-server" ]
.devops/llama-server-rocm.Dockerfile
View file @
97ef6ff8
...
@@ -11,7 +11,7 @@ FROM ${BASE_ROCM_DEV_CONTAINER} AS build
...
@@ -11,7 +11,7 @@ FROM ${BASE_ROCM_DEV_CONTAINER} AS build
# Unless otherwise specified, we make a fat build.
# Unless otherwise specified, we make a fat build.
# List from https://github.com/ggerganov/llama.cpp/pull/1087#issuecomment-1682807878
# List from https://github.com/ggerganov/llama.cpp/pull/1087#issuecomment-1682807878
# This is mostly tied to rocBLAS supported archs.
# This is mostly tied to rocBLAS supported archs.
ARG
ROCM_DOCKER_ARCH=\
ARG
ROCM_DOCKER_ARCH=
"
\
gfx803 \
gfx803 \
gfx900 \
gfx900 \
gfx906 \
gfx906 \
...
@@ -21,7 +21,7 @@ ARG ROCM_DOCKER_ARCH=\
...
@@ -21,7 +21,7 @@ ARG ROCM_DOCKER_ARCH=\
gfx1030 \
gfx1030 \
gfx1100 \
gfx1100 \
gfx1101 \
gfx1101 \
gfx1102
gfx1102
"
COPY
requirements.txt requirements.txt
COPY
requirements.txt requirements.txt
COPY
requirements requirements
COPY
requirements requirements
...
@@ -34,11 +34,13 @@ WORKDIR /app
...
@@ -34,11 +34,13 @@ WORKDIR /app
COPY
. .
COPY
. .
# Set nvcc architecture
# Set nvcc architecture
ENV
GPU_TARGETS=${ROCM_DOCKER_ARCH}
ENV
AMD
GPU_TARGETS=${ROCM_DOCKER_ARCH}
# Enable ROCm
# Enable ROCm
ENV
GGML_HIPBLAS=1
ENV
GGML_HIPBLAS=1
ENV
CC=/opt/rocm/llvm/bin/clang
ENV
CC=/opt/rocm/llvm/bin/clang
ENV
CXX=/opt/rocm/llvm/bin/clang++
ENV
CXX=/opt/rocm/llvm/bin/clang++
# Must be set to 0.0.0.0 so it can listen to requests from host machine
ENV
LLAMA_ARG_HOST=0.0.0.0
# Enable cURL
# Enable cURL
ENV
LLAMA_CURL=1
ENV
LLAMA_CURL=1
...
...
.devops/llama-server-vulkan.Dockerfile
View file @
97ef6ff8
...
@@ -14,7 +14,7 @@ RUN wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | apt-key
...
@@ -14,7 +14,7 @@ RUN wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | apt-key
# Build it
# Build it
WORKDIR
/app
WORKDIR
/app
COPY
. .
COPY
. .
RUN
cmake
-B
build
-DGGML_VULKAN
=
1
-DLLAMA_CURL
=
1
&&
\
RUN
cmake
-B
build
-DGGML_NATIVE
=
OFF
-DGGML_VULKAN
=
1
-DLLAMA_CURL
=
1
&&
\
cmake
--build
build
--config
Release
--target
llama-server
cmake
--build
build
--config
Release
--target
llama-server
# Clean up
# Clean up
...
@@ -23,6 +23,8 @@ RUN cp /app/build/bin/llama-server /llama-server && \
...
@@ -23,6 +23,8 @@ RUN cp /app/build/bin/llama-server /llama-server && \
rm
-rf
/app
rm
-rf
/app
ENV
LC_ALL=C.utf8
ENV
LC_ALL=C.utf8
# Must be set to 0.0.0.0 so it can listen to requests from host machine
ENV
LLAMA_ARG_HOST=0.0.0.0
HEALTHCHECK
CMD [ "curl", "-f", "http://localhost:8080/health" ]
HEALTHCHECK
CMD [ "curl", "-f", "http://localhost:8080/health" ]
...
...
.devops/llama-server.Dockerfile
View file @
97ef6ff8
...
@@ -21,6 +21,8 @@ RUN apt-get update && \
...
@@ -21,6 +21,8 @@ RUN apt-get update && \
COPY
--from=build /app/llama-server /llama-server
COPY
--from=build /app/llama-server /llama-server
ENV
LC_ALL=C.utf8
ENV
LC_ALL=C.utf8
# Must be set to 0.0.0.0 so it can listen to requests from host machine
ENV
LLAMA_ARG_HOST=0.0.0.0
HEALTHCHECK
CMD [ "curl", "-f", "http://localhost:8080/health" ]
HEALTHCHECK
CMD [ "curl", "-f", "http://localhost:8080/health" ]
...
...
.devops/nix/devshells.nix
View file @
97ef6ff8
{
inputs
,
...
}:
{
{
perSystem
=
perSystem
=
{
config
,
lib
,
...
}:
{
config
,
lib
,
system
,
...
}:
{
{
devShells
=
devShells
=
lib
.
concatMapAttrs
let
(
name
:
package
:
{
pkgs
=
import
inputs
.
nixpkgs
{
inherit
system
;
};
${
name
}
=
package
.
passthru
.
shell
;
stdenv
=
pkgs
.
stdenv
;
${
name
+
"-extra"
}
=
package
.
passthru
.
shell-extra
;
scripts
=
config
.
packages
.
python-scripts
;
})
in
config
.
packages
;
lib
.
pipe
(
config
.
packages
)
[
(
lib
.
concatMapAttrs
(
name
:
package
:
{
${
name
}
=
pkgs
.
mkShell
{
name
=
"
${
name
}
"
;
inputsFrom
=
[
package
];
shellHook
=
''
echo "Entering
${
name
}
devShell"
''
;
};
"
${
name
}
-extra"
=
if
(
name
==
"python-scripts"
)
then
null
else
pkgs
.
mkShell
{
name
=
"
${
name
}
-extra"
;
inputsFrom
=
[
package
scripts
];
# Extra packages that *may* be used by some scripts
packages
=
[
pkgs
.
python3Packages
.
tiktoken
];
shellHook
=
''
echo "Entering
${
name
}
devShell"
addToSearchPath "LD_LIBRARY_PATH" "
${
lib
.
getLib
stdenv
.
cc
.
cc
}
/lib"
''
;
};
}
))
(
lib
.
filterAttrs
(
name
:
value
:
value
!=
null
))
];
};
};
}
}
.devops/nix/nixpkgs-instances.nix
View file @
97ef6ff8
...
@@ -26,16 +26,14 @@
...
@@ -26,16 +26,14 @@
config
.
cudaSupport
=
true
;
config
.
cudaSupport
=
true
;
config
.
allowUnfreePredicate
=
config
.
allowUnfreePredicate
=
p
:
p
:
builtins
.
all
builtins
.
all
(
(
license
:
license
:
license
.
free
license
.
free
||
builtins
.
elem
license
.
shortName
[
||
builtins
.
elem
license
.
shortName
[
"CUDA EULA"
"CUDA EULA"
"cuDNN EULA"
"cuDNN EULA"
]
]
)
)
(
p
.
meta
.
licenses
or
[
p
.
meta
.
license
]);
(
p
.
meta
.
licenses
or
[
p
.
meta
.
license
]);
};
};
# Ensure dependencies use ROCm consistently
# Ensure dependencies use ROCm consistently
pkgsRocm
=
import
inputs
.
nixpkgs
{
pkgsRocm
=
import
inputs
.
nixpkgs
{
...
...
.devops/nix/package-gguf-py.nix
0 → 100644
View file @
97ef6ff8
{
lib
,
llamaVersion
,
numpy
,
tqdm
,
sentencepiece
,
pyyaml
,
poetry-core
,
buildPythonPackage
,
pytestCheckHook
,
}:
buildPythonPackage
{
pname
=
"gguf"
;
version
=
llamaVersion
;
pyproject
=
true
;
nativeBuildInputs
=
[
poetry-core
];
propagatedBuildInputs
=
[
numpy
tqdm
sentencepiece
pyyaml
];
src
=
lib
.
cleanSource
../../gguf-py
;
pythonImportsCheck
=
[
"numpy"
"gguf"
];
nativeCheckInputs
=
[
pytestCheckHook
];
doCheck
=
true
;
meta
=
with
lib
;
{
description
=
"Python package for writing binary files in the GGUF format"
;
license
=
licenses
.
mit
;
maintainers
=
[
maintainers
.
ditsuke
];
};
}
.devops/nix/package.nix
View file @
97ef6ff8
...
@@ -3,31 +3,33 @@
...
@@ -3,31 +3,33 @@
glibc
,
glibc
,
config
,
config
,
stdenv
,
stdenv
,
mkShell
,
runCommand
,
runCommand
,
cmake
,
cmake
,
ninja
,
ninja
,
pkg-config
,
pkg-config
,
git
,
git
,
python3
,
mpi
,
mpi
,
blas
,
blas
,
cudaPackages
,
cudaPackages
,
autoAddDriverRunpath
,
darwin
,
darwin
,
rocmPackages
,
rocmPackages
,
vulkan-headers
,
vulkan-headers
,
vulkan-loader
,
vulkan-loader
,
curl
,
curl
,
shaderc
,
shaderc
,
useBlas
?
builtins
.
all
(
x
:
!
x
)
[
useBlas
?
builtins
.
all
(
x
:
!
x
)
[
useCuda
useCuda
useMetalKit
useMetalKit
useRocm
useRocm
useVulkan
useVulkan
]
&&
blas
.
meta
.
available
,
]
&&
blas
.
meta
.
available
,
useCuda
?
config
.
cudaSupport
,
useCuda
?
config
.
cudaSupport
,
useMetalKit
?
stdenv
.
isAarch64
&&
stdenv
.
isDarwin
,
useMetalKit
?
stdenv
.
isAarch64
&&
stdenv
.
isDarwin
,
useMpi
?
false
,
# Increases the runtime closure size by ~700M
# Increases the runtime closure size by ~700M
useMpi
?
false
,
useRocm
?
config
.
rocmSupport
,
useRocm
?
config
.
rocmSupport
,
enableCurl
?
true
,
enableCurl
?
true
,
useVulkan
?
false
,
useVulkan
?
false
,
...
@@ -37,8 +39,8 @@
...
@@ -37,8 +39,8 @@
# otherwise we get libstdc++ errors downstream.
# otherwise we get libstdc++ errors downstream.
effectiveStdenv
?
if
useCuda
then
cudaPackages
.
backendStdenv
else
stdenv
,
effectiveStdenv
?
if
useCuda
then
cudaPackages
.
backendStdenv
else
stdenv
,
enableStatic
?
effectiveStdenv
.
hostPlatform
.
isStatic
,
enableStatic
?
effectiveStdenv
.
hostPlatform
.
isStatic
,
precompileMetalShaders
?
false
precompileMetalShaders
?
false
,
}
@
inputs
:
}:
let
let
inherit
(
lib
)
inherit
(
lib
)
...
@@ -46,7 +48,6 @@ let
...
@@ -46,7 +48,6 @@ let
cmakeFeature
cmakeFeature
optionals
optionals
strings
strings
versionOlder
;
;
stdenv
=
throw
"Use effectiveStdenv instead"
;
stdenv
=
throw
"Use effectiveStdenv instead"
;
...
@@ -62,54 +63,11 @@ let
...
@@ -62,54 +63,11 @@ let
pnameSuffix
=
pnameSuffix
=
strings
.
optionalString
(
suffices
!=
[
])
strings
.
optionalString
(
suffices
!=
[
])
"-
${
strings
.
concatMapStringsSep
"-"
strings
.
toLower
suffices
}
"
;
"-
${
strings
.
concatMapStringsSep
"-"
strings
.
toLower
suffices
}
"
;
descriptionSuffix
=
descriptionSuffix
=
strings
.
optionalString
(
strings
.
optionalString
(
suffices
!=
[
])
suffices
!=
[
]
", accelerated with
${
strings
.
concatStringsSep
", "
suffices
}
"
;
)
", accelerated with
${
strings
.
concatStringsSep
", "
suffices
}
"
;
executableSuffix
=
effectiveStdenv
.
hostPlatform
.
extensions
.
executable
;
# TODO: package the Python in this repository in a Nix-like way.
# It'd be nice to migrate to buildPythonPackage, as well as ensure this repo
# is PEP 517-compatible, and ensure the correct .dist-info is generated.
# https://peps.python.org/pep-0517/
#
# TODO: Package up each Python script or service appropriately, by making
# them into "entrypoints"
llama-python
=
python3
.
withPackages
(
ps
:
[
ps
.
numpy
ps
.
sentencepiece
]
);
# TODO(Green-Sky): find a better way to opt-into the heavy ml python runtime
llama-python-extra
=
python3
.
withPackages
(
ps
:
[
ps
.
numpy
ps
.
sentencepiece
ps
.
tiktoken
ps
.
torchWithoutCuda
ps
.
transformers
# server bench
xcrunHost
=
runCommand
"xcrunHost"
{
}
''
ps
.
matplotlib
# server tests
ps
.
openai
ps
.
behave
ps
.
prometheus-client
# for examples/pydantic-models-to-grammar-examples.py
ps
.
docstring-parser
ps
.
pydantic
# for scripts/compare-llama-bench.py
ps
.
gitpython
ps
.
tabulate
]
);
xcrunHost
=
runCommand
"xcrunHost"
{}
''
mkdir -p $out/bin
mkdir -p $out/bin
ln -s /usr/bin/xcrun $out/bin
ln -s /usr/bin/xcrun $out/bin
''
;
''
;
...
@@ -144,8 +102,7 @@ let
...
@@ -144,8 +102,7 @@ let
];
];
in
in
effectiveStdenv
.
mkDerivation
(
effectiveStdenv
.
mkDerivation
(
finalAttrs
:
{
finalAttrs
:
{
pname
=
"llama-cpp
${
pnameSuffix
}
"
;
pname
=
"llama-cpp
${
pnameSuffix
}
"
;
version
=
llamaVersion
;
version
=
llamaVersion
;
...
@@ -169,9 +126,9 @@ effectiveStdenv.mkDerivation (
...
@@ -169,9 +126,9 @@ effectiveStdenv.mkDerivation (
};
};
postPatch
=
''
postPatch
=
''
substituteInPlace ./ggml/src/ggml-metal.m \
substituteInPlace ./ggml/src/ggml-metal
/ggml-metal
.m \
--replace '[bundle pathForResource:@"ggml-metal" ofType:@"metal"];' "@\"$out/bin/ggml-metal.metal\";"
--replace '[bundle pathForResource:@"ggml-metal" ofType:@"metal"];' "@\"$out/bin/ggml-metal.metal\";"
substituteInPlace ./ggml/src/ggml-metal.m \
substituteInPlace ./ggml/src/ggml-metal
/ggml-metal
.m \
--replace '[bundle pathForResource:@"default" ofType:@"metallib"];' "@\"$out/bin/default.metallib\";"
--replace '[bundle pathForResource:@"default" ofType:@"metallib"];' "@\"$out/bin/default.metallib\";"
''
;
''
;
...
@@ -193,15 +150,10 @@ effectiveStdenv.mkDerivation (
...
@@ -193,15 +150,10 @@ effectiveStdenv.mkDerivation (
++
optionals
useCuda
[
++
optionals
useCuda
[
cudaPackages
.
cuda_nvcc
cudaPackages
.
cuda_nvcc
# TODO: Replace with autoAddDriverRunpath
autoAddDriverRunpath
# once https://github.com/NixOS/nixpkgs/pull/275241 has been merged
cudaPackages
.
autoAddOpenGLRunpathHook
]
]
++
optionals
(
effectiveStdenv
.
hostPlatform
.
isGnu
&&
enableStatic
)
[
++
optionals
(
effectiveStdenv
.
hostPlatform
.
isGnu
&&
enableStatic
)
[
glibc
.
static
]
glibc
.
static
++
optionals
(
effectiveStdenv
.
isDarwin
&&
useMetalKit
&&
precompileMetalShaders
)
[
xcrunHost
];
]
++
optionals
(
effectiveStdenv
.
isDarwin
&&
useMetalKit
&&
precompileMetalShaders
)
[
xcrunHost
];
buildInputs
=
buildInputs
=
optionals
effectiveStdenv
.
isDarwin
darwinBuildInputs
optionals
effectiveStdenv
.
isDarwin
darwinBuildInputs
...
@@ -221,7 +173,7 @@ effectiveStdenv.mkDerivation (
...
@@ -221,7 +173,7 @@ effectiveStdenv.mkDerivation (
(
cmakeBool
"GGML_NATIVE"
false
)
(
cmakeBool
"GGML_NATIVE"
false
)
(
cmakeBool
"GGML_BLAS"
useBlas
)
(
cmakeBool
"GGML_BLAS"
useBlas
)
(
cmakeBool
"GGML_CUDA"
useCuda
)
(
cmakeBool
"GGML_CUDA"
useCuda
)
(
cmakeBool
"GGML_HIP
BLAS
"
useRocm
)
(
cmakeBool
"GGML_HIP"
useRocm
)
(
cmakeBool
"GGML_METAL"
useMetalKit
)
(
cmakeBool
"GGML_METAL"
useMetalKit
)
(
cmakeBool
"GGML_VULKAN"
useVulkan
)
(
cmakeBool
"GGML_VULKAN"
useVulkan
)
(
cmakeBool
"GGML_STATIC"
enableStatic
)
(
cmakeBool
"GGML_STATIC"
enableStatic
)
...
@@ -256,35 +208,6 @@ effectiveStdenv.mkDerivation (
...
@@ -256,35 +208,6 @@ effectiveStdenv.mkDerivation (
cp $src/include/llama.h $out/include/
cp $src/include/llama.h $out/include/
''
;
''
;
# Define the shells here, but don't add in the inputsFrom to avoid recursion.
passthru
=
{
inherit
useBlas
useCuda
useMetalKit
useMpi
useRocm
useVulkan
;
shell
=
mkShell
{
name
=
"shell-
${
finalAttrs
.
finalPackage
.
name
}
"
;
description
=
"contains numpy and sentencepiece"
;
buildInputs
=
[
llama-python
];
inputsFrom
=
[
finalAttrs
.
finalPackage
];
shellHook
=
''
addToSearchPath "LD_LIBRARY_PATH" "
${
lib
.
getLib
effectiveStdenv
.
cc
.
cc
}
/lib"
''
;
};
shell-extra
=
mkShell
{
name
=
"shell-extra-
${
finalAttrs
.
finalPackage
.
name
}
"
;
description
=
"contains numpy, sentencepiece, torchWithoutCuda, and transformers"
;
buildInputs
=
[
llama-python-extra
];
inputsFrom
=
[
finalAttrs
.
finalPackage
];
};
};
meta
=
{
meta
=
{
# Configurations we don't want even the CI to evaluate. Results in the
# Configurations we don't want even the CI to evaluate. Results in the
# "unsupported platform" messages. This is mostly a no-op, because
# "unsupported platform" messages. This is mostly a no-op, because
...
@@ -320,5 +243,4 @@ effectiveStdenv.mkDerivation (
...
@@ -320,5 +243,4 @@ effectiveStdenv.mkDerivation (
# Extend `badPlatforms` instead
# Extend `badPlatforms` instead
platforms
=
lib
.
platforms
.
all
;
platforms
=
lib
.
platforms
.
all
;
};
};
}
})
)
Prev
1
2
3
4
5
…
26
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment