Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
orangecat
ollama
Commits
527cc978
Unverified
Commit
527cc978
authored
Dec 10, 2024
by
Jeffrey Morgan
Committed by
GitHub
Dec 10, 2024
Browse files
llama: update vendored code to commit 40c6d79f (#7875)
parent
a37f4a86
Changes
288
Hide whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
73 additions
and
24 deletions
+73
-24
llama/unicode.cpp
llama/unicode.cpp
+27
-8
llama/unicode.h
llama/unicode.h
+1
-1
llama/vendoring
llama/vendoring
+1
-1
llm/server.go
llm/server.go
+0
-1
make/Makefile.rocm
make/Makefile.rocm
+4
-2
make/Makefile.sync
make/Makefile.sync
+37
-9
make/cuda.make
make/cuda.make
+2
-0
make/gpu.make
make/gpu.make
+1
-2
No files found.
llama/unicode.cpp
View file @
527cc978
/**
* llama.cpp - commit
3f1ae2e32cde00c39b96be6d01c2997c29bae555
- do not edit this file
* llama.cpp - commit
40c6d79fb52f995f47507fedfeaae2ac05d9b35c
- do not edit this file
*
* MIT License
*
...
...
@@ -154,11 +154,11 @@ uint32_t unicode_cpt_from_utf8(const std::string & utf8, size_t & offset) {
static
std
::
vector
<
codepoint_flags
>
unicode_cpt_flags_array
()
{
std
::
vector
<
codepoint_flags
>
cpt_flags
(
MAX_CODEPOINTS
,
codepoint_flags
::
UNDEFINED
);
assert
(
unicode_ranges_flags
.
front
()
.
first
==
0
);
assert
(
unicode_ranges_flags
.
b
ack
()
.
first
==
MAX_CODEPOINTS
);
assert
(
unicode_ranges_flags
.
begin
()[
0
]
.
first
==
0
);
assert
(
unicode_ranges_flags
.
b
egin
()[
unicode_ranges_flags
.
size
()
-
1
]
.
first
==
MAX_CODEPOINTS
);
for
(
size_t
i
=
1
;
i
<
unicode_ranges_flags
.
size
();
++
i
)
{
const
auto
range_ini
=
unicode_ranges_flags
[
i
-
1
];
// codepoint_ini, flags
const
auto
range_end
=
unicode_ranges_flags
[
i
];
// codepoint_end, flags
const
auto
range_ini
=
unicode_ranges_flags
.
begin
()
[
i
-
1
];
// codepoint_ini, flags
const
auto
range_end
=
unicode_ranges_flags
.
begin
()
[
i
];
// codepoint_end, flags
for
(
uint32_t
cpt
=
range_ini
.
first
;
cpt
<
range_end
.
first
;
++
cpt
)
{
cpt_flags
[
cpt
]
=
range_ini
.
second
;
}
...
...
@@ -247,7 +247,19 @@ static inline std::wstring unicode_wstring_from_utf8(const std::string & s) {
free
(
wbuf
);
return
ret
;
#else
#if defined(__clang__)
// disable C++17 deprecation warning for std::codecvt_utf8
# pragma clang diagnostic push
# pragma clang diagnostic ignored "-Wdeprecated-declarations"
#endif
std
::
wstring_convert
<
std
::
codecvt_utf8
<
wchar_t
>>
conv
;
#if defined(__clang__)
# pragma clang diagnostic pop
#endif
return
conv
.
from_bytes
(
s
);
#endif
}
...
...
@@ -644,7 +656,7 @@ std::vector<uint32_t> unicode_cpts_normalize_nfd(const std::vector<uint32_t> & c
std
::
vector
<
uint32_t
>
result
(
cpts
.
size
());
for
(
size_t
i
=
0
;
i
<
cpts
.
size
();
++
i
)
{
const
uint32_t
cpt
=
cpts
[
i
];
auto
it
=
std
::
upper_bound
(
unicode_ranges_nfd
.
c
begin
(),
unicode_ranges_nfd
.
c
end
(),
cpt
,
comp
)
-
1
;
auto
it
=
std
::
upper_bound
(
unicode_ranges_nfd
.
begin
(),
unicode_ranges_nfd
.
end
(),
cpt
,
comp
)
-
1
;
result
[
i
]
=
(
it
->
first
<=
cpt
&&
cpt
<=
it
->
last
)
?
it
->
nfd
:
cpt
;
}
return
result
;
...
...
@@ -686,8 +698,15 @@ uint8_t unicode_utf8_to_byte(const std::string & utf8) {
}
uint32_t
unicode_tolower
(
uint32_t
cp
)
{
auto
it
=
unicode_map_lowercase
.
find
(
cp
);
return
it
==
unicode_map_lowercase
.
end
()
?
cp
:
it
->
second
;
// binary search
auto
it
=
std
::
lower_bound
(
unicode_map_lowercase
.
begin
(),
unicode_map_lowercase
.
end
(),
cp
,
[](
const
std
::
pair
<
uint32_t
,
uint32_t
>
&
pair
,
uint32_t
value
)
{
return
pair
.
first
<
value
;
});
if
(
it
!=
unicode_map_lowercase
.
end
()
&&
it
->
first
==
cp
)
{
return
it
->
second
;
}
return
cp
;
// Return the original code point if no lowercase mapping is found
}
std
::
vector
<
std
::
string
>
unicode_regex_split
(
const
std
::
string
&
text
,
const
std
::
vector
<
std
::
string
>
&
regex_exprs
)
{
...
...
llama/unicode.h
View file @
527cc978
/**
* llama.cpp - commit
3f1ae2e32cde00c39b96be6d01c2997c29bae555
- do not edit this file
* llama.cpp - commit
40c6d79fb52f995f47507fedfeaae2ac05d9b35c
- do not edit this file
*
* MIT License
*
...
...
llama/vendoring
View file @
527cc978
LLAMACPP_BASE_COMMIT=3f1ae2e32cde00c39b96be6d01c2997c29bae555
\ No newline at end of file
LLAMACPP_BASE_COMMIT=40c6d79fb52f995f47507fedfeaae2ac05d9b35c
llm/server.go
View file @
527cc978
...
...
@@ -699,7 +699,6 @@ func (s *llmServer) Completion(ctx context.Context, req CompletionRequest, fn fu
"top_k"
:
req
.
Options
.
TopK
,
"top_p"
:
req
.
Options
.
TopP
,
"min_p"
:
req
.
Options
.
MinP
,
"tfs_z"
:
req
.
Options
.
TFSZ
,
"typical_p"
:
req
.
Options
.
TypicalP
,
"repeat_last_n"
:
req
.
Options
.
RepeatLastN
,
"repeat_penalty"
:
req
.
Options
.
RepeatPenalty
,
...
...
make/Makefile.rocm
View file @
527cc978
...
...
@@ -51,7 +51,7 @@ GPU_DIST_LIB_DEPS= $(sort $(addprefix $(ROCM_DIST_DEPS_DIR)/,$(notdir $(GPU_LIBS
ROCBLAS_DIST_DEP_MANIFEST
=
$(ROCM_DIST_DEPS_DIR)
/rocblas/library/TensileManifest.txt
ifeq
($(OS),linux)
GPU_COMPILER_FPIC
:=
-fPIC
-Wno-unused-function
-std
=
gnu++1
1
GPU_COMPILER_FPIC
:=
-fPIC
-Wno-unused-function
-std
=
gnu++1
7
else
ifeq
($(OS),windows)
GPU_COMPILER_FPIC
:=
-Xclang
--dependent-lib
=
msvcrt
endif
...
...
@@ -69,11 +69,13 @@ GPU_COMPILER_CUFLAGS = \
-O3
\
-DGGML_USE_CUDA
\
-DGGML_BUILD
=
1
\
-DGGML_BACKEND_BUILD
=
1
\
-DGGML_SHARED
=
1
\
-DGGML_BACKEND_SHARED
=
1
\
-DGGML_CUDA_DMMV_X
=
32
\
-DGGML_CUDA_MMV_Y
=
1
\
-DGGML_SCHED_MAX_COPIES
=
4
\
-DGGML_USE_HIP
BLAS
\
-DGGML_USE_HIP
\
-DGGML_USE_LLAMAFILE
\
-DHIP_FAST_MATH
\
-D__HIP_PLATFORM_AMD__
=
1
\
...
...
make/Makefile.sync
View file @
527cc978
...
...
@@ -86,13 +86,14 @@ LLAMACPP_FILES=\
src/llama-sampling.cpp
\
src/llama-sampling.h
\
include/llama.h
\
ggml/src/llamafile/sgemm.cpp
\
ggml/src/llamafile/sgemm.h
ggml/include/ggml-cpu.h
\
ggml/src/ggml-cpu/llamafile/sgemm.cpp
\
ggml/src/ggml-cpu/llamafile/sgemm.h
$(foreach
name,$(LLAMACPP_FILES),$(eval
$(call
vendor_file,$(name),$(DEST_DIR))))
# llama.cpp files -> llama/llamafile
LLAMAFILE_FILES
=
\
ggml/src/llamafile/sgemm.h
ggml/src/
ggml-cpu/
llamafile/sgemm.h
$(foreach
name,$(LLAMAFILE_FILES),$(eval
$(call
vendor_file,$(name),$(DEST_DIR)llamafile/)))
# ggml files -> llama/
...
...
@@ -101,26 +102,53 @@ GGML_FILES= \
ggml/include/ggml.h
\
ggml/src/ggml-quants.c
\
ggml/src/ggml-quants.h
\
ggml/src/ggml-metal.metal
\
ggml/src/ggml-metal
/ggml-metal
.metal
\
ggml/include/ggml-metal.h
\
ggml/src/ggml-impl.h
\
ggml/src/ggml-threading.h
\
ggml/include/ggml-cuda.h
\
ggml/src/ggml-cuda.cu
\
ggml/src/ggml-backend-reg.cpp
\
ggml/src/ggml-metal/ggml-metal-impl.h
\
ggml/src/ggml-common.h
\
ggml/include/ggml-backend.h
\
ggml/src/ggml-backend.c
\
ggml/src/ggml-backend.c
pp
\
ggml/src/ggml-backend-impl.h
\
ggml/include/ggml-alloc.h
\
ggml/src/ggml-alloc.c
\
ggml/src/ggml-aarch64.h
\
ggml/src/ggml-aarch64.c
\
ggml/src/ggml-cpu-impl.h
\
ggml/include/ggml-blas.h
\
ggml/src/ggml-blas.cpp
ggml/include/ggml-cpp.h
\
ggml/src/ggml-threading.cpp
\
ggml/src/ggml-blas/ggml-blas.cpp
\
ggml/src/ggml-cpu/ggml-cpu.c
\
ggml/src/ggml-cpu/ggml-cpu-aarch64.c
\
ggml/src/ggml-cpu/ggml-cpu.cpp
\
ggml/src/ggml-cpu/ggml-cpu-aarch64.h
\
ggml/src/ggml-cpu/ggml-cpu-quants.h
\
ggml/src/ggml-cpu/ggml-cpu-quants.c
\
ggml/src/ggml-cpu/ggml-cpu-impl.h
\
ggml/src/ggml-cpu/amx/amx.h
\
ggml/src/ggml-cpu/amx/amx.cpp
\
ggml/src/ggml-cpu/amx/mmq.cpp
\
ggml/src/ggml-cpu/amx/mmq.h
$(foreach
name,$(GGML_FILES),$(eval
$(call
vendor_file,$(name),$(DEST_DIR))))
$(DEST_DIR)ggml-metal-embed.metal
:
$(DEST_DIR)ggml-common.h $(DEST_DIR)ggml-metal-impl.h
@
sed
-e
'/__embed_ggml-common.h__/r
$(DEST_DIR)
/ggml-common.h'
\
-e
'/__embed_ggml-common.h__/d'
\
<
$(DEST_DIR)
/ggml-metal.metal
\
>
$(DEST_DIR)
/ggml-metal-embed.metal.tmp
@
sed
-e
'/#include "ggml-metal-impl.h"/r
$(DEST_DIR)
/ggml-metal-impl.h'
\
-e
'/#include "ggml-metal-impl.h"/d'
\
<
$(DEST_DIR)
/ggml-metal-embed.metal.tmp
\
>
$(DEST_DIR)
/ggml-metal-embed.metal
@
rm
$(DEST_DIR)
/ggml-metal-embed.metal.tmp
VENDORED_FILES
+=
$(DEST_DIR)
ggml-metal-embed.metal
# TODO generalize renaming pattern if we have more of these
$(DEST_DIR)ggml-metal_darwin_arm64.m
:
$(LLAMACPP_REPO)ggml/src/ggml-metal.m
$(DEST_DIR)ggml-metal_darwin_arm64.m
:
$(LLAMACPP_REPO)ggml/src/ggml-metal
/ggml-metal
.m
@
echo
"vendoring
$(
subst
$(LLAMACPP_REPO)
,,
$<
)
"
;
\
mkdir
-p
$(
dir
$@
)
&&
\
echo
"/**"
>
$@
&&
\
...
...
make/cuda.make
View file @
527cc978
...
...
@@ -41,7 +41,9 @@ GPU_COMPILER_CUFLAGS = \
-DGGML_CUDA_PEER_MAX_BATCH_SIZE
=
128
\
-DGGML_USE_CUDA
=
1
\
-DGGML_SHARED
=
1
\
-DGGML_BACKEND_SHARED
=
1
\
-DGGML_BUILD
=
1
\
-DGGML_BACKEND_BUILD
=
1
\
-DGGML_USE_LLAMAFILE
\
-DK_QUANTS_PER_ITERATION
=
2
\
-DNDEBUG
\
...
...
make/gpu.make
View file @
527cc978
...
...
@@ -15,10 +15,9 @@ DIST_GPU_RUNNER_DEPS_DIR = $(DIST_LIB_DIR)
GPU_RUNNER_LIBS
=
$(
wildcard
$(
addsuffix
.
$(SHARED_EXT)
.
*
,
$(
addprefix
$(GPU_LIB_DIR)
/
$(SHARED_PREFIX)
,
$(GPU_RUNNER_LIBS_SHORT)
)))
GPU_RUNNER_SRCS
:=
\
llama/ggml-cuda.cu
\
$(
filter-out
$(
wildcard
llama/ggml-cuda/fattn
*
.cu
)
,
$(
wildcard
llama/ggml-cuda/
*
.cu
))
\
$(
wildcard
llama/ggml-cuda/template-instances/mmq
*
.cu
)
\
llama/ggml.c llama/ggml-backend.c llama/ggml-alloc.c llama/ggml-quants.c llama/sgemm.cpp llama/ggml-aarch64.c
llama/ggml.c llama/ggml-backend.c
pp
llama/ggml-alloc.c llama/ggml-quants.c llama/sgemm.cpp llama/ggml-aarch64.c
llama/ggml-threading.cpp
GPU_RUNNER_HDRS
:=
\
$(
wildcard
llama/ggml-cuda/
*
.cuh
)
...
...
Prev
1
…
11
12
13
14
15
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment