Unverified Commit 527cc978 authored by Jeffrey Morgan's avatar Jeffrey Morgan Committed by GitHub
Browse files

llama: update vendored code to commit 40c6d79f (#7875)

parent a37f4a86
/** /**
* llama.cpp - commit 3f1ae2e32cde00c39b96be6d01c2997c29bae555 - do not edit this file * llama.cpp - commit 40c6d79fb52f995f47507fedfeaae2ac05d9b35c - do not edit this file
* *
* MIT License * MIT License
* *
...@@ -154,11 +154,11 @@ uint32_t unicode_cpt_from_utf8(const std::string & utf8, size_t & offset) { ...@@ -154,11 +154,11 @@ uint32_t unicode_cpt_from_utf8(const std::string & utf8, size_t & offset) {
static std::vector<codepoint_flags> unicode_cpt_flags_array() { static std::vector<codepoint_flags> unicode_cpt_flags_array() {
std::vector<codepoint_flags> cpt_flags(MAX_CODEPOINTS, codepoint_flags::UNDEFINED); std::vector<codepoint_flags> cpt_flags(MAX_CODEPOINTS, codepoint_flags::UNDEFINED);
assert (unicode_ranges_flags.front().first == 0); assert (unicode_ranges_flags.begin()[0].first == 0);
assert (unicode_ranges_flags.back().first == MAX_CODEPOINTS); assert (unicode_ranges_flags.begin()[unicode_ranges_flags.size()-1].first == MAX_CODEPOINTS);
for (size_t i = 1; i < unicode_ranges_flags.size(); ++i) { for (size_t i = 1; i < unicode_ranges_flags.size(); ++i) {
const auto range_ini = unicode_ranges_flags[i-1]; // codepoint_ini, flags const auto range_ini = unicode_ranges_flags.begin()[i-1]; // codepoint_ini, flags
const auto range_end = unicode_ranges_flags[i]; // codepoint_end, flags const auto range_end = unicode_ranges_flags.begin()[i]; // codepoint_end, flags
for (uint32_t cpt = range_ini.first; cpt < range_end.first; ++cpt) { for (uint32_t cpt = range_ini.first; cpt < range_end.first; ++cpt) {
cpt_flags[cpt] = range_ini.second; cpt_flags[cpt] = range_ini.second;
} }
...@@ -247,7 +247,19 @@ static inline std::wstring unicode_wstring_from_utf8(const std::string & s) { ...@@ -247,7 +247,19 @@ static inline std::wstring unicode_wstring_from_utf8(const std::string & s) {
free(wbuf); free(wbuf);
return ret; return ret;
#else #else
#if defined(__clang__)
// disable C++17 deprecation warning for std::codecvt_utf8
# pragma clang diagnostic push
# pragma clang diagnostic ignored "-Wdeprecated-declarations"
#endif
std::wstring_convert<std::codecvt_utf8<wchar_t>> conv; std::wstring_convert<std::codecvt_utf8<wchar_t>> conv;
#if defined(__clang__)
# pragma clang diagnostic pop
#endif
return conv.from_bytes(s); return conv.from_bytes(s);
#endif #endif
} }
...@@ -644,7 +656,7 @@ std::vector<uint32_t> unicode_cpts_normalize_nfd(const std::vector<uint32_t> & c ...@@ -644,7 +656,7 @@ std::vector<uint32_t> unicode_cpts_normalize_nfd(const std::vector<uint32_t> & c
std::vector<uint32_t> result(cpts.size()); std::vector<uint32_t> result(cpts.size());
for (size_t i = 0; i < cpts.size(); ++i) { for (size_t i = 0; i < cpts.size(); ++i) {
const uint32_t cpt = cpts[i]; const uint32_t cpt = cpts[i];
auto it = std::upper_bound(unicode_ranges_nfd.cbegin(), unicode_ranges_nfd.cend(), cpt, comp) - 1; auto it = std::upper_bound(unicode_ranges_nfd.begin(), unicode_ranges_nfd.end(), cpt, comp) - 1;
result[i] = (it->first <= cpt && cpt <= it->last) ? it->nfd : cpt; result[i] = (it->first <= cpt && cpt <= it->last) ? it->nfd : cpt;
} }
return result; return result;
...@@ -686,8 +698,15 @@ uint8_t unicode_utf8_to_byte(const std::string & utf8) { ...@@ -686,8 +698,15 @@ uint8_t unicode_utf8_to_byte(const std::string & utf8) {
} }
uint32_t unicode_tolower(uint32_t cp) { uint32_t unicode_tolower(uint32_t cp) {
auto it = unicode_map_lowercase.find(cp); // binary search
return it == unicode_map_lowercase.end() ? cp : it->second; auto it = std::lower_bound(unicode_map_lowercase.begin(), unicode_map_lowercase.end(), cp,
[](const std::pair<uint32_t, uint32_t> & pair, uint32_t value) {
return pair.first < value;
});
if (it != unicode_map_lowercase.end() && it->first == cp) {
return it->second;
}
return cp; // Return the original code point if no lowercase mapping is found
} }
std::vector<std::string> unicode_regex_split(const std::string & text, const std::vector<std::string> & regex_exprs) { std::vector<std::string> unicode_regex_split(const std::string & text, const std::vector<std::string> & regex_exprs) {
......
/** /**
* llama.cpp - commit 3f1ae2e32cde00c39b96be6d01c2997c29bae555 - do not edit this file * llama.cpp - commit 40c6d79fb52f995f47507fedfeaae2ac05d9b35c - do not edit this file
* *
* MIT License * MIT License
* *
......
LLAMACPP_BASE_COMMIT=3f1ae2e32cde00c39b96be6d01c2997c29bae555 LLAMACPP_BASE_COMMIT=40c6d79fb52f995f47507fedfeaae2ac05d9b35c
\ No newline at end of file
...@@ -699,7 +699,6 @@ func (s *llmServer) Completion(ctx context.Context, req CompletionRequest, fn fu ...@@ -699,7 +699,6 @@ func (s *llmServer) Completion(ctx context.Context, req CompletionRequest, fn fu
"top_k": req.Options.TopK, "top_k": req.Options.TopK,
"top_p": req.Options.TopP, "top_p": req.Options.TopP,
"min_p": req.Options.MinP, "min_p": req.Options.MinP,
"tfs_z": req.Options.TFSZ,
"typical_p": req.Options.TypicalP, "typical_p": req.Options.TypicalP,
"repeat_last_n": req.Options.RepeatLastN, "repeat_last_n": req.Options.RepeatLastN,
"repeat_penalty": req.Options.RepeatPenalty, "repeat_penalty": req.Options.RepeatPenalty,
......
...@@ -51,7 +51,7 @@ GPU_DIST_LIB_DEPS= $(sort $(addprefix $(ROCM_DIST_DEPS_DIR)/,$(notdir $(GPU_LIBS ...@@ -51,7 +51,7 @@ GPU_DIST_LIB_DEPS= $(sort $(addprefix $(ROCM_DIST_DEPS_DIR)/,$(notdir $(GPU_LIBS
ROCBLAS_DIST_DEP_MANIFEST = $(ROCM_DIST_DEPS_DIR)/rocblas/library/TensileManifest.txt ROCBLAS_DIST_DEP_MANIFEST = $(ROCM_DIST_DEPS_DIR)/rocblas/library/TensileManifest.txt
ifeq ($(OS),linux) ifeq ($(OS),linux)
GPU_COMPILER_FPIC := -fPIC -Wno-unused-function -std=gnu++11 GPU_COMPILER_FPIC := -fPIC -Wno-unused-function -std=gnu++17
else ifeq ($(OS),windows) else ifeq ($(OS),windows)
GPU_COMPILER_FPIC := -Xclang --dependent-lib=msvcrt GPU_COMPILER_FPIC := -Xclang --dependent-lib=msvcrt
endif endif
...@@ -69,11 +69,13 @@ GPU_COMPILER_CUFLAGS = \ ...@@ -69,11 +69,13 @@ GPU_COMPILER_CUFLAGS = \
-O3 \ -O3 \
-DGGML_USE_CUDA \ -DGGML_USE_CUDA \
-DGGML_BUILD=1 \ -DGGML_BUILD=1 \
-DGGML_BACKEND_BUILD=1 \
-DGGML_SHARED=1 \ -DGGML_SHARED=1 \
-DGGML_BACKEND_SHARED=1 \
-DGGML_CUDA_DMMV_X=32 \ -DGGML_CUDA_DMMV_X=32 \
-DGGML_CUDA_MMV_Y=1 \ -DGGML_CUDA_MMV_Y=1 \
-DGGML_SCHED_MAX_COPIES=4 \ -DGGML_SCHED_MAX_COPIES=4 \
-DGGML_USE_HIPBLAS \ -DGGML_USE_HIP \
-DGGML_USE_LLAMAFILE \ -DGGML_USE_LLAMAFILE \
-DHIP_FAST_MATH \ -DHIP_FAST_MATH \
-D__HIP_PLATFORM_AMD__=1 \ -D__HIP_PLATFORM_AMD__=1 \
......
...@@ -86,13 +86,14 @@ LLAMACPP_FILES=\ ...@@ -86,13 +86,14 @@ LLAMACPP_FILES=\
src/llama-sampling.cpp \ src/llama-sampling.cpp \
src/llama-sampling.h \ src/llama-sampling.h \
include/llama.h \ include/llama.h \
ggml/src/llamafile/sgemm.cpp \ ggml/include/ggml-cpu.h \
ggml/src/llamafile/sgemm.h ggml/src/ggml-cpu/llamafile/sgemm.cpp \
ggml/src/ggml-cpu/llamafile/sgemm.h
$(foreach name,$(LLAMACPP_FILES),$(eval $(call vendor_file,$(name),$(DEST_DIR)))) $(foreach name,$(LLAMACPP_FILES),$(eval $(call vendor_file,$(name),$(DEST_DIR))))
# llama.cpp files -> llama/llamafile # llama.cpp files -> llama/llamafile
LLAMAFILE_FILES= \ LLAMAFILE_FILES= \
ggml/src/llamafile/sgemm.h ggml/src/ggml-cpu/llamafile/sgemm.h
$(foreach name,$(LLAMAFILE_FILES),$(eval $(call vendor_file,$(name),$(DEST_DIR)llamafile/))) $(foreach name,$(LLAMAFILE_FILES),$(eval $(call vendor_file,$(name),$(DEST_DIR)llamafile/)))
# ggml files -> llama/ # ggml files -> llama/
...@@ -101,26 +102,53 @@ GGML_FILES= \ ...@@ -101,26 +102,53 @@ GGML_FILES= \
ggml/include/ggml.h \ ggml/include/ggml.h \
ggml/src/ggml-quants.c \ ggml/src/ggml-quants.c \
ggml/src/ggml-quants.h \ ggml/src/ggml-quants.h \
ggml/src/ggml-metal.metal \ ggml/src/ggml-metal/ggml-metal.metal \
ggml/include/ggml-metal.h \ ggml/include/ggml-metal.h \
ggml/src/ggml-impl.h \ ggml/src/ggml-impl.h \
ggml/src/ggml-threading.h \
ggml/include/ggml-cuda.h \ ggml/include/ggml-cuda.h \
ggml/src/ggml-cuda.cu \ ggml/src/ggml-backend-reg.cpp \
ggml/src/ggml-metal/ggml-metal-impl.h \
ggml/src/ggml-common.h \ ggml/src/ggml-common.h \
ggml/include/ggml-backend.h \ ggml/include/ggml-backend.h \
ggml/src/ggml-backend.c \ ggml/src/ggml-backend.cpp \
ggml/src/ggml-backend-impl.h \ ggml/src/ggml-backend-impl.h \
ggml/include/ggml-alloc.h \ ggml/include/ggml-alloc.h \
ggml/src/ggml-alloc.c \ ggml/src/ggml-alloc.c \
ggml/src/ggml-aarch64.h \ ggml/src/ggml-aarch64.h \
ggml/src/ggml-aarch64.c \ ggml/src/ggml-aarch64.c \
ggml/src/ggml-cpu-impl.h \
ggml/include/ggml-blas.h \ ggml/include/ggml-blas.h \
ggml/src/ggml-blas.cpp ggml/include/ggml-cpp.h \
ggml/src/ggml-threading.cpp \
ggml/src/ggml-blas/ggml-blas.cpp \
ggml/src/ggml-cpu/ggml-cpu.c \
ggml/src/ggml-cpu/ggml-cpu-aarch64.c \
ggml/src/ggml-cpu/ggml-cpu.cpp \
ggml/src/ggml-cpu/ggml-cpu-aarch64.h \
ggml/src/ggml-cpu/ggml-cpu-quants.h \
ggml/src/ggml-cpu/ggml-cpu-quants.c \
ggml/src/ggml-cpu/ggml-cpu-impl.h \
ggml/src/ggml-cpu/amx/amx.h \
ggml/src/ggml-cpu/amx/amx.cpp \
ggml/src/ggml-cpu/amx/mmq.cpp \
ggml/src/ggml-cpu/amx/mmq.h
$(foreach name,$(GGML_FILES),$(eval $(call vendor_file,$(name),$(DEST_DIR)))) $(foreach name,$(GGML_FILES),$(eval $(call vendor_file,$(name),$(DEST_DIR))))
$(DEST_DIR)ggml-metal-embed.metal: $(DEST_DIR)ggml-common.h $(DEST_DIR)ggml-metal-impl.h
@sed -e '/__embed_ggml-common.h__/r $(DEST_DIR)/ggml-common.h' \
-e '/__embed_ggml-common.h__/d' \
< $(DEST_DIR)/ggml-metal.metal \
> $(DEST_DIR)/ggml-metal-embed.metal.tmp
@sed -e '/#include "ggml-metal-impl.h"/r $(DEST_DIR)/ggml-metal-impl.h' \
-e '/#include "ggml-metal-impl.h"/d' \
< $(DEST_DIR)/ggml-metal-embed.metal.tmp \
> $(DEST_DIR)/ggml-metal-embed.metal
@rm $(DEST_DIR)/ggml-metal-embed.metal.tmp
VENDORED_FILES += $(DEST_DIR)ggml-metal-embed.metal
# TODO generalize renaming pattern if we have more of these # TODO generalize renaming pattern if we have more of these
$(DEST_DIR)ggml-metal_darwin_arm64.m : $(LLAMACPP_REPO)ggml/src/ggml-metal.m $(DEST_DIR)ggml-metal_darwin_arm64.m : $(LLAMACPP_REPO)ggml/src/ggml-metal/ggml-metal.m
@echo "vendoring $(subst $(LLAMACPP_REPO),,$<)"; \ @echo "vendoring $(subst $(LLAMACPP_REPO),,$<)"; \
mkdir -p $(dir $@) && \ mkdir -p $(dir $@) && \
echo "/**" > $@ && \ echo "/**" > $@ && \
......
...@@ -41,7 +41,9 @@ GPU_COMPILER_CUFLAGS = \ ...@@ -41,7 +41,9 @@ GPU_COMPILER_CUFLAGS = \
-DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 \ -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 \
-DGGML_USE_CUDA=1 \ -DGGML_USE_CUDA=1 \
-DGGML_SHARED=1 \ -DGGML_SHARED=1 \
-DGGML_BACKEND_SHARED=1 \
-DGGML_BUILD=1 \ -DGGML_BUILD=1 \
-DGGML_BACKEND_BUILD=1 \
-DGGML_USE_LLAMAFILE \ -DGGML_USE_LLAMAFILE \
-DK_QUANTS_PER_ITERATION=2 \ -DK_QUANTS_PER_ITERATION=2 \
-DNDEBUG \ -DNDEBUG \
......
...@@ -15,10 +15,9 @@ DIST_GPU_RUNNER_DEPS_DIR = $(DIST_LIB_DIR) ...@@ -15,10 +15,9 @@ DIST_GPU_RUNNER_DEPS_DIR = $(DIST_LIB_DIR)
GPU_RUNNER_LIBS = $(wildcard $(addsuffix .$(SHARED_EXT).*,$(addprefix $(GPU_LIB_DIR)/$(SHARED_PREFIX),$(GPU_RUNNER_LIBS_SHORT)))) GPU_RUNNER_LIBS = $(wildcard $(addsuffix .$(SHARED_EXT).*,$(addprefix $(GPU_LIB_DIR)/$(SHARED_PREFIX),$(GPU_RUNNER_LIBS_SHORT))))
GPU_RUNNER_SRCS := \ GPU_RUNNER_SRCS := \
llama/ggml-cuda.cu \
$(filter-out $(wildcard llama/ggml-cuda/fattn*.cu),$(wildcard llama/ggml-cuda/*.cu)) \ $(filter-out $(wildcard llama/ggml-cuda/fattn*.cu),$(wildcard llama/ggml-cuda/*.cu)) \
$(wildcard llama/ggml-cuda/template-instances/mmq*.cu) \ $(wildcard llama/ggml-cuda/template-instances/mmq*.cu) \
llama/ggml.c llama/ggml-backend.c llama/ggml-alloc.c llama/ggml-quants.c llama/sgemm.cpp llama/ggml-aarch64.c llama/ggml.c llama/ggml-backend.cpp llama/ggml-alloc.c llama/ggml-quants.c llama/sgemm.cpp llama/ggml-aarch64.c llama/ggml-threading.cpp
GPU_RUNNER_HDRS := \ GPU_RUNNER_HDRS := \
$(wildcard llama/ggml-cuda/*.cuh) $(wildcard llama/ggml-cuda/*.cuh)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment