Unverified Commit c68f367e authored by Daniel Hiltgen's avatar Daniel Hiltgen Committed by GitHub
Browse files

Update GGML to b6646 (#12245)

Notable EOLs with this change:
- MacOS v12 and v13 are no longer supported (v14+ required)
- AMD gfx900 and gfx906 are no longer supported
parent fdb10946
...@@ -273,7 +273,7 @@ struct gguf_reader { ...@@ -273,7 +273,7 @@ struct gguf_reader {
} }
bool read(std::string & dst) const { bool read(std::string & dst) const {
uint64_t size = -1; uint64_t size = 0;
if (!read(size)) { if (!read(size)) {
return false; return false;
} }
...@@ -523,7 +523,7 @@ struct gguf_context * gguf_init_from_file_impl(FILE * file, struct gguf_init_par ...@@ -523,7 +523,7 @@ struct gguf_context * gguf_init_from_file_impl(FILE * file, struct gguf_init_par
// tensor shape // tensor shape
{ {
uint32_t n_dims = -1; uint32_t n_dims = 0;
ok = ok && gr.read(n_dims); ok = ok && gr.read(n_dims);
if (n_dims > GGML_MAX_DIMS) { if (n_dims > GGML_MAX_DIMS) {
GGML_LOG_ERROR("%s: tensor '%s' has invalid number of dimensions: %" PRIu32 " > %" PRIu32 "\n", GGML_LOG_ERROR("%s: tensor '%s' has invalid number of dimensions: %" PRIu32 " > %" PRIu32 "\n",
...@@ -1169,50 +1169,51 @@ void gguf_set_tensor_data(struct gguf_context * ctx, const char * name, const vo ...@@ -1169,50 +1169,51 @@ void gguf_set_tensor_data(struct gguf_context * ctx, const char * name, const vo
ctx->info[tensor_id].t.data = (void *)(uintptr_t)data; // double cast suppresses warning about casting away const ctx->info[tensor_id].t.data = (void *)(uintptr_t)data; // double cast suppresses warning about casting away const
} }
struct gguf_writer { struct gguf_writer_base {
std::vector<int8_t> & buf; size_t written_bytes {0u};
~gguf_writer_base(void) {}
gguf_writer(std::vector<int8_t> & buf) : buf(buf) {} // we bet on devirtualization
virtual void write(int8_t val) = 0;
virtual void write(const std::vector<int8_t> & val) = 0;
virtual void write_tensor_data(const struct gguf_tensor_info & info, size_t offset_data, size_t alignment) = 0;
template <typename T> template <typename T>
void write(const T & val) const { void write(const T & val) {
for (size_t i = 0; i < sizeof(val); ++i) { for (size_t i = 0; i < sizeof(val); ++i) {
buf.push_back(reinterpret_cast<const int8_t *>(&val)[i]); write(reinterpret_cast<const int8_t *>(&val)[i]);
} }
} }
void write(const std::vector<int8_t> & val) const { void write(const bool & val) {
buf.insert(buf.end(), val.begin(), val.end());
}
void write(const bool & val) const {
const int8_t val8 = val ? 1 : 0; const int8_t val8 = val ? 1 : 0;
write(val8); write(val8);
} }
void write(const std::string & val) const { void write(const std::string & val) {
{ {
const uint64_t n = val.length(); const uint64_t n = val.length();
write(n); write(n);
} }
for (size_t i = 0; i < val.length(); ++i) { for (size_t i = 0; i < val.length(); ++i) {
buf.push_back(reinterpret_cast<const int8_t *>(val.data())[i]); write((val.data())[i]);
} }
} }
void write(const char * val) const { void write(const char * val) {
write(std::string(val)); write(std::string(val));
} }
void write(const enum ggml_type & val) const { void write(const enum ggml_type & val) {
write(int32_t(val)); write(int32_t(val));
} }
void write(const enum gguf_type & val) const { void write(const enum gguf_type & val) {
write(int32_t(val)); write(int32_t(val));
} }
void write(const struct gguf_kv & kv) const { void write(const struct gguf_kv & kv) {
const uint64_t ne = kv.get_ne(); const uint64_t ne = kv.get_ne();
write(kv.get_key()); write(kv.get_key());
...@@ -1253,7 +1254,7 @@ struct gguf_writer { ...@@ -1253,7 +1254,7 @@ struct gguf_writer {
} }
} }
void write_tensor_meta(const struct gguf_tensor_info & info) const { void write_tensor_meta(const struct gguf_tensor_info & info) {
write(info.t.name); write(info.t.name);
const uint32_t n_dims = ggml_n_dims(&info.t); const uint32_t n_dims = ggml_n_dims(&info.t);
...@@ -1266,14 +1267,33 @@ struct gguf_writer { ...@@ -1266,14 +1267,33 @@ struct gguf_writer {
write(info.offset); write(info.offset);
} }
void pad(const size_t alignment) const { void pad(const size_t alignment) {
while (buf.size() % alignment != 0) { while (written_bytes % alignment != 0) {
const int8_t zero = 0; const int8_t zero = 0;
write(zero); write(zero);
} }
} }
};
// vector buffer based writer
struct gguf_writer_buf final : public gguf_writer_base {
std::vector<int8_t> & buf;
gguf_writer_buf(std::vector<int8_t> & buf) : buf(buf) {}
using gguf_writer_base::write;
void write(const int8_t val) override {
buf.push_back(val);
written_bytes++;
}
void write_tensor_data(const struct gguf_tensor_info & info, const size_t offset_data, const size_t alignment) const { void write(const std::vector<int8_t> & val) override {
buf.insert(buf.end(), val.begin(), val.end());
written_bytes += val.size();
}
void write_tensor_data(const struct gguf_tensor_info & info, const size_t offset_data, const size_t alignment) override {
GGML_ASSERT(buf.size() - offset_data == info.offset); GGML_ASSERT(buf.size() - offset_data == info.offset);
GGML_ASSERT(ggml_is_contiguous(&info.t)); GGML_ASSERT(ggml_is_contiguous(&info.t));
...@@ -1287,14 +1307,58 @@ struct gguf_writer { ...@@ -1287,14 +1307,58 @@ struct gguf_writer {
GGML_ASSERT(info.t.data); GGML_ASSERT(info.t.data);
memcpy(buf.data() + offset, info.t.data, nbytes); memcpy(buf.data() + offset, info.t.data, nbytes);
} }
written_bytes += nbytes;
pad(alignment); pad(alignment);
} }
}; };
void gguf_write_to_buf(const struct gguf_context * ctx, std::vector<int8_t> & buf, bool only_meta) { // file based writer
const struct gguf_writer gw(buf); struct gguf_writer_file final : public gguf_writer_base {
FILE * file;
gguf_writer_file(FILE* file) : file(file) {}
using gguf_writer_base::write;
void write(const int8_t val) override {
const auto real_val = static_cast<uint8_t>(val);
const auto ret = fputc(real_val, file);
written_bytes++;
if (ret != real_val) {
throw std::runtime_error("unexpected fputc result '" + std::to_string(ret) + "' instead of '" + std::to_string((int)real_val) + "'");
}
}
void write(const std::vector<int8_t> & val) override {
const auto ret = fwrite(val.data(), 1, val.size(), file);
written_bytes += val.size();
if (ret != val.size()) {
throw std::runtime_error("unexpected fwrite number of bytes written, '" + std::to_string(ret) + "' instead of '" + std::to_string(val.size()) + "'");
}
}
void write_tensor_data(const struct gguf_tensor_info & info, const size_t offset_data, const size_t alignment) override {
GGML_ASSERT(written_bytes - offset_data == info.offset);
GGML_ASSERT(ggml_is_contiguous(&info.t));
const size_t nbytes = ggml_nbytes(&info.t);
std::vector<int8_t> buf(nbytes);
if (info.t.buffer) {
ggml_backend_tensor_get(&info.t, buf.data(), 0, nbytes);
} else {
GGML_ASSERT(info.t.data);
memcpy(buf.data(), info.t.data, nbytes);
}
write(buf);
pad(alignment);
}
};
template <typename writer_t>
static void gguf_write_out(const struct gguf_context * ctx, writer_t & gw, bool only_meta) {
const int64_t n_kv = gguf_get_n_kv(ctx); const int64_t n_kv = gguf_get_n_kv(ctx);
const int64_t n_tensors = gguf_get_n_tensors(ctx); const int64_t n_tensors = gguf_get_n_tensors(ctx);
...@@ -1324,7 +1388,7 @@ void gguf_write_to_buf(const struct gguf_context * ctx, std::vector<int8_t> & bu ...@@ -1324,7 +1388,7 @@ void gguf_write_to_buf(const struct gguf_context * ctx, std::vector<int8_t> & bu
return; return;
} }
const size_t offset_data = gw.buf.size(); const size_t offset_data = gw.written_bytes;
// write tensor data // write tensor data
for (int64_t i = 0; i < n_tensors; ++i) { for (int64_t i = 0; i < n_tensors; ++i) {
...@@ -1332,6 +1396,11 @@ void gguf_write_to_buf(const struct gguf_context * ctx, std::vector<int8_t> & bu ...@@ -1332,6 +1396,11 @@ void gguf_write_to_buf(const struct gguf_context * ctx, std::vector<int8_t> & bu
} }
} }
void gguf_write_to_buf(const struct gguf_context * ctx, std::vector<int8_t> & buf, bool only_meta) {
gguf_writer_buf gw(buf);
gguf_write_out(ctx, gw, only_meta);
}
bool gguf_write_to_file(const struct gguf_context * ctx, const char * fname, bool only_meta) { bool gguf_write_to_file(const struct gguf_context * ctx, const char * fname, bool only_meta) {
FILE * file = ggml_fopen(fname, "wb"); FILE * file = ggml_fopen(fname, "wb");
...@@ -1340,11 +1409,17 @@ bool gguf_write_to_file(const struct gguf_context * ctx, const char * fname, boo ...@@ -1340,11 +1409,17 @@ bool gguf_write_to_file(const struct gguf_context * ctx, const char * fname, boo
return false; return false;
} }
std::vector<int8_t> buf; try {
gguf_write_to_buf(ctx, buf, only_meta); gguf_writer_file gw(file);
const bool ok = fwrite(buf.data(), 1, buf.size(), file) == buf.size(); gguf_write_out(ctx, gw, only_meta);
} catch (const std::runtime_error& ex) {
GGML_LOG_ERROR("%s: failed to write GGUF data into '%s': %s\n", __func__, fname, ex.what());
fclose(file);
return false;
}
fclose(file); fclose(file);
return ok; return true;
} }
size_t gguf_get_meta_size(const struct gguf_context * ctx) { size_t gguf_get_meta_size(const struct gguf_context * ctx) {
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment