Unverified Commit 2f3f4076 authored by Ceng's avatar Ceng Committed by GitHub
Browse files

issue/634: InfiniCore 支持InfiniLM Llama模型适配 (#668)



* issue/634: InfiniCore 支持InfiniLM Llama模型适配
Signed-off-by: default avatarCeng23333 <441651826@qq.com>

* .
Signed-off-by: default avatarCeng23333 <441651826@qq.com>

---------
Signed-off-by: default avatarCeng23333 <441651826@qq.com>
parent 1bafd1a6
import contextlib import contextlib
import infinicore.context as context
import infinicore.nn as nn import infinicore.nn as nn
# Import context functions # Import context functions
...@@ -60,6 +61,7 @@ from infinicore.tensor import ( ...@@ -60,6 +61,7 @@ from infinicore.tensor import (
__all__ = [ __all__ = [
# Modules. # Modules.
"context",
"nn", "nn",
# Classes. # Classes.
"device", "device",
......
...@@ -5,8 +5,8 @@ from infinicore.tensor import Tensor ...@@ -5,8 +5,8 @@ from infinicore.tensor import Tensor
class RopeAlgo: class RopeAlgo:
r"""Different types of RoPE algorithms.""" r"""Different types of RoPE algorithms."""
GPT_J = _infinicore.Algo.GPT_J GPT_J = _infinicore.RoPEAlgo.GPT_J
GPT_NEOX = _infinicore.Algo.GPT_NEOX GPT_NEOX = _infinicore.RoPEAlgo.GPT_NEOX
def rope( def rope(
......
...@@ -36,9 +36,9 @@ Embedding::Embedding(size_t num_embeddings, ...@@ -36,9 +36,9 @@ Embedding::Embedding(size_t num_embeddings,
// This would require a slice operation // This would require a slice operation
} }
spdlog::debug("Created Embedding module: num_embeddings={}, embedding_dim={}, dtype={}, padding_idx={}", SPDLOG_DEBUG("Created Embedding module: num_embeddings={}, embedding_dim={}, dtype={}, padding_idx={}",
num_embeddings, embedding_dim, static_cast<int>(dtype_), num_embeddings, embedding_dim, static_cast<int>(dtype_),
padding_idx_.has_value() ? std::to_string(padding_idx_.value()) : "None"); padding_idx_.has_value() ? std::to_string(padding_idx_.value()) : "None");
} }
Tensor Embedding::forward(const Tensor &indices) const { Tensor Embedding::forward(const Tensor &indices) const {
......
...@@ -22,8 +22,8 @@ Linear::Linear(size_t in_features, size_t out_features, bool bias, const DataTyp ...@@ -22,8 +22,8 @@ Linear::Linear(size_t in_features, size_t out_features, bool bias, const DataTyp
bias_ = Parameter(); // Default constructed empty parameter bias_ = Parameter(); // Default constructed empty parameter
} }
spdlog::debug("Created Linear module: in_features={}, out_features={}, bias={}, dtype={}", SPDLOG_DEBUG("Created Linear module: in_features={}, out_features={}, bias={}, dtype={}",
in_features, out_features, bias, static_cast<int>(dtype_)); in_features, out_features, bias, static_cast<int>(dtype_));
} }
Tensor Linear::compute_linear(Tensor &input) const { Tensor Linear::compute_linear(Tensor &input) const {
......
...@@ -19,7 +19,13 @@ Parameter::Parameter( ...@@ -19,7 +19,13 @@ Parameter::Parameter(
void Parameter::load_blob(const void *data) { void Parameter::load_blob(const void *data) {
auto buffer = Tensor::empty(impl_->shape(), impl_->dtype(), Device(Device::Type::CPU, 0), true); auto buffer = Tensor::empty(impl_->shape(), impl_->dtype(), Device(Device::Type::CPU, 0), true);
std::memcpy(buffer->data(), data, buffer->nbytes()); std::memcpy(buffer->data(), data, buffer->nbytes());
infinicore::context::memcpyH2D(impl_->data(), buffer->data(), buffer->nbytes());
infinicore::context::syncStream(); // If parameter is on CPU, use direct memcpy; otherwise use H2D
if (impl_->device().getType() == Device::Type::CPU) {
infinicore::context::memcpyH2H(impl_->data(), buffer->data(), buffer->nbytes());
} else {
infinicore::context::memcpyH2D(impl_->data(), buffer->data(), buffer->nbytes());
infinicore::context::syncStream();
}
} }
} // namespace infinicore::nn } // namespace infinicore::nn
#include "infinicore/nn/rmsnorm.hpp" #include "infinicore/nn/rmsnorm.hpp"
#include "infinicore/ops.hpp" #include "infinicore/ops.hpp"
#include <cmath> #include <cmath>
#include <spdlog/spdlog.h>
#include <stdexcept> #include <stdexcept>
namespace infinicore::nn { namespace infinicore::nn {
...@@ -19,9 +18,6 @@ RMSNorm::RMSNorm(size_t normalized_shape, double eps, const DataType &dtype, con ...@@ -19,9 +18,6 @@ RMSNorm::RMSNorm(size_t normalized_shape, double eps, const DataType &dtype, con
// Initialize weight to ones (standard practice for RMSNorm) // Initialize weight to ones (standard practice for RMSNorm)
auto ones_tensor = Tensor::ones({normalized_shape}, dtype_, device); auto ones_tensor = Tensor::ones({normalized_shape}, dtype_, device);
weight_->copy_from(ones_tensor); weight_->copy_from(ones_tensor);
spdlog::debug("Created RMSNorm module: normalized_shape={}, eps={}, dtype={}",
normalized_shape, eps, static_cast<int>(dtype_));
} }
Tensor RMSNorm::forward(const Tensor &x) const { Tensor RMSNorm::forward(const Tensor &x) const {
......
...@@ -4,7 +4,6 @@ ...@@ -4,7 +4,6 @@
#include <algorithm> #include <algorithm>
#include <cmath> #include <cmath>
#include <functional> #include <functional>
#include <spdlog/spdlog.h>
#include <stdexcept> #include <stdexcept>
namespace infinicore::nn { namespace infinicore::nn {
...@@ -20,7 +19,6 @@ RoPE::RoPE(size_t head_dim, ...@@ -20,7 +19,6 @@ RoPE::RoPE(size_t head_dim,
theta_(theta), theta_(theta),
algo_(algo), algo_(algo),
dtype_(dtype) { dtype_(dtype) {
if (head_dim % 2 != 0) { if (head_dim % 2 != 0) {
throw std::invalid_argument("head_dim must be even for RoPE, got " + std::to_string(head_dim)); throw std::invalid_argument("head_dim must be even for RoPE, got " + std::to_string(head_dim));
} }
...@@ -29,9 +27,6 @@ RoPE::RoPE(size_t head_dim, ...@@ -29,9 +27,6 @@ RoPE::RoPE(size_t head_dim,
// Initialize cache tables // Initialize cache tables
initialize_cache(); initialize_cache();
spdlog::debug("Created RoPE module: head_dim={}, max_seq_len={}, theta={}, algo={}, dtype={}",
head_dim, max_seq_len, theta, static_cast<int>(algo), static_cast<int>(dtype_));
} }
void RoPE::initialize_cache() { void RoPE::initialize_cache() {
...@@ -42,9 +37,8 @@ void RoPE::initialize_cache() { ...@@ -42,9 +37,8 @@ void RoPE::initialize_cache() {
INFINICORE_NN_BUFFER_INIT(cos_cache, ({max_seq_len_, cache_dim}, dtype_, device_)); INFINICORE_NN_BUFFER_INIT(cos_cache, ({max_seq_len_, cache_dim}, dtype_, device_));
// Pre-compute sin and cos values // Pre-compute sin and cos values
// The frequency calculation differs based on algorithm: // Frequency generation always uses GPT-J style (theta^(-2j/head_dim)).
// - GPT_J: pairs are (2j, 2j+1) for cache entry j, frequency for dimension 2j is theta^(-2j/head_dim) // The rotation algorithm (algo_) controls how dimensions are paired in the kernel.
// - GPT_NEOX: pairs are (j, j+head_dim/2) for cache entry j, frequency for dimension j is theta^(-j/head_dim)
// Compute on CPU first, then copy to device // Compute on CPU first, then copy to device
auto cpu_device = Device(Device::Type::CPU, 0); auto cpu_device = Device(Device::Type::CPU, 0);
...@@ -55,20 +49,8 @@ void RoPE::initialize_cache() { ...@@ -55,20 +49,8 @@ void RoPE::initialize_cache() {
for (size_t pos = 0; pos < max_seq_len_; pos++) { for (size_t pos = 0; pos < max_seq_len_; pos++) {
for (size_t j = 0; j < cache_dim; j++) { for (size_t j = 0; j < cache_dim; j++) {
// Compute inverse frequency based on algorithm // GPT-J style inverse frequency: theta^(-2j/head_dim)
double inv_freq; double inv_freq = 1.0 / std::pow(theta_, 2.0 * static_cast<double>(j) / static_cast<double>(head_dim_));
if (algo_ == Algo::GPT_J) {
// GPT_J: pairs are (2j, 2j+1) for cache entry j
// Frequency for pair j: theta^(-2j/head_dim)
inv_freq = 1.0 / std::pow(theta_, 2.0 * static_cast<double>(j) / static_cast<double>(head_dim_));
} else if (algo_ == Algo::GPT_NEOX) {
// GPT_NEOX: pairs are (j, j+head_dim/2) for cache entry j
// Frequency for pair j (corresponding to dimension j): theta^(-j/head_dim)
inv_freq = 1.0 / std::pow(theta_, static_cast<double>(j) / static_cast<double>(head_dim_));
} else {
throw std::runtime_error("Unsupported RoPE algorithm: " + std::to_string(static_cast<int>(algo_)));
}
// Compute angle: position * inverse_frequency // Compute angle: position * inverse_frequency
double angle = static_cast<double>(pos) * inv_freq; double angle = static_cast<double>(pos) * inv_freq;
......
...@@ -26,4 +26,4 @@ inline void bind(py::module &m) { ...@@ -26,4 +26,4 @@ inline void bind(py::module &m) {
m.def("sync_device", &syncDevice, "Synchronize the current device"); m.def("sync_device", &syncDevice, "Synchronize the current device");
} }
} // namespace infinicore::context } // namespace infinicore::context
\ No newline at end of file
#include <pybind11/pybind11.h> #include <pybind11/pybind11.h>
#include <pybind11/stl.h> #include <pybind11/stl.h>
#include "../utils.hpp"
#include "context.hpp" #include "context.hpp"
#include "device.hpp" #include "device.hpp"
#include "device_event.hpp" #include "device_event.hpp"
#include "dtype.hpp" #include "dtype.hpp"
#include "nn.hpp"
#include "ops.hpp" #include "ops.hpp"
#include "tensor.hpp" #include "tensor.hpp"
...@@ -17,6 +19,7 @@ PYBIND11_MODULE(_infinicore, m) { ...@@ -17,6 +19,7 @@ PYBIND11_MODULE(_infinicore, m) {
dtype::bind(m); dtype::bind(m);
ops::bind(m); ops::bind(m);
tensor::bind(m); tensor::bind(m);
pybind11_nn::bind(m);
} }
} // namespace infinicore } // namespace infinicore
#pragma once
#include <pybind11/pybind11.h>
#include "nn/rope.hpp"
namespace py = pybind11;
namespace infinicore::pybind11_nn {
inline void bind(py::module &m) {
bind_rope(m);
}
} // namespace infinicore::pybind11_nn
...@@ -9,11 +9,6 @@ namespace py = pybind11; ...@@ -9,11 +9,6 @@ namespace py = pybind11;
namespace infinicore::ops { namespace infinicore::ops {
inline void bind_rope(py::module &m) { inline void bind_rope(py::module &m) {
py::enum_<infinicore::nn::RoPE::Algo>(m, "Algo")
.value("GPT_J", infinicore::nn::RoPE::Algo::GPT_J)
.value("GPT_NEOX", infinicore::nn::RoPE::Algo::GPT_NEOX);
m.def("rope", m.def("rope",
&op::rope, &op::rope,
py::arg("x"), py::arg("x"),
......
...@@ -3,14 +3,15 @@ ...@@ -3,14 +3,15 @@
#include "infinicore/ops.hpp" #include "infinicore/ops.hpp"
#include "infinicore/tensor.hpp" #include "infinicore/tensor.hpp"
#include <spdlog/spdlog.h> #include <algorithm>
#include <cstring>
#include <iostream>
namespace infinicore { namespace infinicore {
Tensor TensorImpl::to(Device device) const { Tensor TensorImpl::to(Device device) const {
if (device == data_.memory->device()) { if (device == data_.memory->device()) {
return Tensor(const_cast<TensorImpl *>(this)->shared_from_this()); return Tensor(const_cast<TensorImpl *>(this)->shared_from_this());
} else { } else {
std::shared_ptr<TensorImpl> _t = empty(meta_.shape, meta_.dtype, device, true); std::shared_ptr<TensorImpl> _t = empty(meta_.shape, meta_.dtype, device);
_t->copy_from(Tensor(const_cast<TensorImpl *>(this)->shared_from_this())); _t->copy_from(Tensor(const_cast<TensorImpl *>(this)->shared_from_this()));
return Tensor(_t); return Tensor(_t);
} }
...@@ -20,26 +21,44 @@ void TensorImpl::copy_from(Tensor src) { ...@@ -20,26 +21,44 @@ void TensorImpl::copy_from(Tensor src) {
if (src->shape() != this->shape()) { if (src->shape() != this->shape()) {
throw std::runtime_error("Cannot copy from tensor with different shape"); throw std::runtime_error("Cannot copy from tensor with different shape");
} }
if (this->device().getType() == src->device().getType()) { if (this->device() == src->device()) {
op::rearrange_(Tensor(const_cast<TensorImpl *>(this)->shared_from_this()), src);
// If both tensors are contiguous, use direct memcpy (much faster and avoids rearrange issues)
if (this->is_contiguous() && src->is_contiguous()) {
// Use nbytes() to get the actual tensor size
size_t copy_size = std::min(this->nbytes(), src->nbytes());
// For CPU-to-CPU copies, use regular memcpy. For device-to-device, use D2D memcpy
if (this->device().getType() == Device::Type::CPU) {
context::memcpyH2H(this->data(), src->data(), copy_size);
} else {
context::memcpyD2D(this->data(), src->data(), copy_size);
}
} else {
op::rearrange_(Tensor(const_cast<TensorImpl *>(this)->shared_from_this()), src);
}
} else { } else {
if (!src->is_contiguous()) { if (!src->is_contiguous()) {
src = src->contiguous(); src = src->contiguous();
} }
// Use nbytes() to get the actual tensor size, not the full memory size
size_t copy_size = std::min(this->nbytes(), src->nbytes());
if (this->device().getType() == Device::Type::CPU) { if (this->device().getType() == Device::Type::CPU) {
if (this->is_contiguous()) { if (this->is_contiguous()) {
context::memcpyD2H(this->data(), src->data(), this->data_.memory->size()); context::memcpyD2H(this->data(), src->data(), copy_size);
} else { } else {
auto local_src = Tensor::empty(this->shape(), this->dtype(), this->device()); auto local_src = Tensor::empty(this->shape(), this->dtype(), this->device());
context::memcpyD2H(local_src->data(), src->data(), this->data_.memory->size()); context::memcpyD2H(local_src->data(), src->data(), this->data_.memory->size());
op::rearrange_(Tensor(const_cast<TensorImpl *>(this)->shared_from_this()), local_src); op::rearrange_(Tensor(const_cast<TensorImpl *>(this)->shared_from_this()), local_src);
} }
} else if (src->device().getType() == Device::Type::CPU) { } else if (src->device().getType() == Device::Type::CPU) {
if (this->is_contiguous()) { if (this->is_contiguous()) {
context::memcpyH2D(this->data(), src->data(), this->data_.memory->size()); context::memcpyH2D(this->data(), src->data(), copy_size);
} else { } else {
auto local_src = Tensor::empty(this->shape(), this->dtype(), this->device()); auto local_src = Tensor::empty(this->shape(), this->dtype(), this->device());
context::memcpyH2D(local_src->data(), src->data(), this->data_.memory->size()); context::memcpyH2D(local_src->data(), src->data(), copy_size);
op::rearrange_(Tensor(const_cast<TensorImpl *>(this)->shared_from_this()), local_src); op::rearrange_(Tensor(const_cast<TensorImpl *>(this)->shared_from_this()), local_src);
} }
} }
......
...@@ -13,6 +13,10 @@ inline struct SpdlogInitializer { ...@@ -13,6 +13,10 @@ inline struct SpdlogInitializer {
} else { } else {
spdlog::cfg::load_env_levels("INFINICORE_LOG_LEVEL"); spdlog::cfg::load_env_levels("INFINICORE_LOG_LEVEL");
} }
// Set pattern for logging
// Using SPDLOG_* macros enables source location support (%s and %#)
// Format: [timestamp] [level] [file:line] message
spdlog::set_pattern("[%Y-%m-%d %H:%M:%S.%e] [%^%l%$] [%s:%#] %v");
} }
} spdlog_initializer; } spdlog_initializer;
...@@ -21,9 +25,9 @@ inline struct SpdlogInitializer { ...@@ -21,9 +25,9 @@ inline struct SpdlogInitializer {
#define INFINICORE_CHECK_ERROR(call) \ #define INFINICORE_CHECK_ERROR(call) \
do { \ do { \
spdlog::debug("Entering `" #call "` at `" __FILE__ ":" STRINGIZE(__LINE__) "`."); \ SPDLOG_DEBUG("Entering `" #call "` at `" __FILE__ ":" STRINGIZE(__LINE__) "`."); \
infiniStatus_t ret = (call); \ infiniStatus_t ret = (call); \
spdlog::debug("Exiting `" #call "` at `" __FILE__ ":" STRINGIZE(__LINE__) "`."); \ SPDLOG_DEBUG("Exiting `" #call "` at `" __FILE__ ":" STRINGIZE(__LINE__) "`."); \
if (ret != INFINI_STATUS_SUCCESS) { \ if (ret != INFINI_STATUS_SUCCESS) { \
throw std::runtime_error(#call " failed with error: " + std::string(infini_status_string(ret))); \ throw std::runtime_error(#call " failed with error: " + std::string(infini_status_string(ret))); \
} \ } \
......
...@@ -348,12 +348,45 @@ target("infiniccl") ...@@ -348,12 +348,45 @@ target("infiniccl")
set_installdir(os.getenv("INFINI_ROOT") or (os.getenv(is_host("windows") and "HOMEPATH" or "HOME") .. "/.infini")) set_installdir(os.getenv("INFINI_ROOT") or (os.getenv(is_host("windows") and "HOMEPATH" or "HOME") .. "/.infini"))
target_end() target_end()
target("infinicore_c_api")
target("infinicore_c_api") target("infinicore_c_api")
set_kind("phony") set_kind("phony")
add_deps("infiniop", "infinirt", "infiniccl") add_deps("infiniop", "infinirt", "infiniccl")
after_build(function (target) print(YELLOW .. "[Congratulations!] Now you can install the libraries with \"xmake install\"" .. NC) end) after_build(function (target) print(YELLOW .. "[Congratulations!] Now you can install the libraries with \"xmake install\"" .. NC) end)
target_end() target_end()
target("infinicore_cpp_api")
set_kind("shared")
add_deps("infiniop", "infinirt", "infiniccl")
set_languages("cxx17")
local INFINI_ROOT = os.getenv("INFINI_ROOT") or (os.getenv(is_host("windows") and "HOMEPATH" or "HOME") .. "/.infini")
add_includedirs("include")
add_includedirs(INFINI_ROOT.."/include", { public = true })
add_linkdirs(INFINI_ROOT.."/lib")
add_links("infiniop", "infinirt", "infiniccl")
-- Add InfiniCore C++ source files (needed for RoPE and other nn modules)
add_files("src/infinicore/*.cc")
add_files("src/infinicore/context/*.cc")
add_files("src/infinicore/context/*/*.cc")
add_files("src/infinicore/tensor/*.cc")
add_files("src/infinicore/nn/*.cc")
add_files("src/infinicore/ops/*/*.cc")
set_installdir(INFINI_ROOT)
add_installfiles("include/infinicore/(**.h)", {prefixdir = "include/infinicore"})
add_installfiles("include/infinicore/(**.hpp)", {prefixdir = "include/infinicore"})
add_installfiles("include/infinicore/(**/*.h)", {prefixdir = "include/infinicore"})
add_installfiles("include/infinicore/(**/*.hpp)",{prefixdir = "include/infinicore"})
add_installfiles("include/infinicore.h", {prefixdir = "include"})
add_installfiles("include/infinicore.hpp", {prefixdir = "include"})
after_build(function (target) print(YELLOW .. "[Congratulations!] Now you can install the libraries with \"xmake install\"" .. NC) end)
target_end()
target("_infinicore") target("_infinicore")
add_packages("boost") add_packages("boost")
if is_mode("debug") then if is_mode("debug") then
...@@ -379,6 +412,7 @@ target("_infinicore") ...@@ -379,6 +412,7 @@ target("_infinicore")
add_files("src/infinicore/context/*.cc") add_files("src/infinicore/context/*.cc")
add_files("src/infinicore/context/*/*.cc") add_files("src/infinicore/context/*/*.cc")
add_files("src/infinicore/tensor/*.cc") add_files("src/infinicore/tensor/*.cc")
add_files("src/infinicore/nn/*.cc")
add_files("src/infinicore/ops/*/*.cc") add_files("src/infinicore/ops/*/*.cc")
add_files("src/infinicore/pybind11/**.cc") add_files("src/infinicore/pybind11/**.cc")
......
...@@ -89,6 +89,7 @@ target("infinicore-test") ...@@ -89,6 +89,7 @@ target("infinicore-test")
add_files(os.projectdir().."/src/infinicore/nn/*.cc") add_files(os.projectdir().."/src/infinicore/nn/*.cc")
add_files(os.projectdir().."/src/infinicore-test/*.cc") add_files(os.projectdir().."/src/infinicore-test/*.cc")
add_files(os.projectdir().."/src/infinicore-test/*/*.cc")
set_installdir(INFINI_ROOT) set_installdir(INFINI_ROOT)
target_end() target_end()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment