Unverified Commit 2f3f4076 authored by Ceng's avatar Ceng Committed by GitHub
Browse files

issue/634: InfiniCore 支持InfiniLM Llama模型适配 (#668)



* issue/634: InfiniCore 支持InfiniLM Llama模型适配
Signed-off-by: default avatarCeng23333 <441651826@qq.com>

* .
Signed-off-by: default avatarCeng23333 <441651826@qq.com>

---------
Signed-off-by: default avatarCeng23333 <441651826@qq.com>
parent 1bafd1a6
import contextlib
import infinicore.context as context
import infinicore.nn as nn
# Import context functions
......@@ -60,6 +61,7 @@ from infinicore.tensor import (
__all__ = [
# Modules.
"context",
"nn",
# Classes.
"device",
......
......@@ -5,8 +5,8 @@ from infinicore.tensor import Tensor
class RopeAlgo:
r"""Different types of RoPE algorithms."""
GPT_J = _infinicore.Algo.GPT_J
GPT_NEOX = _infinicore.Algo.GPT_NEOX
GPT_J = _infinicore.RoPEAlgo.GPT_J
GPT_NEOX = _infinicore.RoPEAlgo.GPT_NEOX
def rope(
......
......@@ -36,7 +36,7 @@ Embedding::Embedding(size_t num_embeddings,
// This would require a slice operation
}
spdlog::debug("Created Embedding module: num_embeddings={}, embedding_dim={}, dtype={}, padding_idx={}",
SPDLOG_DEBUG("Created Embedding module: num_embeddings={}, embedding_dim={}, dtype={}, padding_idx={}",
num_embeddings, embedding_dim, static_cast<int>(dtype_),
padding_idx_.has_value() ? std::to_string(padding_idx_.value()) : "None");
}
......
......@@ -22,7 +22,7 @@ Linear::Linear(size_t in_features, size_t out_features, bool bias, const DataTyp
bias_ = Parameter(); // Default constructed empty parameter
}
spdlog::debug("Created Linear module: in_features={}, out_features={}, bias={}, dtype={}",
SPDLOG_DEBUG("Created Linear module: in_features={}, out_features={}, bias={}, dtype={}",
in_features, out_features, bias, static_cast<int>(dtype_));
}
......
......@@ -19,7 +19,13 @@ Parameter::Parameter(
void Parameter::load_blob(const void *data) {
auto buffer = Tensor::empty(impl_->shape(), impl_->dtype(), Device(Device::Type::CPU, 0), true);
std::memcpy(buffer->data(), data, buffer->nbytes());
// If parameter is on CPU, use direct memcpy; otherwise use H2D
if (impl_->device().getType() == Device::Type::CPU) {
infinicore::context::memcpyH2H(impl_->data(), buffer->data(), buffer->nbytes());
} else {
infinicore::context::memcpyH2D(impl_->data(), buffer->data(), buffer->nbytes());
infinicore::context::syncStream();
}
}
} // namespace infinicore::nn
#include "infinicore/nn/rmsnorm.hpp"
#include "infinicore/ops.hpp"
#include <cmath>
#include <spdlog/spdlog.h>
#include <stdexcept>
namespace infinicore::nn {
......@@ -19,9 +18,6 @@ RMSNorm::RMSNorm(size_t normalized_shape, double eps, const DataType &dtype, con
// Initialize weight to ones (standard practice for RMSNorm)
auto ones_tensor = Tensor::ones({normalized_shape}, dtype_, device);
weight_->copy_from(ones_tensor);
spdlog::debug("Created RMSNorm module: normalized_shape={}, eps={}, dtype={}",
normalized_shape, eps, static_cast<int>(dtype_));
}
Tensor RMSNorm::forward(const Tensor &x) const {
......
......@@ -4,7 +4,6 @@
#include <algorithm>
#include <cmath>
#include <functional>
#include <spdlog/spdlog.h>
#include <stdexcept>
namespace infinicore::nn {
......@@ -20,7 +19,6 @@ RoPE::RoPE(size_t head_dim,
theta_(theta),
algo_(algo),
dtype_(dtype) {
if (head_dim % 2 != 0) {
throw std::invalid_argument("head_dim must be even for RoPE, got " + std::to_string(head_dim));
}
......@@ -29,9 +27,6 @@ RoPE::RoPE(size_t head_dim,
// Initialize cache tables
initialize_cache();
spdlog::debug("Created RoPE module: head_dim={}, max_seq_len={}, theta={}, algo={}, dtype={}",
head_dim, max_seq_len, theta, static_cast<int>(algo), static_cast<int>(dtype_));
}
void RoPE::initialize_cache() {
......@@ -42,9 +37,8 @@ void RoPE::initialize_cache() {
INFINICORE_NN_BUFFER_INIT(cos_cache, ({max_seq_len_, cache_dim}, dtype_, device_));
// Pre-compute sin and cos values
// The frequency calculation differs based on algorithm:
// - GPT_J: pairs are (2j, 2j+1) for cache entry j, frequency for dimension 2j is theta^(-2j/head_dim)
// - GPT_NEOX: pairs are (j, j+head_dim/2) for cache entry j, frequency for dimension j is theta^(-j/head_dim)
// Frequency generation always uses GPT-J style (theta^(-2j/head_dim)).
// The rotation algorithm (algo_) controls how dimensions are paired in the kernel.
// Compute on CPU first, then copy to device
auto cpu_device = Device(Device::Type::CPU, 0);
......@@ -55,20 +49,8 @@ void RoPE::initialize_cache() {
for (size_t pos = 0; pos < max_seq_len_; pos++) {
for (size_t j = 0; j < cache_dim; j++) {
// Compute inverse frequency based on algorithm
double inv_freq;
if (algo_ == Algo::GPT_J) {
// GPT_J: pairs are (2j, 2j+1) for cache entry j
// Frequency for pair j: theta^(-2j/head_dim)
inv_freq = 1.0 / std::pow(theta_, 2.0 * static_cast<double>(j) / static_cast<double>(head_dim_));
} else if (algo_ == Algo::GPT_NEOX) {
// GPT_NEOX: pairs are (j, j+head_dim/2) for cache entry j
// Frequency for pair j (corresponding to dimension j): theta^(-j/head_dim)
inv_freq = 1.0 / std::pow(theta_, static_cast<double>(j) / static_cast<double>(head_dim_));
} else {
throw std::runtime_error("Unsupported RoPE algorithm: " + std::to_string(static_cast<int>(algo_)));
}
// GPT-J style inverse frequency: theta^(-2j/head_dim)
double inv_freq = 1.0 / std::pow(theta_, 2.0 * static_cast<double>(j) / static_cast<double>(head_dim_));
// Compute angle: position * inverse_frequency
double angle = static_cast<double>(pos) * inv_freq;
......
#include <pybind11/pybind11.h>
#include <pybind11/stl.h>
#include "../utils.hpp"
#include "context.hpp"
#include "device.hpp"
#include "device_event.hpp"
#include "dtype.hpp"
#include "nn.hpp"
#include "ops.hpp"
#include "tensor.hpp"
......@@ -17,6 +19,7 @@ PYBIND11_MODULE(_infinicore, m) {
dtype::bind(m);
ops::bind(m);
tensor::bind(m);
pybind11_nn::bind(m);
}
} // namespace infinicore
#pragma once
#include <pybind11/pybind11.h>
#include "nn/rope.hpp"
namespace py = pybind11;
namespace infinicore::pybind11_nn {
inline void bind(py::module &m) {
bind_rope(m);
}
} // namespace infinicore::pybind11_nn
......@@ -9,11 +9,6 @@ namespace py = pybind11;
namespace infinicore::ops {
inline void bind_rope(py::module &m) {
py::enum_<infinicore::nn::RoPE::Algo>(m, "Algo")
.value("GPT_J", infinicore::nn::RoPE::Algo::GPT_J)
.value("GPT_NEOX", infinicore::nn::RoPE::Algo::GPT_NEOX);
m.def("rope",
&op::rope,
py::arg("x"),
......
......@@ -3,14 +3,15 @@
#include "infinicore/ops.hpp"
#include "infinicore/tensor.hpp"
#include <spdlog/spdlog.h>
#include <algorithm>
#include <cstring>
#include <iostream>
namespace infinicore {
Tensor TensorImpl::to(Device device) const {
if (device == data_.memory->device()) {
return Tensor(const_cast<TensorImpl *>(this)->shared_from_this());
} else {
std::shared_ptr<TensorImpl> _t = empty(meta_.shape, meta_.dtype, device, true);
std::shared_ptr<TensorImpl> _t = empty(meta_.shape, meta_.dtype, device);
_t->copy_from(Tensor(const_cast<TensorImpl *>(this)->shared_from_this()));
return Tensor(_t);
}
......@@ -20,26 +21,44 @@ void TensorImpl::copy_from(Tensor src) {
if (src->shape() != this->shape()) {
throw std::runtime_error("Cannot copy from tensor with different shape");
}
if (this->device().getType() == src->device().getType()) {
if (this->device() == src->device()) {
// If both tensors are contiguous, use direct memcpy (much faster and avoids rearrange issues)
if (this->is_contiguous() && src->is_contiguous()) {
// Use nbytes() to get the actual tensor size
size_t copy_size = std::min(this->nbytes(), src->nbytes());
// For CPU-to-CPU copies, use regular memcpy. For device-to-device, use D2D memcpy
if (this->device().getType() == Device::Type::CPU) {
context::memcpyH2H(this->data(), src->data(), copy_size);
} else {
context::memcpyD2D(this->data(), src->data(), copy_size);
}
} else {
op::rearrange_(Tensor(const_cast<TensorImpl *>(this)->shared_from_this()), src);
}
} else {
if (!src->is_contiguous()) {
src = src->contiguous();
}
// Use nbytes() to get the actual tensor size, not the full memory size
size_t copy_size = std::min(this->nbytes(), src->nbytes());
if (this->device().getType() == Device::Type::CPU) {
if (this->is_contiguous()) {
context::memcpyD2H(this->data(), src->data(), this->data_.memory->size());
context::memcpyD2H(this->data(), src->data(), copy_size);
} else {
auto local_src = Tensor::empty(this->shape(), this->dtype(), this->device());
context::memcpyD2H(local_src->data(), src->data(), this->data_.memory->size());
op::rearrange_(Tensor(const_cast<TensorImpl *>(this)->shared_from_this()), local_src);
}
} else if (src->device().getType() == Device::Type::CPU) {
if (this->is_contiguous()) {
context::memcpyH2D(this->data(), src->data(), this->data_.memory->size());
context::memcpyH2D(this->data(), src->data(), copy_size);
} else {
auto local_src = Tensor::empty(this->shape(), this->dtype(), this->device());
context::memcpyH2D(local_src->data(), src->data(), this->data_.memory->size());
context::memcpyH2D(local_src->data(), src->data(), copy_size);
op::rearrange_(Tensor(const_cast<TensorImpl *>(this)->shared_from_this()), local_src);
}
}
......
......@@ -13,6 +13,10 @@ inline struct SpdlogInitializer {
} else {
spdlog::cfg::load_env_levels("INFINICORE_LOG_LEVEL");
}
// Set pattern for logging
// Using SPDLOG_* macros enables source location support (%s and %#)
// Format: [timestamp] [level] [file:line] message
spdlog::set_pattern("[%Y-%m-%d %H:%M:%S.%e] [%^%l%$] [%s:%#] %v");
}
} spdlog_initializer;
......@@ -21,9 +25,9 @@ inline struct SpdlogInitializer {
#define INFINICORE_CHECK_ERROR(call) \
do { \
spdlog::debug("Entering `" #call "` at `" __FILE__ ":" STRINGIZE(__LINE__) "`."); \
SPDLOG_DEBUG("Entering `" #call "` at `" __FILE__ ":" STRINGIZE(__LINE__) "`."); \
infiniStatus_t ret = (call); \
spdlog::debug("Exiting `" #call "` at `" __FILE__ ":" STRINGIZE(__LINE__) "`."); \
SPDLOG_DEBUG("Exiting `" #call "` at `" __FILE__ ":" STRINGIZE(__LINE__) "`."); \
if (ret != INFINI_STATUS_SUCCESS) { \
throw std::runtime_error(#call " failed with error: " + std::string(infini_status_string(ret))); \
} \
......
......@@ -348,12 +348,45 @@ target("infiniccl")
set_installdir(os.getenv("INFINI_ROOT") or (os.getenv(is_host("windows") and "HOMEPATH" or "HOME") .. "/.infini"))
target_end()
target("infinicore_c_api")
target("infinicore_c_api")
set_kind("phony")
add_deps("infiniop", "infinirt", "infiniccl")
after_build(function (target) print(YELLOW .. "[Congratulations!] Now you can install the libraries with \"xmake install\"" .. NC) end)
target_end()
target("infinicore_cpp_api")
set_kind("shared")
add_deps("infiniop", "infinirt", "infiniccl")
set_languages("cxx17")
local INFINI_ROOT = os.getenv("INFINI_ROOT") or (os.getenv(is_host("windows") and "HOMEPATH" or "HOME") .. "/.infini")
add_includedirs("include")
add_includedirs(INFINI_ROOT.."/include", { public = true })
add_linkdirs(INFINI_ROOT.."/lib")
add_links("infiniop", "infinirt", "infiniccl")
-- Add InfiniCore C++ source files (needed for RoPE and other nn modules)
add_files("src/infinicore/*.cc")
add_files("src/infinicore/context/*.cc")
add_files("src/infinicore/context/*/*.cc")
add_files("src/infinicore/tensor/*.cc")
add_files("src/infinicore/nn/*.cc")
add_files("src/infinicore/ops/*/*.cc")
set_installdir(INFINI_ROOT)
add_installfiles("include/infinicore/(**.h)", {prefixdir = "include/infinicore"})
add_installfiles("include/infinicore/(**.hpp)", {prefixdir = "include/infinicore"})
add_installfiles("include/infinicore/(**/*.h)", {prefixdir = "include/infinicore"})
add_installfiles("include/infinicore/(**/*.hpp)",{prefixdir = "include/infinicore"})
add_installfiles("include/infinicore.h", {prefixdir = "include"})
add_installfiles("include/infinicore.hpp", {prefixdir = "include"})
after_build(function (target) print(YELLOW .. "[Congratulations!] Now you can install the libraries with \"xmake install\"" .. NC) end)
target_end()
target("_infinicore")
add_packages("boost")
if is_mode("debug") then
......@@ -379,6 +412,7 @@ target("_infinicore")
add_files("src/infinicore/context/*.cc")
add_files("src/infinicore/context/*/*.cc")
add_files("src/infinicore/tensor/*.cc")
add_files("src/infinicore/nn/*.cc")
add_files("src/infinicore/ops/*/*.cc")
add_files("src/infinicore/pybind11/**.cc")
......
......@@ -89,6 +89,7 @@ target("infinicore-test")
add_files(os.projectdir().."/src/infinicore/nn/*.cc")
add_files(os.projectdir().."/src/infinicore-test/*.cc")
add_files(os.projectdir().."/src/infinicore-test/*/*.cc")
set_installdir(INFINI_ROOT)
target_end()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment