Commit 144ba492 authored by PanZezhong's avatar PanZezhong Committed by wooway777
Browse files

issue/143 fix bench script, worker cleanup, compiler initial input

parent 69f18760
#include "paged_compiler.hpp"
namespace {
// Todo: replace with Tensor::zeros when it is available
inline void set_zeros(infinicore::Tensor &tensor) {
std::vector<uint8_t> zeros(tensor->nbytes(), 0);
infinicore::context::memcpyH2D(tensor->data(), zeros.data(), tensor->nbytes(), false);
}
} // namespace
namespace infinilm::engine {
PagedCompiler::PagedCompiler(const std::shared_ptr<InfinilmModel> &model, RankBarrier *barrier)
: GraphCompiler(model, barrier) {
......@@ -27,15 +35,20 @@ void PagedCompiler::compile() {
compiled_map_decode_.clear();
block_tables_holder_ = infinicore::Tensor::empty(
{nblocks}, infinicore::DataType::I64, infinicore::context::getDevice());
set_zeros(block_tables_holder_);
for (size_t b : decode_batch_sizes_) {
size_t block_per_req = nblocks / b;
InfinilmModel::Input input;
input.input_ids = infinicore::Tensor::empty({1, b}, infinicore::DataType::I64, infinicore::context::getDevice());
input.position_ids = infinicore::Tensor::empty({b}, infinicore::DataType::I64, infinicore::context::getDevice());
input.total_sequence_lengths = infinicore::Tensor::empty({b}, infinicore::DataType::I64, infinicore::context::getDevice());
set_zeros(input.input_ids.value());
set_zeros(input.position_ids.value());
set_zeros(input.total_sequence_lengths.value());
std::vector<int64_t> total_sequence_lengths_vec(b, 1);
infinicore::context::memcpyH2D(input.total_sequence_lengths.value()->data(), total_sequence_lengths_vec.data(), b * sizeof(int64_t), false);
input.input_offsets = infinicore::Tensor::empty({b + 1}, infinicore::DataType::I64, infinicore::context::getDevice());
set_zeros(input.input_offsets.value());
std::vector<int64_t> input_offsets_vec(b + 1, 0);
for (size_t i = 0; i <= b; i++) {
input_offsets_vec[i] = i;
......@@ -43,6 +56,7 @@ void PagedCompiler::compile() {
infinicore::context::memcpyH2D(input.input_offsets.value()->data(), input_offsets_vec.data(), (b + 1) * sizeof(int64_t), false);
input.block_tables = block_tables_holder_->as_strided({b, block_per_req}, {(ptrdiff_t)block_per_req, 1});
input.slot_mapping = infinicore::Tensor::empty({b}, infinicore::DataType::I64, infinicore::context::getDevice());
set_zeros(input.slot_mapping.value());
barrier_->wait();
infinicore::context::startGraphRecording();
......
......@@ -246,12 +246,12 @@ void RankWorker::thread_loop() {
try {
model_->load_parameter(local_param_name, local_param);
} catch (const std::exception &e) {
// convert exceptions to a safe behavior: set should_exit_ and notify caller
{
std::lock_guard<std::mutex> lk(mutex_);
should_exit_ = true;
job_done_ = true;
}
cv_.notify_all();
// rethrow so the thread can be joined and caller sees an error if desired (optional)
spdlog::error("[{}] exception during load_parameter_: {}\n", info(), e.what());
break;
}
......@@ -321,9 +321,11 @@ void RankWorker::thread_loop() {
cv_.notify_all();
} catch (const std::exception &e) {
{
std::lock_guard<std::mutex> lk(mutex_);
should_exit_ = true;
job_done_ = true;
}
cv_.notify_all();
spdlog::error("[{}] exception during forward: {}\n", info(), e.what());
break;
......@@ -338,9 +340,11 @@ void RankWorker::thread_loop() {
cv_.notify_all();
} catch (const std::exception &e) {
{
std::lock_guard<std::mutex> lk(mutex_);
should_exit_ = true;
job_done_ = true;
}
cv_.notify_all();
spdlog::error("[{}] exception during reset_cache: {}\n", info(), e.what());
break;
......@@ -357,9 +361,11 @@ void RankWorker::thread_loop() {
cv_.notify_all();
} catch (const std::exception &e) {
{
std::lock_guard<std::mutex> lk(mutex_);
should_exit_ = true;
job_done_ = true;
}
cv_.notify_all();
spdlog::error("[{}] exception during compile: {}\n", info(), e.what());
break;
......@@ -369,6 +375,9 @@ void RankWorker::thread_loop() {
// Shouldn't reach here (no-op)
}
} // while
// Some clean up should be done before exiting the thread
compiler_.reset();
} catch (const std::exception &e) {
// Top-level exception: ensure any waiters are woken and the thread exits cleanly.
{
......
......@@ -137,6 +137,21 @@ def get_args():
action="store_true",
help="Run nvidia test",
)
parser.add_argument(
"--metax",
action="store_true",
help="Run metax test",
)
parser.add_argument(
"--moore",
action="store_true",
help="Run moore test",
)
parser.add_argument(
"--iluvatar",
action="store_true",
help="Run iluvatar test",
)
parser.add_argument(
"--cambricon",
action="store_true",
......@@ -328,6 +343,12 @@ if __name__ == "__main__":
device_str = "cpu"
elif args.nvidia:
device_str = "cuda"
elif args.metax:
device_str = "cuda"
elif args.moore:
device_str = "musa"
elif args.iluvatar:
device_str = "cuda"
elif args.cambricon:
device_str = "mlu"
else:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment