/*! * Copyright (c) 2019 by Contributors * \file array/cpu/array_op_impl.cc * \brief Array operator CPU implementation */ #include #include #include "../arith.h" namespace dgl { using runtime::NDArray; namespace aten { namespace impl { ///////////////////////////// AsNumBits ///////////////////////////// template IdArray AsNumBits(IdArray arr, uint8_t bits) { CHECK(bits == 32 || bits == 64) << "invalid number of integer bits"; if (sizeof(IdType) * 8 == bits) { return arr; } const int64_t len = arr->shape[0]; IdArray ret = NewIdArray(len, arr->ctx, bits); const IdType* arr_data = static_cast(arr->data); if (bits == 32) { int32_t* ret_data = static_cast(ret->data); for (int64_t i = 0; i < len; ++i) { ret_data[i] = arr_data[i]; } } else { int64_t* ret_data = static_cast(ret->data); for (int64_t i = 0; i < len; ++i) { ret_data[i] = arr_data[i]; } } return ret; } template IdArray AsNumBits(IdArray arr, uint8_t bits); template IdArray AsNumBits(IdArray arr, uint8_t bits); ///////////////////////////// BinaryElewise ///////////////////////////// template IdArray BinaryElewise(IdArray lhs, IdArray rhs) { IdArray ret = NewIdArray(lhs->shape[0], lhs->ctx, lhs->dtype.bits); const IdType* lhs_data = static_cast(lhs->data); const IdType* rhs_data = static_cast(rhs->data); IdType* ret_data = static_cast(ret->data); // TODO(BarclayII): this usually incurs lots of overhead in thread spawning, scheduling, // etc., especially since the workload is very light. Need to replace with parallel_for. // #pragma omp parallel for for (int64_t i = 0; i < lhs->shape[0]; ++i) { ret_data[i] = Op::Call(lhs_data[i], rhs_data[i]); } return ret; } template IdArray BinaryElewise(IdArray lhs, IdArray rhs); template IdArray BinaryElewise(IdArray lhs, IdArray rhs); template IdArray BinaryElewise(IdArray lhs, IdArray rhs); template IdArray BinaryElewise(IdArray lhs, IdArray rhs); template IdArray BinaryElewise(IdArray lhs, IdArray rhs); template IdArray BinaryElewise(IdArray lhs, IdArray rhs); template IdArray BinaryElewise(IdArray lhs, IdArray rhs); template IdArray BinaryElewise(IdArray lhs, IdArray rhs); template IdArray BinaryElewise(IdArray lhs, IdArray rhs); template IdArray BinaryElewise(IdArray lhs, IdArray rhs); template IdArray BinaryElewise(IdArray lhs, IdArray rhs); template IdArray BinaryElewise(IdArray lhs, IdArray rhs); template IdArray BinaryElewise(IdArray lhs, IdArray rhs); template IdArray BinaryElewise(IdArray lhs, IdArray rhs); template IdArray BinaryElewise(IdArray lhs, IdArray rhs); template IdArray BinaryElewise(IdArray lhs, IdArray rhs); template IdArray BinaryElewise(IdArray lhs, IdArray rhs); template IdArray BinaryElewise(IdArray lhs, IdArray rhs); template IdArray BinaryElewise(IdArray lhs, IdArray rhs); template IdArray BinaryElewise(IdArray lhs, IdArray rhs); template IdArray BinaryElewise(IdArray lhs, IdArray rhs); template IdArray BinaryElewise(IdArray lhs, IdArray rhs); template IdArray BinaryElewise(IdArray lhs, IdType rhs) { IdArray ret = NewIdArray(lhs->shape[0], lhs->ctx, lhs->dtype.bits); const IdType* lhs_data = static_cast(lhs->data); IdType* ret_data = static_cast(ret->data); // TODO(BarclayII): this usually incurs lots of overhead in thread spawning, scheduling, // etc., especially since the workload is very light. Need to replace with parallel_for. // #pragma omp parallel for for (int64_t i = 0; i < lhs->shape[0]; ++i) { ret_data[i] = Op::Call(lhs_data[i], rhs); } return ret; } template IdArray BinaryElewise(IdArray lhs, int32_t rhs); template IdArray BinaryElewise(IdArray lhs, int32_t rhs); template IdArray BinaryElewise(IdArray lhs, int32_t rhs); template IdArray BinaryElewise(IdArray lhs, int32_t rhs); template IdArray BinaryElewise(IdArray lhs, int32_t rhs); template IdArray BinaryElewise(IdArray lhs, int32_t rhs); template IdArray BinaryElewise(IdArray lhs, int32_t rhs); template IdArray BinaryElewise(IdArray lhs, int32_t rhs); template IdArray BinaryElewise(IdArray lhs, int32_t rhs); template IdArray BinaryElewise(IdArray lhs, int32_t rhs); template IdArray BinaryElewise(IdArray lhs, int32_t rhs); template IdArray BinaryElewise(IdArray lhs, int64_t rhs); template IdArray BinaryElewise(IdArray lhs, int64_t rhs); template IdArray BinaryElewise(IdArray lhs, int64_t rhs); template IdArray BinaryElewise(IdArray lhs, int64_t rhs); template IdArray BinaryElewise(IdArray lhs, int64_t rhs); template IdArray BinaryElewise(IdArray lhs, int64_t rhs); template IdArray BinaryElewise(IdArray lhs, int64_t rhs); template IdArray BinaryElewise(IdArray lhs, int64_t rhs); template IdArray BinaryElewise(IdArray lhs, int64_t rhs); template IdArray BinaryElewise(IdArray lhs, int64_t rhs); template IdArray BinaryElewise(IdArray lhs, int64_t rhs); template IdArray BinaryElewise(IdType lhs, IdArray rhs) { IdArray ret = NewIdArray(rhs->shape[0], rhs->ctx, rhs->dtype.bits); const IdType* rhs_data = static_cast(rhs->data); IdType* ret_data = static_cast(ret->data); // TODO(BarclayII): this usually incurs lots of overhead in thread spawning, scheduling, // etc., especially since the workload is very light. Need to replace with parallel_for. // #pragma omp parallel for for (int64_t i = 0; i < rhs->shape[0]; ++i) { ret_data[i] = Op::Call(lhs, rhs_data[i]); } return ret; } template IdArray BinaryElewise(int32_t lhs, IdArray rhs); template IdArray BinaryElewise(int32_t lhs, IdArray rhs); template IdArray BinaryElewise(int32_t lhs, IdArray rhs); template IdArray BinaryElewise(int32_t lhs, IdArray rhs); template IdArray BinaryElewise(int32_t lhs, IdArray rhs); template IdArray BinaryElewise(int32_t lhs, IdArray rhs); template IdArray BinaryElewise(int32_t lhs, IdArray rhs); template IdArray BinaryElewise(int32_t lhs, IdArray rhs); template IdArray BinaryElewise(int32_t lhs, IdArray rhs); template IdArray BinaryElewise(int32_t lhs, IdArray rhs); template IdArray BinaryElewise(int32_t lhs, IdArray rhs); template IdArray BinaryElewise(int64_t lhs, IdArray rhs); template IdArray BinaryElewise(int64_t lhs, IdArray rhs); template IdArray BinaryElewise(int64_t lhs, IdArray rhs); template IdArray BinaryElewise(int64_t lhs, IdArray rhs); template IdArray BinaryElewise(int64_t lhs, IdArray rhs); template IdArray BinaryElewise(int64_t lhs, IdArray rhs); template IdArray BinaryElewise(int64_t lhs, IdArray rhs); template IdArray BinaryElewise(int64_t lhs, IdArray rhs); template IdArray BinaryElewise(int64_t lhs, IdArray rhs); template IdArray BinaryElewise(int64_t lhs, IdArray rhs); template IdArray BinaryElewise(int64_t lhs, IdArray rhs); template IdArray UnaryElewise(IdArray lhs) { IdArray ret = NewIdArray(lhs->shape[0], lhs->ctx, lhs->dtype.bits); const IdType* lhs_data = static_cast(lhs->data); IdType* ret_data = static_cast(ret->data); // TODO(BarclayII): this usually incurs lots of overhead in thread spawning, scheduling, // etc., especially since the workload is very light. Need to replace with parallel_for. // #pragma omp parallel for for (int64_t i = 0; i < lhs->shape[0]; ++i) { ret_data[i] = Op::Call(lhs_data[i]); } return ret; } template IdArray UnaryElewise(IdArray lhs); template IdArray UnaryElewise(IdArray lhs); ///////////////////////////// Full ///////////////////////////// template IdArray Full(IdType val, int64_t length, DLContext ctx) { IdArray ret = NewIdArray(length, ctx, sizeof(IdType) * 8); IdType* ret_data = static_cast(ret->data); std::fill(ret_data, ret_data + length, val); return ret; } template IdArray Full(int32_t val, int64_t length, DLContext ctx); template IdArray Full(int64_t val, int64_t length, DLContext ctx); ///////////////////////////// Range ///////////////////////////// template IdArray Range(IdType low, IdType high, DLContext ctx) { CHECK(high >= low) << "high must be bigger than low"; IdArray ret = NewIdArray(high - low, ctx, sizeof(IdType) * 8); IdType* ret_data = static_cast(ret->data); std::iota(ret_data, ret_data + high - low, low); return ret; } template IdArray Range(int32_t, int32_t, DLContext); template IdArray Range(int64_t, int64_t, DLContext); ///////////////////////////// Relabel_ ///////////////////////////// template IdArray Relabel_(const std::vector& arrays) { // build map & relabel IdType newid = 0; std::unordered_map oldv2newv; for (IdArray arr : arrays) { for (int64_t i = 0; i < arr->shape[0]; ++i) { const IdType id = static_cast(arr->data)[i]; if (!oldv2newv.count(id)) { oldv2newv[id] = newid++; } static_cast(arr->data)[i] = oldv2newv[id]; } } // map array IdArray maparr = NewIdArray(newid, DLContext{kDLCPU, 0}, sizeof(IdType) * 8); IdType* maparr_data = static_cast(maparr->data); for (const auto& kv : oldv2newv) { maparr_data[kv.second] = kv.first; } return maparr; } template IdArray Relabel_(const std::vector& arrays); template IdArray Relabel_(const std::vector& arrays); } // namespace impl } // namespace aten } // namespace dgl