Unverified Commit fe46dac2 authored by AllentDan's avatar AllentDan Committed by GitHub
Browse files

Add lint action (#32)

* temp

* fix lint

* csrc->src

* remove clang-format

* skip .rst

* skip doc

* clang-format

version

version

* mat_B
parent e8ab4ba3
...@@ -25,8 +25,7 @@ struct Request { ...@@ -25,8 +25,7 @@ struct Request {
using Callback = std::function<void(std::unordered_map<std::string, Tensor>*)>; using Callback = std::function<void(std::unordered_map<std::string, Tensor>*)>;
Callback stream_cb; Callback stream_cb;
enum enum {
{
kInvalid = 1, kInvalid = 1,
kConflict = 2, kConflict = 2,
kBusy = 3, kBusy = 3,
......
...@@ -10,7 +10,7 @@ ...@@ -10,7 +10,7 @@
#include <cutlass/half.h> #include <cutlass/half.h>
#include <cutlass/platform/platform.h> #include <cutlass/platform/platform.h>
// modifiy from: // modified from:
// https://github.com/NVIDIA/cutlass/blob/main/examples/41_fused_multi_head_attention/kernel_forward.h // https://github.com/NVIDIA/cutlass/blob/main/examples/41_fused_multi_head_attention/kernel_forward.h
namespace fastertransformer { namespace fastertransformer {
......
...@@ -11,7 +11,8 @@ namespace cg = cooperative_groups; ...@@ -11,7 +11,8 @@ namespace cg = cooperative_groups;
namespace fastertransformer { namespace fastertransformer {
template<typename T> template<typename T>
struct res_norm_ops_t {}; struct res_norm_ops_t {
};
template<typename T> template<typename T>
struct res_norm_t { struct res_norm_t {
...@@ -144,7 +145,7 @@ __global__ void fusedAddBiasResidualNorm(T* __restrict__ r_data, ...@@ -144,7 +145,7 @@ __global__ void fusedAddBiasResidualNorm(T* __restrict__ r_data,
template<typename T> template<typename T>
void invokeFusedAddBiasResidualRMSNorm( void invokeFusedAddBiasResidualRMSNorm(
T* residual, T* inout, const T* bias, const T* scale, float eps, int batch_size, int n_dims, cudaStream_t stream) T* residual, T* in_out, const T* bias, const T* scale, float eps, int batch_size, int n_dims, cudaStream_t stream)
{ {
constexpr int PACK_DIM = sizeof(uint4) / sizeof(T); constexpr int PACK_DIM = sizeof(uint4) / sizeof(T);
FT_CHECK(n_dims % PACK_DIM == 0); FT_CHECK(n_dims % PACK_DIM == 0);
...@@ -154,7 +155,7 @@ void invokeFusedAddBiasResidualRMSNorm( ...@@ -154,7 +155,7 @@ void invokeFusedAddBiasResidualRMSNorm(
n_threads = (n_threads + 31) / 32 * 32; // round up to the nearest multiple of warp size n_threads = (n_threads + 31) / 32 * 32; // round up to the nearest multiple of warp size
fusedAddBiasResidualNorm<<<batch_size, n_threads, 0, stream>>>( fusedAddBiasResidualNorm<<<batch_size, n_threads, 0, stream>>>(
residual, inout, bias, scale, eps, batch_size, n_dims); residual, in_out, bias, scale, eps, batch_size, n_dims);
} }
template void template void
......
...@@ -6,6 +6,6 @@ namespace fastertransformer { ...@@ -6,6 +6,6 @@ namespace fastertransformer {
template<typename T> template<typename T>
void invokeFusedAddBiasResidualRMSNorm( void invokeFusedAddBiasResidualRMSNorm(
T* residual, T* inout, const T* bias, const T* scale, float eps, int batch_size, int n_dims, cudaStream_t stream); T* residual, T* in_out, const T* bias, const T* scale, float eps, int batch_size, int n_dims, cudaStream_t stream);
} // namespace fastertransformer } // namespace fastertransformer
// Copyright (c) OpenMMLab. All rights reserved. // Copyright (c) OpenMMLab. All rights reserved.
#include "src/fastertransformer/kernels/decoder_masked_multihead_attention_utils.h"
#include "src/fastertransformer/kernels/reduce_kernel_utils.cuh" #include "src/fastertransformer/kernels/reduce_kernel_utils.cuh"
#include "src/fastertransformer/models/llama/llama_kernels.h" #include "src/fastertransformer/models/llama/llama_kernels.h"
#include "src/fastertransformer/models/llama/llama_utils.h" #include "src/fastertransformer/models/llama/llama_utils.h"
#include "src/fastertransformer/utils/cuda_type_utils.cuh" #include "src/fastertransformer/utils/cuda_type_utils.cuh"
#include "src/fastertransformer/kernels/decoder_masked_multihead_attention_utils.h"
namespace fastertransformer { namespace fastertransformer {
...@@ -293,7 +293,8 @@ inline __device__ float2 float2div(float a, float2 b) ...@@ -293,7 +293,8 @@ inline __device__ float2 float2div(float a, float2 b)
return c; return c;
} }
static inline __device__ half4 char4_scale_to_half4(char4 value, const float scale) { static inline __device__ half4 char4_scale_to_half4(char4 value, const float scale)
{
half4 dst; half4 dst;
dst.x = __float2half(value.x * scale); dst.x = __float2half(value.x * scale);
dst.y = __float2half(value.y * scale); dst.y = __float2half(value.y * scale);
...@@ -302,16 +303,18 @@ static inline __device__ half4 char4_scale_to_half4(char4 value, const float sca ...@@ -302,16 +303,18 @@ static inline __device__ half4 char4_scale_to_half4(char4 value, const float sca
return dst; return dst;
} }
static inline __device__ uint32_t float4_to_char4(float x, static inline __device__ uint32_t float4_to_char4(float x, float y, float z, float w)
float y, {
float z,
float w) {
uint32_t dst; uint32_t dst;
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 720 #if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 720
uint32_t a; asm volatile("cvt.rni.sat.s32.f32 %0, %1;\n" : "=r"(a) : "f"(x)); uint32_t a;
uint32_t b; asm volatile("cvt.rni.sat.s32.f32 %0, %1;\n" : "=r"(b) : "f"(y)); asm volatile("cvt.rni.sat.s32.f32 %0, %1;\n" : "=r"(a) : "f"(x));
uint32_t c; asm volatile("cvt.rni.sat.s32.f32 %0, %1;\n" : "=r"(c) : "f"(z)); uint32_t b;
uint32_t d; asm volatile("cvt.rni.sat.s32.f32 %0, %1;\n" : "=r"(d) : "f"(w)); asm volatile("cvt.rni.sat.s32.f32 %0, %1;\n" : "=r"(b) : "f"(y));
uint32_t c;
asm volatile("cvt.rni.sat.s32.f32 %0, %1;\n" : "=r"(c) : "f"(z));
uint32_t d;
asm volatile("cvt.rni.sat.s32.f32 %0, %1;\n" : "=r"(d) : "f"(w));
asm volatile("cvt.pack.sat.s8.s32.b32 %0, %1, %2, 0;\n" : "=r"(dst) : "r"(d), "r"(c)); asm volatile("cvt.pack.sat.s8.s32.b32 %0, %1, %2, 0;\n" : "=r"(dst) : "r"(d), "r"(c));
asm volatile("cvt.pack.sat.s8.s32.b32 %0, %1, %2, %0;\n" : "+r"(dst) : "r"(b), "r"(a)); asm volatile("cvt.pack.sat.s8.s32.b32 %0, %1, %2, %0;\n" : "+r"(dst) : "r"(b), "r"(a));
...@@ -380,7 +383,6 @@ __global__ void extend_value_cache_int8(int8_t** v_dst, ...@@ -380,7 +383,6 @@ __global__ void extend_value_cache_int8(int8_t** v_dst,
} }
} }
template<typename T> template<typename T>
void invokeExtendKVCache(T** k_dst, void invokeExtendKVCache(T** k_dst,
T** v_dst, T** v_dst,
...@@ -404,18 +406,48 @@ void invokeExtendKVCache(T** k_dst, ...@@ -404,18 +406,48 @@ void invokeExtendKVCache(T** k_dst,
dim3 grid((max_q_len * size_per_head / x + block_sz - 1) / block_sz, local_batch_size, local_head_num); dim3 grid((max_q_len * size_per_head / x + block_sz - 1) / block_sz, local_batch_size, local_head_num);
if (quant & QuantPolicy::kCacheKVInt8) { if (quant & QuantPolicy::kCacheKVInt8) {
extend_value_cache_int8<<<grid, block_sz, 0, stream>>>( extend_value_cache_int8<<<grid, block_sz, 0, stream>>>(reinterpret_cast<int8_t**>(k_dst),
reinterpret_cast<int8_t**>(k_dst), dst_offset, k_src, local_head_num, size_per_head, query_length, history_length, max_q_len, max_seq_len, kv_scale[0]); dst_offset,
k_src,
extend_value_cache_int8<<<grid, block_sz, 0, stream>>>( local_head_num,
reinterpret_cast<int8_t**>(v_dst), dst_offset, v_src, local_head_num, size_per_head, query_length, history_length, max_q_len, max_seq_len, kv_scale[1]); size_per_head,
query_length,
} else { history_length,
extend_value_cache<<<grid, block_sz, 0, stream>>>( max_q_len,
k_dst, dst_offset, k_src, local_head_num, size_per_head, query_length, history_length, max_q_len, max_seq_len); max_seq_len,
kv_scale[0]);
extend_value_cache<<<grid, block_sz, 0, stream>>>(
v_dst, dst_offset, v_src, local_head_num, size_per_head, query_length, history_length, max_q_len, max_seq_len); extend_value_cache_int8<<<grid, block_sz, 0, stream>>>(reinterpret_cast<int8_t**>(v_dst),
dst_offset,
v_src,
local_head_num,
size_per_head,
query_length,
history_length,
max_q_len,
max_seq_len,
kv_scale[1]);
}
else {
extend_value_cache<<<grid, block_sz, 0, stream>>>(k_dst,
dst_offset,
k_src,
local_head_num,
size_per_head,
query_length,
history_length,
max_q_len,
max_seq_len);
extend_value_cache<<<grid, block_sz, 0, stream>>>(v_dst,
dst_offset,
v_src,
local_head_num,
size_per_head,
query_length,
history_length,
max_q_len,
max_seq_len);
} }
} }
...@@ -492,7 +524,6 @@ __global__ void transpose_value_cache(T* v_dst, // ...@@ -492,7 +524,6 @@ __global__ void transpose_value_cache(T* v_dst, //
} }
} }
template<typename T> template<typename T>
__global__ void transpose_value_cache_int8(T* v_dst, // __global__ void transpose_value_cache_int8(T* v_dst, //
const int8_t** v_src, const int8_t** v_src,
...@@ -562,13 +593,27 @@ void invokeTransposeKVCache(T* key_cache_trans, ...@@ -562,13 +593,27 @@ void invokeTransposeKVCache(T* key_cache_trans,
dim3 grid((max_kv_len * size_per_head / x + block_sz - 1) / block_sz, batch_size, head_num); dim3 grid((max_kv_len * size_per_head / x + block_sz - 1) / block_sz, batch_size, head_num);
if (quant & QuantPolicy::kCacheKVInt8) { if (quant & QuantPolicy::kCacheKVInt8) {
transpose_value_cache_int8<<<grid, block_sz, 0, stream>>>( transpose_value_cache_int8<<<grid, block_sz, 0, stream>>>(key_cache_trans,
key_cache_trans, reinterpret_cast<const int8_t**>(key_cache), src_offset, head_num, size_per_head, key_length, max_kv_len, max_seq_len, kv_scale[0]); reinterpret_cast<const int8_t**>(key_cache),
src_offset,
transpose_value_cache_int8<<<grid, block_sz, 0, stream>>>( head_num,
val_cache_trans, reinterpret_cast<const int8_t**>(val_cache), src_offset, head_num, size_per_head, key_length, max_kv_len, max_seq_len, kv_scale[1]); size_per_head,
key_length,
} else { max_kv_len,
max_seq_len,
kv_scale[0]);
transpose_value_cache_int8<<<grid, block_sz, 0, stream>>>(val_cache_trans,
reinterpret_cast<const int8_t**>(val_cache),
src_offset,
head_num,
size_per_head,
key_length,
max_kv_len,
max_seq_len,
kv_scale[1]);
}
else {
transpose_value_cache<<<grid, block_sz, 0, stream>>>( transpose_value_cache<<<grid, block_sz, 0, stream>>>(
key_cache_trans, key_cache, src_offset, head_num, size_per_head, key_length, max_kv_len, max_seq_len); key_cache_trans, key_cache, src_offset, head_num, size_per_head, key_length, max_kv_len, max_seq_len);
...@@ -577,10 +622,34 @@ void invokeTransposeKVCache(T* key_cache_trans, ...@@ -577,10 +622,34 @@ void invokeTransposeKVCache(T* key_cache_trans,
} }
} }
template void invokeTransposeKVCache( template void invokeTransposeKVCache(float*,
float*, float*, const float**, const float**, size_t, int, const int*, int, int, int, int, cudaStream_t stream, int, const float*); float*,
template void invokeTransposeKVCache( const float**,
half*, half*, const half**, const half**, size_t, int, const int*, int, int, int, int, cudaStream_t stream, int, const float*); const float**,
size_t,
int,
const int*,
int,
int,
int,
int,
cudaStream_t stream,
int,
const float*);
template void invokeTransposeKVCache(half*,
half*,
const half**,
const half**,
size_t,
int,
const int*,
int,
int,
int,
int,
cudaStream_t stream,
int,
const float*);
__global__ void gatherOutput(int* output_ids, __global__ void gatherOutput(int* output_ids,
const int* ids, const int* ids,
......
...@@ -18,8 +18,7 @@ enum QuantPolicy { ...@@ -18,8 +18,7 @@ enum QuantPolicy {
kCacheKVInt8 = 0x04, kCacheKVInt8 = 0x04,
}; };
enum CmpMode enum CmpMode {
{
kCmpNone, kCmpNone,
kCmpRead, kCmpRead,
kCmpWrite, kCmpWrite,
......
...@@ -25,7 +25,8 @@ ...@@ -25,7 +25,8 @@
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
// Modified from https://github.com/triton-inference-server/fastertransformer_backend/blob/main/src/libfastertransformer.cc // Modified from
// https://github.com/triton-inference-server/fastertransformer_backend/blob/main/src/libfastertransformer.cc
#include <stdint.h> #include <stdint.h>
...@@ -1399,7 +1400,7 @@ void ModelInstanceState::SetInputTensors( ...@@ -1399,7 +1400,7 @@ void ModelInstanceState::SetInputTensors(
auto batch_input_name = batch_input.TargetNames()[0]; auto batch_input_name = batch_input.TargetNames()[0];
// we only take care of the ragged input_ids // we only take care of the ragged input_ids
// Assume the first dimention (length) are different and others are the // Assume the first dimension (length) are different and others are the
// same BATCH_ITEM_SHAPE [num_requests (batches), num_dims (excluding // same BATCH_ITEM_SHAPE [num_requests (batches), num_dims (excluding
// batch dimension)] // batch dimension)]
if (batch_input_kind == BatchInput::Kind::BATCH_ITEM_SHAPE if (batch_input_kind == BatchInput::Kind::BATCH_ITEM_SHAPE
...@@ -1464,7 +1465,7 @@ void ModelInstanceState::SetInputTensors( ...@@ -1464,7 +1465,7 @@ void ModelInstanceState::SetInputTensors(
param.batch_input_ptr + param.batch_intput_size, param.batch_input_ptr + param.batch_intput_size,
[&](int x) { return x != param.batch_input_ptr[0]; }); [&](int x) { return x != param.batch_input_ptr[0]; });
// calculate statics of elements // calculate statistics of elements
if (param.is_input_ragged) { if (param.is_input_ragged) {
param.max_elements_per_seq = param.max_elements_per_seq =
*std::max_element(param.batch_input_ptr, param.batch_input_ptr + param.batch_intput_size); *std::max_element(param.batch_input_ptr, param.batch_input_ptr + param.batch_intput_size);
......
# Copyright (c) 2021-2022, NVIDIA CORPORATION. All rights reserved. #Copyright(c) 2021 - 2022, NVIDIA CORPORATION.All rights reserved.
# #
# Redistribution and use in source and binary forms, with or without #Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions #modification, are permitted provided that the following conditions
# are met: #are met:
# * Redistributions of source code must retain the above copyright #* Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer. #notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright #* Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the #notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution. #documentation and / or other materials provided with the distribution.
# * Neither the name of NVIDIA CORPORATION nor the names of its #* Neither the name of NVIDIA CORPORATION nor the names of its
# contributors may be used to endorse or promote products derived #contributors may be used to endorse or promote products derived
# from this software without specific prior written permission. #from this software without specific prior written permission.
# #
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY #THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE #EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR #IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR #PURPOSE ARE DISCLAIMED.IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, #CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, #EXEMPLARY, OR CONSEQUENTIAL DAMAGES(INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR #PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY #PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT #OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE #(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
{ {
global: global:
TRITONBACKEND_*; TRITONBACKEND_*;
local: *; local:
*;
}; };
...@@ -309,7 +309,8 @@ std::string LlamaTritonModel<T>::toString() ...@@ -309,7 +309,8 @@ std::string LlamaTritonModel<T>::toString()
<< "\nuse_context_fmha: " << use_context_fmha_ << "\nstart_id: " << start_id_ << "\nuse_context_fmha: " << use_context_fmha_ << "\nstart_id: " << start_id_
<< "\ntensor_para_size: " << tensor_para_size_ << "\npipeline_para_size: " << pipeline_para_size_ << "\ntensor_para_size: " << tensor_para_size_ << "\npipeline_para_size: " << pipeline_para_size_
<< "\nenable_custom_all_reduce: " << enable_custom_all_reduce_ << "\nmodel_name: " << model_name_ << "\nenable_custom_all_reduce: " << enable_custom_all_reduce_ << "\nmodel_name: " << model_name_
<< "\nprefix_cache_len: " << prefix_cache_len_ << "\nmodel_dir: " << model_dir_ << "\nquant_policy: " << quant_policy_ << std::endl; << "\nprefix_cache_len: " << prefix_cache_len_ << "\nmodel_dir: " << model_dir_
<< "\nquant_policy: " << quant_policy_ << std::endl;
return ss.str(); return ss.str();
} }
......
...@@ -15,7 +15,8 @@ ...@@ -15,7 +15,8 @@
* limitations under the License. * limitations under the License.
*/ */
// Modified from https://github.com/NVIDIA/FasterTransformer/blob/main/src/fastertransformer/triton_backend/multi_gpu_gpt/ParallelGptTritonModel.h // Modified from
// https://github.com/NVIDIA/FasterTransformer/blob/main/src/fastertransformer/triton_backend/multi_gpu_gpt/ParallelGptTritonModel.h
#include "src/fastertransformer/triton_backend/llama/LlamaTritonModelInstance.h" #include "src/fastertransformer/triton_backend/llama/LlamaTritonModelInstance.h"
#include "src/fastertransformer/triton_backend/transformer_triton_backend.hpp" #include "src/fastertransformer/triton_backend/transformer_triton_backend.hpp"
......
...@@ -15,7 +15,8 @@ ...@@ -15,7 +15,8 @@
* limitations under the License. * limitations under the License.
*/ */
// Modified from https://github.com/NVIDIA/FasterTransformer/blob/main/src/fastertransformer/triton_backend/multi_gpu_gpt/ParallelGptTritonModel.h // Modified from
// https://github.com/NVIDIA/FasterTransformer/blob/main/src/fastertransformer/triton_backend/multi_gpu_gpt/ParallelGptTritonModel.h
#pragma once #pragma once
......
...@@ -15,7 +15,8 @@ ...@@ -15,7 +15,8 @@
* limitations under the License. * limitations under the License.
*/ */
// Modified from https://github.com/NVIDIA/FasterTransformer/blob/main/src/fastertransformer/triton_backend/transformer_triton_backend.cpp // Modified from
// https://github.com/NVIDIA/FasterTransformer/blob/main/src/fastertransformer/triton_backend/transformer_triton_backend.cpp
#include "src/fastertransformer/triton_backend/transformer_triton_backend.hpp" #include "src/fastertransformer/triton_backend/transformer_triton_backend.hpp"
#include "src/fastertransformer/utils/nccl_utils.h" #include "src/fastertransformer/utils/nccl_utils.h"
......
...@@ -15,7 +15,8 @@ ...@@ -15,7 +15,8 @@
* limitations under the License. * limitations under the License.
*/ */
// Modified from https://github.com/NVIDIA/FasterTransformer/blob/main/src/fastertransformer/triton_backend/transformer_triton_backend.hpp // Modified from
// https://github.com/NVIDIA/FasterTransformer/blob/main/src/fastertransformer/triton_backend/transformer_triton_backend.hpp
#pragma once #pragma once
......
...@@ -35,8 +35,7 @@ ...@@ -35,8 +35,7 @@
namespace fastertransformer { namespace fastertransformer {
typedef enum datatype_enum typedef enum datatype_enum {
{
TYPE_INVALID, TYPE_INVALID,
TYPE_BOOL, TYPE_BOOL,
TYPE_UINT8, TYPE_UINT8,
...@@ -99,8 +98,7 @@ DataType getTensorType() ...@@ -99,8 +98,7 @@ DataType getTensorType()
} }
} }
typedef enum memorytype_enum typedef enum memorytype_enum {
{
MEMORY_CPU, MEMORY_CPU,
MEMORY_CPU_PINNED, MEMORY_CPU_PINNED,
MEMORY_GPU MEMORY_GPU
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment