添加下载的代码

b30f3cdb · xiabo · e38ee081 · b30f3cdb · b30f3cdb · b30f3cdb
Commit b30f3cdb authored Nov 14, 2023 by xiabo
20 changed files
--- a/3rdparty/backend-r22.12/include/triton/backend/backend_output_responder.h
+++ b/3rdparty/backend-r22.12/include/triton/backend/backend_output_responder.h
--- a/3rdparty/backend-r22.12/src/backend_common.cc
+++ b/3rdparty/backend-r22.12/src/backend_common.cc
--- a/3rdparty/backend-r22.12/src/backend_input_collector.cc
+++ b/3rdparty/backend-r22.12/src/backend_input_collector.cc
--- a/3rdparty/backend-r22.12/src/backend_memory.cc
+++ b/3rdparty/backend-r22.12/src/backend_memory.cc
--- a/3rdparty/backend-r22.12/src/backend_model.cc
+++ b/3rdparty/backend-r22.12/src/backend_model.cc
--- a/3rdparty/backend-r22.12/src/backend_model_instance.cc
+++ b/3rdparty/backend-r22.12/src/backend_model_instance.cc
--- a/3rdparty/backend-r22.12/src/backend_output_responder.cc
+++ b/3rdparty/backend-r22.12/src/backend_output_responder.cc
--- a/3rdparty/backend-r22.12/src/kernel.cu
+++ b/3rdparty/backend-r22.12/src/kernel.cu
+// Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#include "kernel.h"
+#include <cuda.h>
+#define THREADBLOCK_SIZE 512
+__launch_bounds__(THREADBLOCK_SIZE) __global__ void TritonGatherKernel(
+    const int8_t** __restrict input_ptr_buffer,
+    const size_t* __restrict byte_size_buffer,
+    const size_t* __restrict byte_size_offset_buffer,
+    int8_t* __restrict output_buffer)
+{
+  int request_idx = blockIdx.x;
+  int lane_id = threadIdx.x;
+  const int8_t* request_input_buffer = input_ptr_buffer[request_idx];
+  int byte_size = byte_size_buffer[request_idx];
+  int byte_size_offset = byte_size_offset_buffer[request_idx];
+  int8_t* output_buffer_with_offset = output_buffer + byte_size_offset;
+  if (((byte_size % 4) == 0) && (((uint64_t)request_input_buffer % 4) == 0) &&
+      (((uint64_t)output_buffer_with_offset % 4) == 0)) {
+    int32_t* input_4 = (int32_t*)request_input_buffer;
+    int32_t* output_4 = (int32_t*)output_buffer_with_offset;
+    int element_count = byte_size / 4;
+    for (int elem_id = lane_id; elem_id < element_count;
+         elem_id += THREADBLOCK_SIZE) {
+      output_4[elem_id] = input_4[elem_id];
+    }
+  } else {
+    for (int elem_id = lane_id; elem_id < byte_size;
+         elem_id += THREADBLOCK_SIZE) {
+      output_buffer_with_offset[elem_id] =
+          __ldg(request_input_buffer + elem_id);
+    }
+  }
+}
+#ifdef __cplusplus
+extern "C" {
+#endif
+cudaError_t
+RunGatherKernel(
+    const int8_t** input_ptr_buffer, const size_t* byte_size_buffer,
+    const size_t* byte_size_offset_buffer, int8_t* output_buffer,
+    size_t request_count, cudaStream_t stream)
+{
+  TritonGatherKernel<<<request_count, THREADBLOCK_SIZE, 0, stream>>>(
+      input_ptr_buffer, byte_size_buffer, byte_size_offset_buffer,
+      output_buffer);
+  return cudaGetLastError();
+}
+#ifdef __cplusplus
+}
+#endif
--- a/3rdparty/backend-r22.12/src/kernel.h
+++ b/3rdparty/backend-r22.12/src/kernel.h
--- a/3rdparty/common-r22.12/.clang-format
+++ b/3rdparty/common-r22.12/.clang-format
--- a/3rdparty/common-r22.12/.gitignore
+++ b/3rdparty/common-r22.12/.gitignore
+/build
+/.vscode
+*.so
--- a/3rdparty/common-r22.12/LICENSE
+++ b/3rdparty/common-r22.12/LICENSE
--- a/3rdparty/common-r22.12/README.md
+++ b/3rdparty/common-r22.12/README.md
--- a/3rdparty/common-r22.12/cmake/TritonCommonConfig.cmake.in
+++ b/3rdparty/common-r22.12/cmake/TritonCommonConfig.cmake.in
--- a/3rdparty/common-r22.12/include/triton/common/async_work_queue.h
+++ b/3rdparty/common-r22.12/include/triton/common/async_work_queue.h
--- a/3rdparty/common-r22.12/include/triton/common/error.h
+++ b/3rdparty/common-r22.12/include/triton/common/error.h
--- a/3rdparty/common-r22.12/include/triton/common/logging.h
+++ b/3rdparty/common-r22.12/include/triton/common/logging.h
--- a/3rdparty/common-r22.12/include/triton/common/model_config.h
+++ b/3rdparty/common-r22.12/include/triton/common/model_config.h
--- a/3rdparty/common-r22.12/include/triton/common/nvtx.h
+++ b/3rdparty/common-r22.12/include/triton/common/nvtx.h
--- a/3rdparty/common-r22.12/include/triton/common/sync_queue.h
+++ b/3rdparty/common-r22.12/include/triton/common/sync_queue.h