"doc/git@developer.sourcefind.cn:OpenDAS/ktransformers.git" did not exist on "25cee5810e8da6c2ce4611b413b0fb14c853b4a8"
Unverified Commit 5ea40abf authored by AllentDan's avatar AllentDan Committed by GitHub
Browse files

use format-11.1 (#38)

* format-11.1

* md-link-config
parent 9bbd39b7
{
"ignorePatterns": [
{
"pattern": "^https://developer.nvidia.com/"
},
{
"pattern": "^https://docs.openvino.ai/"
},
{
"pattern": "^https://developer.android.com/"
},
{
"pattern": "^https://developer.qualcomm.com/"
},
{
"pattern": "^http://localhost"
}
],
"httpHeaders": [
{
"urls": ["https://github.com/", "https://guides.github.com/", "https://help.github.com/", "https://docs.github.com/"],
"headers": {
"Accept-Encoding": "zstd, br, gzip, deflate"
}
}
],
"timeout": "20s",
"retryOn429": true,
"retryCount": 5,
"fallbackRetryDelay": "30s",
"aliveStatusCodes": [200, 206, 429]
}
......@@ -18,11 +18,11 @@ jobs:
- name: Linting
run: pre-commit run --all-files
- name: Format c/cuda codes with clang-format
uses: DoozyX/clang-format-lint-action@v0.14
uses: DoozyX/clang-format-lint-action@v0.13
with:
source: src
extensions: h,c,cpp,hpp,cu,cuh
clangFormatVersion: 14
clangFormatVersion: 11
style: file
- name: Check markdown link
uses: gaurav-nelson/github-action-markdown-link-check@v1
......
......@@ -398,9 +398,8 @@ template void invokeBuildRelativeAttentionBias(__nv_bfloat16* relat
template<typename T_OUT, typename T_IN>
__global__ void getLastTokenDequantize(getLastTokenDequantizeParam<T_OUT, T_IN> param)
{
param.output[blockIdx.x * param.d_model + threadIdx.x] =
(T_OUT)((float)param.input[blockIdx.x * param.max_seq_len * param.d_model + threadIdx.x]
* __ldg(param.input_scale));
param.output[blockIdx.x * param.d_model + threadIdx.x] = (T_OUT)(
(float)param.input[blockIdx.x * param.max_seq_len * param.d_model + threadIdx.x] * __ldg(param.input_scale));
}
template<typename T_OUT, typename T_IN>
......
......@@ -24,7 +24,8 @@
namespace turbomind {
enum class PositionEmbeddingType {
enum class PositionEmbeddingType
{
relative,
absolute,
};
......
......@@ -23,7 +23,8 @@
namespace turbomind {
enum class RepetitionPenaltyType {
enum class RepetitionPenaltyType
{
Additive, // the presence penalty
Multiplicative, // the repetition penalty
None // No repetition penalty.
......
......@@ -82,7 +82,8 @@ void invokeAddBiasSoftMax(T* logits,
cudaStream_t stream);
namespace segmented_topp_impl {
enum DType_t {
enum DType_t
{
kFLOAT,
kHALF,
kINT8
......@@ -95,14 +96,17 @@ template<typename Key_Data_Type_ = float,
struct Segmented_topk_kernel_params {
typedef Key_Data_Type_ Key_Data_Type;
typedef Value_Data_Type_ Value_Data_Type;
enum {
enum
{
BLOCK_THREADS = BLOCK_THREADS_
};
enum {
enum
{
ITEMS_INCREMENT = 32
};
// enum { KEYS_PER_LDG = 2 * 4 / sizeof(Key_Data_Type_) };
enum {
enum
{
KEYS_PER_LDG = KEYS_PER_LDG_
};
};
......
......@@ -40,7 +40,7 @@ public:
virtual void setup(const size_t batch_size, const size_t beam_width, TensorMap* runtime_args) = 0;
virtual void forward(std::vector<turbomind::Tensor>* output_tensors,
const std::vector<turbomind::Tensor>* input_tensors) = 0;
const std::vector<turbomind::Tensor>* input_tensors) = 0;
virtual void forward(std::unordered_map<std::string, Tensor>* output_tensors,
const std::unordered_map<std::string, Tensor>* input_tensors) = 0;
virtual void forward(TensorMap* output_tensors, TensorMap* input_tensors) = 0;
......
......@@ -23,7 +23,7 @@ namespace turbomind {
template<typename T>
void FfnLayer<T>::forward(std::vector<turbomind::Tensor>* output_tensors,
const std::vector<turbomind::Tensor>* input_tensors,
const FfnWeight<T>* ffn_weights)
const FfnWeight<T>* ffn_weights)
{
TensorMap input_tensor({{"ffn_input", input_tensors->at(0)}});
TensorMap output_tensor({{"ffn_output", output_tensors->at(0)}});
......
......@@ -124,7 +124,7 @@ public:
virtual void forward(std::vector<turbomind::Tensor>* output_tensors,
const std::vector<turbomind::Tensor>* input_tensors,
const FfnWeight<T>* ffn_weights);
const FfnWeight<T>* ffn_weights);
virtual void forward(TensorMap* output_tensors, TensorMap* input_tensors, const FfnWeight<T>* ffn_weights);
};
......
......@@ -22,7 +22,7 @@ namespace turbomind {
template<typename T>
void FfnLayerINT8<T>::forward(std::vector<turbomind::Tensor>* output_tensors,
const std::vector<turbomind::Tensor>* input_tensors,
const FfnWeight<T>* ffn_weights)
const FfnWeight<T>* ffn_weights)
{
// input_tensors: [input (token_num, hidden_dimension)]
// output_tensors: [output (token_num, hidden_dimension)]
......
......@@ -79,7 +79,7 @@ public:
void forward(std::vector<turbomind::Tensor>* output_tensors,
const std::vector<turbomind::Tensor>* input_tensors,
const FfnWeight<T>* ffn_weights);
const FfnWeight<T>* ffn_weights);
friend GeluFfnLayerINT8<T>;
friend ReluFfnLayerINT8<T>;
......
......@@ -30,7 +30,8 @@
namespace turbomind {
enum class AttentionType {
enum class AttentionType
{
UNFUSED_MHA,
UNFUSED_PADDED_MHA,
FUSED_MHA,
......
......@@ -15,9 +15,9 @@ public:
pthread_barrier_init(&barrier_, nullptr, count);
}
Barrier(const Barrier&) = delete;
Barrier& operator=(const Barrier&) = delete;
Barrier(Barrier&&) noexcept = delete;
Barrier(const Barrier&) = delete;
Barrier& operator=(const Barrier&) = delete;
Barrier(Barrier&&) noexcept = delete;
Barrier& operator=(Barrier&&) noexcept = delete;
void wait()
......
......@@ -35,7 +35,7 @@ public:
size_t tensor_para_size,
size_t tensor_para_rank);
~LlamaDecoderLayerWeight();
LlamaDecoderLayerWeight(const LlamaDecoderLayerWeight& other) = delete;
LlamaDecoderLayerWeight(const LlamaDecoderLayerWeight& other) = delete;
LlamaDecoderLayerWeight& operator=(const LlamaDecoderLayerWeight& other) = delete;
void loadModel(std::string dir_path, FtCudaDataType model_file_type);
......
......@@ -25,7 +25,8 @@
namespace turbomind {
enum class WeightType : int {
enum class WeightType : int
{
kFP32,
kFP16,
kFP8, // not supported yet
......
......@@ -40,7 +40,7 @@ struct LlamaWeight {
~LlamaWeight();
LlamaWeight(const LlamaWeight& other) = delete;
LlamaWeight(const LlamaWeight& other) = delete;
LlamaWeight& operator=(const LlamaWeight& other) = delete;
void loadModel(std::string dir_path);
......
......@@ -25,7 +25,8 @@ struct Request {
using Callback = std::function<void(std::unordered_map<std::string, Tensor>*)>;
Callback stream_cb;
enum {
enum
{
kInvalid = 1,
kConflict = 2,
kBusy = 3,
......
......@@ -9,7 +9,8 @@
namespace turbomind {
enum QuantPolicy {
enum QuantPolicy
{
kNone = 0x00,
// reserve 0x01 and 0x02 for backward compatibility
kReserve1 = 0x01,
......@@ -18,7 +19,8 @@ enum QuantPolicy {
kCacheKVInt8 = 0x04,
};
enum CmpMode {
enum CmpMode
{
kCmpNone,
kCmpRead,
kCmpWrite,
......
......@@ -1159,8 +1159,7 @@ void streaming_callback(std::shared_ptr<std::unordered_map<std::string, Tensor>>
for (auto& response : *responses) {
if (response != nullptr) {
LOG_MESSAGE(TRITONSERVER_LOG_VERBOSE, (std::string("start to send streaming response")).c_str());
LOG_IF_ERROR(TRITONBACKEND_ResponseSend(response, 0, nullptr),
"failed to send TurboMind backend response");
LOG_IF_ERROR(TRITONBACKEND_ResponseSend(response, 0, nullptr), "failed to send TurboMind backend response");
LOG_MESSAGE(TRITONSERVER_LOG_VERBOSE, (std::string("streaming response is sent")).c_str());
}
else {
......@@ -1354,11 +1353,10 @@ ModelInstanceState::Execute(std::vector<TRITONBACKEND_Response*>*
}
}
catch (std::exception& ex) {
SendErrorForResponses(
responses,
response_count,
TRITONSERVER_ErrorNew(TRITONSERVER_ERROR_INTERNAL,
("TurboMind execute failure: " + std::string(ex.what())).c_str()));
SendErrorForResponses(responses,
response_count,
TRITONSERVER_ErrorNew(TRITONSERVER_ERROR_INTERNAL,
("TurboMind execute failure: " + std::string(ex.what())).c_str()));
}
auto output_tensors = output_tensors_list[0];
return output_tensors;
......
#Copyright(c) 2021 - 2022, NVIDIA CORPORATION.All rights reserved.
# Copyright (c) 2021-2022, NVIDIA CORPORATION. All rights reserved.
#
#Redistribution and use in source and binary forms, with or without
#modification, are permitted provided that the following conditions
#are met:
#* Redistributions of source code must retain the above copyright
#notice, this list of conditions and the following disclaimer.
#* Redistributions in binary form must reproduce the above copyright
#notice, this list of conditions and the following disclaimer in the
#documentation and / or other materials provided with the distribution.
#* Neither the name of NVIDIA CORPORATION nor the names of its
#contributors may be used to endorse or promote products derived
#from this software without specific prior written permission.
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of NVIDIA CORPORATION nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
#THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
#EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
#IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
#PURPOSE ARE DISCLAIMED.IN NO EVENT SHALL THE COPYRIGHT OWNER OR
#CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
#EXEMPLARY, OR CONSEQUENTIAL DAMAGES(INCLUDING, BUT NOT LIMITED TO,
#PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
#PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
#OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
#(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
#OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
{
global:
global:
TRITONBACKEND_*;
local:
*;
local: *;
};
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment