Unverified Commit 9efcac38 authored by Li Zhang's avatar Li Zhang Committed by GitHub
Browse files

check-in fastertransformer (#7)

* add ft code

* gitignore

* fix lint

* revert fmha
parent 720fc533
...@@ -3,3 +3,5 @@ ...@@ -3,3 +3,5 @@
__pycache__/ __pycache__/
*.egg-info/ *.egg-info/
workspace/ workspace/
.cache
*build*/
// Read an INI file into easy-to-access name/value pairs.
// inih and INIReader are released under the New BSD license.
// Go to the project home page for more info:
//
// https://github.com/benhoyt/inih (Initial repo)
// https://github.com/jtilly/inih (The reference of this header file)
/* inih -- simple .INI file parser
inih is released under the New BSD license (see LICENSE.txt). Go to the project
home page for more info:
https://github.com/benhoyt/inih
https://github.com/jtilly/inih
*/
#ifndef __INI_H__
#define __INI_H__
/* Make this header file easier to include in C++ code */
#ifdef __cplusplus
extern "C" {
#endif
#include <stdio.h>
/* Typedef for prototype of handler function. */
typedef int (*ini_handler)(void* user, const char* section,
const char* name, const char* value);
/* Typedef for prototype of fgets-style reader function. */
typedef char* (*ini_reader)(char* str, int num, void* stream);
/* Parse given INI-style file. May have [section]s, name=value pairs
(whitespace stripped), and comments starting with ';' (semicolon). Section
is "" if name=value pair parsed before any section heading. name:value
pairs are also supported as a concession to Python's configparser.
For each name=value pair parsed, call handler function with given user
pointer as well as section, name, and value (data only valid for duration
of handler call). Handler should return nonzero on success, zero on error.
Returns 0 on success, line number of first error on parse error (doesn't
stop on first error), -1 on file open error, or -2 on memory allocation
error (only when INI_USE_STACK is zero).
*/
int ini_parse(const char* filename, ini_handler handler, void* user);
/* Same as ini_parse(), but takes a FILE* instead of filename. This doesn't
close the file when it's finished -- the caller must do that. */
int ini_parse_file(FILE* file, ini_handler handler, void* user);
/* Same as ini_parse(), but takes an ini_reader function pointer instead of
filename. Used for implementing custom or string-based I/O. */
int ini_parse_stream(ini_reader reader, void* stream, ini_handler handler,
void* user);
/* Nonzero to allow multi-line value parsing, in the style of Python's
configparser. If allowed, ini_parse() will call the handler with the same
name for each subsequent line parsed. */
#ifndef INI_ALLOW_MULTILINE
#define INI_ALLOW_MULTILINE 1
#endif
/* Nonzero to allow a UTF-8 BOM sequence (0xEF 0xBB 0xBF) at the start of
the file. See http://code.google.com/p/inih/issues/detail?id=21 */
#ifndef INI_ALLOW_BOM
#define INI_ALLOW_BOM 1
#endif
/* Nonzero to allow inline comments (with valid inline comment characters
specified by INI_INLINE_COMMENT_PREFIXES). Set to 0 to turn off and match
Python 3.2+ configparser behaviour. */
#ifndef INI_ALLOW_INLINE_COMMENTS
#define INI_ALLOW_INLINE_COMMENTS 1
#endif
#ifndef INI_INLINE_COMMENT_PREFIXES
#define INI_INLINE_COMMENT_PREFIXES ";"
#endif
/* Nonzero to use stack, zero to use heap (malloc/free). */
#ifndef INI_USE_STACK
#define INI_USE_STACK 1
#endif
/* Stop parsing on first error (default is to keep parsing). */
#ifndef INI_STOP_ON_FIRST_ERROR
#define INI_STOP_ON_FIRST_ERROR 0
#endif
/* Maximum line length for any line in INI file. */
#ifndef INI_MAX_LINE
#define INI_MAX_LINE 200
#endif
#ifdef __cplusplus
}
#endif
/* inih -- simple .INI file parser
inih is released under the New BSD license (see LICENSE.txt). Go to the project
home page for more info:
https://github.com/benhoyt/inih
*/
#if defined(_MSC_VER) && !defined(_CRT_SECURE_NO_WARNINGS)
#define _CRT_SECURE_NO_WARNINGS
#endif
#include <stdio.h>
#include <ctype.h>
#include <string.h>
#if !INI_USE_STACK
#include <stdlib.h>
#endif
#define MAX_SECTION 50
#define MAX_NAME 50
/* Strip whitespace chars off end of given string, in place. Return s. */
inline static char* rstrip(char* s)
{
char* p = s + strlen(s);
while (p > s && isspace((unsigned char)(*--p)))
*p = '\0';
return s;
}
/* Return pointer to first non-whitespace char in given string. */
inline static char* lskip(const char* s)
{
while (*s && isspace((unsigned char)(*s)))
s++;
return (char*)s;
}
/* Return pointer to first char (of chars) or inline comment in given string,
or pointer to null at end of string if neither found. Inline comment must
be prefixed by a whitespace character to register as a comment. */
inline static char* find_chars_or_comment(const char* s, const char* chars)
{
#if INI_ALLOW_INLINE_COMMENTS
int was_space = 0;
while (*s && (!chars || !strchr(chars, *s)) &&
!(was_space && strchr(INI_INLINE_COMMENT_PREFIXES, *s))) {
was_space = isspace((unsigned char)(*s));
s++;
}
#else
while (*s && (!chars || !strchr(chars, *s))) {
s++;
}
#endif
return (char*)s;
}
/* Version of strncpy that ensures dest (size bytes) is null-terminated. */
inline static char* strncpy0(char* dest, const char* src, size_t size)
{
strncpy(dest, src, size);
dest[size - 1] = '\0';
return dest;
}
/* See documentation in header file. */
inline int ini_parse_stream(ini_reader reader, void* stream, ini_handler handler,
void* user)
{
/* Uses a fair bit of stack (use heap instead if you need to) */
#if INI_USE_STACK
char line[INI_MAX_LINE];
#else
char* line;
#endif
char section[MAX_SECTION] = "";
char prev_name[MAX_NAME] = "";
char* start;
char* end;
char* name;
char* value;
int lineno = 0;
int error = 0;
#if !INI_USE_STACK
line = (char*)malloc(INI_MAX_LINE);
if (!line) {
return -2;
}
#endif
/* Scan through stream line by line */
while (reader(line, INI_MAX_LINE, stream) != NULL) {
lineno++;
start = line;
#if INI_ALLOW_BOM
if (lineno == 1 && (unsigned char)start[0] == 0xEF &&
(unsigned char)start[1] == 0xBB &&
(unsigned char)start[2] == 0xBF) {
start += 3;
}
#endif
start = lskip(rstrip(start));
if (*start == ';' || *start == '#') {
/* Per Python configparser, allow both ; and # comments at the
start of a line */
}
#if INI_ALLOW_MULTILINE
else if (*prev_name && *start && start > line) {
#if INI_ALLOW_INLINE_COMMENTS
end = find_chars_or_comment(start, NULL);
if (*end)
*end = '\0';
rstrip(start);
#endif
/* Non-blank line with leading whitespace, treat as continuation
of previous name's value (as per Python configparser). */
if (!handler(user, section, prev_name, start) && !error)
error = lineno;
}
#endif
else if (*start == '[') {
/* A "[section]" line */
end = find_chars_or_comment(start + 1, "]");
if (*end == ']') {
*end = '\0';
strncpy0(section, start + 1, sizeof(section));
*prev_name = '\0';
}
else if (!error) {
/* No ']' found on section line */
error = lineno;
}
}
else if (*start) {
/* Not a comment, must be a name[=:]value pair */
end = find_chars_or_comment(start, "=:");
if (*end == '=' || *end == ':') {
*end = '\0';
name = rstrip(start);
value = lskip(end + 1);
#if INI_ALLOW_INLINE_COMMENTS
end = find_chars_or_comment(value, NULL);
if (*end)
*end = '\0';
#endif
rstrip(value);
/* Valid name[=:]value pair found, call handler */
strncpy0(prev_name, name, sizeof(prev_name));
if (!handler(user, section, name, value) && !error)
error = lineno;
}
else if (!error) {
/* No '=' or ':' found on name[=:]value line */
error = lineno;
}
}
#if INI_STOP_ON_FIRST_ERROR
if (error)
break;
#endif
}
#if !INI_USE_STACK
free(line);
#endif
return error;
}
/* See documentation in header file. */
inline int ini_parse_file(FILE* file, ini_handler handler, void* user)
{
return ini_parse_stream((ini_reader)fgets, file, handler, user);
}
/* See documentation in header file. */
inline int ini_parse(const char* filename, ini_handler handler, void* user)
{
FILE* file;
int error;
file = fopen(filename, "r");
if (!file)
return -1;
error = ini_parse_file(file, handler, user);
fclose(file);
return error;
}
#endif /* __INI_H__ */
#ifndef __INIREADER_H__
#define __INIREADER_H__
#include <map>
#include <set>
#include <string>
// Read an INI file into easy-to-access name/value pairs. (Note that I've gone
// for simplicity here rather than speed, but it should be pretty decent.)
class INIReader
{
public:
// Empty Constructor
INIReader() {};
// Construct INIReader and parse given filename. See ini.h for more info
// about the parsing.
INIReader(std::string filename);
// Construct INIReader and parse given file. See ini.h for more info
// about the parsing.
INIReader(FILE *file);
~INIReader();
// Return the result of ini_parse(), i.e., 0 on success, line number of
// first error on parse error, or -1 on file open error.
int ParseError() const;
// Return the list of sections found in ini file
const std::set<std::string>& Sections() const;
// Get a string value from INI file, returning default_value if not found.
std::string Get(std::string section, std::string name,
std::string default_value) const;
std::string Get(std::string section, std::string name) const;
// Get an integer (long) value from INI file, returning default_value if
// not found or not a valid integer (decimal "1234", "-1234", or hex "0x4d2").
long GetInteger(std::string section, std::string name, long default_value) const;
long GetInteger(std::string section, std::string name) const;
// Get a real (floating point double) value from INI file, returning
// default_value if not found or not a valid floating point value
// according to strtod().
double GetReal(std::string section, std::string name, double default_value) const;
// Get a single precision floating point number value from INI file, returning
// default_value if not found or not a valid floating point value
// according to strtof().
float GetFloat(std::string section, std::string name, float default_value) const;
float GetFloat(std::string section, std::string name) const;
// Get a boolean value from INI file, returning default_value if not found or if
// not a valid true/false value. Valid true values are "true", "yes", "on", "1",
// and valid false values are "false", "no", "off", "0" (not case sensitive).
bool GetBoolean(std::string section, std::string name, bool default_value) const;
protected:
int _error;
std::map<std::string, std::string> _values;
std::set<std::string> _sections;
static std::string MakeKey(std::string section, std::string name);
static int ValueHandler(void* user, const char* section, const char* name,
const char* value);
};
#endif // __INIREADER_H__
#ifndef __INIREADER__
#define __INIREADER__
#include <algorithm>
#include <cctype>
#include <cstdlib>
inline INIReader::INIReader(std::string filename)
{
_error = ini_parse(filename.c_str(), ValueHandler, this);
}
inline INIReader::INIReader(FILE *file)
{
_error = ini_parse_file(file, ValueHandler, this);
}
inline int INIReader::ParseError() const
{
return _error;
}
inline INIReader::~INIReader() { }
inline const std::set<std::string>& INIReader::Sections() const
{
return _sections;
}
inline std::string INIReader::Get(std::string section, std::string name, std::string default_value) const
{
std::string key = MakeKey(section, name);
return _values.count(key) ? _values.at(key) : default_value;
}
inline std::string INIReader::Get(std::string section, std::string name) const
{
std::string key = MakeKey(section, name);
if(_values.count(key)) return _values.at(key);
else
{
printf("[ERROR] Does not find the section %s with name %s. \n", section.c_str(), name.c_str());
exit(-1);
}
}
inline long INIReader::GetInteger(std::string section, std::string name, long default_value) const
{
std::string valstr = Get(section, name, "");
const char* value = valstr.c_str();
char* end;
// This parses "1234" (decimal) and also "0x4D2" (hex)
long n = strtol(value, &end, 0);
return end > value ? n : default_value;
}
inline long INIReader::GetInteger(std::string section, std::string name) const
{
std::string valstr = Get(section, name, "");
const char* value = valstr.c_str();
char* end;
// This parses "1234" (decimal) and also "0x4D2" (hex)
long n = strtol(value, &end, 0);
if(end <= value)
{
printf("[ERROR] Does not find the section %s with name %s. \n", section.c_str(), name.c_str());
exit(-1);
}
return n;
}
inline double INIReader::GetReal(std::string section, std::string name, double default_value) const
{
std::string valstr = Get(section, name, "");
const char* value = valstr.c_str();
char* end;
double n = strtod(value, &end);
return end > value ? n : default_value;
}
inline float INIReader::GetFloat(std::string section, std::string name, float default_value) const
{
std::string valstr = Get(section, name, "");
const char* value = valstr.c_str();
char* end;
float n = strtof(value, &end);
return end > value ? n : default_value;
}
inline float INIReader::GetFloat(std::string section, std::string name) const
{
std::string valstr = Get(section, name, "");
const char* value = valstr.c_str();
char* end;
float n = strtof(value, &end);
if(end <= value)
{
printf("[ERROR] Does not find the section %s with name %s. \n", section.c_str(), name.c_str());
exit(-1);
}
return n;
}
inline bool INIReader::GetBoolean(std::string section, std::string name, bool default_value) const
{
std::string valstr = Get(section, name, "");
// Convert to lower case to make string comparisons case-insensitive
std::transform(valstr.begin(), valstr.end(), valstr.begin(), ::tolower);
if (valstr == "true" || valstr == "yes" || valstr == "on" || valstr == "1")
return true;
else if (valstr == "false" || valstr == "no" || valstr == "off" || valstr == "0")
return false;
else
return default_value;
}
inline std::string INIReader::MakeKey(std::string section, std::string name)
{
std::string key = section + "=" + name;
// Convert to lower case to make section/name lookups case-insensitive
std::transform(key.begin(), key.end(), key.begin(), ::tolower);
return key;
}
inline int INIReader::ValueHandler(void* user, const char* section, const char* name,
const char* value)
{
INIReader* reader = (INIReader*)user;
std::string key = MakeKey(section, name);
if (reader->_values[key].size() > 0)
reader->_values[key] += "\n";
reader->_values[key] += value;
reader->_sections.insert(section);
return 1;
}
#endif // __INIREADER__
\ No newline at end of file
# Copyright (c) 2019-2023, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
cmake_minimum_required(VERSION 3.11 FATAL_ERROR) # for PyTorch extensions, version should be greater than 3.13
project(FasterTransformer LANGUAGES CXX CUDA)
find_package(CUDA 10.2 REQUIRED)
if(${CUDA_VERSION_MAJOR} VERSION_GREATER_EQUAL "11")
add_definitions("-DENABLE_BF16")
message("CUDA_VERSION ${CUDA_VERSION_MAJOR}.${CUDA_VERSION_MINOR} is greater or equal than 11.0, enable -DENABLE_BF16 flag")
endif()
if((${CUDA_VERSION_MAJOR} VERSION_GREATER_EQUAL "11" AND ${CUDA_VERSION_MINOR} VERSION_GREATER_EQUAL "8") OR (${CUDA_VERSION_MAJOR} VERSION_GREATER_EQUAL "12"))
add_definitions("-DENABLE_FP8")
option(ENABLE_FP8 "ENABLE_FP8" OFF)
if(ENABLE_FP8)
message("CUDA_VERSION ${CUDA_VERSION_MAJOR}.${CUDA_VERSION_MINOR} is greater or equal than 11.8, enable -DENABLE_FP8 flag")
endif()
endif()
set(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake/Modules)
option(BUILD_PYT "Build in PyTorch TorchScript class mode" OFF)
if(NOT BUILD_MULTI_GPU)
option(BUILD_MULTI_GPU "Build project about multi-GPU" OFF)
endif()
if(NOT USE_TRITONSERVER_DATATYPE)
option(USE_TRITONSERVER_DATATYPE "Build triton backend for triton server" OFF)
endif()
include(FetchContent)
FetchContent_Declare(
repo-cutlass
GIT_REPOSITORY https://github.com/NVIDIA/cutlass.git
GIT_TAG cc85b64cf676c45f98a17e3a47c0aafcf817f088
)
set(CUTLASS_ENABLE_HEADERS_ONLY ON CACHE BOOL "Enable only the header library")
FetchContent_MakeAvailable(repo-cutlass)
set(CUTLASS_HEADER_DIR ${PROJECT_SOURCE_DIR}/3rdparty/cutlass/include)
set(CUTLASS_EXTENSIONS_DIR ${PROJECT_SOURCE_DIR}/src/fastertransformer/cutlass_extensions/include)
option(SPARSITY_SUPPORT "Build project with Ampere sparsity feature support" OFF)
option(BUILD_FAST_MATH "Build in fast math mode" ON)
if(BUILD_MULTI_GPU)
message(STATUS "Add DBUILD_MULTI_GPU, requires MPI and NCCL")
add_definitions("-DBUILD_MULTI_GPU")
set(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake/Modules)
find_package(MPI REQUIRED)
find_package(NCCL REQUIRED)
set(CMAKE_MODULE_PATH "") # prevent the bugs for pytorch building
endif()
if(BUILD_PYT)
if(DEFINED ENV{NVIDIA_PYTORCH_VERSION})
if($ENV{NVIDIA_PYTORCH_VERSION} VERSION_LESS "20.03")
message(FATAL_ERROR "NVIDIA PyTorch image is too old for TorchScript mode.")
endif()
if($ENV{NVIDIA_PYTORCH_VERSION} VERSION_EQUAL "20.03")
add_definitions(-DLEGACY_THS=1)
endif()
endif()
endif()
if(USE_TRITONSERVER_DATATYPE)
message("-- USE_TRITONSERVER_DATATYPE")
add_definitions("-DUSE_TRITONSERVER_DATATYPE")
endif()
set(CXX_STD "14" CACHE STRING "C++ standard")
set(CUDA_PATH ${CUDA_TOOLKIT_ROOT_DIR})
set(TF_PATH "" CACHE STRING "TensorFlow path")
set(CUSPARSELT_PATH "" CACHE STRING "cuSPARSELt path")
if((BUILD_TF OR BUILD_TF2) AND NOT TF_PATH)
message(FATAL_ERROR "TF_PATH must be set if BUILD_TF or BUILD_TF2 (=TensorFlow mode) is on.")
endif()
list(APPEND CMAKE_MODULE_PATH ${CUDA_PATH}/lib64)
# profiling
option(USE_NVTX "Whether or not to use nvtx" ON)
if(USE_NVTX)
message(STATUS "NVTX is enabled.")
add_definitions("-DUSE_NVTX")
endif()
# setting compiler flags
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS}")
set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xcompiler -Wall -ldl") # -Xptxas -v
set(SM_SETS 52 60 61 70 75 80 86 89 90)
set(USING_WMMA False)
set(FIND_SM False)
foreach(SM_NUM IN LISTS SM_SETS)
string(FIND "${SM}" "${SM_NUM}" SM_POS)
if(SM_POS GREATER -1)
if(FIND_SM STREQUAL False)
set(ENV{TORCH_CUDA_ARCH_LIST} "")
endif()
set(FIND_SM True)
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -gencode=arch=compute_${SM_NUM},code=\\\"sm_${SM_NUM},compute_${SM_NUM}\\\"")
if (SM_NUM STREQUAL 70 OR SM_NUM STREQUAL 75 OR SM_NUM STREQUAL 80 OR SM_NUM STREQUAL 86 OR SM_NUM STREQUAL 89 OR SM_NUM STREQUAL 90)
set(USING_WMMA True)
endif()
if(BUILD_PYT)
string(SUBSTRING ${SM_NUM} 0 1 SM_MAJOR)
string(SUBSTRING ${SM_NUM} 1 1 SM_MINOR)
set(ENV{TORCH_CUDA_ARCH_LIST} "$ENV{TORCH_CUDA_ARCH_LIST}\;${SM_MAJOR}.${SM_MINOR}")
endif()
list(APPEND CMAKE_CUDA_ARCHITECTURES ${SM_NUM})
message("-- Assign GPU architecture (sm=${SM_NUM})")
endif()
endforeach()
if(USING_WMMA STREQUAL True)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DWMMA")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DWMMA")
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -DWMMA")
message("-- Use WMMA")
endif()
if(NOT (FIND_SM STREQUAL True))
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} \
-gencode=arch=compute_70,code=\\\"sm_70,compute_70\\\" \
-gencode=arch=compute_75,code=\\\"sm_75,compute_75\\\" \
-gencode=arch=compute_80,code=\\\"sm_80,compute_80\\\" \
-gencode=arch=compute_86,code=\\\"sm_86,compute_86\\\" \
")
# -rdc=true")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DWMMA")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DWMMA")
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -DWMMA")
if(BUILD_PYT)
set(ENV{TORCH_CUDA_ARCH_LIST} "7.0;7.5;8.0;8.6")
endif()
set(CMAKE_CUDA_ARCHITECTURES 70 75 80 86)
message("-- Assign GPU architecture (sm=70,75,80,86)")
endif()
if(BUILD_PYT)
set(TORCH_CUDA_ARCH_LIST $ENV{TORCH_CUDA_ARCH_LIST})
endif()
set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} -Wall -O0")
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -Wall -O0")
# set(CMAKE_CUDA_FLAGS_DEBUG "${CMAKE_CUDA_FLAGS_DEBUG} -O0 -G -Xcompiler -Wall --ptxas-options=-v --resource-usage")
set(CMAKE_CUDA_FLAGS_DEBUG "${CMAKE_CUDA_FLAGS_DEBUG} -O0 -G -Xcompiler -Wall -DCUDA_PTX_FP8_F2FP_ENABLED")
set(CMAKE_CXX_STANDARD "${CXX_STD}")
set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --expt-extended-lambda")
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --expt-relaxed-constexpr")
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --std=c++${CXX_STD} -DCUDA_PTX_FP8_F2FP_ENABLED")
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -O3")
# set(CMAKE_CUDA_FLAGS_RELEASE "${CMAKE_CUDA_FLAGS_RELEASE} -Xcompiler -O3 --ptxas-options=--verbose")
set(CMAKE_CUDA_FLAGS_RELEASE "${CMAKE_CUDA_FLAGS_RELEASE} -Xcompiler -O3 -DCUDA_PTX_FP8_F2FP_ENABLED")
if(BUILD_FAST_MATH)
set(CMAKE_CUDA_FLAGS_RELEASE "${CMAKE_CUDA_FLAGS_RELEASE} --use_fast_math")
message("CMAKE_CUDA_FLAGS_RELEASE: ${CMAKE_CUDA_FLAGS_RELEASE}")
endif()
set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib)
set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib)
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin)
set(COMMON_HEADER_DIRS
${PROJECT_SOURCE_DIR}
${CUDA_PATH}/include
${CUTLASS_HEADER_DIR}
)
message("-- COMMON_HEADER_DIRS: ${COMMON_HEADER_DIRS}")
set(COMMON_LIB_DIRS
${CUDA_PATH}/lib64
)
if (SPARSITY_SUPPORT)
list(APPEND COMMON_HEADER_DIRS ${CUSPARSELT_PATH}/include)
list(APPEND COMMON_LIB_DIRS ${CUSPARSELT_PATH}/lib64)
add_definitions(-DSPARSITY_ENABLED=1)
endif()
if(BUILD_TF)
list(APPEND COMMON_HEADER_DIRS ${TF_PATH}/include)
list(APPEND COMMON_LIB_DIRS ${TF_PATH})
add_definitions(-D_GLIBCXX_USE_CXX11_ABI=0)
endif()
if(BUILD_TF2)
list(APPEND COMMON_HEADER_DIRS ${TF_PATH}/include)
list(APPEND COMMON_LIB_DIRS ${TF_PATH})
add_definitions(-D_GLIBCXX_USE_CXX11_ABI=1)
endif()
set(PYTHON_PATH "python" CACHE STRING "Python path")
if(BUILD_PYT)
execute_process(COMMAND ${PYTHON_PATH} "-c" "from __future__ import print_function; import torch; print(torch.__version__,end='');"
RESULT_VARIABLE _PYTHON_SUCCESS
OUTPUT_VARIABLE TORCH_VERSION)
if (TORCH_VERSION VERSION_LESS "1.5.0")
message(FATAL_ERROR "PyTorch >= 1.5.0 is needed for TorchScript mode.")
endif()
execute_process(COMMAND ${PYTHON_PATH} "-c" "from __future__ import print_function; import os; import torch;
print(os.path.dirname(torch.__file__),end='');"
RESULT_VARIABLE _PYTHON_SUCCESS
OUTPUT_VARIABLE TORCH_DIR)
if (NOT _PYTHON_SUCCESS MATCHES 0)
message(FATAL_ERROR "Torch config Error.")
endif()
list(APPEND CMAKE_PREFIX_PATH ${TORCH_DIR})
find_package(Torch REQUIRED)
execute_process(COMMAND ${PYTHON_PATH} "-c" "from __future__ import print_function; from distutils import sysconfig;
print(sysconfig.get_python_inc());"
RESULT_VARIABLE _PYTHON_SUCCESS
OUTPUT_VARIABLE PY_INCLUDE_DIR)
if (NOT _PYTHON_SUCCESS MATCHES 0)
message(FATAL_ERROR "Python config Error.")
endif()
list(APPEND COMMON_HEADER_DIRS ${PY_INCLUDE_DIR})
execute_process(COMMAND ${PYTHON_PATH} "-c" "from __future__ import print_function; import torch;
print(torch._C._GLIBCXX_USE_CXX11_ABI,end='');"
RESULT_VARIABLE _PYTHON_SUCCESS
OUTPUT_VARIABLE USE_CXX11_ABI)
message("-- USE_CXX11_ABI=${USE_CXX11_ABI}")
if (USE_CXX11_ABI)
set(CMAKE_CUDA_FLAGS_RELEASE "${CMAKE_CUDA_FLAGS_RELEASE} -D_GLIBCXX_USE_CXX11_ABI=1")
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -D_GLIBCXX_USE_CXX11_ABI=1")
set(CMAKE_CUDA_FLAGS_DEBUG "${CMAKE_CUDA_FLAGS_DEBUG} -D_GLIBCXX_USE_CXX11_ABI=1")
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -D_GLIBCXX_USE_CXX11_ABI=1")
else()
set(CMAKE_CUDA_FLAGS_RELEASE "${CMAKE_CUDA_FLAGS_RELEASE} -D_GLIBCXX_USE_CXX11_ABI=0")
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -D_GLIBCXX_USE_CXX11_ABI=0")
set(CMAKE_CUDA_FLAGS_DEBUG "${CMAKE_CUDA_FLAGS_DEBUG} -D_GLIBCXX_USE_CXX11_ABI=0")
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -D_GLIBCXX_USE_CXX11_ABI=0")
endif()
endif()
if (BUILD_MULTI_GPU)
list(APPEND COMMON_HEADER_DIRS ${MPI_INCLUDE_PATH})
list(APPEND COMMON_LIB_DIRS /usr/local/mpi/lib)
endif()
if(USE_TRITONSERVER_DATATYPE)
list(APPEND COMMON_HEADER_DIRS ${PROJECT_SOURCE_DIR}/../repo-core-src/include)
endif()
include_directories(
${COMMON_HEADER_DIRS}
)
link_directories(
${COMMON_LIB_DIRS}
)
# add_subdirectory(3rdparty)
add_subdirectory(src)
add_subdirectory(examples)
add_subdirectory(tests)
# # Mesaure the compile time
option(MEASURE_BUILD_TIME "Measure the build time of each module" OFF)
if (MEASURE_BUILD_TIME)
set_property(GLOBAL PROPERTY RULE_LAUNCH_COMPILE "${CMAKE_COMMAND} -E time")
set_property(GLOBAL PROPERTY RULE_LAUNCH_CUSTOM "${CMAKE_COMMAND} -E time")
set_property(GLOBAL PROPERTY RULE_LAUNCH_LINK "${CMAKE_COMMAND} -E time")
endif()
########################################
add_library(transformer-shared SHARED
$<TARGET_OBJECTS:BaseBeamSearchLayer>
$<TARGET_OBJECTS:BaseSamplingLayer>
$<TARGET_OBJECTS:BeamSearchLayer>
$<TARGET_OBJECTS:DynamicDecodeLayer>
$<TARGET_OBJECTS:llama_fmha>
$<TARGET_OBJECTS:Llama>
$<TARGET_OBJECTS:LlamaTritonBackend>
$<TARGET_OBJECTS:OnlineBeamSearchLayer>
$<TARGET_OBJECTS:TopKSamplingLayer>
$<TARGET_OBJECTS:TopPSamplingLayer>
$<TARGET_OBJECTS:TransformerTritonBackend>
$<TARGET_OBJECTS:activation_kernels>
$<TARGET_OBJECTS:ban_bad_words>
$<TARGET_OBJECTS:beam_search_penalty_kernels>
$<TARGET_OBJECTS:beam_search_topk_kernels>
$<TARGET_OBJECTS:bert_preprocess_kernels>
$<TARGET_OBJECTS:cublasAlgoMap>
$<TARGET_OBJECTS:cublasMMWrapper>
$<TARGET_OBJECTS:cuda_utils>
$<TARGET_OBJECTS:custom_ar_comm>
$<TARGET_OBJECTS:custom_ar_kernels>
$<TARGET_OBJECTS:decoder_masked_multihead_attention>
$<TARGET_OBJECTS:decoding_kernels>
$<TARGET_OBJECTS:gpt_kernels>
$<TARGET_OBJECTS:logprob_kernels>
$<TARGET_OBJECTS:logger>
$<TARGET_OBJECTS:memory_utils>
$<TARGET_OBJECTS:mpi_utils>
$<TARGET_OBJECTS:nccl_utils>
$<TARGET_OBJECTS:nvtx_utils>
$<TARGET_OBJECTS:online_softmax_beamsearch_kernels>
$<TARGET_OBJECTS:sampling_penalty_kernels>
$<TARGET_OBJECTS:sampling_topk_kernels>
$<TARGET_OBJECTS:sampling_topp_kernels>
$<TARGET_OBJECTS:stop_criteria>
$<TARGET_OBJECTS:tensor>
$<TARGET_OBJECTS:unfused_attention_kernels>
$<TARGET_OBJECTS:word_list>
)
if (BUILD_MULTI_GPU)
target_link_libraries(transformer-shared PUBLIC
-lmpi
${NCCL_LIBRARIES}
)
endif()
if(USE_NVTX)
target_link_libraries(transformer-shared PUBLIC
-lnvToolsExt
)
endif()
set_target_properties(transformer-shared PROPERTIES POSITION_INDEPENDENT_CODE ON)
set_target_properties(transformer-shared PROPERTIES CUDA_RESOLVE_DEVICE_SYMBOLS ON)
set_target_properties(transformer-shared PROPERTIES LINKER_LANGUAGE CXX)
target_link_libraries(transformer-shared PUBLIC -lcudart -lcublas -lcublasLt -lcurand)
include(GNUInstallDirs)
set(INSTALL_CONFIGDIR ${CMAKE_INSTALL_LIBDIR}/cmake/FasterTransformer)
include(CMakePackageConfigHelpers)
configure_package_config_file(
${CMAKE_CURRENT_LIST_DIR}/cmake/FasterTransformerConfig.cmake.in
${CMAKE_CURRENT_BINARY_DIR}/FasterTransformerConfig.cmake
INSTALL_DESTINATION ${INSTALL_CONFIGDIR}
)
install(
FILES
${CMAKE_CURRENT_BINARY_DIR}/FasterTransformerConfig.cmake
DESTINATION ${INSTALL_CONFIGDIR}
)
install(
TARGETS
transformer-shared
EXPORT
transformer-shared-targets
LIBRARY DESTINATION ${CMAKE_INSTALL_PREFIX}/backends/fastertransformer
ARCHIVE DESTINATION ${CMAKE_INSTALL_PREFIX}/backends/fastertransformer
)
install(
EXPORT
transformer-shared-targets
FILE
FasterTransformerTargets.cmake
DESTINATION
${INSTALL_CONFIGDIR}
)
export(
EXPORT
transformer-shared-targets
FILE
${CMAKE_CURRENT_BINARY_DIR}/FasterTransformerTargets.cmake
NAMESPACE
TritonCore::
)
export(PACKAGE FasterTransformer)
# Copyright (c) 2021-2023, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of NVIDIA CORPORATION nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
include(CMakeFindDependencyMacro)
get_filename_component(
FASTERTRANSFORMER_CMAKE_DIR "${CMAKE_CURRENT_LIST_FILE}" PATH
)
list(APPEND CMAKE_MODULE_PATH ${FASTERTRANSFORMER_CMAKE_DIR})
if(NOT TARGET transformer-shared)
include("${FASTERTRANSFORMER_CMAKE_DIR}/FasterTransformerTargets.cmake")
endif()
set(FASTERTRANSFORMER_LIBRARIES transformer-shared)
# taken from https://github.com/pytorch/pytorch/blob/master/cmake/Modules_CUDA_fix/FindCUDNN.cmake
# Find the CUDNN libraries
#
# The following variables are optionally searched for defaults
# CUDNN_ROOT: Base directory where CUDNN is found
# CUDNN_INCLUDE_DIR: Directory where CUDNN header is searched for
# CUDNN_LIBRARY: Directory where CUDNN library is searched for
# CUDNN_STATIC: Are we looking for a static library? (default: no)
#
# The following are set after configuration is done:
# CUDNN_FOUND
# CUDNN_INCLUDE_PATH
# CUDNN_LIBRARY_PATH
#
include(FindPackageHandleStandardArgs)
set(CUDNN_ROOT $ENV{CUDNN_ROOT_DIR} CACHE PATH "Folder containing NVIDIA cuDNN")
if (DEFINED $ENV{CUDNN_ROOT_DIR})
message(WARNING "CUDNN_ROOT_DIR is deprecated. Please set CUDNN_ROOT instead.")
endif()
list(APPEND CUDNN_ROOT $ENV{CUDNN_ROOT_DIR} ${CUDA_TOOLKIT_ROOT_DIR})
# Compatible layer for CMake <3.12. CUDNN_ROOT will be accounted in for searching paths and libraries for CMake >=3.12.
list(APPEND CMAKE_PREFIX_PATH ${CUDNN_ROOT})
set(CUDNN_INCLUDE_DIR $ENV{CUDNN_INCLUDE_DIR} CACHE PATH "Folder containing NVIDIA cuDNN header files")
find_path(CUDNN_INCLUDE_PATH cudnn.h
HINTS ${CUDNN_INCLUDE_DIR}
PATH_SUFFIXES cuda/include cuda include)
option(CUDNN_STATIC "Look for static CUDNN" OFF)
if (CUDNN_STATIC)
set(CUDNN_LIBNAME "libcudnn_static.a")
else()
set(CUDNN_LIBNAME "cudnn")
endif()
set(CUDNN_LIBRARY $ENV{CUDNN_LIBRARY} CACHE PATH "Path to the cudnn library file (e.g., libcudnn.so)")
if (CUDNN_LIBRARY MATCHES ".*cudnn_static.a" AND NOT CUDNN_STATIC)
message(WARNING "CUDNN_LIBRARY points to a static library (${CUDNN_LIBRARY}) but CUDNN_STATIC is OFF.")
endif()
find_library(CUDNN_LIBRARY_PATH ${CUDNN_LIBNAME}
PATHS ${CUDNN_LIBRARY}
PATH_SUFFIXES lib lib64 cuda/lib cuda/lib64 lib/x64)
find_package_handle_standard_args(CUDNN DEFAULT_MSG CUDNN_LIBRARY_PATH CUDNN_INCLUDE_PATH)
mark_as_advanced(CUDNN_ROOT CUDNN_INCLUDE_DIR CUDNN_LIBRARY)
# Copyright (c) 2021-2022, NVIDIA CORPORATION. All rights reserved.
#
# From PyTorch:
#
# Copyright (c) 2016- Facebook, Inc (Adam Paszke)
# Copyright (c) 2014- Facebook, Inc (Soumith Chintala)
# Copyright (c) 2011-2014 Idiap Research Institute (Ronan Collobert)
# Copyright (c) 2012-2014 Deepmind Technologies (Koray Kavukcuoglu)
# Copyright (c) 2011-2012 NEC Laboratories America (Koray Kavukcuoglu)
# Copyright (c) 2011-2013 NYU (Clement Farabet)
# Copyright (c) 2006-2010 NEC Laboratories America (Ronan Collobert, Leon Bottou, Iain Melvin, Jason Weston)
# Copyright (c) 2006 Idiap Research Institute (Samy Bengio)
# Copyright (c) 2001-2004 Idiap Research Institute (Ronan Collobert, Samy Bengio, Johnny Mariethoz)
#
# From Caffe2:
#
# Copyright (c) 2016-present, Facebook Inc. All rights reserved.
#
# All contributions by Facebook:
# Copyright (c) 2016 Facebook Inc.
#
# All contributions by Google:
# Copyright (c) 2015 Google Inc.
# All rights reserved.
#
# All contributions by Yangqing Jia:
# Copyright (c) 2015 Yangqing Jia
# All rights reserved.
#
# All contributions by Kakao Brain:
# Copyright 2019-2020 Kakao Brain
#
# All contributions from Caffe:
# Copyright(c) 2013, 2014, 2015, the respective contributors
# All rights reserved.
#
# All other contributions:
# Copyright(c) 2015, 2016 the respective contributors
# All rights reserved.
#
# Caffe2 uses a copyright model similar to Caffe: each contributor holds
# copyright over their contributions to Caffe2. The project versioning records
# all such contribution and copyright details. If a contributor wants to further
# mark their specific copyright on a particular contribution, they should
# indicate their copyright solely in the commit message of the change when it is
# committed.
#
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
#
# 3. Neither the names of Facebook, Deepmind Technologies, NYU, NEC Laboratories America
# and IDIAP Research Institute nor the names of its contributors may be
# used to endorse or promote products derived from this software without
# specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
#
# Find the nccl libraries
#
# The following variables are optionally searched for defaults
# NCCL_ROOT: Base directory where all NCCL components are foundHong Xu, 1 year ago: • Let CMake handle NCCL detection instead of ou…
# NCCL_INCLUDE_DIR: Directory where NCCL header is foundPieter Noordhuis, 3 years ago: • Bump gloo
# NCCL_LIB_DIR: Directory where NCCL library is found
#
# The following are set after configuration is done:
# NCCL_FOUND
# NCCL_INCLUDE_DIRS
# NCCL_LIBRARIES
#
# The path hints include CUDA_TOOLKIT_ROOT_DIR seeing as some folks
# install NCCL in the same location as the CUDA toolkit.
# See https://github.com/caffe2/caffe2/issues/1601
set(NCCL_INCLUDE_DIR $ENV{NCCL_INCLUDE_DIR} CACHE PATH "Folder contains NVIDIA NCCL headers")
set(NCCL_LIB_DIR $ENV{NCCL_LIB_DIR} CACHE PATH "Folder contains NVIDIA NCCL libraries")
set(NCCL_VERSION $ENV{NCCL_VERSION} CACHE STRING "Version of NCCL to build with")
if ($ENV{NCCL_ROOT_DIR})
message(WARNING "NCCL_ROOT_DIR is deprecated. Please set NCCL_ROOT instead.")
endif()
list(APPEND NCCL_ROOT $ENV{NCCL_ROOT_DIR} ${CUDA_TOOLKIT_ROOT_DIR})
# Compatible layer for CMake <3.12. NCCL_ROOT will be accounted in for searching paths and libraries for CMake >=3.12.
list(APPEND CMAKE_PREFIX_PATH ${NCCL_ROOT})
find_path(NCCL_INCLUDE_DIRS
NAMES nccl.h
HINTS ${NCCL_INCLUDE_DIR})
if (USE_STATIC_NCCL)
MESSAGE(STATUS "USE_STATIC_NCCL is set. Linking with static NCCL library.")
SET(NCCL_LIBNAME "nccl_static")
if (NCCL_VERSION) # Prefer the versioned library if a specific NCCL version is specified
set(CMAKE_FIND_LIBRARY_SUFFIXES ".a.${NCCL_VERSION}" ${CMAKE_FIND_LIBRARY_SUFFIXES})
endif()
else()
SET(NCCL_LIBNAME "nccl")
if (NCCL_VERSION) # Prefer the versioned library if a specific NCCL version is specified
set(CMAKE_FIND_LIBRARY_SUFFIXES ".so.${NCCL_VERSION}" ${CMAKE_FIND_LIBRARY_SUFFIXES})
endif()
endif()
find_library(NCCL_LIBRARIES
NAMES ${NCCL_LIBNAME}
HINTS ${NCCL_LIB_DIR})
include(FindPackageHandleStandardArgs)
find_package_handle_standard_args(NCCL DEFAULT_MSG NCCL_INCLUDE_DIRS NCCL_LIBRARIES)
if(NCCL_FOUND) # obtaining NCCL version and some sanity checks
set (NCCL_HEADER_FILE "${NCCL_INCLUDE_DIRS}/nccl.h")
message (STATUS "Determining NCCL version from ${NCCL_HEADER_FILE}...")
set (OLD_CMAKE_REQUIRED_INCLUDES ${CMAKE_REQUIRED_INCLUDES})
list (APPEND CMAKE_REQUIRED_INCLUDES ${NCCL_INCLUDE_DIRS})
include(CheckCXXSymbolExists)
check_cxx_symbol_exists(NCCL_VERSION_CODE nccl.h NCCL_VERSION_DEFINED)
if (NCCL_VERSION_DEFINED)
set(file "${PROJECT_BINARY_DIR}/detect_nccl_version.cc")
file(WRITE ${file} "
#include <iostream>
#include <nccl.h>
int main()
{
std::cout << NCCL_MAJOR << '.' << NCCL_MINOR << '.' << NCCL_PATCH << std::endl;
int x;
ncclGetVersion(&x);
return x == NCCL_VERSION_CODE;
}
")
try_run(NCCL_VERSION_MATCHED compile_result ${PROJECT_BINARY_DIR} ${file}
RUN_OUTPUT_VARIABLE NCCL_VERSION_FROM_HEADER
CMAKE_FLAGS "-DINCLUDE_DIRECTORIES=${NCCL_INCLUDE_DIRS}"
LINK_LIBRARIES ${NCCL_LIBRARIES})
if (NOT NCCL_VERSION_MATCHED)
message(FATAL_ERROR "Found NCCL header version and library version do not match! \
(include: ${NCCL_INCLUDE_DIRS}, library: ${NCCL_LIBRARIES}) Please set NCCL_INCLUDE_DIR and NCCL_LIB_DIR manually.")
endif()
message(STATUS "NCCL version: ${NCCL_VERSION_FROM_HEADER}")
else()
# message(STATUS "NCCL version < 2.3.5-5")
endif ()
set (CMAKE_REQUIRED_INCLUDES ${OLD_CMAKE_REQUIRED_INCLUDES})
message(STATUS "Found NCCL (include: ${NCCL_INCLUDE_DIRS}, library: ${NCCL_LIBRARIES})")
mark_as_advanced(NCCL_ROOT_DIR NCCL_INCLUDE_DIRS NCCL_LIBRARIES)
endif()
# Copyright (c) 2021-2022, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of NVIDIA CORPORATION nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
include(CMakeFindDependencyMacro)
get_filename_component(
TRITONPYTORCHBACKEND_CMAKE_DIR "${CMAKE_CURRENT_LIST_FILE}" PATH
)
list(APPEND CMAKE_MODULE_PATH ${TRITONPYTORCHBACKEND_CMAKE_DIR})
if(NOT TARGET TritonPyTorchBackend::triton-pytorch-backend)
include("${TRITONPYTORCHBACKEND_CMAKE_DIR}/TritonPyTorchBackendTargets.cmake")
endif()
set(TRITONPYTORCHBACKEND_LIBRARIES TritonPyTorchBackend::triton-pytorch-backend)
# Copyright (c) 2019-2023, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
add_subdirectory(cpp)
\ No newline at end of file
# Copyright (c) 2019-2023, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
add_subdirectory(llama)
\ No newline at end of file
# Copyright (c) OpenMMLab. All rights reserved.
add_executable(llama_triton_example llama_triton_example.cc)
target_link_libraries(llama_triton_example PUBLIC -lcublas -lcublasLt -lcudart
LlamaTritonBackend TransformerTritonBackend mpi_utils nccl_utils
nvtx_utils word_list glog)
\ No newline at end of file
# Copyright (c) OpenMMLab. All rights reserved.
import subprocess
import fire
def main(head_num: int = 80,
size_per_head: int = 128,
vocab_size: int = 65632,
inter_size: int = 27392,
tensor_para_size: int = 8,
max_batch_size: int = 64):
for bsz in range(1, max_batch_size + 1):
subprocess.call(
f'bin/gpt_gemm {bsz} 1 1 {head_num} {size_per_head} {inter_size} {vocab_size} 1 {tensor_para_size} {0 if bsz == 1 else 1}',
shell=True)
if __name__ == '__main__':
fire.Fire(main)
# Copyright (c) OpenMMLab. All rights reserved.
import torch
import fire
import os.path as osp
from os import makedirs
from pathlib import Path
import safetensors
from typing import List
from tqdm import tqdm
def import_fb(ckpt_dir: str):
checkpoints = []
for pattern in ['*.pth', '*.pt']:
checkpoints += sorted(Path(ckpt_dir).glob(pattern))
print(checkpoints)
n_ckpt = len(checkpoints)
model_params = {}
def get_param(name, size):
print(name, size)
if name not in model_params:
model_params[name] = torch.zeros(
size, dtype=torch.float16, device='cpu')
return model_params[name]
for i, ckpt_path in enumerate(checkpoints):
ckpt = torch.load(ckpt_path, map_location='cpu')
for param_name, param_data in ckpt.items():
key = param_name.split('.')[-2]
if key in ['w1', 'w3', 'wq', 'wk', 'wv', 'output']: # column-parallel
size = param_data.size(0)
param = get_param(
param_name, [size * n_ckpt, param_data.size(1)])
param.data[size * i: size * (i + 1), :] = param_data
elif key in ['w2', 'wo', 'tok_embeddings']: # row-parallel
size = param_data.size(-1)
param = get_param(
param_name, [param_data.size(0), size * n_ckpt])
param.data[:, size * i: size * (i + 1)] = param_data
elif i == 0:
param = get_param(param_name, param_data.size())
param.data = param_data
del ckpt
for name, param in model_params.items():
# transpose all weights as FasterTransformer is expecting column-major weights
# (output_dims, input_dims) -> (input_dims, output_dims)
key = name.split('.')[-2]
if key in ['w1', 'w3', 'wq', 'wk', 'wv', 'w2', 'wo']:
param.data = param.data.t()
# concat qkv projection
for i in range(1000):
_qkv = [f'layers.{i}.attention.{k}.weight' for k in ['wq', 'wk', 'wv']]
try:
qkv = tuple(map(model_params.pop, _qkv))
except KeyError:
break
qkv = torch.stack(qkv, dim=1)
model_params[f'layers.{i}.attention.w_qkv.weight'] = qkv
print(qkv.shape, qkv.dtype)
return model_params
def permute(x: torch.Tensor):
SIZE_PER_HEAD = 128
if x.shape[-1] > 1: # qweights
dim = x.shape[-1]
n_heads = dim // SIZE_PER_HEAD
return x.view(-1, n_heads, 2, dim // n_heads // 2).transpose(2, 3).reshape(-1, dim)
else: # scales, zeros
dim = x.shape[0]
n_heads = dim // SIZE_PER_HEAD
return x.view(n_heads, 2, dim // n_heads // 2, 1).transpose(1, 2).reshape(dim, 1)
def check_zero(x: torch.Tensor):
sum = x.flatten().sum().item()
assert sum == 0, str(sum)
def import_gptq(path: str):
model_params = {}
_qweight = 'weight'
_suffixes = [_qweight]
n_split = 3
if True:
_params = {}
for i in tqdm(range(0, n_split)):
filename = "pytorch_model-{:05d}-of-{:05d}.bin".format(i + 1, n_split)
_tmp = torch.load(osp.join(path, filename), map_location='cpu')
_params.update(_tmp)
# print('\n'.join(_params.keys()))
def get_tensor(name):
return _params[name]
def get_tensor_transposed(name):
return _params[name].t()
# _qweight = 'qweight'
# _suffixes = [_qweight, 'bias', 'scales', 'zeros']
# with safetensors.safe_open(path, framework='pt') as f:
# get_tensor = f.get_tensor
# # quantized weights are already in column major, no need to transpose
# get_tensor_transposed = get_tensor
for i in range(1000):
try:
# attention weights
_qkvo = [f'model.layers.{i}.self_attn.{t}_proj' for t in 'qkvo']
for suffix in _suffixes:
q, k, v, o = map(get_tensor_transposed, map(('{}.' + suffix).format, _qkvo))
if suffix == 'bias':
check_zero(q), check_zero(k), check_zero(v), check_zero(o)
else:
# q, k has different layout for fb & hf, convert to fb's layout
q = permute(q)
k = permute(k)
if suffix == _qweight: # weight, qweight
# insert a dimension for splitting heads later
# qkv = torch.cat([q[:, None, :], k[:, None, :], v[:, None, :]], dim=1)
qkv = torch.stack((q, k, v), dim=1)
else: # scales, zeros
# qkv = torch.cat([q[None, :], k[None, :], v[None, :]], dim=0).squeeze(dim=-1)
qkv = torch.stack((q, k, v), dim=0).squeeze(dim=-1)
for k, v in [('w_qkv', qkv), ('wo', o)]:
model_params[f'layers.{i}.attention.{k}.{suffix}'] = v
# ffn weights
_w123 = [f'model.layers.{i}.mlp.{t}_proj' for t in ['gate', 'down', 'up']]
for suffix in _suffixes:
w1, w2, w3 = map(get_tensor_transposed, map(('{}.' + suffix).format, _w123))
if suffix == 'bias':
check_zero(w1), check_zero(w2), check_zero(w3)
else:
if suffix in ['scales', 'zeros']:
w1, w2, w3 = map(lambda x: x.squeeze(dim=-1), [w1, w2, w3])
for k, v in [('w1', w1), ('w2', w2), ('w3', w3)]:
model_params[f'layers.{i}.feed_forward.{k}.{suffix}'] = v
other = [('attention_norm.weight', 'input_layernorm.weight'),
('ffn_norm.weight', 'post_attention_layernorm.weight')]
for ours, theirs in other:
model_params[f'layers.{i}.' + ours] = get_tensor(f'model.layers.{i}.' + theirs)
except safetensors.SafetensorError:
break
except KeyError:
break
print(i)
other = [('tok_embeddings.weight', 'model.embed_tokens.weight'),
('norm.weight', 'model.norm.weight'),
('output.weight', 'lm_head.weight')]
for ours, theirs in other:
model_params[ours] = get_tensor(theirs)
return model_params
def export(model_params: dict, out_dir: str, n_inference: int):
makedirs(out_dir, exist_ok=True)
def save_bin(param: torch.Tensor, name):
print(name, param.shape)
if param.dtype in [torch.float, torch.bfloat16]:
param = param.half()
param.contiguous().numpy().tofile(osp.join(out_dir, name))
# reverse the spliting axes since the weights are transposed above
for param_name, param_data in model_params.items():
split_dim = None
key, ext = param_name.split('.')[-2:]
copy = False
if key in ['w1', 'w3', 'w_qkv']:
split_dim = -1
elif key in ['w2', 'wo']:
if ext in ['scales', 'zeros']:
copy = True
else:
split_dim = 0
if split_dim is not None:
print(f'*** spliting {param_name}, shape={param_data.shape}, split_dim={split_dim}')
assert param_data.shape[split_dim] % n_inference == 0
split_size = param_data.shape[split_dim] // n_inference
splits = torch.split(param_data, split_size, dim=split_dim)
for i, split in enumerate(splits):
prefix, ext = osp.splitext(param_name)
save_bin(split, f'{prefix}.{i}{ext}')
elif copy:
print(f'### copying {param_name}, shape={param_data.shape}')
copies = [param_data] * n_inference
for i, copy in enumerate(copies):
prefix, ext = osp.splitext(param_name)
save_bin(copy, f'{prefix}.{i}{ext}')
else:
save_bin(param_data, param_name)
def main(kind: str, input_path: str, out_dir: str, n_inference: int = 1):
if kind == 'fb':
model_params = import_fb(input_path)
elif kind == 'gptq':
model_params = import_gptq(input_path)
else:
raise RuntimeError(f'Unsupported kind: {kind}')
export(model_params, out_dir, n_inference)
if __name__ == '__main__':
fire.Fire(main)
\ No newline at end of file
[ft_instance_hyperparameter]
data_type=fp16
enable_custom_all_reduce=0
pipeline_para_size=1
tensor_para_size=8
model_dir=/shared_data/chatpjlm-0/v0.2.3/fastertransformer/weights/
[request]
request_batch_size=8
request_output_len=2048
beam_width=1 ; beam width for beam search
top_k=1 ; k value for top k sampling
top_p=0.0 ; p value for top p sampling
temperature=1.0 ; Use for sampling
repetition_penalty=1.00 ; Use for sampling
presence_penalty=0.0 ; Only one of repetition_penalty and presence_penalty are allowed.
len_penalty=0.0
beam_search_diversity_rate=0.0
; PJLM start/end ids
start_id=0
end_id=1
; --------------------- legacy params -------------------------
; LLaMA start/end ids
; start_id=1
; end_id=2
[4999_llama]
head_num=80
size_per_head=128
vocab_size=65632
num_layer=82
rotary_embedding=128
norm_eps=1e-5
start_id=0
end_id=1
inter_size=27392
[llama_7B]
head_num=32
size_per_head=128
vocab_size=32000
num_layer=32
rotary_embedding=128
start_id=1
end_id=2
inter_size=11008
[llama_13B]
head_num=40
size_per_head=128
vocab_size=32000
num_layer=40
rotary_embedding=128
start_id=1
end_id=2
inter_size=13824
[llama_30B]
head_num=52
size_per_head=128
vocab_size=32000
num_layer=60
rotary_embedding=128
start_id=1
end_id=2
inter_size=17920
[llama_65B]
head_num=64
size_per_head=128
vocab_size=32000
num_layer=80
rotary_embedding=128
start_id=1
end_id=2
inter_size=22016
This diff is collapsed.
0,18396,22305,13,4662,561,399,326,44875,29913,6938,1198,345,3134,39407,320,47997,45778,45121,61969,47371,492,13,44872,65616,47997,45778,45121,61969,47371,345,263,13820,1558,5515,2404,409,345,12643,521,41109,34993,326,44875,24488,10677,320,45691,45926,45513,46641,47641,46285,6456,492,824,345,12314,307,377,11951,44863,23391,44863,329,5420,935,421,44858,13,44872,65616,47997,45778,45121,61969,47371,541,2914,329,34352,30302,3530,299,278,5515,14966,521,278,1711,1591,425,5716,329,65616,45452,45545,44858,13,570,996,372,13,44975,45004,44950,11111,45004,35597,44883,2282,32901,4220,46323,13,44975,45004,11130,32843,45004,35597
0,18396,22305,13,4662,561,399,326,44875,29913,6938,1198,345,3134,39407,320,47997,45778,45121,61969,47371,492,13,44872,65616,47997,45778,45121,61969,47371,345,263,13820,1558,5515,2404,409,345,12643,521,41109,34993,326,44875,24488,10677,320,45691,45926,45513,46641,47641,46285,6456,492,824,345,12314,307,377,11951,44863,23391,44863,329,5420,935,421,44858,13,44872,65616,47997,45778,45121,61969,47371,541,2914,329,34352,30302,3530,299,278,5515,14966,521,278,1711,1591,425,5716,329,65616,45452,45545,44858,13,570,996,372,13,44975,45004,44950,11111,45004,35597,46088,46064,625,19880,46323,13,44975,45004,11130,32843,45004,35597
0,18396,22305,13,4662,561,399,326,44875,29913,6938,1198,345,3134,39407,320,47997,45778,45121,61969,47371,492,13,44872,65616,47997,45778,45121,61969,47371,345,263,13820,1558,5515,2404,409,345,12643,521,41109,34993,326,44875,24488,10677,320,45691,45926,45513,46641,47641,46285,6456,492,824,345,12314,307,377,11951,44863,23391,44863,329,5420,935,421,44858,13,44872,65616,47997,45778,45121,61969,47371,541,2914,329,34352,30302,3530,299,278,5515,14966,521,278,1711,1591,425,5716,329,65616,45452,45545,44858,13,570,996,372,13,44975,45004,44950,11111,45004,35597,47335,56437,60468,46323,13,44975,45004,11130,32843,45004,35597
0,18396,22305,13,4662,561,399,326,44875,29913,6938,1198,345,3134,39407,320,47997,45778,45121,61969,47371,492,13,44872,65616,47997,45778,45121,61969,47371,345,263,13820,1558,5515,2404,409,345,12643,521,41109,34993,326,44875,24488,10677,320,45691,45926,45513,46641,47641,46285,6456,492,824,345,12314,307,377,11951,44863,23391,44863,329,5420,935,421,44858,13,44872,65616,47997,45778,45121,61969,47371,541,2914,329,34352,30302,3530,299,278,5515,14966,521,278,1711,1591,425,5716,329,65616,45452,45545,44858,13,570,996,372,13,44975,45004,44950,11111,45004,35597,44883,2282,6828,3467,46323,13,44975,45004,11130,32843,45004,35597
0,18396,22305,13,4662,561,399,326,44875,29913,6938,1198,345,3134,39407,320,47997,45778,45121,61969,47371,492,13,44872,65616,47997,45778,45121,61969,47371,345,263,13820,1558,5515,2404,409,345,12643,521,41109,34993,326,44875,24488,10677,320,45691,45926,45513,46641,47641,46285,6456,492,824,345,12314,307,377,11951,44863,23391,44863,329,5420,935,421,44858,13,44872,65616,47997,45778,45121,61969,47371,541,2914,329,34352,30302,3530,299,278,5515,14966,521,278,1711,1591,425,5716,329,65616,45452,45545,44858,13,570,996,372,13,44975,45004,44950,11111,45004,35597,36589,3467,7849,299,7032,46323,13,44975,45004,11130,32843,45004,35597
0,18396,22305,13,4662,561,399,326,44875,29913,6938,1198,345,3134,39407,320,47997,45778,45121,61969,47371,492,13,44872,65616,47997,45778,45121,61969,47371,345,263,13820,1558,5515,2404,409,345,12643,521,41109,34993,326,44875,24488,10677,320,45691,45926,45513,46641,47641,46285,6456,492,824,345,12314,307,377,11951,44863,23391,44863,329,5420,935,421,44858,13,44872,65616,47997,45778,45121,61969,47371,541,2914,329,34352,30302,3530,299,278,5515,14966,521,278,1711,1591,425,5716,329,65616,45452,45545,44858,13,570,996,372,13,44975,45004,44950,11111,45004,35597,44976,39798,6828,3467,46323,13,44975,45004,11130,32843,45004,35597
0,18396,22305,13,4662,561,399,326,44875,29913,6938,1198,345,3134,39407,320,47997,45778,45121,61969,47371,492,13,44872,65616,47997,45778,45121,61969,47371,345,263,13820,1558,5515,2404,409,345,12643,521,41109,34993,326,44875,24488,10677,320,45691,45926,45513,46641,47641,46285,6456,492,824,345,12314,307,377,11951,44863,23391,44863,329,5420,935,421,44858,13,44872,65616,47997,45778,45121,61969,47371,541,2914,329,34352,30302,3530,299,278,5515,14966,521,278,1711,1591,425,5716,329,65616,45452,45545,44858,13,570,996,372,13,44975,45004,44950,11111,45004,35597,2795,977,9193,299,405,537,46323,13,44975,45004,11130,32843,45004,35597
0,18396,22305,13,4662,561,399,326,44875,29913,6938,1198,345,3134,39407,320,47997,45778,45121,61969,47371,492,13,44872,65616,47997,45778,45121,61969,47371,345,263,13820,1558,5515,2404,409,345,12643,521,41109,34993,326,44875,24488,10677,320,45691,45926,45513,46641,47641,46285,6456,492,824,345,12314,307,377,11951,44863,23391,44863,329,5420,935,421,44858,13,44872,65616,47997,45778,45121,61969,47371,541,2914,329,34352,30302,3530,299,278,5515,14966,521,278,1711,1591,425,5716,329,65616,45452,45545,44858,13,570,996,372,13,44975,45004,44950,11111,45004,35597,45691,45926,45513,46641,47641,46285,6456,46323,13,44975,45004,11130,32843,45004,35597
\ No newline at end of file
from sentencepiece import SentencePieceProcessor
from typing import List
import fire
import sys
class Tokenizer:
def __init__(self, model_file: str):
self.model = SentencePieceProcessor(model_file=model_file)
self.vocab_size = self.model.vocab_size()
self.start_id = self.model.bos_id()
self.end_id = self.model.eos_id()
self.pad_id = self.model.pad_id()
print(f'vocab_size = {self.vocab_size}')
print(f'start_id = {self.start_id}')
print(f'end_id = {self.end_id}')
print(f'pad_id = {self.pad_id}')
def encode(self, s: str):
return self.model.Encode(s, add_bos=True)
def decode(self, t: List[int]):
return self.model.Decode(t)
def main(model_file: str = '/data/llama/model/tokenizer.model',
encode_file: str = None, decode_file: str = None):
tokenizer = Tokenizer(model_file)
if encode_file:
with open(encode_file, 'r') as f:
xs = tokenizer.encode(f.read())
print(','.join(map(str, xs)))
elif decode_file:
with open(decode_file, 'r') as f:
ys = tokenizer.decode(f.read())
print(ys)
else:
first = True
while True:
try:
s = input()
except EOFError:
break
if not first:
print('---------------------------------------------')
first = False
try:
xs = map(int, s.strip().split(' '))
s = tokenizer.decode(list(xs))
print(s)
except ValueError:
xs = tokenizer.encode(s)
print(' '.join(map(str, xs)))
if __name__ == '__main__':
fire.Fire(main)
\ No newline at end of file
# Copyright (c) 2019-2023, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
add_subdirectory(fastertransformer)
\ No newline at end of file
# Copyright (c) 2019-2023, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
add_subdirectory(utils)
add_subdirectory(kernels)
add_subdirectory(layers)
add_subdirectory(models)
if(BUILD_PYT)
add_subdirectory(th_op)
endif()
add_subdirectory(triton_backend)
\ No newline at end of file
# Copyright (c) 2019-2023, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
cmake_minimum_required(VERSION 3.8)
add_library(ban_bad_words STATIC ban_bad_words.cu)
set_property(TARGET ban_bad_words PROPERTY POSITION_INDEPENDENT_CODE ON)
set_property(TARGET ban_bad_words PROPERTY CUDA_RESOLVE_DEVICE_SYMBOLS ON)
add_library(stop_criteria STATIC stop_criteria_kernels.cu)
set_property(TARGET stop_criteria PROPERTY POSITION_INDEPENDENT_CODE ON)
set_property(TARGET stop_criteria PROPERTY CUDA_RESOLVE_DEVICE_SYMBOLS ON)
add_library(activation_kernels STATIC activation_kernels.cu)
set_property(TARGET activation_kernels PROPERTY POSITION_INDEPENDENT_CODE ON)
set_property(TARGET activation_kernels PROPERTY CUDA_RESOLVE_DEVICE_SYMBOLS ON)
add_library(gen_relative_pos_bias STATIC gen_relative_pos_bias.cu)
set_property(TARGET gen_relative_pos_bias PROPERTY POSITION_INDEPENDENT_CODE ON)
set_property(TARGET gen_relative_pos_bias PROPERTY CUDA_RESOLVE_DEVICE_SYMBOLS ON)
target_link_libraries(gen_relative_pos_bias PUBLIC activation_kernels)
add_library(logprob_kernels STATIC logprob_kernels.cu)
set_property(TARGET logprob_kernels PROPERTY POSITION_INDEPENDENT_CODE ON)
set_property(TARGET logprob_kernels PROPERTY CUDA_RESOLVE_DEVICE_SYMBOLS ON)
add_library(unfused_attention_kernels STATIC unfused_attention_kernels.cu)
set_property(TARGET unfused_attention_kernels PROPERTY POSITION_INDEPENDENT_CODE ON)
set_property(TARGET unfused_attention_kernels PROPERTY CUDA_RESOLVE_DEVICE_SYMBOLS ON)
add_library(bert_preprocess_kernels STATIC bert_preprocess_kernels.cu)
set_property(TARGET bert_preprocess_kernels PROPERTY POSITION_INDEPENDENT_CODE ON)
set_property(TARGET bert_preprocess_kernels PROPERTY CUDA_RESOLVE_DEVICE_SYMBOLS ON)
set(decoder_masked_multihead_attention_files
decoder_masked_multihead_attention.cu
)
file(GLOB decoder_masked_multihead_attention_files ${decoder_masked_multihead_attention_files} ./decoder_masked_multihead_attention/*.cu)
add_library(decoder_masked_multihead_attention STATIC ${decoder_masked_multihead_attention_files})
set_property(TARGET decoder_masked_multihead_attention PROPERTY POSITION_INDEPENDENT_CODE ON)
set_property(TARGET decoder_masked_multihead_attention PROPERTY CUDA_RESOLVE_DEVICE_SYMBOLS ON)
add_library(online_softmax_beamsearch_kernels STATIC online_softmax_beamsearch_kernels.cu)
set_property(TARGET online_softmax_beamsearch_kernels PROPERTY POSITION_INDEPENDENT_CODE ON)
set_property(TARGET online_softmax_beamsearch_kernels PROPERTY CUDA_RESOLVE_DEVICE_SYMBOLS ON)
add_library(decoding_kernels STATIC decoding_kernels.cu)
set_property(TARGET decoding_kernels PROPERTY POSITION_INDEPENDENT_CODE ON)
set_property(TARGET decoding_kernels PROPERTY CUDA_RESOLVE_DEVICE_SYMBOLS ON)
add_library(gpt_kernels STATIC gpt_kernels.cu)
set_property(TARGET gpt_kernels PROPERTY POSITION_INDEPENDENT_CODE ON)
set_property(TARGET gpt_kernels PROPERTY CUDA_RESOLVE_DEVICE_SYMBOLS ON)
add_library(beam_search_penalty_kernels STATIC beam_search_penalty_kernels.cu)
set_property(TARGET beam_search_penalty_kernels PROPERTY POSITION_INDEPENDENT_CODE ON)
set_property(TARGET beam_search_penalty_kernels PROPERTY CUDA_RESOLVE_DEVICE_SYMBOLS ON)
target_link_libraries(beam_search_penalty_kernels PRIVATE cuda_utils)
add_library(beam_search_topk_kernels STATIC beam_search_topk_kernels.cu)
set_property(TARGET beam_search_topk_kernels PROPERTY POSITION_INDEPENDENT_CODE ON)
set_property(TARGET beam_search_topk_kernels PROPERTY CUDA_RESOLVE_DEVICE_SYMBOLS ON)
add_library(sampling_topk_kernels STATIC sampling_topk_kernels.cu)
set_property(TARGET sampling_topk_kernels PROPERTY POSITION_INDEPENDENT_CODE ON)
set_property(TARGET sampling_topk_kernels PROPERTY CUDA_RESOLVE_DEVICE_SYMBOLS ON)
add_library(sampling_topp_kernels STATIC sampling_topp_kernels.cu)
set_property(TARGET sampling_topp_kernels PROPERTY POSITION_INDEPENDENT_CODE ON)
set_property(TARGET sampling_topp_kernels PROPERTY CUDA_RESOLVE_DEVICE_SYMBOLS ON)
add_library(sampling_penalty_kernels STATIC sampling_penalty_kernels.cu)
set_property(TARGET sampling_penalty_kernels PROPERTY POSITION_INDEPENDENT_CODE ON)
set_property(TARGET sampling_penalty_kernels PROPERTY CUDA_RESOLVE_DEVICE_SYMBOLS ON)
add_library(custom_ar_kernels STATIC custom_ar_kernels.cu)
set_property(TARGET custom_ar_kernels PROPERTY POSITION_INDEPENDENT_CODE ON)
set_property(TARGET custom_ar_kernels PROPERTY CUDA_RESOLVE_DEVICE_SYMBOLS ON)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment