// // Created by huangyuyang on 6/14/23. // #ifndef FASTLLM_CUDADEVICE_H #define FASTLLM_CUDADEVICE_H #include "device.h" namespace fastllm { class CudaDevice : BaseDevice { public: CudaDevice (); bool Malloc (void **ret, size_t size); // 分配尺寸为size的空间 bool Free(void *ret); // 释放ret bool CopyDataToCPU(void *dst, void *src, size_t size); bool CopyDataFromCPU(void *dst, void *src, size_t size); }; class CudaAttention : BaseOperator { void Reshape(const std::string &opType, const DataDict &datas, const FloatDict &floatParams, const IntDict &intParams); void Run(const std::string &opType, const DataDict &datas, const FloatDict &floatParams, const IntDict &intParams); }; class CudaCopyKVCacheOp : BaseOperator { void Reshape(const std::string &opType, const DataDict &datas, const FloatDict &floatParams, const IntDict &intParams); void Run(const std::string &opType, const DataDict &datas, const FloatDict &floatParams, const IntDict &intParams); }; class CudaLayerNormOp : BaseOperator { bool CanRun(const std::string &opType, const DataDict &datas, const FloatDict &floatParams, const IntDict &intParams); void Run(const std::string &opType, const DataDict &datas, const FloatDict &floatParams, const IntDict &intParams); }; class CudaRMSNormOp : BaseOperator { bool CanRun(const std::string &opType, const DataDict &datas, const FloatDict &floatParams, const IntDict &intParams); void Run(const std::string &opType, const DataDict &datas, const FloatDict &floatParams, const IntDict &intParams); }; class CudaLinearOp : BaseOperator { void Reshape(const std::string &opType, const DataDict &datas, const FloatDict &floatParams, const IntDict &intParams); bool CanRun(const std::string &opType, const DataDict &datas, const FloatDict &floatParams, const IntDict &intParams); void Run(const std::string &opType, const DataDict &datas, const FloatDict &floatParams, const IntDict &intParams); }; class CudaSplitOp : BaseOperator { void Reshape(const std::string &opType, const DataDict &datas, const FloatDict &floatParams, const IntDict &intParams); void Run(const std::string &opType, const DataDict &datas, const FloatDict &floatParams, const IntDict &intParams); }; class CudaCatDirectOp : BaseOperator { void Run(const std::string &opType, const DataDict &datas, const FloatDict &floatParams, const IntDict &intParams); }; class CudaMatMulOp : BaseOperator { void Reshape(const std::string &opType, const DataDict &datas, const FloatDict &floatParams, const IntDict &intParams); void Run(const std::string &opType, const DataDict &datas, const FloatDict &floatParams, const IntDict &intParams); }; class CudaMatMulTransBOp : BaseOperator { void Reshape(const std::string &opType, const DataDict &datas, const FloatDict &floatParams, const IntDict &intParams); void Run(const std::string &opType, const DataDict &datas, const FloatDict &floatParams, const IntDict &intParams); }; class CudaSoftMaxOp : BaseOperator { bool CanRun(const std::string &opType, const DataDict &datas, const FloatDict &floatParams, const IntDict &intParams); void Run(const std::string &opType, const DataDict &datas, const FloatDict &floatParams, const IntDict &intParams); }; class CudaGeluNewOp : BaseOperator { void Run(const std::string &opType, const DataDict &datas, const FloatDict &floatParams, const IntDict &intParams); }; class CudaSiluOp : BaseOperator { void Run(const std::string &opType, const DataDict &datas, const FloatDict &floatParams, const IntDict &intParams); }; class CudaSwigluOp : BaseOperator { void Reshape(const std::string &opType, const DataDict &datas, const FloatDict &floatParams, const IntDict &intParams); void Run(const std::string &opType, const DataDict &datas, const FloatDict &floatParams, const IntDict &intParams); }; class CudaMulOp : BaseOperator { void Run(const std::string &opType, const DataDict &datas, const FloatDict &floatParams, const IntDict &intParams); }; class CudaAddToOp : BaseOperator { void Run(const std::string &opType, const DataDict &datas, const FloatDict &floatParams, const IntDict &intParams); }; class CudaMulToOp : BaseOperator { void Run(const std::string &opType, const DataDict &datas, const FloatDict &floatParams, const IntDict &intParams); }; class CudaAttentionMaskOp : BaseOperator { void Run(const std::string &opType, const DataDict &datas, const FloatDict &floatParams, const IntDict &intParams); }; class CudaAlibiMaskOp : BaseOperator { void Run(const std::string &opType, const DataDict &datas, const FloatDict &floatParams, const IntDict &intParams); }; class CudaTopKOp : BaseOperator { void Reshape(const std::string &opType, const DataDict &datas, const FloatDict &floatParams, const IntDict &intParams); bool CanRun(const std::string &opType, const DataDict &datas, const FloatDict &floatParams, const IntDict &intParams); void Run(const std::string &opType, const DataDict &datas, const FloatDict &floatParams, const IntDict &intParams); }; class CudaPermuteSelfOp : BaseOperator { void Run(const std::string &opType, const DataDict &datas, const FloatDict &floatParams, const IntDict &intParams); }; class CudaRotatePosition2DOp : BaseOperator { void Run(const std::string &opType, const DataDict &datas, const FloatDict &floatParams, const IntDict &intParams); }; class CudaLlamaRotatePosition2DOp : BaseOperator { void Run(const std::string &opType, const DataDict &datas, const FloatDict &floatParams, const IntDict &intParams); }; class CudaNearlyRotatePosition2DOp : BaseOperator { void Run(const std::string &opType, const DataDict &datas, const FloatDict &floatParams, const IntDict &intParams); }; class CudaApplyLognAttnOp : BaseOperator { void Run(const std::string &opType, const DataDict &datas, const FloatDict &floatParams, const IntDict &intParams); }; class CudaSplitBatchOp : BaseBatchOperator { void Reshape(const std::string &opType, const DataDict &datas, const FloatDict &floatParams, const IntDict &intParams); void Run(const std::string &opType, const DataDict &datas, const FloatDict &floatParams, const IntDict &intParams); }; class CudaCatBatchOp : BaseBatchOperator { void Reshape(const std::string &opType, const DataDict &datas, const FloatDict &floatParams, const IntDict &intParams); void Run(const std::string &opType, const DataDict &datas, const FloatDict &floatParams, const IntDict &intParams); }; class CudaMulBatchOp : BaseBatchOperator { void Run(const std::string &opType, const DataDict &datas, const FloatDict &floatParams, const IntDict &intParams); }; class CudaMatMulBatchOp : BaseBatchOperator { void Reshape(const std::string &opType, const DataDict &datas, const FloatDict &floatParams, const IntDict &intParams); void Run(const std::string &opType, const DataDict &datas, const FloatDict &floatParams, const IntDict &intParams); }; class CudaMatMulTransBBatchOp : BaseBatchOperator { void Reshape(const std::string &opType, const DataDict &datas, const FloatDict &floatParams, const IntDict &intParams); void Run(const std::string &opType, const DataDict &datas, const FloatDict &floatParams, const IntDict &intParams); }; class CudaSoftmaxBatchOp : BaseBatchOperator { void Run(const std::string &opType, const DataDict &datas, const FloatDict &floatParams, const IntDict &intParams); }; class CudaCatDirectBatchOp : BaseBatchOperator { void Run(const std::string &opType, const DataDict &datas, const FloatDict &floatParams, const IntDict &intParams); }; class CudaAttentionBatchOp : BaseBatchOperator { void Reshape(const std::string &opType, const DataDict &datas, const FloatDict &floatParams, const IntDict &intParams); void Run(const std::string &opType, const DataDict &datas, const FloatDict &floatParams, const IntDict &intParams); }; } #endif //FASTLLM_CUDADEVICE_H