#pragma once #include #include #include #include #include #include #include #include #include #include #include #include "debug.h" #define SCCL_MAJOR 1 #define SCCL_MINOR 0 #define SCCL_PATCH 0 #define SCCL_SUFFIX "" #define SCCL_VERSION(X, Y, Z) ((X) * 1000 + (Y) * 100 + (Z)) namespace sccl { /** * @brief 对选中的代码进行简要功能说明 * @note 根据代码作用域(如公开API或内部实现)编写适当的文档注释 */ typedef enum { scclSuccess = 0, /*!< No error */ scclUnhandledHipError = 1, /*!< Unhandled HIP error */ scclSystemError = 2, /*!< Unhandled system error */ scclInternalError = 3, /*!< Internal Error - Please report to RCCL developers */ scclInvalidArgument = 4, /*!< Invalid argument */ scclInvalidUsage = 5, /*!< Invalid usage */ scclRemoteError = 6, /*!< Remote process exited or there was a network error */ scclInProgress = 7, /*!< RCCL operation in progress */ scclNumResults = 8 /*!< Number of result types */ } scclResult_t; typedef enum { testSuccess = 0, testInternalError = 1, testHipError = 2, testScclError = 3, testTimeout = 4, testNumResults = 5 } testResult_t; static const char* scclGetErrorString(scclResult_t code) { switch(code) { case scclSuccess: return "success"; case scclUnhandledHipError: return "unhandled hip error (run with SCCL_DEBUG=INFO for details)"; case scclSystemError: return "unhandled system error (run with SCCL_DEBUG=INFO for details)"; case scclInternalError: return "internal error - please report this issue to the SCCL developers"; case scclInvalidArgument: return "invalid argument (run with SCCL_DEBUG=WARN for details)"; case scclInvalidUsage: return "invalid usage (run with SCCL_DEBUG=WARN for details)"; case scclRemoteError: return "remote process exited or there was a network error"; case scclInProgress: return "SCCL operation in progress"; default: return "unknown result code"; } } ////////////////////////////// SCCL和HIP ////////////////////////////// // Propagate errors up #define SCCLCHECK(call) \ do { \ scclResult_t RES = call; \ if(RES != scclSuccess && RES != scclInProgress) { \ /* Print the back trace*/ \ INFO(SCCL_LOG_CODEALL, "check fail: %s", scclGetErrorString(RES)); \ return RES; \ } \ } while(0); #define SCCLCHECKGOTO(call, RES, label) \ do { \ RES = call; \ if(RES != scclSuccess && RES != scclInProgress) { \ INFO(SCCL_LOG_CODEALL, "%s:%d -> %d", __FILE__, __LINE__, RES); \ goto label; \ } \ } while(0); #define HIPCHECK(cmd) \ do { \ hipError_t err = cmd; \ if(err != hipSuccess) { \ char hostname[1024]; \ gethostname(hostname, 1024); \ printf("%s: Test HIP failure %s:%d '%s'\n", hostname, __FILE__, __LINE__, hipGetErrorString(err)); \ return scclUnhandledHipError; \ } \ } while(0) #define HIPCHECKGOTO(cmd, RES, label) \ do { \ hipError_t err = cmd; \ if(err != hipSuccess) { \ WARN("HIP failure '%s'", hipGetErrorString(err)); \ RES = scclUnhandledHipError; \ goto label; \ } \ } while(false) ////////////////////////////// Value检查 ////////////////////////////// #define EQCHECK(statement, value) \ do { \ if((statement) == value) { \ /* Print the back trace*/ \ INFO(SCCL_LOG_CODEALL, "%s:%d -> %d (%s)", __FILE__, __LINE__, scclSystemError, strerror(errno)); \ return scclSystemError; \ } \ } while(0); #define EQCHECKGOTO(statement, value, RES, label) \ do { \ if((statement) == value) { \ /* Print the back trace*/ \ RES = scclSystemError; \ INFO(SCCL_LOG_CODEALL, "%s:%d -> %d (%s)", __FILE__, __LINE__, RES, strerror(errno)); \ goto label; \ } \ } while(0); ////////////////////////////// SYS ////////////////////////////// // Check system calls #define SYSCHECK(call, name) \ do { \ int retval; \ SYSCHECKVAL(call, name, retval); \ } while(false) #define SYSCHECKVAL(call, name, retval) \ do { \ SYSCHECKSYNC(call, name, retval); \ if(retval == -1) { \ WARN("Call to " name " failed : %s", strerror(errno)); \ return scclSystemError; \ } \ } while(false) #define SYSCHECKSYNC(call, name, retval) \ do { \ retval = call; \ if(retval == -1 && (errno == EINTR || errno == EWOULDBLOCK || errno == EAGAIN)) { \ INFO(SCCL_LOG_CODEALL, "Call to " name " returned %s, retrying", strerror(errno)); \ } else { \ break; \ } \ } while(true) #define SYSCHECKGOTO(statement, RES, label) \ do { \ if((statement) == -1) { \ /* Print the back trace*/ \ RES = scclSystemError; \ INFO(SCCL_LOG_CODEALL, "%s:%d -> %d (%s)", __FILE__, __LINE__, RES, strerror(errno)); \ goto label; \ } \ } while(0); } // namespace sccl