// disable CUDA graphs for batch size > 1 for now.
// Changes in batch size or context size can cause changes to the grid size of some kernels.
use_cuda_graph=false;
#ifndef NDEBUG
GGML_LOG_DEBUG("%s: disabling CUDA graphs due to batch size > 1 [%s] [%ld %ld %ld %ld]\n",__func__,node->name,node->ne[0],node->ne[1],node->ne[2],node->ne[3]);
#endif
}
if(node->op==GGML_OP_CPY){
// store the copy op parameter which changes with each token.
// disable CUDA graphs for batch size > 1 for now.
// Changes in batch size or context size can cause changes to the grid size of some kernels.
use_cuda_graph=false;
#ifndef NDEBUG
GGML_LOG_DEBUG("%s: disabling CUDA graphs due to batch size > 1 [%s] [%ld %ld %ld %ld]\n",__func__,node->name,node->ne[0],node->ne[1],node->ne[2],node->ne[3]);
if(ggml_cuda_info().devices[id].cc<GGML_CUDA_CC_CDNA||ggml_cuda_info().devices[id].cc==GGML_CUDA_CC_RDNA1){// NVIDIA and AMD older than RDNA2 but not CDNA
if(ggml_cuda_info().devices[id].cc<GGML_CUDA_CC_RDNA2){// NVIDIA and AMD older than RDNA2