// disable CUDA graphs for batch size > 1 for now.
// Changes in batch size or context size can cause changes to the grid size of some kernels.
use_cuda_graph=false;
#ifndef NDEBUG
GGML_CUDA_LOG_WARN("%s: disabling CUDA graphs due to batch size > 1 [%s] [%ld %ld %ld %ld]\n",__func__,node->name,node->ne[0],node->ne[1],node->ne[2],node->ne[3]);
#endif
}
if(node->op==GGML_OP_CPY){
// store the copy op parameter which changes with each token.