#include "hip/hip_runtime.h"
/*******************************************************************************************
 * This file contains the implementation of GPU functions related to jacobian compute.
 ******************************************************************************************/

#include "species.h"
#include "thermoFluid.h"
#include "reactions.h"
#include "jacobian_kernel.h"
#include "reactions_kernel.h"

__device__ inline REAL modify_kf_kr_vflt1_d(REAL *vf_mi, REAL *ki, REAL *c) {
    return ((*ki)*(*vf_mi * pow((*c), (*vf_mi - 1.))));
}

__device__ inline REAL modify_kf_kr_vfgt1_d(REAL *vf_mi, REAL *ki, REAL *c) {
    return ((*ki)*(*vf_mi * pow((*c), (*vf_mi - 1.))));
}

__device__ inline REAL modify_kf_kr_knqm_d(REAL *vf_mi, REAL *ki, REAL *c) {
    return ((*ki)*pow((*c), (*vf_mi)));
}

__device__ inline void computeJ0_d(unsigned int w, unsigned int s, int k, REAL *vf, REAL *vr, REAL *c, REAL *kfi_tmp, REAL *J) {
            
    REAL vf_ki, vr_mi, vf_mi, c_tmp, p, tmp, tmp1;

    vf_ki = *(vf + w + k*react_num_d);

    if (vf_ki > 0) {
        
        tmp = *kfi_tmp;
        
        for (int m = 0; m < sp_num_d; m++) {

            vf_mi = *(vf + w + m*react_num_d);

            if (vf_mi > 0) {

                c_tmp  = access_vars_data(c, m, s);

                if (k == m) {

                    if (vf_mi < 1) {

                        if (c_tmp > SMALL) {

                            tmp = modify_kf_kr_vflt1_d(&vf_mi, &tmp, &c_tmp);

                        } else {

                            tmp = 0.;

                        }

                    } else {
                            
                        tmp = modify_kf_kr_vfgt1_d(&vf_mi, &tmp, &c_tmp);
                    }

                } else {

                    tmp = modify_kf_kr_knqm_d(&vf_mi, &tmp, &c_tmp);

                }
            }
        }
        #pragma unroll
        for (int m = 0; m < sp_num_d; m++) {

            vr_mi = *(vr + w + m*react_num_d);
            vf_mi = *(vf + w + m*react_num_d);

            tmp1 = (vr_mi - vf_mi) * tmp;

            access_J_atomic(J, k, m, s, tmp1); 

        }
    }
}

__global__ void computeJ0_g(REAL *kf, REAL *kr, REAL *vf, REAL *vr, REAL *react_c, REAL *c, REAL *J, REAL *dt_sum_d, REAL t_end_h) {

    unsigned int s = blockDim.x * blockIdx.x + threadIdx.x;
    unsigned int w = blockDim.y * blockIdx.y + threadIdx.y;

    if (w< react_num_d && s < size_d && access_data(dt_sum_d, s) <= t_end_h) {
        REAL k_tmp       =  access_react_num_data(kf,      w, s);
        REAL react_c_tmp =  access_react_num_data(react_c, w, s);

        REAL kfi_tmp = k_tmp*react_c_tmp;

        k_tmp =  access_react_num_data(kr, w, s);

        REAL kri_tmp = k_tmp*react_c_tmp;
        REAL tmp;
        
        for (int k = 0; k < sp_num_d; k++) {

            computeJ0_d(w, s, k, vf, vr, c, &kfi_tmp, J);

            computeJ0_d(w, s, k, vr, vf, c, &kri_tmp, J);
        }            
    }
}

__host__ void computeJ0(REAL *kf, REAL *kr, REAL *vf, REAL *vr, REAL *react_c, REAL *c, REAL *J) {
    if(thermoFluid_d_ptr == nullptr) MPI_PRINTF("\033[31mBEFORE SETTING UP THE REACTION CLASS, PLEASE CREATE THE THERMO SCOPE.\033[0m\n");
    
    size_t size = thermoFluid_d_ptr->thermo_ptr_d.size;
    size_t react_num = reactions_d_ptr->reactions_ptr_d.react_num;

    dim3 griddim, blockdim;

    set_block_grid2d(size, react_num, block_set_J_0, griddim, blockdim);

    computeJ0_g<<<griddim, blockdim, 0, Stream_opencc[0]>>>(kf, kr, vf, vr, react_c, c, J, dt_sum_d, t_end_h);
}

__device__  inline REAL dFdc2(REAL *pr, REAL *F, REAL *F_cent, REAL *dprdc, REAL *T) {
    REAL log_pr = log10f(fmax(*pr, SMALL));
    REAL log_Fcent = log10f(fmax(*F_cent, SMALL));

    REAL dlogpr_dc = *dprdc/(log_ten);
    REAL c = -0.4 - 0.67*log_Fcent;
    REAL n = 0.75 - 1.27*log_Fcent;

    REAL tmp = (n - 0.14*(log_pr + c));

    dlogpr_dc = (2.*(log_pr + c)/(tmp*tmp) *
                (dlogpr_dc - (log_pr + c)*(- 0.14*(dlogpr_dc))/(n - 0.14*(log_pr + c))));

    tmp = (log_pr + c)/(n - 0.14*(log_pr + c));
    tmp = tmp*tmp;

    return (log_ten * (-log_Fcent*dlogpr_dc)/((1 + tmp)*(1 + tmp)));
}

__device__ inline void compute_Fcent_Fi_j(REAL *T, REAL *pri, REAL *a_troe, REAL *T1_troe, REAL *T2_troe, REAL *T3_troe, REAL *F_cent, REAL *Fi) {

    REAL tmp = (1.0 - *a_troe) * exp(- *T / *T3_troe) + *a_troe * exp(- *T / *T1_troe) + exp(- *T2_troe / *T);

    *F_cent = tmp;

    REAL A_troe = log10f(fmax(*pri, 1e-40)) - 0.67*log10f(fmax(tmp, 1e-40)) - 0.4;
    REAL B_tore = 0.806 - 1.1762*log10f(fmax(tmp, 1e-40))-0.14*log10f(fmax(*pri, 1e-40));

    REAL tmp2 = A_troe/B_tore;

    tmp2 = 1./(tmp2*tmp2 + 1.);

    *Fi = pow(tmp, tmp2);
}

__global__ void computeJ1_g(REAL *react_type, REAL *fall_type, REAL *q, REAL *T, REAL *tb_coeffs, REAL *fall_coeffs, REAL *c, REAL *J, REAL *v_net, REAL *kf_low, REAL *kf, REAL *dt_sum_d, REAL t_end_h) {

    unsigned int s = blockDim.x * blockIdx.x + threadIdx.x;
    unsigned int w = blockDim.y * blockIdx.y + threadIdx.y;

    extern __shared__ REAL shared_J[];

    for (int i = threadIdx.x; i < (sp_num_d + 2) * (sp_num_d + 2); i += blockDim.x) {
        shared_J[i] = 0.0;
    }
    __syncthreads();

    if (w< react_num_d && s < size_d && access_data(dt_sum_d, s) <= t_end_h) {

        REAL q_tmp = access_react_num_data(q, w, s);
        REAL tmp, tmp1, tmp2, tmp3, tmp4;
        REAL sum_c = 0;

        if (*(react_type + w) == 1) {
            for (int k = 0; k < sp_num_d; k++) {
                sum_c += *(tb_coeffs + k + sp_num_d*w) * access_vars_data(c, k, s);
            }

            //tmp = 1.0/fmax(sum_c, 1e-6);
            //REAL tmp = 1./sum_c;
            #pragma unroll
            for (int m = 0; m <sp_num_d; m++) {
                for (int k = 0; k <sp_num_d; k++) {
                    sum_c = *(v_net + w + react_num_d*m) * q_tmp * 
                    *(tb_coeffs + k + sp_num_d*w); //* tmp;

                    //access_J_atomic(J, k, m, s, sum_c);
                    atomicAdd(&shared_J[k + m * (sp_num_d + 2)], tmp1);
                }
            }
        }

        __syncthreads();

        for (int i = threadIdx.x; i < (sp_num_d + 2) * (sp_num_d + 2); i += blockDim.x) {
            atomicAdd((J + i + s * pitch_J_d), shared_J[i]);
        }

        if (*(react_type + w) == 2) {
            for (int k = 0; k < sp_num_d; k++) {
                sum_c += *(tb_coeffs + k + sp_num_d*w) * access_vars_data(c, k, s);
            }

            if (*(fall_type + w) == 1) {

                if (sum_c > SMALL) {

                    //tmp1 = access_react_num_data(kf_low, w, s)/access_react_num_data(kf, w, s); //pri
                    tmp1 = access_react_num_data(kf_low, w, s); //pri
            
                    tmp2 = 1./(fmax(sum_c, 1e-6)*(1 + tmp1*sum_c));
                    #pragma unroll
                    for (int m = 0; m <sp_num_d; m++) {
                        for (int k = 0; k <sp_num_d; k++) {
                            tmp3 = - *(tb_coeffs + k + sp_num_d*w);

                            tmp4 = *(v_net + w + react_num_d*m) * q_tmp * tmp2 * tmp3;
                            access_J_atomic(J, k, m, s, tmp4);
                        }
                    }
                }
            }

            if (*(fall_type + w) == 2) {

                if (sum_c > SMALL) {

                    //tmp1 = access_react_num_data(kf_low, i, s)/access_react_num_data(kf, i, s); //pri
                    tmp1 = access_react_num_data(kf_low, w, s);

                    tmp2 = tmp1*sum_c;  //pri
                    tmp3 = 1/(1 + tmp2);

                    REAL T_tmp = access_data(T, s);
                    REAL F_cent, Fi;

                    compute_Fcent_Fi_j(&T_tmp, &tmp2, 
                                (fall_coeffs + 5*w), 
                                (fall_coeffs + 5*w + 1), 
                                (fall_coeffs + 5*w + 2),
                                (fall_coeffs + 5*w + 3),
                                    &F_cent, &Fi);
                           
                    for (int m = 0; m <sp_num_d; m++) {
                        for (int k = 0; k <sp_num_d; k++) {
                            tmp4 = - *(tb_coeffs + k + sp_num_d*w)/fmax(sum_c, 1e-6);

                            tmp = dFdc2(&tmp2, &Fi, &F_cent, &tmp4, &T_tmp);

                            tmp = *(v_net + w + react_num_d*m) * q_tmp * (tmp4 * tmp3 + tmp);
                            access_J_atomic(J, k, m, s, tmp);
                        }
                    }
                }
            }
        }
    }
}

__host__ void computeJ1(REAL *react_type, REAL *fall_type, REAL *q, REAL *T, REAL *tb_coeffs, REAL *fall_coeffs, REAL *c, REAL *J, REAL *v_net, REAL *kf_low, REAL *kf) {
    if(thermoFluid_d_ptr == nullptr) MPI_PRINTF("\033[31mBEFORE SETTING UP THE REACTION CLASS, PLEASE CREATE THE THERMO SCOPE.\033[0m\n");
    
    size_t size = thermoFluid_d_ptr->thermo_ptr_d.size;
    size_t react_num = reactions_d_ptr->reactions_ptr_d.react_num;

    dim3 griddim, blockdim;

    set_block_grid2d(size, react_num, block_set_J_1, griddim, blockdim);

    computeJ1_g<<<griddim, blockdim, 0, Stream_opencc[0]>>>(react_type, fall_type, q, T, tb_coeffs, fall_coeffs, c, J, v_net, kf_low, kf, dt_sum_d, t_end_h);
}

__device__  inline REAL compute_dBdT_d(REAL *nasa, REAL *T) {

    REAL tmp = *T * *T;

    tmp = (nasa[0] - 1.) / *T + nasa[1]*0.5 + nasa[2] * *T/3. + nasa[3] * tmp*0.25 + 
           nasa[4] * tmp * *T *0.2 + nasa[5]/tmp;

    return tmp;
}

__global__ void compute_dBdT_g(REAL *T, REAL *T_range, REAL *sp_nasa, REAL *dBdT, REAL *dt_sum_d, REAL t_end_h) {
    unsigned int s = blockDim.x * blockIdx.x + threadIdx.x;
    unsigned int w = blockDim.y * blockIdx.y + threadIdx.y;


    if (w< sp_num_d && s < size_d && access_data(dt_sum_d, s) <= t_end_h) {
        REAL T_tmp   = access_data(T, s);
        REAL T_range_tmp = *(T_range + 1 + w*3);
        REAL Nasa_tmp[6];

        if (T_tmp >= T_range_tmp) {
            Nasa_tmp[0] = *(sp_nasa + 7  + w*14);
            Nasa_tmp[1] = *(sp_nasa + 8  + w*14);
            Nasa_tmp[2] = *(sp_nasa + 9  + w*14);
            Nasa_tmp[3] = *(sp_nasa + 10 + w*14);
            Nasa_tmp[4] = *(sp_nasa + 11 + w*14);
            Nasa_tmp[5] = *(sp_nasa + 12 + w*14);
            
            access_sp_num_data(dBdT, w, s) = compute_dBdT_d(&Nasa_tmp[0], &T_tmp);
        } else {
            Nasa_tmp[0] = *(sp_nasa + 0  + w*14);
            Nasa_tmp[1] = *(sp_nasa + 1  + w*14);
            Nasa_tmp[2] = *(sp_nasa + 2  + w*14);
            Nasa_tmp[3] = *(sp_nasa + 3  + w*14);
            Nasa_tmp[4] = *(sp_nasa + 4  + w*14);
            Nasa_tmp[5] = *(sp_nasa + 5  + w*14);
            
            access_sp_num_data(dBdT, w, s) = compute_dBdT_d(&Nasa_tmp[0], &T_tmp);
        }
    }
}

__host__ void compute_dBdT_h(REAL *T, REAL *T_range, REAL *sp_nasa, REAL *dBdT) {

    size_t size = thermoFluid_d_ptr->thermo_ptr_d.size;
    size_t sp_num = species_d_ptr->species_const_d.sp_num;

    dim3 griddim, blockdim;

    set_block_grid2d(size, sp_num, block_set_J02d, griddim, blockdim);

    compute_dBdT_g<<<griddim, blockdim, 0, Stream_opencc[0]>>>(T, T_range, sp_nasa, dBdT, dt_sum_d, t_end_h);

}

__device__ inline void modify_kfi_d(int w, int s, REAL *vf, REAL *dkfdTi, REAL *kfi, REAL *c, REAL *T_1) {
    
    REAL tmp;
    REAL sum_exp = 0;
    
    for (int k = 0; k < sp_num_d; k++) {
        tmp = *(vf + w + k*react_num_d);
        if (tmp > 0) {
            tmp = pow(access_vars_data(c, k, s), tmp);
            *dkfdTi *= tmp;
            *kfi *= tmp;
            sum_exp += *(vf + w + k*react_num_d);
        }
    }
    *kfi *= - sum_exp * *T_1;
}

__device__ inline REAL dFdT2(REAL *pr, REAL *F, REAL *F_cent, REAL *dprdc, REAL *T, REAL *fall_coeffs) {
    REAL log_pr = log10f(fmax(*pr, SMALL));
    REAL log_Fcent = log10f(fmax(*F_cent, SMALL));

    REAL dFcent_dT = ((fall_coeffs[0] - 1)*exp(-*T/fall_coeffs[3])/fall_coeffs[3]
                     - fall_coeffs[0]*exp(-*T/fall_coeffs[1])/fall_coeffs[1]
                     + fall_coeffs[2]*exp(-fall_coeffs[2]/ *T)/(*T * *T)
                        );


    dFcent_dT = dFcent_dT/(*F_cent*log_ten);
    REAL dcdT = -0.67*dFcent_dT;
    REAL dndT = -1.27*dFcent_dT;

    //REAL dlogpr_dc = *dprdc/(*pr * log_ten);
    REAL dlogpr_dc = *dprdc/(log_ten);
    REAL c = -0.4 - 0.67*log_Fcent;
    REAL n = 0.75 - 1.27*log_Fcent;
    REAL tmp = (n - 0.14*(log_pr + c));
    dlogpr_dc = (2.*(log_pr + c)/(tmp*tmp) *
                (dlogpr_dc + dcdT - (log_pr + c)*(dndT - 0.14*(dlogpr_dc + dcdT))/(n - 0.14*(log_pr + c))));

    tmp = (log_pr + c)/(n - 0.14*(log_pr + c));
    tmp = tmp*tmp;

    return (log_ten * (dFcent_dT/(1. + tmp)
        -log_Fcent*dlogpr_dc/((1 + tmp)*(1 + tmp))));
}

__global__ void compute_J2_g(REAL *dBdT, REAL *q, REAL *v_net, REAL *kf_low, REAL *kf, REAL *kr, REAL *T, REAL *abe,
    REAL *react_type, REAL *tb_coeffs, REAL *c, REAL *fall_type, REAL *fall_coeffs, REAL *vr, REAL *vf, REAL *J, REAL *dt_sum_d, REAL t_end_h) {
    unsigned int s = blockDim.x * blockIdx.x + threadIdx.x;
    unsigned int w = blockDim.y * blockIdx.y + threadIdx.y; //react_num

    if (w< react_num_d && s < size_d && access_data(dt_sum_d, s) <= t_end_h) {

        REAL sum_dBdT = 0;
        REAL sum_c = 0;
        
        for (int k = 0; k < sp_num_d; k++) {
            sum_dBdT += access_sp_num_data(dBdT, k, s) * *(v_net + w + react_num_d*k);
            sum_c += *(tb_coeffs + k + sp_num_d*w) * access_vars_data(c, k, s);
        }

        REAL kfi = access_react_num_data(kf, w, s);
        REAL kri = access_react_num_data(kr, w, s);

        REAL T_tmp = access_data(T, s);
        REAL T_1 = 1. / T_tmp;

        REAL abe_tmp[4];

        abe_tmp[0] = *(abe + 1 + 6*w); 
        abe_tmp[1] = *(abe + 2 + 6*w); 
        abe_tmp[2] = *(abe + 4 + 6*w); 
        abe_tmp[3] = *(abe + 5 + 6*w);

        REAL tmp = (abe_tmp[0] + abe_tmp[1] * T_1) * T_1;

        REAL dkfdTi = kfi * tmp;

        REAL dkrdTi = (tmp - sum_dBdT) * (kri);

        if (*(react_type + w) == 1) {
            dkfdTi *= sum_c;
            dkrdTi *= sum_c;
            kfi *= sum_c;
            kri *= sum_c;
        }

        REAL pri0, pri1, pri2;
        REAL F_cent, Fi;
        REAL fc_tmp[4];

        if (*(react_type + w) == 2) {
            //pri0 = sum_c*access_react_num_data(kf_low, w, s)/access_react_num_data(kf, w, s);
            pri0 = sum_c*access_react_num_data(kf_low, w, s);

            pri1 = pri0/(1 + pri0);
            pri2 = 1./((1 + pri0));

            if (*(fall_type + w) == 1) {
                dkfdTi *= pri1;
                dkrdTi *= pri1;
                kfi *= pri1;
                kri *= pri1;
            }

            if(*(fall_type + w) == 2) {

                fc_tmp[0] = *(fall_coeffs + 5*w);
                fc_tmp[1] = *(fall_coeffs + 5*w + 1);
                fc_tmp[2] = *(fall_coeffs + 5*w + 2);
                fc_tmp[3] = *(fall_coeffs + 5*w + 3);

                compute_Fcent_Fi_j(&T_tmp, &pri0, 
                                 &fc_tmp[0], 
                                 &fc_tmp[1], 
                                 &fc_tmp[2], 
                                 &fc_tmp[3], 
                                &F_cent, &Fi);

                dkfdTi *= pri1*Fi;
                dkrdTi *= pri1*Fi;
                kfi *= pri1*Fi;
                kri *= pri1*Fi;
            }
        }

        modify_kfi_d(w, s, vf, &dkfdTi, &kfi, c, &T_1);

        modify_kfi_d(w, s, vr, &dkrdTi, &kri, c, &T_1);

        REAL dqidT = (dkfdTi - dkrdTi) + (kfi - kri);

        REAL dcidT = 0.0;
        if (*(react_type + w) == 1) {
            dcidT = T_1;
        }

        if (*(react_type + w) == 2) {
            REAL tmp1, tmp2, tmp3;
            tmp1 = (abe_tmp[0] + abe_tmp[1]*T_1);
            tmp2 = (abe_tmp[2] + abe_tmp[3]*T_1);
            tmp3 = (tmp2 - tmp1 - 1)*T_1;

            if (*(fall_type + w) == 1) {
                if (sum_c > SMALL) {

                    dcidT = tmp3*pri2;
                }
            }

            if (*(fall_type + w) == 2) {
                if (sum_c > SMALL) {
                    REAL tmp4 = tmp3*pri0;

                    tmp1 = dFdT2(&pri0, &Fi, &F_cent, &tmp3, &T_tmp, &fc_tmp[0]);
                    dcidT = tmp3*pri2 + tmp1;
                }
            }
        }

        dcidT *= access_react_num_data(q, w, s);
        #pragma unroll
        for (int k = 0; k < sp_num_d; k++) {
            sum_c = (*(vr + w + k*react_num_d) - *(vf + w + k*react_num_d)) * (dqidT + dcidT);
            access_J_atomic(J, sp_num_d, k, s, sum_c); 
        }
    }
}


__host__ void compute_J2_h(REAL *dBdT, REAL *q, REAL *v_net, REAL *kf_low, REAL *kf, REAL *kr, REAL *T, REAL *abe,
    REAL *react_type, REAL *tb_coeffs, REAL *c, REAL *fall_type, REAL *fall_coeffs, REAL *vr, REAL *vf, REAL *J) {

    size_t size = thermoFluid_d_ptr->thermo_ptr_d.size;
    size_t react_num = reactions_d_ptr->reactions_ptr_d.react_num;

    dim3 griddim, blockdim;

    set_block_grid2d(size, react_num, block_set_J_2, griddim, blockdim);

    compute_J2_g<<<griddim, blockdim, 0, Stream_opencc[0]>>>(dBdT, q, v_net, kf_low, kf, kr, T, abe, react_type, tb_coeffs
    , c, fall_type, fall_coeffs, vr, vf, J, dt_sum_d, t_end_h);

}

__device__ inline REAL compute_dcpkdT(REAL *nasa, REAL *T) {
    REAL tmp = *T * *T;

    tmp = 8313.8462*(nasa[0] + 2*nasa[1]* *T + 3*nasa[2]*tmp 
    + 4*nasa[3]*tmp* *T);

    return tmp;
}

__global__ void compute_J3_g(REAL *T, REAL *T_range, REAL *sp_nasa, REAL *c, REAL *sp_cp_mole, REAL *sp_ha_mole, 
            REAL *dcdt, REAL *J, REAL *dt_sum_d, REAL t_end_h) {
    unsigned int s = blockDim.x * blockIdx.x + threadIdx.x;

    if (s < size_d && access_data(dt_sum_d, s) <= t_end_h) {
        REAL T_tmp   = access_data(T, s);
        REAL T_range_tmp;
        REAL Nasa_tmp[4];
        REAL dcpdT;
        REAL cp_mean = 0;
        REAL dcpdT_mean = 0;
        REAL dTdt;
        
        for (int k = 0; k < sp_num_d; k++) {
            T_range_tmp = *(T_range + 1 + k*3);

            if (T_tmp >= T_range_tmp) {
                Nasa_tmp[0] = *(sp_nasa + 8  + k*14);
                Nasa_tmp[1] = *(sp_nasa + 9  + k*14);
                Nasa_tmp[2] = *(sp_nasa + 10 + k*14);
                Nasa_tmp[3] = *(sp_nasa + 11 + k*14);

                dcpdT = compute_dcpkdT(&Nasa_tmp[0], &T_tmp);
            } else {
                Nasa_tmp[0] = *(sp_nasa + 1  + k*14);
                Nasa_tmp[1] = *(sp_nasa + 2  + k*14);
                Nasa_tmp[2] = *(sp_nasa + 3  + k*14);
                Nasa_tmp[3] = *(sp_nasa + 4  + k*14);

                dcpdT = compute_dcpkdT(&Nasa_tmp[0], &T_tmp);
            }

            dTdt = access_vars_data(c, k, s);

            dcpdT_mean += dTdt*dcpdT;

            cp_mean += dTdt*access_sp_num_data(sp_cp_mole, k, s);
        }

        dTdt = 0;
        
        for (int k = 0; k < sp_num_d; k++) {
            dTdt += access_sp_num_data(sp_ha_mole, k, s)*access_vars_data(dcdt, k, s);
        }

        T_range_tmp = -1./cp_mean;

        dTdt *= T_range_tmp;

        REAL tmp;
        #pragma unroll
        for (int k = 0; k < sp_num_d; k++) {
            tmp = 0.0;
                for (int m = 0; m < sp_num_d; m++) {
                    tmp += access_sp_num_data(sp_ha_mole, m, s)*access_J(J, k, m, s);
                }

            tmp += access_sp_num_data(sp_cp_mole, k, s)*dTdt;

            access_J(J, k, sp_num_d, s) = tmp*T_range_tmp;
        }

        tmp = 0.0;
        #pragma unroll
        for (int k = 0; k < sp_num_d; k++) {
            tmp += access_sp_num_data(sp_cp_mole, k, s)*access_vars_data(dcdt, k, s) +
            access_sp_num_data(sp_ha_mole, k, s)*access_J(J, sp_num_d, k, s);
        }

        tmp += dTdt*dcpdT_mean;
        tmp *= T_range_tmp;
        tmp += dTdt/T_tmp;

        access_J(J, sp_num_d, sp_num_d, s) = tmp;
    }
}

__host__ void compute_J3_h(REAL *T, REAL *T_range, REAL *sp_nasa, REAL *c, REAL *sp_cp_mole, REAL *sp_ha_mole, 
            REAL *dcdt, REAL *J) {

    size_t size = thermoFluid_d_ptr->thermo_ptr_d.size;
    size_t react_num = reactions_d_ptr->reactions_ptr_d.react_num;

    dim3 griddim, blockdim;

    set_block_grid(size, block_set_J_3, griddim, blockdim);

    compute_J3_g<<<griddim, blockdim, 0, Stream_opencc[0]>>>(T, T_range, sp_nasa, c, sp_cp_mole, sp_ha_mole, dcdt, J, dt_sum_d, t_end_h);

}


