/*---------------------------------------------------------------------------*\
  =========                 |
  \\      /  F ield         | OpenFOAM: The Open Source CFD Toolbox
   \\    /   O peration     |
    \\  /    A nd           | www.openfoam.com
     \\/     M anipulation  |
-------------------------------------------------------------------------------
    Copyright (C) 2011-2017 OpenFOAM Foundation
    Copyright (C) 2020 OpenCFD Ltd.
-------------------------------------------------------------------------------
License
    This file is part of OpenFOAM.

    OpenFOAM is free software: you can redistribute it and/or modify it
    under the terms of the GNU General Public License as published by
    the Free Software Foundation, either version 3 of the License, or
    (at your option) any later version.

    OpenFOAM is distributed in the hope that it will be useful, but WITHOUT
    ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
    FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    for more details.

    You should have received a copy of the GNU General Public License
    along with OpenFOAM.  If not, see <http://www.gnu.org/licenses/>.

\*---------------------------------------------------------------------------*/

#include "gpuMULES.H"
#include "gpuupwind.H"
#include "gpufvcSurfaceIntegrate.H"
#include "gpulocalEulerDdtScheme.H"
#include "slicedSurfacegpuFields.H"
#include "wedgegpuFvPatch.H"
#include "syncTools.H"

// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * //
namespace Foam
{
	struct limiterFunctor{
		const label* owner;
		const label* neighbour;
		const label* ownStart;
		const label* neiStart;
		const label* losort;
		const scalar* phiCorrIf;
		const scalar* psiIf;
		const scalar* phiBDIf;

		limiterFunctor
		(
			const label* _owner,
			const label* _neighbour,
			const label* _ownStart,
			const label* _neiStart,
			const label* _losort,
			const scalar* _phiCorrIf,
			const scalar* _psiIf,
			const scalar* _phiBDIf
		):
			owner(_owner),
			neighbour(_neighbour),
	        ownStart(_ownStart),
	        neiStart(_neiStart),
	        losort(_losort),
	        phiCorrIf(_phiCorrIf),
			psiIf(_psiIf),
			phiBDIf(_phiBDIf)
		{}
			
		template <typename Tuple>
		__host__ __device__
		void operator()(Tuple t)
	    {
			scalar psiMaxn = thrust::get<0>(t);
			scalar psiMinn = thrust::get<1>(t);
			scalar sumPhip = thrust::get<2>(t);
			scalar mSumPhim = thrust::get<3>(t);
			scalar sumPhiBD = thrust::get<4>(t);
			label id = thrust::get<5>(t);

			label oStart = ownStart[id];
			label oSize = ownStart[id+1] - oStart;
			
			label nStart = neiStart[id];
			label nSize = neiStart[id+1] - nStart;

			for(label i = 0; i<oSize; i++)
			{
				label face = oStart + i;
				
				psiMaxn = max(psiMaxn, psiIf[neighbour[face]]);
				psiMinn = min(psiMinn, psiIf[neighbour[face]]);

				sumPhiBD += phiBDIf[face];
				
				scalar phiCorrf = phiCorrIf[face];
				
				if (phiCorrf > 0)
				{
					sumPhip += phiCorrf;
				}
				else
				{
					mSumPhim -= phiCorrf;
				}
			}
			
			for(label i = 0; i<nSize; i++)
			{
				label face = losort[nStart + i];
				
				psiMaxn = max(psiMaxn, psiIf[owner[face]]);
				psiMinn = min(psiMinn, psiIf[owner[face]]);

				sumPhiBD -= phiBDIf[face];
				
				scalar phiCorrf = phiCorrIf[face];
				
				if (phiCorrf > 0)
				{
					mSumPhim += phiCorrf;
				}
				else
				{
					sumPhip -= phiCorrf;
				}
			}
	    }
	};

	
	struct limiterCorrCalPatchFunctor{
		const label* neiStart;
		const label* losort;
		const label* pcells;
		const scalar* psiPf;

		limiterCorrCalPatchFunctor
		(
			const label* _neiStart,
			const label* _losort,
			const label* _pcells,
			const scalar* _psiPf
		):
	        neiStart(_neiStart),
	        losort(_losort),
	        pcells(_pcells),
			psiPf(_psiPf)
		{}
			
		template <typename Tuple>
		__host__ __device__
		void operator()(Tuple t)
	    {
			scalar psiMaxn = thrust::get<0>(t);
			scalar psiMinn = thrust::get<1>(t);
			label id = thrust::get<2>(t);

			label nStart = neiStart[id];
			label nSize = neiStart[id+1] - nStart;
			label cellI = pcells[id];

			for(label i = 0; i<nSize; i++)
			{
				label face = losort[nStart + i];
				
				psiMaxn = max(psiMaxn, psiPf[face]);
				psiMinn = min(psiMinn, psiPf[face]);
			}
	    }
	};

	struct limiterCorrPatchFunctor{
		const label* neiStart;
		const label* losort;
		const label* pcells;
		const scalar* psiMax;
		const scalar* psiMin;
		const scalar bDECoeff;

		limiterCorrPatchFunctor
		(
			const label* _neiStart,
			const label* _losort,
			const label* _pcells,
			const scalar* _psiMax,
			const scalar* _psiMin,
			const scalar _bDECoeff
		):
	        neiStart(_neiStart),
	        losort(_losort),
	        pcells(_pcells),
			psiMax(_psiMax),
			psiMin(_psiMin),
			bDECoeff(_bDECoeff)
		{}
			
		template <typename Tuple>
		__host__ __device__
		void operator()(Tuple t)
	    {
			scalar psiMaxn = thrust::get<0>(t);
			scalar psiMinn = thrust::get<1>(t);
			label id = thrust::get<2>(t);

			label nStart = neiStart[id];
			label nSize = neiStart[id+1] - nStart;
			label cellI = pcells[id];

			const scalar extrema =
                        bDECoeff
                       *(psiMax[cellI] - psiMin[cellI]);

			for(label i = 0; i<nSize; i++)
			{
				psiMaxn += extrema;
				psiMinn -= extrema;
			}
	    }
	};

	struct limiterSumPatchFunctor{
		const label* neiStart;
		const label* losort;
		const label* pcells;
		const scalar* phiPf;
		const scalar* phiBDPf;

		limiterSumPatchFunctor
		(
			const label* _neiStart,
			const label* _losort,
			const label* _pcells,
			const scalar* _phiPf,
			const scalar* _phiBDPf
		):
	        neiStart(_neiStart),
	        losort(_losort),
	        pcells(_pcells),
			phiPf(_phiPf),
			phiBDPf(_phiBDPf)
		{}
			
		template <typename Tuple>
		__host__ __device__
		void operator()(Tuple t)
	    {
			scalar sumPhip = thrust::get<0>(t);
			scalar mSumPhim = thrust::get<1>(t);
			scalar sumPhiBD = thrust::get<2>(t);
			label id = thrust::get<3>(t);

			label nStart = neiStart[id];
			label nSize = neiStart[id+1] - nStart;
			label cellI = pcells[id];

			for(label i = 0; i<nSize; i++)
			{
				label face = losort[nStart + i];

				sumPhiBD += phiBDPf[face];
				
				scalar phiCorrf = phiPf[face];
				
				if (phiCorrf > 0)
				{
					sumPhip += phiCorrf;
				}
				else
				{
					mSumPhim -= phiCorrf;
				}
			}
	    }
	};
	
	struct limiterCorrSumPatchFunctor{
		const label* neiStart;
		const label* losort;
		const label* pcells;
		const scalar* phiPf;

		limiterCorrSumPatchFunctor
		(
			const label* _neiStart,
			const label* _losort,
			const label* _pcells,
			const scalar* _phiPf
		):
	        neiStart(_neiStart),
	        losort(_losort),
	        pcells(_pcells),
			phiPf(_phiPf)
		{}
			
		template <typename Tuple>
		__host__ __device__
		void operator()(Tuple t)
	    {
			scalar sumPhip = thrust::get<0>(t);
			scalar mSumPhim = thrust::get<1>(t);
			label id = thrust::get<2>(t);

			label nStart = neiStart[id];
			label nSize = neiStart[id+1] - nStart;
			label cellI = pcells[id];

			for(label i = 0; i<nSize; i++)
			{
				label face = losort[nStart + i];
				scalar phiCorrf = phiPf[face];
				
				if (phiCorrf > 0)
				{
					sumPhip += phiCorrf;
				}
				else
				{
					mSumPhim -= phiCorrf;
				}
			}
	    }
	};
	
	struct limiterCorrSumFunctor{
		const label* ownStart;
		const label* neiStart;
		const label* losort;
		const scalar* phiCorrIf;

		limiterCorrSumFunctor
		(
			const label* _ownStart,
			const label* _neiStart,
			const label* _losort,
			const scalar* _phiCorrIf
		):
	        ownStart(_ownStart),
	        neiStart(_neiStart),
	        losort(_losort),
	        phiCorrIf(_phiCorrIf)
		{}
			
		template <typename Tuple>
		__host__ __device__
		void operator()(Tuple t)
	    {
			scalar psiMaxn = thrust::get<0>(t);
			scalar psiMinn = thrust::get<1>(t);
			scalar sumPhip = thrust::get<2>(t);
			scalar mSumPhim = thrust::get<3>(t);
			scalar sumlPhip = thrust::get<4>(t);
			scalar mSumlPhim = thrust::get<5>(t);
			label id = thrust::get<6>(t);

			label oStart = ownStart[id];
			label oSize = ownStart[id+1] - oStart;
			
			label nStart = neiStart[id];
			label nSize = neiStart[id+1] - nStart;

			for(label i = 0; i<oSize; i++)
			{
				label face = oStart + i;
				
				scalar phiCorrf = phiCorrIf[face];
				
				if (phiCorrf > 0)
				{
					sumlPhip += phiCorrf;
				}
				else
				{
					mSumlPhim -= phiCorrf;
				}
			}
			
			for(label i = 0; i<nSize; i++)
			{
				label face = losort[nStart + i];
				
				scalar phiCorrf = phiCorrIf[face];
				
				if (phiCorrf > 0)
				{
					mSumlPhim += phiCorrf;
				}
				else
				{
					sumlPhip -= phiCorrf;
				}
			}
			
			sumlPhip =
                max(min
                (
                    (sumlPhip + psiMaxn)
                   /(mSumPhim + ROOTVSMALL),
                    1.0), 0.0
                );

            mSumlPhim =
                max(min
                (
                    (mSumlPhim + psiMinn)
                   /(sumPhip + ROOTVSMALL),
                    1.0), 0.0
                );
	    }
	};
	
	struct limiterCorrlambdaFunctor{
		const scalar* lambdam;
		const scalar* lambdap;

		limiterCorrlambdaFunctor
		(
			const scalar* _lambdam,
			const scalar* _lambdap
		):
	        lambdam(_lambdam),
			lambdap(_lambdap)
		{}
			
		__host__ __device__
		scalar operator()(const thrust::tuple<scalar,label,label>& t)
	    {
			const scalar phiCorrf = thrust::get<0>(t);
			const label owner = thrust::get<1>(t);
			const label neighb = thrust::get<2>(t);

			scalar lambdaIf;

            if (phiCorrf > 0)
            {
                lambdaIf = min
                (
                    lambdaIf,
                    min(lambdap[owner], lambdam[neighb])
                );
            }
            else
            {
                lambdaIf = min
                (
                    lambdaIf,
                    min(lambdam[owner], lambdap[neighb])
                );
            }

			return lambdaIf;
			
	    }
	};
	
	struct limiterCorrlambdaCoupledPatchFunctor{
		const scalar* lambdam;
		const scalar* lambdap;

		limiterCorrlambdaCoupledPatchFunctor
		(
			const scalar* _lambdam,
			const scalar* _lambdap
		):
	        lambdam(_lambdam),
			lambdap(_lambdap)
		{}
			
		__host__ __device__
		scalar operator()(const thrust::tuple<scalar,label>& t)
	    {
			const scalar phiCorrf = thrust::get<0>(t);
			const label pcell = thrust::get<1>(t);

			scalar lambdaIf;

            if (phiCorrf > 0)
            {
                lambdaIf = min(lambdaIf, lambdap[pcell]);
            }
            else
            {
                lambdaIf = min(lambdaIf, lambdam[pcell]);
            }

			return lambdaIf;
			
	    }
	};
	
	struct limiterCorrlambdaPatchFunctor{
		const scalar* lambdam;
		const scalar* lambdap;

		limiterCorrlambdaPatchFunctor
		(
			const scalar* _lambdam,
			const scalar* _lambdap
		):
	        lambdam(_lambdam),
			lambdap(_lambdap)
		{}
			
		__host__ __device__
		scalar operator()(const thrust::tuple<scalar,scalar,label>& t)
	    {
			const scalar phiCorrf = thrust::get<0>(t);
			const scalar phif = thrust::get<1>(t);
			const label pcell = thrust::get<2>(t);

			scalar lambdaIf;

			if(phif + phiCorrf > SMALL*SMALL){
            if (phiCorrf > 0)
            {
                lambdaIf = min(lambdaIf, lambdap[pcell]);
            }
            else
            {
                lambdaIf = min(lambdaIf, lambdam[pcell]);
            }
			}

			return lambdaIf;
			
	    }
	};
}


template<class RdeltaTType, class RhoType, class SpType, class SuType>
void Foam::MULES::explicitSolve
(
    const RdeltaTType& rDeltaT,
    const RhoType& rho,
    volScalargpuField& psi,
    const surfaceScalargpuField& phiPsi,
    const SpType& Sp,
    const SuType& Su
)
{
    Info<< "MULES: Solving for " << psi.name() << endl;

    const gpufvMesh& mesh = psi.mesh();

    scalargpuField& psiIf = psi;
    const scalargpuField& psi0 = psi.oldTime();

    psiIf = 0.0;
    fvc::surfaceIntegrate(psiIf, phiPsi);

    if (mesh.hostmesh().moving())
    {
        psiIf =
        (
            mesh.Vsc0()().field()*rho.oldTime().field()
           *psi0*rDeltaT/mesh.Vsc()().field()
          + Su.field()
          - psiIf
        )/(rho.field()*rDeltaT - Sp.field());
    }
    else
    {
        psiIf =
        (
            rho.oldTime().field()*psi0*rDeltaT
          + Su.field()
          - psiIf
        )/(rho.field()*rDeltaT - Sp.field());
    }

    psi.correctBoundaryConditions();
}


template<class RhoType>
void Foam::MULES::explicitSolve
(
    const RhoType& rho,
    volScalargpuField& psi,
    const surfaceScalargpuField& phiPsi
)
{
    explicitSolve(rho, psi, phiPsi, zeroField(), zeroField());
}


template<class RhoType, class SpType, class SuType>
void Foam::MULES::explicitSolve
(
    const RhoType& rho,
    volScalargpuField& psi,
    const surfaceScalargpuField& phiPsi,
    const SpType& Sp,
    const SuType& Su
)
{
    const gpufvMesh& mesh = psi.mesh();

    if (fv::gpulocalEulerDdt::enabled(mesh))
    {
        const volScalargpuField& rDeltaT = fv::gpulocalEulerDdt::localRDeltaT(mesh);
        explicitSolve(rDeltaT, rho, psi, phiPsi, Sp, Su);
    }
    else
    {
        const scalar rDeltaT = 1.0/mesh.time().deltaTValue();
        explicitSolve(rDeltaT, rho, psi, phiPsi, Sp, Su);
    }
}


template<class RhoType, class PsiMaxType, class PsiMinType>
void Foam::MULES::explicitSolve
(
    const RhoType& rho,
    volScalargpuField& psi,
    const surfaceScalargpuField& phiBD,
    surfaceScalargpuField& phiPsi,
    const PsiMaxType& psiMax,
    const PsiMinType& psiMin
)
{
    explicitSolve
    (
        rho,
        psi,
        phiBD,
        phiPsi,
        zeroField(),
        zeroField(),
        psiMax,
        psiMin
    );
}


template
<
    class RhoType,
    class SpType,
    class SuType,
    class PsiMaxType,
    class PsiMinType
>
void Foam::MULES::explicitSolve
(
    const RhoType& rho,
    volScalargpuField& psi,
    const surfaceScalargpuField& phi,
    surfaceScalargpuField& phiPsi,
    const SpType& Sp,
    const SuType& Su,
    const PsiMaxType& psiMax,
    const PsiMinType& psiMin
)
{
    const gpufvMesh& mesh = psi.mesh();

    psi.correctBoundaryConditions();

    if (fv::gpulocalEulerDdt::enabled(mesh))
    {
        const volScalargpuField& rDeltaT = fv::gpulocalEulerDdt::localRDeltaT(mesh);
        limit(rDeltaT, rho, psi, phi, phiPsi, Sp, Su, psiMax, psiMin, false);
        explicitSolve(rDeltaT, rho, psi, phiPsi, Sp, Su);
    }
    else
    {
        const scalar rDeltaT = 1.0/mesh.time().deltaTValue();
        limit(rDeltaT, rho, psi, phi, phiPsi, Sp, Su, psiMax, psiMin, false);
        explicitSolve(rDeltaT, rho, psi, phiPsi, Sp, Su);
    }
}


template
<
    class RdeltaTType,
    class RhoType,
    class SpType,
    class SuType,
    class PsiMaxType,
    class PsiMinType
>
void Foam::MULES::limiter
(
    scalargpuField& allLambda,
    const RdeltaTType& rDeltaT,
    const RhoType& rho,
    const volScalargpuField& psi,
    const surfaceScalargpuField& phiBD,
    const surfaceScalargpuField& phiCorr,
    const SpType& Sp,
    const SuType& Su,
    const PsiMaxType& psiMax,
    const PsiMinType& psiMin
)
{
    const scalargpuField& psiIf = psi;
    const volScalargpuField::Boundary& psiBf = psi.boundaryField();

    const gpufvMesh& mesh = psi.mesh();

    const dictionary& MULEScontrols = mesh.hostmesh().solverDict(psi.name());

    const label nLimiterIter
    (
        MULEScontrols.getOrDefault<label>("nLimiterIter", 3)
    );

    const scalar smoothLimiter
    (
        MULEScontrols.getOrDefault<scalar>("smoothLimiter", 0)
    );

    const scalar extremaCoeff
    (
        MULEScontrols.getOrDefault<scalar>("extremaCoeff", 0)
    );

    const scalar boundaryExtremaCoeff
    (
        MULEScontrols.getOrDefault<scalar>
        (
            "boundaryExtremaCoeff",
            extremaCoeff
        )
    );

    const scalar boundaryDeltaExtremaCoeff
    (
        max(boundaryExtremaCoeff - extremaCoeff, 0)
    );

    const scalargpuField& psi0 = psi.oldTime();

    const labelgpuList& owner = mesh.owner();
    const labelgpuList& neighb = mesh.neighbour();
	const labelgpuList& ownStart = mesh.lduAddr().ownerStartAddr();
	const labelgpuList& neiStart = mesh.lduAddr().losortStartAddr();
	const labelgpuList& losort = mesh.lduAddr().losortAddr();
	
    tmp<volScalargpuField::Internal> tVsc = mesh.Vsc();
    const scalargpuField& V = tVsc();

    const scalargpuField& phiBDIf = phiBD;
    const surfaceScalargpuField::Boundary& phiBDBf =
        phiBD.boundaryField();

    const scalargpuField& phiCorrIf = phiCorr;
    const surfaceScalargpuField::Boundary& phiCorrBf =
        phiCorr.boundaryField();

    slicedSurfaceScalargpuField lambda
    (
        IOobject
        (
            "lambda",
            mesh.time().timeName(),
            mesh.hostmesh(),
            IOobject::NO_READ,
            IOobject::NO_WRITE,
            false
        ),
        mesh,
        dimless,
        allLambda,
        false   // Use slices for the couples
    );

    scalargpuField& lambdaIf = lambda;
    surfaceScalargpuField::Boundary& lambdaBf = lambda.boundaryFieldRef();

    scalargpuField psiMaxn(psiIf.size());
    scalargpuField psiMinn(psiIf.size());

    psiMaxn = psiMin;
    psiMinn = psiMax;

    scalargpuField sumPhiBD(psiIf.size(), Zero);

    scalargpuField sumPhip(psiIf.size(), Zero);
    scalargpuField mSumPhim(psiIf.size(), Zero);

//    forAll(phiCorrIf, facei)
//    {
//        const label own = owner[facei];
//        const label nei = neighb[facei];
//
//        psiMaxn[own] = max(psiMaxn[own], psiIf[nei]);
//        psiMinn[own] = min(psiMinn[own], psiIf[nei]);
//
//        psiMaxn[nei] = max(psiMaxn[nei], psiIf[own]);
//        psiMinn[nei] = min(psiMinn[nei], psiIf[own]);
//
//        sumPhiBD[own] += phiBDIf[facei];
//        sumPhiBD[nei] -= phiBDIf[facei];
//
//        const scalar phiCorrf = phiCorrIf[facei];
//
//        if (phiCorrf > 0)
//        {
//            sumPhip[own] += phiCorrf;
//            mSumPhim[nei] += phiCorrf;
//        }
//        else
//        {
//            mSumPhim[own] -= phiCorrf;
//            sumPhip[nei] -= phiCorrf;
//        }
//    }
	thrust::for_each
	(
		thrust::make_zip_iterator(thrust::make_tuple(
		psiMaxn.begin(),
		psiMinn.begin(),
		sumPhip.begin(),
		mSumPhim.begin(),
		sumPhiBD.begin(),
		thrust::make_counting_iterator(0)
		)),
		thrust::make_zip_iterator(thrust::make_tuple(
		psiMaxn.end(),
		psiMinn.end(),
		sumPhip.end(),
		mSumPhim.end(),
		sumPhiBD.end(),
		thrust::make_counting_iterator(0)+psiMaxn.size()
		)),
		limiterFunctor
		(
			owner.data(),
			neighb.data(),
			ownStart.data(),
			neiStart.data(),
			losort.data(),
			phiCorrIf.data(), 
			psiIf.data(),
			phiBDIf.data()
		)
	);

    forAll(phiCorrBf, patchi)
    {
        const fvPatchScalargpuField& psiPf = psiBf[patchi];
        const scalargpuField& phiBDPf = phiBDBf[patchi];
        const scalargpuField& phiCorrPf = phiCorrBf[patchi];

		const labelgpuList& pcells = mesh.lduAddr().gpuPatchSortCells(patchi);
		const labelgpuList& plosort = mesh.lduAddr().gpuPatchSortAddr(patchi);
		const labelgpuList& plosortStart = mesh.lduAddr().gpuPatchSortStartAddr(patchi);

        if (psiPf.coupled())
        {
            const scalargpuField psiPNf(psiPf.patchNeighbourField());

//            forAll(phiCorrPf, pFacei)
//            {
//                const label pfCelli = pFaceCells[pFacei];
//
//                psiMaxn[pfCelli] = max(psiMaxn[pfCelli], psiPNf[pFacei]);
//                psiMinn[pfCelli] = min(psiMinn[pfCelli], psiPNf[pFacei]);
//            }
			
			thrust::for_each
			(
				thrust::make_zip_iterator(thrust::make_tuple(
				thrust::make_permutation_iterator(psiMaxn.begin(),pcells.begin()),
				thrust::make_permutation_iterator(psiMinn.begin(),pcells.begin()),
				thrust::make_counting_iterator(0)
				)),
				thrust::make_zip_iterator(thrust::make_tuple(
				thrust::make_permutation_iterator(psiMaxn.begin(),pcells.end()),
				thrust::make_permutation_iterator(psiMinn.begin(),pcells.end()),
				thrust::make_counting_iterator(0)+pcells.size()
				)),
				limiterCorrCalPatchFunctor
				(
					plosortStart.data(),
					plosort.data(),
					pcells.data(),
					psiPNf.data()
				)
			);
        }
        else if (psiPf.fixesValue())
        {
//            forAll(phiCorrPf, pFacei)
//            {
//                const label pfCelli = pFaceCells[pFacei];
//
//                psiMaxn[pfCelli] = max(psiMaxn[pfCelli], psiPf[pFacei]);
//                psiMinn[pfCelli] = min(psiMinn[pfCelli], psiPf[pFacei]);
//            }
			
			thrust::for_each
			(
				thrust::make_zip_iterator(thrust::make_tuple(
				thrust::make_permutation_iterator(psiMaxn.begin(),pcells.begin()),
				thrust::make_permutation_iterator(psiMinn.begin(),pcells.begin()),
				thrust::make_counting_iterator(0)
				)),
				thrust::make_zip_iterator(thrust::make_tuple(
				thrust::make_permutation_iterator(psiMaxn.begin(),pcells.end()),
				thrust::make_permutation_iterator(psiMinn.begin(),pcells.end()),
				thrust::make_counting_iterator(0)+pcells.size()
				)),
				limiterCorrCalPatchFunctor
				(
					plosortStart.data(),
					plosort.data(),
					pcells.data(),
					psiPf.data()
				)
			);
        }
        else
        {
            // Add the optional additional allowed boundary extrema
            if (boundaryDeltaExtremaCoeff > 0)
            {
//                forAll(phiCorrPf, pFacei)
//                {
//                    const label pfCelli = pFaceCells[pFacei];
//
//                     const scalar extrema =
//                        boundaryDeltaExtremaCoeff
//                       *(psiMax[pfCelli] - psiMin[pfCelli]);
//
//                    psiMaxn[pfCelli] += extrema;
//                    psiMinn[pfCelli] -= extrema;
//                }
				
				thrust::for_each
				(
					thrust::make_zip_iterator(thrust::make_tuple(
					thrust::make_permutation_iterator(psiMaxn.begin(),pcells.begin()),
					thrust::make_permutation_iterator(psiMinn.begin(),pcells.begin()),
					thrust::make_counting_iterator(0)
					)),
					thrust::make_zip_iterator(thrust::make_tuple(
					thrust::make_permutation_iterator(psiMaxn.begin(),pcells.end()),
					thrust::make_permutation_iterator(psiMinn.begin(),pcells.end()),
					thrust::make_counting_iterator(0)+pcells.size()
					)),
					limiterCorrPatchFunctor
					(
						plosortStart.data(),
						plosort.data(),
						pcells.data(),
						psiMax.data(),
						psiMin.data(),
						boundaryDeltaExtremaCoeff
					)
				);
            }
        }

//        forAll(phiCorrPf, pFacei)
//        {
//            const label pfCelli = pFaceCells[pFacei];
//
//            sumPhiBD[pfCelli] += phiBDPf[pFacei];
//
//            const scalar phiCorrf = phiCorrPf[pFacei];
//
//            if (phiCorrf > 0)
//            {
//                sumPhip[pfCelli] += phiCorrf;
//            }
//            else
//            {
//                mSumPhim[pfCelli] -= phiCorrf;
//            }
//        }
		
		thrust::for_each
		(
			thrust::make_zip_iterator(thrust::make_tuple(
			thrust::make_permutation_iterator(sumPhip.begin(),pcells.begin()),
			thrust::make_permutation_iterator(mSumPhim.begin(),pcells.begin()),
			thrust::make_permutation_iterator(sumPhiBD.begin(),pcells.begin()),
			thrust::make_counting_iterator(0)
			)),
			thrust::make_zip_iterator(thrust::make_tuple(
			thrust::make_permutation_iterator(sumPhip.begin(),pcells.end()),
			thrust::make_permutation_iterator(mSumPhim.begin(),pcells.end()),
			thrust::make_permutation_iterator(sumPhiBD.begin(),pcells.end()),
			thrust::make_counting_iterator(0)+pcells.size()
			)),
			limiterSumPatchFunctor
			(
				plosortStart.data(),
				plosort.data(),
				pcells.data(),
				phiCorrPf.data(),
				phiBDPf.data()
			)
		);
    }

    psiMaxn = min(psiMaxn + extremaCoeff*(psiMax - psiMin), psiMax);
    psiMinn = max(psiMinn - extremaCoeff*(psiMax - psiMin), psiMin);

    if (smoothLimiter > SMALL)
    {
        psiMaxn =
            min(smoothLimiter*psiIf + (1.0 - smoothLimiter)*psiMaxn, psiMax);
        psiMinn =
            max(smoothLimiter*psiIf + (1.0 - smoothLimiter)*psiMinn, psiMin);
    }

    if (mesh.hostmesh().moving())
    {
        tmp<volScalargpuField::Internal> V0 = mesh.Vsc0();

        psiMaxn =
            V
           *(
               (rho.field()*rDeltaT - Sp.field())*psiMaxn
             - Su.field()
            )
          - (V0().field()*rDeltaT)*rho.oldTime().field()*psi0
          + sumPhiBD;

        psiMinn =
            V
           *(
               Su.field()
             - (rho.field()*rDeltaT - Sp.field())*psiMinn
            )
          + (V0().field()*rDeltaT)*rho.oldTime().field()*psi0
          - sumPhiBD;
    }
    else
    {
        psiMaxn =
            V
           *(
               (rho.field()*rDeltaT - Sp.field())*psiMaxn
             - Su.field()
             - (rho.oldTime().field()*rDeltaT)*psi0
            )
          + sumPhiBD;

        psiMinn =
            V
           *(
               Su.field()
             - (rho.field()*rDeltaT - Sp.field())*psiMinn
             + (rho.oldTime().field()*rDeltaT)*psi0
            )
          - sumPhiBD;
    }

    scalargpuField sumlPhip(psiIf.size());
    scalargpuField mSumlPhim(psiIf.size());

    for (int j=0; j<nLimiterIter; j++)
    {
        sumlPhip = 0;
        mSumlPhim = 0;

//        forAll(lambdaIf, facei)
//        {
//            const label own = owner[facei];
//            const label nei = neighb[facei];
//
//            scalar lambdaPhiCorrf = lambdaIf[facei]*phiCorrIf[facei];
//
//            if (lambdaPhiCorrf > 0)
//            {
//                sumlPhip[own] += lambdaPhiCorrf;
//                mSumlPhim[nei] += lambdaPhiCorrf;
//            }
//            else
//            {
//                mSumlPhim[own] -= lambdaPhiCorrf;
//                sumlPhip[nei] -= lambdaPhiCorrf;
//            }
//        }
		scalargpuField lambdaPhiCorrf;

        forAll(lambdaBf, patchi)
        {
            scalargpuField& lambdaPf = lambdaBf[patchi];
            const scalargpuField& phiCorrfPf = phiCorrBf[patchi];

			const labelgpuList& pcells = mesh.lduAddr().gpuPatchSortCells(patchi);
			const labelgpuList& plosort = mesh.lduAddr().gpuPatchSortAddr(patchi);
			const labelgpuList& plosortStart = mesh.lduAddr().gpuPatchSortStartAddr(patchi);

			lambdaPhiCorrf.setSize(lambdaPf.size());
			lambdaPhiCorrf = lambdaPf * phiCorrfPf;
//            forAll(lambdaPf, pFacei)
//            {
//                const label pfCelli = pFaceCells[pFacei];
//                const scalar lambdaPhiCorrf =
//                    lambdaPf[pFacei]*phiCorrfPf[pFacei];
//
//                if (lambdaPhiCorrf > 0)
//                {
//                    sumlPhip[pfCelli] += lambdaPhiCorrf;
//                }
//                else
//                {
//                    mSumlPhim[pfCelli] -= lambdaPhiCorrf;
//                }
//            }
			thrust::for_each
			(
				thrust::make_zip_iterator(thrust::make_tuple(
				thrust::make_permutation_iterator(sumlPhip.begin(),pcells.begin()),
				thrust::make_permutation_iterator(mSumlPhim.begin(),pcells.begin()),
				thrust::make_counting_iterator(0)
				)),
				thrust::make_zip_iterator(thrust::make_tuple(
				thrust::make_permutation_iterator(sumlPhip.begin(),pcells.end()),
				thrust::make_permutation_iterator(mSumlPhim.begin(),pcells.end()),
				thrust::make_counting_iterator(0)+pcells.size()
				)),
				limiterCorrSumPatchFunctor
				(
					plosortStart.data(),
					plosort.data(),
					pcells.data(),
					lambdaPhiCorrf.data()
				)
			);
        }

		lambdaPhiCorrf.setSize(lambdaIf.size());
		lambdaPhiCorrf = lambdaIf * phiCorrIf;
		
		thrust::for_each
		(
			thrust::make_zip_iterator(thrust::make_tuple(
			psiMaxn.begin(),
			psiMinn.begin(),
			sumPhip.begin(),
			mSumPhim.begin(),
			sumlPhip.begin(),
			mSumlPhim.begin(),
			thrust::make_counting_iterator(0)
			)),
			thrust::make_zip_iterator(thrust::make_tuple(
			psiMaxn.end(),
			psiMinn.end(),
			sumPhip.end(),
			mSumPhim.end(),
			sumlPhip.end(),
			mSumlPhim.end(),
			thrust::make_counting_iterator(0)+sumlPhip.size()
			)),
			limiterCorrSumFunctor
			(
				ownStart.data(),
				neiStart.data(),
				losort.data(),
				lambdaPhiCorrf.data()
			)
		);
//        forAll(sumlPhip, celli)
//        {
//            sumlPhip[celli] =
//                max(min
//                (
//                    (sumlPhip[celli] + psiMaxn[celli])
//                   /(mSumPhim[celli] + ROOTVSMALL),
//                    1.0), 0.0
//                );
//
//            mSumlPhim[celli] =
//                max(min
//                (
//                    (mSumlPhim[celli] + psiMinn[celli])
//                   /(sumPhip[celli] + ROOTVSMALL),
//                    1.0), 0.0
//                );
//        }

        const scalargpuField& lambdam = sumlPhip;
        const scalargpuField& lambdap = mSumlPhim;

//        forAll(lambdaIf, facei)
//        {
//            if (phiCorrIf[facei] > 0)
//            {
//                lambdaIf[facei] = min
//                (
//                    lambdaIf[facei],
//                    min(lambdap[owner[facei]], lambdam[neighb[facei]])
//                );
//            }
//            else
//            {
//                lambdaIf[facei] = min
//                (
//                    lambdaIf[facei],
//                    min(lambdam[owner[facei]], lambdap[neighb[facei]])
//                );
//            }
//        }
		
		thrust::transform
		(
			thrust::make_zip_iterator(thrust::make_tuple(
			phiCorrIf.begin(),
			owner.begin(),
			neighb.begin()
			)),
			thrust::make_zip_iterator(thrust::make_tuple(
			phiCorrIf.end(),
			owner.end(),
			neighb.end()
			)),
			lambdaIf.begin(),
			limiterCorrlambdaFunctor
			(
				lambdam.data(),
				lambdap.data()
			)
		);

        forAll(lambdaBf, patchi)
        {
            fvsPatchScalargpuField& lambdaPf = lambdaBf[patchi];
            const scalargpuField& phiCorrfPf = phiCorrBf[patchi];
            const fvPatchScalargpuField& psiPf = psiBf[patchi];

            if (isA<wedgegpuFvPatch>(mesh.boundary()[patchi]))
            {
                lambdaPf = 0;
            }
            else if (psiPf.coupled())
            {
                const labelgpuList& pFaceCells =
                    mesh.boundary()[patchi].gpuFaceCells();

//                forAll(lambdaPf, pFacei)
//                {
//                    const label pfCelli = pFaceCells[pFacei];
//
//                    if (phiCorrfPf[pFacei] > 0)
//                    {
//                        lambdaPf[pFacei] =
//                            min(lambdaPf[pFacei], lambdap[pfCelli]);
//                    }
//                    else
//                    {
//                        lambdaPf[pFacei] =
//                            min(lambdaPf[pFacei], lambdam[pfCelli]);
//                    }
//                }
				
				thrust::transform
				(
					thrust::make_zip_iterator(thrust::make_tuple(
					phiCorrfPf.begin(),
					pFaceCells.begin()
					)),
					thrust::make_zip_iterator(thrust::make_tuple(
					phiCorrfPf.end(),
					pFaceCells.end()
					)),
					lambdaPf.begin(),
					limiterCorrlambdaCoupledPatchFunctor
					(
						lambdam.data(),
						lambdap.data()
					)
				);
            }
        }

        syncTools::syncFaceList(mesh.hostmesh(), allLambda, minEqOp<scalar>());
    }
}


template
<
    class RdeltaTType,
    class RhoType,
    class SpType,
    class SuType,
    class PsiMaxType,
    class PsiMinType
>
void Foam::MULES::limit
(
    const RdeltaTType& rDeltaT,
    const RhoType& rho,
    const volScalargpuField& psi,
    const surfaceScalargpuField& phi,
    surfaceScalargpuField& phiPsi,
    const SpType& Sp,
    const SuType& Su,
    const PsiMaxType& psiMax,
    const PsiMinType& psiMin,
    const bool returnCorr
)
{
    const gpufvMesh& mesh = psi.mesh();

    surfaceScalargpuField phiBD(gpuupwind<scalar>(psi.mesh(), phi).flux(psi));

    surfaceScalargpuField::Boundary& phiBDBf = phiBD.boundaryFieldRef();
    const surfaceScalargpuField::Boundary& phiPsiBf = phiPsi.boundaryField();

    forAll(phiBDBf, patchi)
    {
        fvsPatchScalargpuField& phiBDPf = phiBDBf[patchi];

        if (!phiBDPf.coupled())
        {
            phiBDPf = phiPsiBf[patchi];
        }
    }

    surfaceScalargpuField& phiCorr = phiPsi;
    phiCorr -= phiBD;

    scalargpuField allLambda(mesh.hostmesh().nFaces(), 1.0);

    slicedSurfaceScalargpuField lambda
    (
        IOobject
        (
            "lambda",
            mesh.time().timeName(),
            mesh.hostmesh(),
            IOobject::NO_READ,
            IOobject::NO_WRITE,
            false
        ),
        mesh,
        dimless,
        allLambda,
        false   // Use slices for the couples
    );

    limiter
    (
        allLambda,
        rDeltaT,
        rho,
        psi,
        phiBD,
        phiCorr,
        Sp,
        Su,
        psiMax,
        psiMin
    );

    if (returnCorr)
    {
        phiCorr *= lambda;
    }
    else
    {
        phiPsi = phiBD + lambda*phiCorr;
    }
}


template
<
    class RhoType,
    class SpType,
    class SuType,
    class PsiMaxType,
    class PsiMinType
>
void Foam::MULES::limit
(
    const RhoType& rho,
    const volScalargpuField& psi,
    const surfaceScalargpuField& phi,
    surfaceScalargpuField& phiPsi,
    const SpType& Sp,
    const SuType& Su,
    const PsiMaxType& psiMax,
    const PsiMinType& psiMin,
    const bool rtnCorr
)
{
    const gpufvMesh& mesh = psi.mesh();

    if (fv::gpulocalEulerDdt::enabled(mesh))
    {
        const volScalargpuField& rDeltaT = fv::gpulocalEulerDdt::localRDeltaT(mesh);
        limit(rDeltaT, rho, psi, phi, phiPsi, Sp, Su, psiMax, psiMin, rtnCorr);
    }
    else
    {
        const scalar rDeltaT = 1.0/mesh.time().deltaTValue();
        limit(rDeltaT, rho, psi, phi, phiPsi, Sp, Su, psiMax, psiMin, rtnCorr);
    }
}


template<class SurfaceScalarFieldList>
void Foam::MULES::gpulimitSum(SurfaceScalarFieldList& phiPsiCorrs)
{
    {
        UPtrList<scalargpuField> phiPsiCorrsInternal(phiPsiCorrs.size());
        forAll(phiPsiCorrs, phasei)
        {
            phiPsiCorrsInternal.set(phasei, &phiPsiCorrs[phasei]);
        }

        gpulimitSum(phiPsiCorrsInternal);
    }

    const surfaceScalargpuField::Boundary& bfld =
        phiPsiCorrs[0].boundaryField();

    forAll(bfld, patchi)
    {
        if (bfld[patchi].coupled())
        {
            UPtrList<scalargpuField> phiPsiCorrsPatch(phiPsiCorrs.size());
            forAll(phiPsiCorrs, phasei)
            {
                phiPsiCorrsPatch.set
                (
                    phasei,
                    &phiPsiCorrs[phasei].boundaryFieldRef()[patchi]
                );
            }

            gpulimitSum(phiPsiCorrsPatch);
        }
    }
}


template<class SurfaceScalarFieldList>
void Foam::MULES::gpulimitSum
(
    const SurfaceScalarFieldList& alphas,
    SurfaceScalarFieldList& phiPsiCorrs,
    const labelHashSet& fixed
)
{
    {
        UPtrList<const scalargpuField> alphasInternal(alphas.size());
        forAll(alphas, phasei)
        {
            alphasInternal.set(phasei, &alphas[phasei]);
        }
        UPtrList<scalargpuField> phiPsiCorrsInternal(phiPsiCorrs.size());
        forAll(phiPsiCorrs, phasei)
        {
            phiPsiCorrsInternal.set(phasei, &phiPsiCorrs[phasei]);
        }

        gpulimitSum(alphasInternal, phiPsiCorrsInternal, fixed);
    }

    const surfaceScalargpuField::Boundary& bfld =
        phiPsiCorrs[0].boundaryField();

    forAll(bfld, patchi)
    {
        if (bfld[patchi].coupled())
        {
            UPtrList<const scalargpuField> alphasPatch(alphas.size());
            forAll(alphas, phasei)
            {
                alphasPatch.set
                (
                    phasei,
                    &alphas[phasei].boundaryField()[patchi]
                );
            }
            UPtrList<scalargpuField> phiPsiCorrsPatch(phiPsiCorrs.size());
            forAll(phiPsiCorrs, phasei)
            {
                phiPsiCorrsPatch.set
                (
                    phasei,
                    &phiPsiCorrs[phasei].boundaryFieldRef()[patchi]
                );
            }

            gpulimitSum(alphasPatch, phiPsiCorrsPatch, fixed);
        }
    }
}
// ************************************************************************* //
