/*---------------------------------------------------------------------------*\
  =========                 |
  \\      /  F ield         | OpenFOAM: The Open Source CFD Toolbox
   \\    /   O peration     |
    \\  /    A nd           | www.openfoam.com
     \\/     M anipulation  |
-------------------------------------------------------------------------------
    Copyright (C) 2011-2017 OpenFOAM Foundation
    Copyright (C) 2016-2021 OpenCFD Ltd.
-------------------------------------------------------------------------------
License
    This file is part of OpenFOAM.

    OpenFOAM is free software: you can redistribute it and/or modify it
    under the terms of the GNU General Public License as published by
    the Free Software Foundation, either version 3 of the License, or
    (at your option) any later version.

    OpenFOAM is distributed in the hope that it will be useful, but WITHOUT
    ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
    FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    for more details.

    You should have received a copy of the GNU General Public License
    along with OpenFOAM.  If not, see <http://www.gnu.org/licenses/>.

\*---------------------------------------------------------------------------*/

#include "volgpuFields.H"
#include "surfacegpuFields.H"
#include "calculatedFvPatchgpuFields.H"
#include "extrapolatedCalculatedFvPatchgpuFields.H"
#include "coupledFvPatchgpuFields.H"
#include "UIndirectList.H"
#include "UniformList.H"
#include "demandDrivenData.H"

#include "cyclicFvPatchgpuField.H"
#include "cyclicAMIFvPatchgpuField.H"
#include "cyclicACMIFvPatchgpuField.H"

#include "processorLduInterfacegpuField.H"
//#include "OFstream.H"
// * * * * * * * * * * * * Protected Member Functions  * * * * * * * * * * * //

namespace Foam
{
    template<class Type,bool add>
    struct fvMatrixPatchAddFunctor
    {
        const Type* issf;
        const label* neiStart;
        const label* losort;

        fvMatrixPatchAddFunctor
        (
            const Type* _issf,
            const label* _neiStart,
            const label* _losort
        ):
             issf(_issf),
             neiStart(_neiStart),
             losort(_losort)
        {}

        __host__ __device__
        Type operator()(const Type& d, const label& id)
        {
            Type out = d;

            label nStart = neiStart[id];
            label nSize = neiStart[id+1] - nStart;

            for(label i = 0; i<nSize; i++)
            {
                label face = losort[nStart + i];
                if(add)
                    out += issf[face];
                else
                    out -= issf[face];
            }

            return out;
        }
    };
}

template<class Type>
template<class Type2>
void Foam::gpufvMatrix<Type>::addToInternalField
(
    const labelgpuList& addr,
    const labelgpuList& sort,
    const labelgpuList& sortStart,
    const gpuField<Type2>& pf,
    gpuField<Type2>& intf
) const
{
    if (sort.size() != pf.size())
    {
        FatalErrorInFunction
            << "addressing (" << addr.size()
            << ") and field (" << pf.size() << ") are different sizes" << endl
            << abort(FatalError);
    }

    thrust::transform
    (
        thrust::make_permutation_iterator
        (
            intf.begin(),
            addr.begin()
        ),
        thrust::make_permutation_iterator
        (
            intf.begin(),
            addr.end()
        ),
        thrust::make_counting_iterator(0),
        thrust::make_permutation_iterator
        (
            intf.begin(),
            addr.begin()
        ),
        fvMatrixPatchAddFunctor<Type2,true>
        (
            pf.data(),
            sortStart.data(),
            sort.data()
        )
    );
}


template<class Type>
template<class Type2>
void Foam::gpufvMatrix<Type>::addToInternalField
(
    const labelgpuList& addr,
    const labelgpuList& sort,
    const labelgpuList& sortStart,
    const tmp<gpuField<Type2>>& tpf,
    gpuField<Type2>& intf
) const
{
    addToInternalField(addr, sort, sortStart, tpf(), intf);
    tpf.clear();
}


template<class Type>
template<class Type2>
void Foam::gpufvMatrix<Type>::subtractFromInternalField
(
    const labelgpuList& addr,
    const labelgpuList& sort,
    const labelgpuList& sortStart,
    const gpuField<Type2>& pf,
    gpuField<Type2>& intf
) const
{
    if (sort.size() != pf.size())
    {
        FatalErrorInFunction
            << "addressing (" << addr.size()
            << ") and field (" << pf.size() << ") are different sizes" << endl
            << abort(FatalError);
    }

    thrust::transform
    (
        thrust::make_permutation_iterator
        (
            intf.begin(),
            addr.begin()
        ),
        thrust::make_permutation_iterator
        (
            intf.begin(),
            addr.end()
        ),
        thrust::make_counting_iterator(0),
        thrust::make_permutation_iterator
        (
            intf.begin(),
            addr.begin()
        ),
        fvMatrixPatchAddFunctor<Type,false>
        (
            pf.data(),
            sortStart.data(),
            sort.data()
        )
    );
}


template<class Type>
template<class Type2>
void Foam::gpufvMatrix<Type>::subtractFromInternalField
(
    const labelgpuList& addr,
    const labelgpuList& sort,
    const labelgpuList& sortStart,
    const tmp<gpuField<Type2>>& tpf,
    gpuField<Type2>& intf
) const
{
    subtractFromInternalField(addr, sort,sortStart, tpf(), intf);
    tpf.clear();
}


template<class Type>
void Foam::gpufvMatrix<Type>::addBoundaryDiag
(
    scalargpuField& diag,
    const direction solveCmpt
) const
{
    for (label fieldi = 0; fieldi < nMatrices(); fieldi++)
    {
        const auto& bpsi = this->psi(fieldi).boundaryField();

        forAll(bpsi, ptfi)
        {
            const label patchi = globalPatchID(fieldi, ptfi);

            if (patchi != -1)
            {
                addToInternalField
                (
                    lduAddr().gpuPatchSortCells(patchi),
                    lduAddr().gpuPatchSortAddr(patchi),
                    lduAddr().gpuPatchSortStartAddr(patchi),
                    internalCoeffs_[patchi].component(solveCmpt),
                    diag
                );
            }
        }
    }
}


template<class Type>
void Foam::gpufvMatrix<Type>::addCmptAvBoundaryDiag(scalargpuField& diag) const
{
    for (label fieldi = 0; fieldi < nMatrices(); fieldi++)
    {
        const auto& bpsi = this->psi(fieldi).boundaryField();

        forAll(bpsi, ptfi)
        {
            const label patchi = globalPatchID(fieldi, ptfi);
            if (patchi != -1)
            {
                addToInternalField
                (
                    lduAddr().gpuPatchSortCells(patchi),
                    lduAddr().gpuPatchSortAddr(patchi),
                    lduAddr().gpuPatchSortStartAddr(patchi),
                    cmptAv(internalCoeffs_[patchi]),
                    diag
                );
            }
        }
    }
}

namespace Foam
{
    template<class Type>
    struct fvMatrixAddBoundarySourceFunctor
    {
        const Type* pbc;
        const Type* pnf;
        const label* neiStart;
        const label* losort;

        fvMatrixAddBoundarySourceFunctor
        (
            const Type* _pbc,
            const Type* _pnf,
            const label* _neiStart,
            const label* _losort
        ):
             pbc(_pbc),
             pnf(_pnf),
             neiStart(_neiStart),
             losort(_losort)
        {}

        __host__ __device__
        Type operator()(const Type& d, const label& id)
        {
            Type out = d;

            label nStart = neiStart[id];
            label nSize = neiStart[id+1] - nStart;

            for(label i = 0; i<nSize; i++)
            {
                label face = losort[nStart + i];

                out += cmptMultiply(pbc[face], pnf[face]);
            }

            return out;
        }
    };
}

template<class Type>
void Foam::gpufvMatrix<Type>::addBoundarySource
(
    gpuField<Type>& source,
    const bool couples
) const
{
    for (label fieldi = 0; fieldi < nMatrices(); fieldi++)
    {
        const auto& bpsi = this->psi(fieldi).boundaryField();

        forAll(bpsi, ptfi)
        {
            const fvPatchgpuField<Type>& ptf = bpsi[ptfi];

            const label patchi = globalPatchID(fieldi, ptfi);

            if (patchi != -1)
            {
                const gpuField<Type>& pbc = boundaryCoeffs_[patchi];

                if (!ptf.coupled())
                {
                    addToInternalField
                    (
                        lduAddr().gpuPatchSortCells(patchi),
                        lduAddr().gpuPatchSortAddr(patchi),
                        lduAddr().gpuPatchSortStartAddr(patchi),
                        pbc,
                        source
                    );
                }
                else if (couples)
                {
                    const tmp<gpuField<Type>> tpnf = ptf.patchNeighbourField();
                    const gpuField<Type>& pnf = tpnf();

                    const labelgpuList& addr = lduAddr().gpuPatchSortCells(patchi);
                    const labelgpuList& sort = lduAddr().gpuPatchSortAddr(patchi);
                    const labelgpuList& sortStart = lduAddr().gpuPatchSortStartAddr(patchi);

                    thrust::transform
                    (
                        thrust::make_permutation_iterator
                        (
                            source.begin(),
                            addr.begin()
                        ),
                        thrust::make_permutation_iterator
                        (
                            source.begin(),
                            addr.end()
                        ),
                        thrust::make_counting_iterator(0),
                        thrust::make_permutation_iterator
                        (
                            source.begin(),
                            addr.begin()
                        ),
                        fvMatrixAddBoundarySourceFunctor<Type>
                        (
                            pbc.data(),
                            pnf.data(),
                            sortStart.data(),
                            sort.data()
                        )
                    );
                }
            }
        }
    }
}

namespace Foam
{
template<class Type>
struct fvMatrixSetValuesSourceFunctor : public std::binary_function<scalar,label,scalar>
{
    const bool* ownMask;
    const bool* neiMask;
    const Type* value;
    const scalar* upper;
    const scalar* lower;
    const label* ownStart;
    const label* neiStart;
    const label* own;
    const label* nei;
    const label* losort;

    fvMatrixSetValuesSourceFunctor
    (
        const bool* _ownMask,
        const bool* _neiMask,
        const Type* _value,
        const scalar* _upper,
        const scalar* _lower,
        const label* _ownStart,
        const label* _neiStart,
        const label* _own,
        const label* _nei,
        const label* _losort
    ):
        ownMask(_ownMask),
        neiMask(_neiMask),
        value(_value),
        upper(_upper),
        lower(_lower),
        ownStart(_ownStart),
        neiStart(_neiStart),
        own(_own),
        nei(_nei),
        losort(_losort)
    {}

    __host__ __device__
    Type operator()(const Type& source,const thrust::tuple<label,bool>& t)
    {
        Type out = source;
        label id = thrust::get<0>(t);
        bool cellSet = thrust::get<1>(t);
		
        if( ! cellSet)
        {
            label oStart = ownStart[id];
            label oSize = ownStart[id+1] - oStart;

            for(label i = 0; i<oSize; i++)
            {
                label face = oStart + i;
                bool neiSet = neiMask[face];
				
                if(neiSet)
                {
                    out -= lower[face]*value[nei[face]];
                }
            }

            label nStart = neiStart[id];
            label nSize = neiStart[id+1] - nStart;

            for(label i = 0; i<nSize; i++)
            {
                label face = losort[nStart + i];
                bool ownSet = ownMask[face];
				
                if(ownSet)
                {
                    out -= upper[face]*value[own[face]];
                }
            }
        }

        return out;
    }
};

template<class Type>
struct fvMatrixSetValuesClearFacesFunctor
{
    const Type zero;

    fvMatrixSetValuesClearFacesFunctor(Type _zero): zero(_zero) {}

    __host__ __device__
    Type operator()(const Type& val,const bool& set)
    {
        if(set)
            return zero;
        else
            return val;
    }
};

}

template<class Type>
template<template<class> class ListType>
void Foam::gpufvMatrix<Type>::setValuesFromList
(
    const labelgpuList& cellLabels,
    const ListType<Type>& values
)
{//printf("I'm in setValuesFromList~\n"); Info << cellLabels << endl;
    const gpufvMesh& mesh = psi_.mesh();
    //OFstream file("own.txt"),file1("nei.txt"),file2("Diag.txt"),file3("psi.txt"), file4("upper.txt"),file5("ownMask.txt");
    const labelgpuList& own = mesh.owner(); //file << own << endl;
    const labelgpuList& nei = mesh.neighbour(); //file1 << nei << endl;

    scalargpuField& Diag = gpuDiag(); //file2 << Diag << endl;
    gpuField<Type>& psi =
        const_cast
        <
            GeometricgpuField<Type, fvPatchgpuField, gpuvolMesh>&
        >(psi_).primitiveFieldRef();

    thrust::copy
    (
        values.begin(),
        values.end(),
        thrust::make_permutation_iterator
        (
            psi.begin(),
            cellLabels.begin()
        )
    );
    //file3 << psi << endl;
    thrust::transform
    (
        values.begin(),
        values.end(),
        thrust::make_permutation_iterator
        (
            Diag.begin(),
            cellLabels.begin()
        ),
        thrust::make_permutation_iterator
        (
            source_.begin(),
            cellLabels.begin()
        ),
        multiplyOperatorFunctor<Type,scalar,Type>()
    );

    if (symmetric() || asymmetric())
    {
        //TODO make it somehow more comprehensible
        gpuList<bool> cellMask(Diag.size(),false);
        gpuList<bool> ownMask(own.size(),false);
        gpuList<bool> neiMask(nei.size(),false);
		
        thrust::fill
        (
            thrust::make_permutation_iterator
            (
                cellMask.begin(),
                cellLabels.begin()
            ),
            thrust::make_permutation_iterator
            (
                cellMask.begin(),
                cellLabels.end()
            ),
            true
        );
		             
        thrust::copy
        (
            thrust::make_permutation_iterator
            (
                cellMask.begin(),
                own.begin()
            ),
            thrust::make_permutation_iterator
            (
                cellMask.begin(),
                own.end()
            ),
            ownMask.begin()
        );
		             
        thrust::copy
        (
            thrust::make_permutation_iterator
            (
                cellMask.begin(),
                nei.begin()
            ),
            thrust::make_permutation_iterator
            (
                cellMask.begin(),
                nei.end()
            ),
            neiMask.begin()
        );
		             
        gpuList<Type> cellValuesTmp(Diag.size(),pTraits<Type>::zero);
		
        thrust::copy
        (
            values.begin(),
            values.end(),
            thrust::make_permutation_iterator
            (
                cellValuesTmp.begin(),
                cellLabels.begin()
            )
        );
		             
        const labelgpuList& l = lduAddr().lowerAddr();
        const labelgpuList& u = lduAddr().upperAddr();
        const labelgpuList& losort = lduAddr().losortAddr();

        const labelgpuList& ownStart = lduAddr().ownerStartAddr();
        const labelgpuList& losortStart = lduAddr().losortStartAddr();

        const scalargpuField& Lower = gpuLower();
        const scalargpuField& Upper = gpuUpper();
		
        thrust::transform
        (
            source_.begin(),
            source_.end(),
            thrust::make_zip_iterator(thrust::make_tuple
            (
                thrust::make_counting_iterator(0),
                cellMask.begin()
            )),
            source_.begin(),
            fvMatrixSetValuesSourceFunctor<Type>
            (
                ownMask.data(),
                neiMask.data(),
                cellValuesTmp.data(),
                Upper.data(),
                Lower.data(),
                ownStart.data(),
                losortStart.data(),
                l.data(),
                u.data(),
                losort.data()
            )
        );


        thrust::transform
        (
            gpuUpper().begin(),
            gpuUpper().end(),
            ownMask.begin(),
            gpuUpper().begin(),
            fvMatrixSetValuesClearFacesFunctor<scalar>(0.0)
        );

        thrust::transform
        (
            gpuUpper().begin(),
            gpuUpper().end(),
            neiMask.begin(),
            gpuUpper().begin(),
            fvMatrixSetValuesClearFacesFunctor<scalar>(0.0)
        );
        //file4 << gpuUpper() <<endl; printf("I'm here before ownMask~\n"); file5 <<ownMask<<endl;
                          
        if (asymmetric())
        {
            thrust::transform
            (
                gpuLower().begin(),
                gpuLower().end(),
                ownMask.begin(),
                gpuLower().begin(),
                fvMatrixSetValuesClearFacesFunctor<scalar>(0.0)
            );

            thrust::transform
            (
                gpuLower().begin(),
                gpuLower().end(),
                neiMask.begin(),
                gpuLower().begin(),
                fvMatrixSetValuesClearFacesFunctor<scalar>(0.0)
            );
        }
        
        forAll(mesh.boundary(),patchi)
        {
            const gpuField<Type>& internalCoeffs = internalCoeffs_[patchi];
            const gpuField<Type>& boundaryCoeffs = boundaryCoeffs_[patchi];
            const labelgpuList& pcells = mesh.boundary()[patchi].gpuFaceCells();
			
            thrust::transform
            (
                internalCoeffs.begin(),
                internalCoeffs.end(),
                thrust::make_permutation_iterator
                (
                    cellMask.begin(),
                    pcells.begin()
                ),
                const_cast<gpuField<Type>&>(internalCoeffs).begin(),
                fvMatrixSetValuesClearFacesFunctor<Type>(pTraits<Type>::zero)
            );
                              
            thrust::transform
            (
                boundaryCoeffs.begin(),
                boundaryCoeffs.end(),
                thrust::make_permutation_iterator
                (
                    cellMask.begin(),
                    pcells.begin()
                ),
                const_cast<gpuField<Type>&>(boundaryCoeffs).begin(),
                fvMatrixSetValuesClearFacesFunctor<Type>(pTraits<Type>::zero)
            );
        }   
    }
}


template<class Type>
Foam::label Foam::gpufvMatrix<Type>::checkImplicit(const label fieldI)
{
    const auto& bpsi = this->psi(fieldI).boundaryField();

    label id = -1;
    forAll (bpsi, patchI)
    {
        if (bpsi[patchI].useImplicit())
        {
            if (debug)
            {
                Pout<< "fvMatrix<Type>::checkImplicit "
                    << " fieldi:" << fieldI
                    << " field:" << this->psi(fieldI).name()
                    << " on mesh:"
                    << this->psi(fieldI).mesh().name()
                    << " patch:" << bpsi[patchI].patch().name()
                    << endl;
            }

            id += (label(2) << patchI);
        }
    }
    if (id >= 0)
    {
        lduAssemblyName_ = word("lduAssembly") + name(id);
        useImplicit_ = true;
    }
    return id;
}


// * * * * * * * * * * * * * * * * Constructors  * * * * * * * * * * * * * * //


template<class Type>
Foam::gpufvMatrix<Type>::gpufvMatrix
(
    const GeometricgpuField<Type, fvPatchgpuField, gpuvolMesh>& psi,
    const dimensionSet& ds
)
:
    gpulduMatrix(psi.mesh()),
    psi_(psi),
    useImplicit_(false),
    lduAssemblyName_(word::null),
    nMatrix_(0),
    dimensions_(ds),
    source_(psi.size(), Zero),
    internalCoeffs_(psi.mesh().boundary().size()),
    boundaryCoeffs_(psi.mesh().boundary().size()),
    faceFluxCorrectionPtr_(nullptr)
{
    DebugInFunction
        << "Constructing fvMatrix<Type> for field " << psi_.name() << endl;

    checkImplicit();

    forAll(psi.mesh().boundary(), patchi)
    {
        internalCoeffs_.set
        (
            patchi,
            new gpuField<Type>
            (
                psi.mesh().boundary()[patchi].size(),
                Zero
            )
        );

        boundaryCoeffs_.set
        (
            patchi,
            new gpuField<Type>
            (
                psi.mesh().boundary()[patchi].size(),
                Zero
            )
        );
    }

    auto& psiRef = this->psi(0);
    label currentStatePsi = psiRef.eventNo();
	psiRef.boundaryFieldRef().updateCoeffs();
    psiRef.eventNo() = currentStatePsi;
}


template<class Type>
Foam::gpufvMatrix<Type>::gpufvMatrix(const gpufvMatrix<Type>& fvm)
:
    refCount(),
    gpulduMatrix(fvm),
    psi_(fvm.psi_),
    useImplicit_(fvm.useImplicit_),
    lduAssemblyName_(fvm.lduAssemblyName_),
    nMatrix_(fvm.nMatrix_),
    dimensions_(fvm.dimensions_),
    source_(fvm.source_),
    internalCoeffs_(fvm.internalCoeffs_),
    boundaryCoeffs_(fvm.boundaryCoeffs_),
    faceFluxCorrectionPtr_(nullptr)
{
    DebugInFunction
        << "Copying fvMatrix<Type> for field " << psi_.name() << endl;

    if (fvm.faceFluxCorrectionPtr_)
    {
        faceFluxCorrectionPtr_ =
            new GeometricgpuField<Type, fvsPatchgpuField, gpusurfaceMesh>
            (
                *(fvm.faceFluxCorrectionPtr_)
            );
    }
}


template<class Type>
Foam::gpufvMatrix<Type>::gpufvMatrix(const tmp<gpufvMatrix<Type>>& tfvm)
:
    gpulduMatrix
    (
        const_cast<gpufvMatrix<Type>&>(tfvm()),
        tfvm.isTmp()
    ),
    psi_(tfvm().psi_),
    useImplicit_(tfvm().useImplicit_),
    lduAssemblyName_(tfvm().lduAssemblyName_),
    nMatrix_(tfvm().nMatrix_),
    dimensions_(tfvm().dimensions_),
    source_
    (
        const_cast<gpufvMatrix<Type>&>(tfvm()).source_,
        tfvm.isTmp()
    ),
    internalCoeffs_
    (
        const_cast<gpufvMatrix<Type>&>(tfvm()).internalCoeffs_,
        tfvm.isTmp()
    ),
    boundaryCoeffs_
    (
        const_cast<gpufvMatrix<Type>&>(tfvm()).boundaryCoeffs_,
        tfvm.isTmp()
    ),
    faceFluxCorrectionPtr_(nullptr)
{
    DebugInFunction
        << "Copying fvMatrix<Type> for field " << psi_.name() << endl;

    if (tfvm().faceFluxCorrectionPtr_)
    {
        if (tfvm.isTmp())
        {
            faceFluxCorrectionPtr_ = tfvm().faceFluxCorrectionPtr_;
            tfvm().faceFluxCorrectionPtr_ = nullptr;
        }
        else
        {
            faceFluxCorrectionPtr_ =
                new GeometricgpuField<Type, fvsPatchgpuField, gpusurfaceMesh>
                (
                    *(tfvm().faceFluxCorrectionPtr_)
                );
        }
    }

    tfvm.clear();
}


template<class Type>
Foam::gpufvMatrix<Type>::gpufvMatrix
(
    const GeometricgpuField<Type, fvPatchgpuField, gpuvolMesh>& psi,
    Istream& is
)
:
    gpulduMatrix(psi.mesh()),
    psi_(psi),
    useImplicit_(false),
    lduAssemblyName_(word::null),
    nMatrix_(0),
    dimensions_(is),
    source_(is),
    internalCoeffs_(psi.mesh().boundary().size()),
    boundaryCoeffs_(psi.mesh().boundary().size()),
    faceFluxCorrectionPtr_(nullptr)
{

    DebugInFunction
        << "Constructing fvMatrix<Type> for field " << psi_.name() << endl;

    checkImplicit();

    // Initialise coupling coefficients
    forAll(psi.mesh().boundary(), patchi)
    {
        internalCoeffs_.set
        (
            patchi,
            new gpuField<Type>
            (
                psi.mesh().boundary()[patchi].size(),
                Zero
            )
        );

        boundaryCoeffs_.set
        (
            patchi,
            new gpuField<Type>
            (
                psi.mesh().boundary()[patchi].size(),
                Zero
            )
        );
    }
}


template<class Type>
Foam::tmp<Foam::gpufvMatrix<Type>> Foam::gpufvMatrix<Type>::clone() const
{
    return tmp<gpufvMatrix<Type>>::New(*this);
}


// * * * * * * * * * * * * * * * Destructor * * * * * * * * * * * * * * * * * //

template<class Type>
Foam::gpufvMatrix<Type>::~gpufvMatrix()
{
    DebugInFunction
        << "Destroying fvMatrix<Type> for field " << psi_.name() << endl;

    deleteDemandDrivenData(faceFluxCorrectionPtr_);
    subMatrices_.clear();
}


// * * * * * * * * * * * * * * * Member Functions  * * * * * * * * * * * * * //

template<class Type>
void Foam::gpufvMatrix<Type>::setInterfaces
(
    lduInterfacegpuFieldPtrsList& interfaces,
    PtrDynList<lduInterfacegpuField>& newInterfaces
)
{
    interfaces.setSize(internalCoeffs_.size());
    for (label fieldi = 0; fieldi < nMatrices(); fieldi++)
    {
        const auto& bpsi = this->psi(fieldi).boundaryField();
        lduInterfacegpuFieldPtrsList fieldInterfaces(bpsi.scalarInterfaces());

        forAll (fieldInterfaces, patchi)
        {
            label globalPatchID = lduMeshPtr()->patchMap()[fieldi][patchi];

            if (globalPatchID != -1)
            {
                if (fieldInterfaces.set(patchi))
                {
                    if (isA<cyclicLduInterfacegpuField>(bpsi[patchi]))
                    {
                        newInterfaces.append
                        (
                            new cyclicFvPatchgpuField<Type>
                            (
                                refCast<const gpufvPatch>
                                (
                                    lduMeshPtr()->interfaces()[globalPatchID]
                                ),
                                bpsi[patchi].internalField()
                            )
                        );
                        interfaces.set(globalPatchID, &newInterfaces.last());

                    }
                    else if (isA<cyclicAMILduInterfacegpuField>(bpsi[patchi]))
                    {
                        newInterfaces.append
                        (
                            new cyclicAMIFvPatchgpuField<Type>
                            (
                                refCast<const gpufvPatch>
                                (
                                    lduMeshPtr()->interfaces()[globalPatchID]
                                ),
                                bpsi[patchi].internalField()
                            )
                        );
                        interfaces.set(globalPatchID, &newInterfaces.last());
                    }
                    else if (isA<cyclicACMILduInterfacegpuField>(bpsi[patchi]))
                    {
                        newInterfaces.append
                        (
                            new cyclicACMIFvPatchgpuField<Type>
                            (
                                refCast<const gpufvPatch>
                                (
                                    lduMeshPtr()->interfaces()[globalPatchID]
                                ),
                                bpsi[patchi].internalField()
                            )
                        );
                        interfaces.set(globalPatchID, &newInterfaces.last());
                    }
                    else
                    {
                        interfaces.set(globalPatchID, &fieldInterfaces[patchi]);
                    }
                }
            }
        }
    }
}


template<class Type>
void Foam::gpufvMatrix<Type>::mapContributions
(
    label fieldi,
    const FieldField<gpuField, Type>& fluxContrib,
    FieldField<gpuField, Type>& contrib,
    bool internal
) const
{
    const gpulduPrimitiveMeshAssembly* ptr = lduMeshPtr();

    const labelList& patchMap = ptr->patchMap()[fieldi];

    forAll(contrib, patchi)
    {
        const label globalPtchId = patchMap[patchi];

        if (globalPtchId != -1)
        {
            // Cache contrib before overwriting
            const gpuField<Type> saveContrib(fluxContrib[globalPtchId]);
            contrib[patchi].setSize(psi_.boundaryField()[patchi].size()),
            contrib[patchi] = pTraits<Type>::zero;

            if (internal)
            {
                contrib[patchi] =
                    cmptMultiply
                    (
                        saveContrib,
                        psi_.boundaryField()[patchi].patchInternalField()
                    );
            }
            else
            {
                if (this->psi(fieldi).boundaryField()[patchi].coupled())
                {
                    contrib[patchi] =
                        cmptMultiply
                        (
                            saveContrib,
                            psi_.boundaryField()[patchi].patchNeighbourField()
                        );
                }
            }
        }
        else if (globalPtchId == -1)
        {
            const polyPatch& pp =
                this->psi(fieldi).mesh().hostmesh().boundaryMesh()[patchi];

            if (pp.masterImplicit())
            {
                label virtualPatch =
                    ptr->patchLocalToGlobalMap()[fieldi][patchi];

                const label nbrPatchId = pp.neighbPolyPatchID();

                // Copy contrib before overwriting
                const gpuField<Type> saveContrib(fluxContrib[virtualPatch]);

                gpuField<Type>& coeffs = contrib[patchi];
                gpuField<Type>& nbrCoeffs = contrib[nbrPatchId];

                coeffs.setSize(psi_.boundaryField()[patchi].size());
                nbrCoeffs.setSize(psi_.boundaryField()[nbrPatchId].size());

                coeffs = pTraits<Type>::zero;
                nbrCoeffs = pTraits<Type>::zero;

                // nrb cells
                const labelList& nbrCellIds =
                    ptr->cellBoundMap()[fieldi][patchi];

                const labelList& cellIds =
                    ptr->cellBoundMap()[fieldi][nbrPatchId];

                const GeometricgpuField<Type, fvPatchgpuField, gpuvolMesh>& psi =
                    this->psi(fieldi);

                forAll(saveContrib, subFaceI)
                {
                    const label faceId =
                        ptr->facePatchFaceMap()[fieldi][patchi][subFaceI];
                    const label nbrFaceId =
                        ptr->facePatchFaceMap()[fieldi][nbrPatchId][subFaceI];

                    const label nbrCellId = nbrCellIds[subFaceI];
                    const label cellId = cellIds[subFaceI];

                    /*if (internal)
                    {
                        coeffs[faceId] +=
                            cmptMultiply(saveContrib[subFaceI], psi[cellId]);

                        nbrCoeffs[nbrFaceId] +=
                            cmptMultiply(saveContrib[subFaceI], psi[nbrCellId]);
                    }
                    else //boundary
                    {
                        coeffs[faceId] +=
                            cmptMultiply(saveContrib[subFaceI], psi[nbrCellId]);

                        nbrCoeffs[nbrFaceId] +=
                            cmptMultiply(saveContrib[subFaceI], psi[cellId]);
                    }*/
                }
            }
        }
    }
}


template<class Type>
void Foam::gpufvMatrix<Type>::setBounAndInterCoeffs()
{
    // If it is a multi-fvMatrix needs correct internalCoeffs and
    // boundaryCoeffs size
    if (nMatrix_ > 0)
    {
        label interfaceI(0);
        for (label fieldi = 0; fieldi < nMatrices(); fieldi++)
        {
            const auto& psi = this->psi(fieldi);

            forAll(psi.mesh().boundary(), patchi)
            {
                interfaceI++;
            }
        }
        internalCoeffs_.setSize(interfaceI);
        boundaryCoeffs_.setSize(interfaceI);

        interfaceI = 0;
        for (label fieldi = 0; fieldi < nMatrices(); fieldi++)
        {
            const auto& psi = this->psi(fieldi);

            forAll(psi.mesh().boundary(), patchi)
            {
                internalCoeffs_.set
                (
                    interfaceI,
                    new gpuField<Type>
                    (
                        psi.mesh().boundary()[patchi].size(),
                        Zero
                    )
                );

                boundaryCoeffs_.set
                (
                    interfaceI,
                    new gpuField<Type>
                    (
                        psi.mesh().boundary()[patchi].size(),
                        Zero
                    )
                );
                interfaceI++;
            }
        }
    }

    for (label i=0; i < nMatrices(); ++i)
    {
        const auto& bpsi = this->psi(i).boundaryField();

        // Cache to-be implicit internal/boundary
        FieldField<gpuField, Type> boundary(bpsi.size());
        FieldField<gpuField, Type> internal(bpsi.size());

        label implicit = 0;
        forAll(bpsi, patchI)
        {
            label globalPatchId = lduMeshPtr()->patchMap()[i][patchI];
            if (globalPatchId == -1)
            {
                boundary.set
                (
                    implicit,
                    matrix(i).boundaryCoeffs()[patchI].clone()
                );
                internal.set
                (
                    implicit,
                    matrix(i).internalCoeffs()[patchI].clone()
                );
                implicit++;
            }
        }

        // Update non-implicit patches (re-order)
        forAll(bpsi, patchI)
        {
            label globalPatchId = lduMeshPtr()->patchMap()[i][patchI];
            if (globalPatchId != -1)
            {
                if (matrix(i).internalCoeffs().set(patchI))
                {
                    internalCoeffs_.set
                    (
                        globalPatchId,
                        matrix(i).internalCoeffs()[patchI].clone()
                    );
                }

                if (matrix(i).boundaryCoeffs().set(patchI))
                {
                    boundaryCoeffs_.set
                    (
                        globalPatchId,
                        matrix(i).boundaryCoeffs()[patchI].clone()
                    );
                }
            }
        }

        // Store implicit patches at the end of the list
        implicit = 0;
        forAll(bpsi, patchI)
        {
            label globalPatchId = lduMeshPtr()->patchMap()[i][patchI];
            if (globalPatchId == -1)
            {
                const label implicitPatchId =
                     lduMeshPtr()->patchLocalToGlobalMap()[i][patchI];

                internalCoeffs_.set
                (
                    implicitPatchId, internal[implicit].clone()
                );
                boundaryCoeffs_.set
                (
                    implicitPatchId, boundary[implicit].clone()
                );

                implicit++;
            }
        }
    }

//    forAll(internalCoeffs_, patchI)
//    {
//         DebugVar(patchI)
//         DebugVar(internalCoeffs_[patchI])
//         DebugVar(boundaryCoeffs_[patchI])
//    }
}


template<class Type>
void Foam::gpufvMatrix<Type>::manipulateMatrix(direction cmp)
{
    for (label i=0; i < nMatrices(); ++i)
    {
        forAll(psi(i).boundaryField(), patchI)
        {
            label globalPatchId = lduMeshPtr()->patchMap()[i][patchI];

            if (globalPatchId == -1)
            {
                psi(i).boundaryFieldRef()[patchI].manipulateMatrix
                (
                    *this,
                    i,
                    cmp
                );
            }
        }
    }
}


template<class Type>
void Foam::gpufvMatrix<Type>::transferFvMatrixCoeffs()
{
    const labelListList& faceMap = lduMeshPtr()->faceMap();
    const labelList& cellMap = lduMeshPtr()->cellOffsets();

    label newFaces = lduMeshPtr()->lduAddr().upperAddr().size();
    label newCells = lduMeshPtr()->lduAddr().size();

    scalarField lowerAssemb(newFaces, Zero);
    scalarField upperAssemb(newFaces, Zero);
    scalarField diagAssemb(newCells, Zero);
    Field<Type> sourceAssemb(newCells, Zero);

    bool asymmetricAssemby = false;
    for (label i=0; i < nMatrices(); ++i)
    {
        if (matrix(i).asymmetric())
        {
            asymmetricAssemby = true;
        }
    }
    // Move append contents into intermediate list
    for (label i=0; i < nMatrices(); ++i)
    {
        if (asymmetricAssemby)
        {
            const scalarField lowerSub(matrix(i).gpuLower());
            forAll(lowerSub, facei)
            {
                lowerAssemb[faceMap[i][facei]] = lowerSub[facei];
            }
        }

        scalarField upperSub(matrix(i).gpuUpper().size());
        scalarField diagSub(matrix(i).gpuDiag().size());
        Field<Type> sourceSub(matrix(i).source());

        thrust::copy(matrix(i).gpuUpper().begin(),matrix(i).gpuUpper().end(), upperSub.begin());
        thrust::copy(matrix(i).gpuDiag().begin(),matrix(i).gpuDiag().end(), diagSub.begin());

        forAll(upperSub, facei)
        {
            upperAssemb[faceMap[i][facei]] = upperSub[facei];
        }

        forAll(diagSub, celli)
        {
            const label globalCelli = cellMap[i] + celli;
            diagAssemb[globalCelli] = diagSub[celli];
            sourceAssemb[globalCelli] = sourceSub[celli];
        }
    }

    if (asymmetricAssemby)
    {
        gpuLower().setSize(newFaces, Zero);
        gpuLower() = lowerAssemb;
    }
    gpuUpper().setSize(newFaces, Zero);
    gpuUpper() = upperAssemb;

    gpuDiag().setSize(newCells, Zero);
    gpuDiag() = diagAssemb;

    source().setSize(newCells, Zero);
    source() = sourceAssemb;
}


template<class Type>
Foam::gpulduPrimitiveMeshAssembly* Foam::gpufvMatrix<Type>::lduMeshPtr()
{
    const gpulduPrimitiveMeshAssembly* lduAssemMeshPtr =
        psi_.mesh().thisDb().objectRegistry::template findObject
        <
            gpulduPrimitiveMeshAssembly
        > (lduAssemblyName_);

    return const_cast<gpulduPrimitiveMeshAssembly*>(lduAssemMeshPtr);
}


template<class Type>
const Foam::gpulduPrimitiveMeshAssembly* Foam::gpufvMatrix<Type>::lduMeshPtr() const
{
    return
    (
        psi_.mesh().thisDb().objectRegistry::template cfindObject
        <
            gpulduPrimitiveMeshAssembly
        > (lduAssemblyName_)
    );
}


template<class Type>
void Foam::gpufvMatrix<Type>::createOrUpdateLduPrimitiveAssembly()
{
    gpulduPrimitiveMeshAssembly* ptr = lduMeshPtr();

    IOobject io
    (
        lduAssemblyName_,
        psi_.mesh().time().timeName(),
        psi_.mesh().hostmesh(),
        IOobject::NO_READ,
        IOobject::NO_WRITE
    );

    UPtrList<gpulduMesh> uMeshPtr(nMatrices());

    UPtrList<GeometricgpuField<Type, fvPatchgpuField, gpuvolMesh>>
        uFieldPtr(nMatrices());

    for (label fieldi = 0; fieldi < nMatrices(); fieldi++)
    {
        const gpufvMesh& meshi = this->psi(fieldi).mesh();
        uMeshPtr.set
        (
            fieldi,
            &const_cast<gpufvMesh&>(meshi)
        );
        uFieldPtr.set(fieldi, &this->psi(fieldi));
    }

    if (!ptr)
    {
        gpulduPrimitiveMeshAssembly* lduAssemMeshPtr =
            new gpulduPrimitiveMeshAssembly(io, uMeshPtr);

        lduAssemMeshPtr->store();
        lduAssemMeshPtr->update(uFieldPtr);

        Info
            << "Creating gpulduPrimitiveAssembly: " << lduAssemblyName_ << endl;
    }
    else if
    (
        psi_.mesh().hostmesh().changing() && !psi_.mesh().hostmesh().upToDatePoints(*ptr)
    )
    {
        // Clear losortPtr_, ownerStartPtr_, losortStartPtr_
        ptr->lduAddr().clearOut();
        ptr->update(uFieldPtr);
        psi_.mesh().hostmesh().setUpToDatePoints(*ptr);

        Info
            << "Updating lduPrimitiveAssembly: " << lduAssemblyName_ << endl;
    }
    else
    {
        Info
            << "Using lduPrimitiveAssembly: " << lduAssemblyName_ << endl;
    }
}


template<class Type>
void Foam::gpufvMatrix<Type>::setValues
(
    const labelgpuList& cellLabels,
    const Type& value
)
{
    this->setValuesFromList(cellLabels, UniformList<Type>(value));
}


template<class Type>
void Foam::gpufvMatrix<Type>::setValues
(
    const labelgpuList& cellLabels,
    const gpuList<Type>& values
)
{
    this->setValuesFromList(cellLabels, values);
}


template<class Type>
void Foam::gpufvMatrix<Type>::setValues
(
    const labelgpuList& cellLabels,
    const UIndirectList<Type>& values
)
{
    this->setValuesFromList(cellLabels, values);
}


template<class Type>
void Foam::gpufvMatrix<Type>::setReference
(
    const label celli,
    const Type& value,
    const bool forceReference
)
{
    if ((forceReference || psi_.needReference()) && celli >= 0)
    {
         source().set(celli,source().get(celli)+gpuDiag().get(celli)*value);
         gpuDiag().set(celli,2*gpuDiag().get(celli));
    }
}


template<class Type>
void Foam::gpufvMatrix<Type>::setReferences
(
    const labelgpuList& cellLabels,
    const Type& value,
    const bool forceReference
)
{
    if (forceReference || psi_.needReference())
    {
        forAll(cellLabels, celli)
        {
            const label cellId = cellLabels.get(celli);
            if (cellId >= 0)
            {
                 source().set(celli,source().get(celli)+gpuDiag().get(celli)*value);
                 gpuDiag().set(celli,2*gpuDiag().get(celli));
            }
        }
    }
}


template<class Type>
void Foam::gpufvMatrix<Type>::setReferences
(
    const labelgpuList& cellLabels,
    const gpuList<Type>& values,
    const bool forceReference
)
{
    if (forceReference || psi_.needReference())
    {
        forAll(cellLabels, celli)
        {
            const label cellId = cellLabels.get(celli);
            if (cellId >= 0)
            {
                source().set(celli,source().get(celli)+gpuDiag().get(celli)*values);
                gpuDiag().set(celli,2*gpuDiag().get(celli));
            }
        }
    }
}


template<class Type>
void Foam::gpufvMatrix<Type>::addFvMatrix(gpufvMatrix& matrix)
{
    subMatrices_.append(matrix.clone());
    ++nMatrix_;

    if (dimensions_ != matrix.dimensions())
    {
        FatalErrorInFunction
            << "incompatible dimensions for matrix addition "
            << endl << "    "
            << "[" << dimensions_  << " ] "
            << " [" << matrix.dimensions() << " ]"
            << abort(FatalError);
    }

    for (label fieldi = 0; fieldi < nMatrices(); fieldi++)
    {
        label id = checkImplicit(fieldi);
        if (id > 0)
        {
            break;
        }
    }

    internalCoeffs_.setSize(0);
    boundaryCoeffs_.setSize(0);
}


namespace Foam
{
    template<class Type>
    struct fvMatrixRelaxDiagonalDominanceFunctor
    {
        __host__ __device__
        scalar operator()(const scalar& s1, const scalar& s2)
        {
            return max(mag(s1), s2);
        }
    };


    template<class Type,class Fun>
    struct fvMatrixRelaxAddToDiagonalFunctor : public std::binary_function<label,scalar,scalar>
    {
        const Fun f;
        const Type* iCoeffs;
        const label* neiStart;
        const label* losort;

        fvMatrixRelaxAddToDiagonalFunctor
        (
            const Fun _f,
            const Type* _iCoeffs,
            const label* _neiStart,
            const label* _losort
        ):
             f(_f),
             iCoeffs(_iCoeffs),
             neiStart(_neiStart),
             losort(_losort)
        {}

        __host__ __device__
        scalar operator()(const label& id,const scalar& s)
        {
            scalar out = s;

            label nStart = neiStart[id];
            label nSize = neiStart[id+1] - nStart;

            for(label i = 0; i<nSize; i++)
            {
                label face = losort[nStart + i];
                out += f(iCoeffs[face]);
            }

            return out;
        }
    };
    
    template<class Type>
    struct componetZeroFunctor
    {
        __host__ __device__
        scalar operator ()(const Type& t) const
        {
            return component(t, 0);
        }
    };
	
    template<class Type>
    struct negativeComponetZeroFunctor
    {
        __host__ __device__
        scalar operator ()(const Type& t) const
        {
            return -component(t, 0);
        }
    };
	
    template<class Type>
    struct magComponetZeroFunctor
    {
        __host__ __device__
        scalar operator ()(const Type& t) const
        {
            return mag(component(t, 0));
        }
    };
	
    template<class Type>
    struct maxComponentMagComponetFunctor
    {
        __host__ __device__
        scalar operator ()(const Type& t) const
        {
            return cmptMax(cmptMag(t));
        }
    };
	
    template<class Type>
    struct negativeComponetMinFunctor
    {
        __host__ __device__
        scalar operator ()(const Type& t) const
        {
            return - cmptMin(t);
        }
    };
}

template<class Type>
void Foam::gpufvMatrix<Type>::relax(const scalar alpha)
{
    if (alpha <= 0)
    {
        return;
    }

    DebugInFunction
        << "Relaxing " << psi_.name() << " by " << alpha << endl;

    gpuField<Type>& S = source();
    scalargpuField& D = gpuDiag();

    // Store the current unrelaxed diagonal for use in updating the source
    scalargpuField D0(D);

    // Calculate the sum-mag off-diagonal from the interior faces
    scalargpuField sumOff(D.size(), Zero);
    sumMagOffDiag(sumOff);

    // Handle the boundary contributions to the diagonal
    forAll(psi_.boundaryField(), patchi)
    {
        const fvPatchgpuField<Type>& ptf = psi_.boundaryField()[patchi];

        if (ptf.size())
        {
            gpuField<Type>& iCoeffs = internalCoeffs_[patchi];
            const labelgpuList& pcells = lduAddr().gpuPatchSortCells(patchi);
            const labelgpuList& losort = lduAddr().gpuPatchSortAddr(patchi);
            const labelgpuList& losortStart = lduAddr().gpuPatchSortStartAddr(patchi);

            if (ptf.coupled())
            {
                const gpuField<Type>& pCoeffs = boundaryCoeffs_[patchi];

                // For coupled boundaries add the diagonal and
                // off-diagonal contributions
                thrust::transform
                (
                    thrust::make_counting_iterator(0),
                    thrust::make_counting_iterator(0)+pcells.size(),
                    thrust::make_permutation_iterator
                    (
                        D.begin(),
                        pcells.begin()
                    ),
                    thrust::make_permutation_iterator
                    (
                        D.begin(),
                        pcells.begin()
                    ),
                    fvMatrixRelaxAddToDiagonalFunctor<Type,componetZeroFunctor<Type> >
                    (
                        componetZeroFunctor<Type>(),
                        iCoeffs.data(),
                        losortStart.data(),
                        losort.data()
                    )
                );
                                               
                thrust::transform
                (
                    thrust::make_counting_iterator(0),
                    thrust::make_counting_iterator(0)+pcells.size(),
                    thrust::make_permutation_iterator
                    (
                        sumOff.begin(),
                        pcells.begin()
                    ),
                    thrust::make_permutation_iterator
                    (
                        sumOff.begin(),
                        pcells.begin()
                    ),
                    fvMatrixRelaxAddToDiagonalFunctor<Type,magComponetZeroFunctor<Type> >
                    (
                        magComponetZeroFunctor<Type>(),
                        pCoeffs.data(),
                        losortStart.data(),
                        losort.data()
                    )
                );
            }
            else
            {
                // For non-coupled boundaries add the maximum magnitude diagonal
                // contribution to ensure stability
                thrust::transform
                (
                    thrust::make_counting_iterator(0),
                    thrust::make_counting_iterator(0)+pcells.size(),
                    thrust::make_permutation_iterator
                    (
                        D.begin(),
                        pcells.begin()
                    ),
                    thrust::make_permutation_iterator
                    (
                        D.begin(),
                        pcells.begin()
                    ),
                    fvMatrixRelaxAddToDiagonalFunctor<Type,maxComponentMagComponetFunctor<Type> >
                    (
                        maxComponentMagComponetFunctor<Type>(),
                        iCoeffs.data(),
                        losortStart.data(),
                        losort.data()
                    )
                ); 
            }
        }
    }

/*
    if (debug)
    {
        // Calculate amount of non-dominance.
        label nNon = 0;
        scalar maxNon = 0.0;
        scalar sumNon = 0.0;
        forAll(D, celli)
        {
            scalar d = (sumOff[celli] - D[celli])/mag(D[celli]);

            if (d > 0)
            {
                nNon++;
                maxNon = max(maxNon, d);
                sumNon += d;
            }
        }

        reduce(nNon, sumOp<label>(), UPstream::msgType(), psi_.mesh().comm());
        reduce
        (
            maxNon,
            maxOp<scalar>(),
            UPstream::msgType(),
            psi_.mesh().comm()
        );
        reduce
        (
            sumNon,
            sumOp<scalar>(),
            UPstream::msgType(),
            psi_.mesh().comm()
        );
        sumNon /= returnReduce
        (
            D.size(),
            sumOp<label>(),
            UPstream::msgType(),
            psi_.mesh().comm()
        );

        InfoInFunction
            << "Matrix dominance test for " << psi_.name() << nl
            << "    number of non-dominant cells   : " << nNon << nl
            << "    maximum relative non-dominance : " << maxNon << nl
            << "    average relative non-dominance : " << sumNon << nl
            << endl;
    }
*/

    // Ensure the matrix is diagonally dominant...
    // Assumes that the central coefficient is positive and ensures it is
    thrust::transform(D.begin(),D.end(),sumOff.begin(),D.begin(),
                      fvMatrixRelaxDiagonalDominanceFunctor<Type>());

    // ... then relax
    D /= alpha;

    // Now remove the diagonal contribution from coupled boundaries
    forAll(psi_.boundaryField(), patchi)
    {
        const fvPatchgpuField<Type>& ptf = psi_.boundaryField()[patchi];

        if (ptf.size())
        {
            gpuField<Type>& iCoeffs = internalCoeffs_[patchi];
            const labelgpuList& pcells = lduAddr().gpuPatchSortCells(patchi);
            const labelgpuList& losort = lduAddr().gpuPatchSortAddr(patchi);
            const labelgpuList& losortStart = lduAddr().gpuPatchSortStartAddr(patchi);

            if (ptf.coupled())
            {
                thrust::transform
                (
                    thrust::make_counting_iterator(0),
                    thrust::make_counting_iterator(0)+pcells.size(),
                    thrust::make_permutation_iterator
                    (
                        D.begin(),
                        pcells.begin()
                    ),
                    thrust::make_permutation_iterator
                    (
                        D.begin(),
                        pcells.begin()
                    ),
                    fvMatrixRelaxAddToDiagonalFunctor<Type,negativeComponetZeroFunctor<Type> >(
                        negativeComponetZeroFunctor<Type>(),
                        iCoeffs.data(),
                        losortStart.data(),
                        losort.data()
                    )
                );
            }
            else
            {              
                thrust::transform
                (
                    thrust::make_counting_iterator(0),
                    thrust::make_counting_iterator(0)+pcells.size(),
                    thrust::make_permutation_iterator
                    (
                        D.begin(),
                        pcells.begin()
                    ),
                    thrust::make_permutation_iterator
                    (
                        D.begin(),
                        pcells.begin()
                    ),
                    fvMatrixRelaxAddToDiagonalFunctor<Type,negativeComponetMinFunctor<Type> >
                    (
                        negativeComponetMinFunctor<Type>(),
                        iCoeffs.data(),
                        losortStart.data(),
                        losort.data()
                    )
                );
            }
        }
    }

    // Finally add the relaxation contribution to the source.
    S += (D - D0)*psi_.primitiveField();
}


template<class Type>
void Foam::gpufvMatrix<Type>::relax()
{
    word name = psi_.select
    (
        psi_.mesh().hostmesh().data::template getOrDefault<bool>
        ("finalIteration", false)
    );

    if (psi_.mesh().hostmesh().relaxEquation(name))
    {
        relax(psi_.mesh().hostmesh().equationRelaxationFactor(name));
    }
}


template<class Type>
void Foam::gpufvMatrix<Type>::boundaryManipulate
(
    typename GeometricgpuField<Type, fvPatchgpuField, gpuvolMesh>::
        Boundary& bFields
)
{
    forAll(bFields, patchi)
    {
        bFields[patchi].manipulateMatrix(*this);
    }
}


template<class Type>
Foam::tmp<Foam::scalargpuField> Foam::gpufvMatrix<Type>::D() const
{
    tmp<scalargpuField> tdiag(new scalargpuField(gpuDiag()));
    addCmptAvBoundaryDiag(tdiag.ref());
    return tdiag;
}


template<class Type>
Foam::tmp<Foam::gpuField<Type>> Foam::gpufvMatrix<Type>::DD() const
{
    tmp<gpuField<Type>> tdiag(pTraits<Type>::one*gpuDiag());

    forAll(psi_.boundaryField(), patchi)
    {
        const fvPatchgpuField<Type>& ptf = psi_.boundaryField()[patchi];

        if (!ptf.coupled() && ptf.size())
        {
            addToInternalField
            (
                lduAddr().gpuPatchSortCells(patchi),
                lduAddr().gpuPatchSortAddr(patchi),
                lduAddr().gpuPatchSortStartAddr(patchi),
                internalCoeffs_[patchi],
                tdiag.ref()
            );
        }
    }

    return tdiag;
}


template<class Type>
Foam::tmp<Foam::volScalargpuField> Foam::gpufvMatrix<Type>::A() const
{
    tmp<volScalargpuField> tAphi
    (
        new volScalargpuField
        (
            IOobject
            (
                "A("+psi_.name()+')',
                psi_.instance(),
                psi_.mesh().hostmesh(),
                IOobject::NO_READ,
                IOobject::NO_WRITE
            ),
            psi_.mesh(),
            dimensions_/psi_.dimensions()/dimVol,
            extrapolatedCalculatedFvPatchScalargpuField::typeName
        )
    );

    tAphi.ref().primitiveFieldRef() = D().ref()/psi_.mesh().V();
    tAphi.ref().correctBoundaryConditions();

    return tAphi;
}


template<class Type>
Foam::tmp<Foam::GeometricgpuField<Type, Foam::fvPatchgpuField, Foam::gpuvolMesh>>
Foam::gpufvMatrix<Type>::H() const
{
    tmp<GeometricgpuField<Type, fvPatchgpuField, gpuvolMesh>> tHphi
    (
        new GeometricgpuField<Type, fvPatchgpuField, gpuvolMesh>
        (
            IOobject
            (
                "H("+psi_.name()+')',
                psi_.instance(),
                psi_.mesh().hostmesh(),
                IOobject::NO_READ,
                IOobject::NO_WRITE
            ),
            psi_.mesh(),
            dimensions_/dimVol,
            extrapolatedCalculatedFvPatchScalargpuField::typeName
        )
    );
    GeometricgpuField<Type, fvPatchgpuField, gpuvolMesh>& Hphi = tHphi.ref();

    // Loop over field components
    for (direction cmpt=0; cmpt<Type::nComponents; cmpt++)
    {
        scalargpuField psiCmpt(psi_.primitiveField().component(cmpt));

        scalargpuField boundaryDiagCmpt(psi_.size(), Zero);
        addBoundaryDiag(boundaryDiagCmpt, cmpt);
        boundaryDiagCmpt.negate();
        addCmptAvBoundaryDiag(boundaryDiagCmpt);

        Hphi.primitiveFieldRef().replace(cmpt, boundaryDiagCmpt*psiCmpt);
    }

    Hphi.primitiveFieldRef() += (gpulduMatrix::H(psi_.primitiveField()).ref() + source_);
    addBoundarySource(Hphi.primitiveFieldRef());

    Hphi.primitiveFieldRef() /= psi_.mesh().V();
    Hphi.correctBoundaryConditions();

    typename Type::labelType validComponents
    (
        psi_.mesh().hostmesh().template validComponents<Type>()
    );

    for (direction cmpt=0; cmpt<Type::nComponents; cmpt++)
    {
        if (validComponents[cmpt] == -1)
        {
            Hphi.replace
            (
                cmpt,
                dimensionedScalar(Hphi.dimensions(), Zero)
            );
        }
    }

    return tHphi;
}


template<class Type>
Foam::tmp<Foam::volScalargpuField> Foam::gpufvMatrix<Type>::H1() const
{
    tmp<volScalargpuField> tH1
    (
        new volScalargpuField
        (
            IOobject
            (
                "H(1)",
                psi_.instance(),
                psi_.mesh().hostmesh(),
                IOobject::NO_READ,
                IOobject::NO_WRITE
            ),
            psi_.mesh(),
            dimensions_/(dimVol*psi_.dimensions()),
            extrapolatedCalculatedFvPatchScalargpuField::typeName
        )
    );
    volScalargpuField& H1_ = tH1.ref();

    H1_.primitiveFieldRef() = gpulduMatrix::H1();

    forAll(psi_.boundaryField(), patchi)
    {
        const fvPatchgpuField<Type>& ptf = psi_.boundaryField()[patchi];

        if (ptf.coupled() && ptf.size())
        {
            addToInternalField
            (
                lduAddr().gpuPatchSortCells(patchi),
                lduAddr().gpuPatchSortAddr(patchi),
                lduAddr().gpuPatchSortStartAddr(patchi),
                boundaryCoeffs_[patchi].component(0),
                H1_
            );
        }
    }

    H1_.primitiveFieldRef() /= psi_.mesh().V();
    H1_.correctBoundaryConditions();

    return tH1;
}



template<class Type>
Foam::tmp<Foam::GeometricgpuField<Type, Foam::fvsPatchgpuField, Foam::gpusurfaceMesh>>
Foam::gpufvMatrix<Type>::
flux() const
{
    if (!psi_.mesh().hostmesh().fluxRequired(psi_.name()))
    {
        FatalErrorInFunction
            << "flux requested but " << psi_.name()
            << " not specified in the fluxRequired sub-dictionary"
               " of fvSchemes."
            << abort(FatalError);
    }

    if (nMatrices() > 1)
    {
        FatalErrorInFunction
            << "Flux requested but " << psi_.name()
            << " can't handle multiple fvMatrix."
            << abort(FatalError);
    }

    // construct GeometricField<Type, fvsPatchField, surfaceMesh>
    tmp<GeometricgpuField<Type, fvsPatchgpuField, gpusurfaceMesh>> tfieldFlux
    (
        new GeometricgpuField<Type, fvsPatchgpuField, gpusurfaceMesh>
        (
            IOobject
            (
                "flux("+psi_.name()+')',
                psi_.instance(),
                psi_.mesh().hostmesh(),
                IOobject::NO_READ,
                IOobject::NO_WRITE
            ),
            psi_.mesh(),
            dimensions()
        )
    );
    GeometricgpuField<Type, fvsPatchgpuField, gpusurfaceMesh>& fieldFlux =
        tfieldFlux.ref();

    fieldFlux.setOriented();

    for (direction cmpt=0; cmpt<pTraits<Type>::nComponents; cmpt++)
    {
        fieldFlux.primitiveFieldRef().replace
        (
            cmpt,
            gpulduMatrix::faceH(psi_.primitiveField().component(cmpt))
        );
    }

    FieldField<gpuField, Type> InternalContrib = internalCoeffs_;

    label fieldi = 0;
    if (!useImplicit_)
    {
        forAll(InternalContrib, patchi)
        {
            InternalContrib[patchi] =
                cmptMultiply
                (
                    InternalContrib[patchi],
                    psi_.boundaryField()[patchi].patchInternalField()
                );
        }
    }
    else
    {
        FieldField<gpuField, Type> fluxInternalContrib(internalCoeffs_);

        mapContributions(fieldi, fluxInternalContrib, InternalContrib, true);
    }

    FieldField<gpuField, Type> NeighbourContrib = boundaryCoeffs_;

    if (!useImplicit_)
    {
        forAll(NeighbourContrib, patchi)
        {
            if (psi_.boundaryField()[patchi].coupled())
            {
                NeighbourContrib[patchi] =
                    cmptMultiply
                    (
                        NeighbourContrib[patchi],
                        psi_.boundaryField()[patchi].patchNeighbourField()
                    );
            }
        }
    }
    else
    {
        FieldField<gpuField, Type> fluxBoundaryContrib(boundaryCoeffs_);

        mapContributions(fieldi, fluxBoundaryContrib, NeighbourContrib, false);
    }

    typename GeometricgpuField<Type, fvsPatchgpuField, gpusurfaceMesh>::
        Boundary& ffbf = fieldFlux.boundaryFieldRef();

    forAll(ffbf, patchi)
    {
        ffbf[patchi] = InternalContrib[patchi] - NeighbourContrib[patchi];
        //DebugVar(gSum(ffbf[patchi]))
    }

    if (faceFluxCorrectionPtr_)
    {
        fieldFlux += *faceFluxCorrectionPtr_;
    }

    return tfieldFlux;
}


template<class Type>
const Foam::dictionary& Foam::gpufvMatrix<Type>::solverDict() const
{
    return psi_.mesh().hostmesh().solverDict
    (
        psi_.select
        (
            psi_.mesh().hostmesh().data::template getOrDefault<bool>
            ("finalIteration", false)
        )
    );
}


// * * * * * * * * * * * * * * * Member Operators  * * * * * * * * * * * * * //

template<class Type>
void Foam::gpufvMatrix<Type>::operator=(const gpufvMatrix<Type>& fvmv)
{
    if (this == &fvmv)
    {
        return;  // Self-assignment is a no-op
    }

    if (&psi_ != &(fvmv.psi_))
    {
        FatalErrorInFunction
            << "different fields"
            << abort(FatalError);
    }

    dimensions_ = fvmv.dimensions_;
    gpulduMatrix::operator=(fvmv);
    source_ = fvmv.source_;
    internalCoeffs_ = fvmv.internalCoeffs_;
    boundaryCoeffs_ = fvmv.boundaryCoeffs_;

    if (faceFluxCorrectionPtr_ && fvmv.faceFluxCorrectionPtr_)
    {
        *faceFluxCorrectionPtr_ = *fvmv.faceFluxCorrectionPtr_;
    }
    else if (fvmv.faceFluxCorrectionPtr_)
    {
        faceFluxCorrectionPtr_ =
            new GeometricgpuField<Type, fvsPatchgpuField, gpusurfaceMesh>
        (*fvmv.faceFluxCorrectionPtr_);
    }

    useImplicit_ = fvmv.useImplicit_;
    lduAssemblyName_ = fvmv.lduAssemblyName_;
}


template<class Type>
void Foam::gpufvMatrix<Type>::operator=(const tmp<gpufvMatrix<Type>>& tfvmv)
{
    operator=(tfvmv());
    tfvmv.clear();
}


template<class Type>
void Foam::gpufvMatrix<Type>::negate()
{
    gpulduMatrix::negate();
    source_.negate();
    internalCoeffs_.negate();
    boundaryCoeffs_.negate();

    if (faceFluxCorrectionPtr_)
    {
        faceFluxCorrectionPtr_->negate();
    }
}


template<class Type>
void Foam::gpufvMatrix<Type>::operator+=(const gpufvMatrix<Type>& fvmv)
{
    checkMethod(*this, fvmv, "+=");

    dimensions_ += fvmv.dimensions_;
    gpulduMatrix::operator+=(fvmv);
    source_ += fvmv.source_;
    internalCoeffs_ += fvmv.internalCoeffs_;
    boundaryCoeffs_ += fvmv.boundaryCoeffs_;

    useImplicit_ = fvmv.useImplicit_;
    lduAssemblyName_ = fvmv.lduAssemblyName_;
    nMatrix_ = fvmv.nMatrix_;

    if (faceFluxCorrectionPtr_ && fvmv.faceFluxCorrectionPtr_)
    {
        *faceFluxCorrectionPtr_ += *fvmv.faceFluxCorrectionPtr_;
    }
    else if (fvmv.faceFluxCorrectionPtr_)
    {
        faceFluxCorrectionPtr_ = new
        GeometricgpuField<Type, fvsPatchgpuField, gpusurfaceMesh>
        (
            *fvmv.faceFluxCorrectionPtr_
        );
    }
}


template<class Type>
void Foam::gpufvMatrix<Type>::operator+=(const tmp<gpufvMatrix<Type>>& tfvmv)
{
    operator+=(tfvmv());
    tfvmv.clear();
}


template<class Type>
void Foam::gpufvMatrix<Type>::operator-=(const gpufvMatrix<Type>& fvmv)
{
    checkMethod(*this, fvmv, "-=");

    dimensions_ -= fvmv.dimensions_;
    gpulduMatrix::operator-=(fvmv);
    source_ -= fvmv.source_;
    internalCoeffs_ -= fvmv.internalCoeffs_;
    boundaryCoeffs_ -= fvmv.boundaryCoeffs_;

    useImplicit_ = fvmv.useImplicit_;
    lduAssemblyName_ = fvmv.lduAssemblyName_;
    nMatrix_ = fvmv.nMatrix_;

    if (faceFluxCorrectionPtr_ && fvmv.faceFluxCorrectionPtr_)
    {
        *faceFluxCorrectionPtr_ -= *fvmv.faceFluxCorrectionPtr_;
    }
    else if (fvmv.faceFluxCorrectionPtr_)
    {
        faceFluxCorrectionPtr_ =
            new GeometricgpuField<Type, fvsPatchgpuField, gpusurfaceMesh>
        (-*fvmv.faceFluxCorrectionPtr_);
    }
}


template<class Type>
void Foam::gpufvMatrix<Type>::operator-=(const tmp<gpufvMatrix<Type>>& tfvmv)
{
    operator-=(tfvmv());
    tfvmv.clear();
}


template<class Type>
void Foam::gpufvMatrix<Type>::operator+=
(
    const DimensionedgpuField<Type, gpuvolMesh>& su
)
{
    checkMethod(*this, su, "+=");
    source() -= su.mesh().V()*su.field();
}


template<class Type>
void Foam::gpufvMatrix<Type>::operator+=
(
    const tmp<DimensionedgpuField<Type, gpuvolMesh>>& tsu
)
{
    operator+=(tsu());
    tsu.clear();
}


template<class Type>
void Foam::gpufvMatrix<Type>::operator+=
(
    const tmp<GeometricgpuField<Type, fvPatchgpuField, gpuvolMesh>>& tsu
)
{
    operator+=(tsu());
    tsu.clear();
}


template<class Type>
void Foam::gpufvMatrix<Type>::operator-=
(
    const DimensionedgpuField<Type, gpuvolMesh>& su
)
{
    checkMethod(*this, su, "-=");
    source() += su.mesh().V()*su.field();
}


template<class Type>
void Foam::gpufvMatrix<Type>::operator-=
(
    const tmp<DimensionedgpuField<Type, gpuvolMesh>>& tsu
)
{
    operator-=(tsu());
    tsu.clear();
}


template<class Type>
void Foam::gpufvMatrix<Type>::operator-=
(
    const tmp<GeometricgpuField<Type, fvPatchgpuField, gpuvolMesh>>& tsu
)
{
    operator-=(tsu());
    tsu.clear();
}


template<class Type>
void Foam::gpufvMatrix<Type>::operator+=
(
    const dimensioned<Type>& su
)
{
    source() -= psi().mesh().V()*su;
}


template<class Type>
void Foam::gpufvMatrix<Type>::operator-=
(
    const dimensioned<Type>& su
)
{
    source() += psi().mesh().V()*su;
}


template<class Type>
void Foam::gpufvMatrix<Type>::operator+=
(
    const zero&
)
{}


template<class Type>
void Foam::gpufvMatrix<Type>::operator-=
(
    const zero&
)
{}


template<class Type>
void Foam::gpufvMatrix<Type>::operator*=
(
    const volScalargpuField::Internal& dsf
)
{
    dimensions_ *= dsf.dimensions();
    gpulduMatrix::operator*=(dsf.field());
    source_ *= dsf.field();

    forAll(boundaryCoeffs_, patchi)
    {
        scalargpuField pisf
        (
            dsf.mesh().boundary()[patchi].patchInternalField(dsf.field())
        );

        internalCoeffs_[patchi] *= pisf;
        boundaryCoeffs_[patchi] *= pisf;
    }

    if (faceFluxCorrectionPtr_)
    {
        FatalErrorInFunction
            << "cannot scale a matrix containing a faceFluxCorrection"
            << abort(FatalError);
    }
}


template<class Type>
void Foam::gpufvMatrix<Type>::operator*=
(
    const tmp<volScalargpuField::Internal>& tdsf
)
{
    operator*=(tdsf());
    tdsf.clear();
}


template<class Type>
void Foam::gpufvMatrix<Type>::operator*=
(
    const tmp<volScalargpuField>& tvsf
)
{
    operator*=(tvsf());
    tvsf.clear();
}


template<class Type>
void Foam::gpufvMatrix<Type>::operator*=
(
    const dimensioned<scalar>& ds
)
{
    dimensions_ *= ds.dimensions();
    gpulduMatrix::operator*=(ds.value());
    source_ *= ds.value();
    internalCoeffs_ *= ds.value();
    boundaryCoeffs_ *= ds.value();

    if (faceFluxCorrectionPtr_)
    {
        *faceFluxCorrectionPtr_ *= ds.value();
    }
}


// * * * * * * * * * * * * * * * Global Functions  * * * * * * * * * * * * * //

template<class Type>
void Foam::checkMethod
(
    const gpufvMatrix<Type>& fvm1,
    const gpufvMatrix<Type>& fvm2,
    const char* op
)
{
    if (&fvm1.psi() != &fvm2.psi())
    {
        FatalErrorInFunction
            << "incompatible fields for operation "
            << endl << "    "
            << "[" << fvm1.psi().name() << "] "
            << op
            << " [" << fvm2.psi().name() << "]"
            << abort(FatalError);
    }

    if
    (
        dimensionSet::checking()
     && fvm1.dimensions() != fvm2.dimensions()
    )
    {
        FatalErrorInFunction
            << "incompatible dimensions for operation "
            << endl << "    "
            << "[" << fvm1.psi().name() << fvm1.dimensions()/dimVolume << " ] "
            << op
            << " [" << fvm2.psi().name() << fvm2.dimensions()/dimVolume << " ]"
            << abort(FatalError);
    }
}


template<class Type>
void Foam::checkMethod
(
    const gpufvMatrix<Type>& fvm,
    const DimensionedgpuField<Type, gpuvolMesh>& df,
    const char* op
)
{
    if
    (
        dimensionSet::checking()
     && fvm.dimensions()/dimVolume != df.dimensions()
    )
    {
        FatalErrorInFunction
            << endl << "    "
            << "[" << fvm.psi().name() << fvm.dimensions()/dimVolume << " ] "
            << op
            << " [" << df.name() << df.dimensions() << " ]"
            << abort(FatalError);
    }
}


template<class Type>
void Foam::checkMethod
(
    const gpufvMatrix<Type>& fvm,
    const dimensioned<Type>& dt,
    const char* op
)
{
    if
    (
        dimensionSet::checking()
     && fvm.dimensions()/dimVolume != dt.dimensions()
    )
    {
        FatalErrorInFunction
            << "incompatible dimensions for operation "
            << endl << "    "
            << "[" << fvm.psi().name() << fvm.dimensions()/dimVolume << " ] "
            << op
            << " [" << dt.name() << dt.dimensions() << " ]"
            << abort(FatalError);
    }
}


template<class Type>
Foam::SolverPerformance<Type> Foam::solve
(
    gpufvMatrix<Type>& fvm,
    const dictionary& solverControls
)
{
    return fvm.solve(solverControls);
}

template<class Type>
Foam::SolverPerformance<Type> Foam::solve
(
    const tmp<gpufvMatrix<Type>>& tfvm,
    const dictionary& solverControls
)
{
    SolverPerformance<Type> solverPerf =
        const_cast<gpufvMatrix<Type>&>(tfvm()).solve(solverControls);

    tfvm.clear();

    return solverPerf;
}


template<class Type>
Foam::SolverPerformance<Type> Foam::solve(gpufvMatrix<Type>& fvm)
{
    return fvm.solve();
}

template<class Type>
Foam::SolverPerformance<Type> Foam::solve(const tmp<gpufvMatrix<Type>>& tfvm)
{
    SolverPerformance<Type> solverPerf =
        const_cast<gpufvMatrix<Type>&>(tfvm()).solve();

    tfvm.clear();

    return solverPerf;
}


template<class Type>
Foam::tmp<Foam::gpufvMatrix<Type>> Foam::correction
(
    const gpufvMatrix<Type>& A
)
{
    tmp<Foam::gpufvMatrix<Type>> tAcorr = A - (A & A.psi());

    // Delete the faceFluxCorrection from the correction matrix
    // as it does not have a clear meaning or purpose
    deleteDemandDrivenData(tAcorr.ref().faceFluxCorrectionPtr());

    return tAcorr;
}


template<class Type>
Foam::tmp<Foam::gpufvMatrix<Type>> Foam::correction
(
    const tmp<gpufvMatrix<Type>>& tA
)
{
    tmp<Foam::gpufvMatrix<Type>> tAcorr = tA - (tA() & tA().psi());

    // Delete the faceFluxCorrection from the correction matrix
    // as it does not have a clear meaning or purpose
    deleteDemandDrivenData(tAcorr.ref().faceFluxCorrectionPtr());

    return tAcorr;
}


// * * * * * * * * * * * * * * * Global Operators  * * * * * * * * * * * * * //

template<class Type>
Foam::tmp<Foam::gpufvMatrix<Type>> Foam::operator==
(
    const gpufvMatrix<Type>& A,
    const gpufvMatrix<Type>& B
)
{
    checkMethod(A, B, "==");
    return (A - B);
}

template<class Type>
Foam::tmp<Foam::gpufvMatrix<Type>> Foam::operator==
(
    const tmp<gpufvMatrix<Type>>& tA,
    const gpufvMatrix<Type>& B
)
{
    checkMethod(tA(), B, "==");
    return (tA - B);
}

template<class Type>
Foam::tmp<Foam::gpufvMatrix<Type>> Foam::operator==
(
    const gpufvMatrix<Type>& A,
    const tmp<gpufvMatrix<Type>>& tB
)
{
    checkMethod(A, tB(), "==");
    return (A - tB);
}

template<class Type>
Foam::tmp<Foam::gpufvMatrix<Type>> Foam::operator==
(
    const tmp<gpufvMatrix<Type>>& tA,
    const tmp<gpufvMatrix<Type>>& tB
)
{
    checkMethod(tA(), tB(), "==");
    return (tA - tB);
}

template<class Type>
Foam::tmp<Foam::gpufvMatrix<Type>> Foam::operator==
(
    const gpufvMatrix<Type>& A,
    const DimensionedgpuField<Type, gpuvolMesh>& su
)
{
    checkMethod(A, su, "==");
    tmp<gpufvMatrix<Type>> tC(new gpufvMatrix<Type>(A));
    tC.ref().source() += su.mesh().V()*su.field();
    return tC;
}

template<class Type>
Foam::tmp<Foam::gpufvMatrix<Type>> Foam::operator==
(
    const gpufvMatrix<Type>& A,
    const tmp<DimensionedgpuField<Type, gpuvolMesh>>& tsu
)
{
    checkMethod(A, tsu(), "==");
    tmp<gpufvMatrix<Type>> tC(new gpufvMatrix<Type>(A));
    tC.ref().source() += tsu().mesh().V()*tsu().field();
    tsu.clear();
    return tC;
}

template<class Type>
Foam::tmp<Foam::gpufvMatrix<Type>> Foam::operator==
(
    const gpufvMatrix<Type>& A,
    const tmp<GeometricgpuField<Type, fvPatchgpuField, gpuvolMesh>>& tsu
)
{
    checkMethod(A, tsu(), "==");
    tmp<gpufvMatrix<Type>> tC(new gpufvMatrix<Type>(A));
    tC.ref().source() += tsu().mesh().V()*tsu().primitiveField();
    tsu.clear();
    return tC;
}

template<class Type>
Foam::tmp<Foam::gpufvMatrix<Type>> Foam::operator==
(
    const tmp<gpufvMatrix<Type>>& tA,
    const DimensionedgpuField<Type, gpuvolMesh>& su
)
{
    checkMethod(tA(), su, "==");
    tmp<gpufvMatrix<Type>> tC(tA.ptr());
    tC.ref().source() += su.mesh().V()*su.field();
    return tC;
}

template<class Type>
Foam::tmp<Foam::gpufvMatrix<Type>> Foam::operator==
(
    const tmp<gpufvMatrix<Type>>& tA,
    const tmp<DimensionedgpuField<Type, gpuvolMesh>>& tsu
)
{
    checkMethod(tA(), tsu(), "==");
    tmp<gpufvMatrix<Type>> tC(tA.ptr());
    tC.ref().source() += tsu().mesh().V()*tsu().field();
    tsu.clear();
    return tC;
}

template<class Type>
Foam::tmp<Foam::gpufvMatrix<Type>> Foam::operator==
(
    const tmp<gpufvMatrix<Type>>& tA,
    const tmp<GeometricgpuField<Type, fvPatchgpuField, gpuvolMesh>>& tsu
)
{
    checkMethod(tA(), tsu(), "==");
    tmp<gpufvMatrix<Type>> tC(tA.ptr());
    tC.ref().source() += tsu().mesh().V()*tsu().primitiveField();
    tsu.clear();
    return tC;
}

template<class Type>
Foam::tmp<Foam::gpufvMatrix<Type>> Foam::operator==
(
    const gpufvMatrix<Type>& A,
    const dimensioned<Type>& su
)
{
    checkMethod(A, su, "==");
    tmp<gpufvMatrix<Type>> tC(new gpufvMatrix<Type>(A));
    tC.ref().source() += A.psi().mesh().V()*su.value();
    return tC;
}

template<class Type>
Foam::tmp<Foam::gpufvMatrix<Type>> Foam::operator==
(
    const tmp<gpufvMatrix<Type>>& tA,
    const dimensioned<Type>& su
)
{
    checkMethod(tA(), su, "==");
    tmp<gpufvMatrix<Type>> tC(tA.ptr());
    tC.ref().source() += tC().psi().mesh().V()*su.value();
    return tC;
}

template<class Type>
Foam::tmp<Foam::gpufvMatrix<Type>> Foam::operator==
(
    const gpufvMatrix<Type>& A,
    const zero&
)
{
    return A;
}


template<class Type>
Foam::tmp<Foam::gpufvMatrix<Type>> Foam::operator==
(
    const tmp<gpufvMatrix<Type>>& tA,
    const zero&
)
{
    return tA;
}


template<class Type>
Foam::tmp<Foam::gpufvMatrix<Type>> Foam::operator-
(
    const gpufvMatrix<Type>& A
)
{
    tmp<gpufvMatrix<Type>> tC(new gpufvMatrix<Type>(A));
    tC.ref().negate();
    return tC;
}

template<class Type>
Foam::tmp<Foam::gpufvMatrix<Type>> Foam::operator-
(
    const tmp<gpufvMatrix<Type>>& tA
)
{
    tmp<gpufvMatrix<Type>> tC(tA.ptr());
    tC.ref().negate();
    return tC;
}


template<class Type>
Foam::tmp<Foam::gpufvMatrix<Type>> Foam::operator+
(
    const gpufvMatrix<Type>& A,
    const gpufvMatrix<Type>& B
)
{
    checkMethod(A, B, "+");
    tmp<gpufvMatrix<Type>> tC(new gpufvMatrix<Type>(A));
    tC.ref() += B;
    return tC;
}

template<class Type>
Foam::tmp<Foam::gpufvMatrix<Type>> Foam::operator+
(
    const tmp<gpufvMatrix<Type>>& tA,
    const gpufvMatrix<Type>& B
)
{
    checkMethod(tA(), B, "+");
    tmp<gpufvMatrix<Type>> tC(tA.ptr());
    tC.ref() += B;
    return tC;
}

template<class Type>
Foam::tmp<Foam::gpufvMatrix<Type>> Foam::operator+
(
    const gpufvMatrix<Type>& A,
    const tmp<gpufvMatrix<Type>>& tB
)
{
    checkMethod(A, tB(), "+");
    tmp<gpufvMatrix<Type>> tC(tB.ptr());
    tC.ref() += A;
    return tC;
}

template<class Type>
Foam::tmp<Foam::gpufvMatrix<Type>> Foam::operator+
(
    const tmp<gpufvMatrix<Type>>& tA,
    const tmp<gpufvMatrix<Type>>& tB
)
{
    checkMethod(tA(), tB(), "+");
    tmp<gpufvMatrix<Type>> tC(tA.ptr());
    tC.ref() += tB();
    tB.clear();
    return tC;
}

template<class Type>
Foam::tmp<Foam::gpufvMatrix<Type>> Foam::operator+
(
    const gpufvMatrix<Type>& A,
    const DimensionedgpuField<Type, gpuvolMesh>& su
)
{
    checkMethod(A, su, "+");
    tmp<gpufvMatrix<Type>> tC(new gpufvMatrix<Type>(A));
    tC.ref().source() -= su.mesh().V()*su.field();
    return tC;
}

template<class Type>
Foam::tmp<Foam::gpufvMatrix<Type>> Foam::operator+
(
    const gpufvMatrix<Type>& A,
    const tmp<DimensionedgpuField<Type, gpuvolMesh>>& tsu
)
{
    checkMethod(A, tsu(), "+");
    tmp<gpufvMatrix<Type>> tC(new gpufvMatrix<Type>(A));
    tC.ref().source() -= tsu().mesh().V()*tsu().field();
    tsu.clear();
    return tC;
}

template<class Type>
Foam::tmp<Foam::gpufvMatrix<Type>> Foam::operator+
(
    const gpufvMatrix<Type>& A,
    const tmp<GeometricgpuField<Type, fvPatchgpuField, gpuvolMesh>>& tsu
)
{
    checkMethod(A, tsu(), "+");
    tmp<gpufvMatrix<Type>> tC(new gpufvMatrix<Type>(A));
    tC.ref().source() -= tsu().mesh().V()*tsu().primitiveField();
    tsu.clear();
    return tC;
}

template<class Type>
Foam::tmp<Foam::gpufvMatrix<Type>> Foam::operator+
(
    const tmp<gpufvMatrix<Type>>& tA,
    const DimensionedgpuField<Type, gpuvolMesh>& su
)
{
    checkMethod(tA(), su, "+");
    tmp<gpufvMatrix<Type>> tC(tA.ptr());
    tC.ref().source() -= su.mesh().V()*su.field();
    return tC;
}

template<class Type>
Foam::tmp<Foam::gpufvMatrix<Type>> Foam::operator+
(
    const tmp<gpufvMatrix<Type>>& tA,
    const tmp<DimensionedgpuField<Type, gpuvolMesh>>& tsu
)
{
    checkMethod(tA(), tsu(), "+");
    tmp<gpufvMatrix<Type>> tC(tA.ptr());
    tC.ref().source() -= tsu().mesh().V()*tsu().field();
    tsu.clear();
    return tC;
}

template<class Type>
Foam::tmp<Foam::gpufvMatrix<Type>> Foam::operator+
(
    const tmp<gpufvMatrix<Type>>& tA,
    const tmp<GeometricgpuField<Type, fvPatchgpuField, gpuvolMesh>>& tsu
)
{
    checkMethod(tA(), tsu(), "+");
    tmp<gpufvMatrix<Type>> tC(tA.ptr());
    tC.ref().source() -= tsu().mesh().V()*tsu().primitiveField();
    tsu.clear();
    return tC;
}

template<class Type>
Foam::tmp<Foam::gpufvMatrix<Type>> Foam::operator+
(
    const DimensionedgpuField<Type, gpuvolMesh>& su,
    const gpufvMatrix<Type>& A
)
{
    checkMethod(A, su, "+");
    tmp<gpufvMatrix<Type>> tC(new gpufvMatrix<Type>(A));
    tC.ref().source() -= su.mesh().V()*su.field();
    return tC;
}

template<class Type>
Foam::tmp<Foam::gpufvMatrix<Type>> Foam::operator+
(
    const tmp<DimensionedgpuField<Type, gpuvolMesh>>& tsu,
    const gpufvMatrix<Type>& A
)
{
    checkMethod(A, tsu(), "+");
    tmp<gpufvMatrix<Type>> tC(new gpufvMatrix<Type>(A));
    tC.ref().source() -= tsu().mesh().V()*tsu().field();
    tsu.clear();
    return tC;
}

template<class Type>
Foam::tmp<Foam::gpufvMatrix<Type>> Foam::operator+
(
    const tmp<GeometricgpuField<Type, fvPatchgpuField, gpuvolMesh>>& tsu,
    const gpufvMatrix<Type>& A
)
{
    checkMethod(A, tsu(), "+");
    tmp<gpufvMatrix<Type>> tC(new gpufvMatrix<Type>(A));
    tC.ref().source() -= tsu().mesh().V()*tsu().primitiveField();
    tsu.clear();
    return tC;
}

template<class Type>
Foam::tmp<Foam::gpufvMatrix<Type>> Foam::operator+
(
    const DimensionedgpuField<Type, gpuvolMesh>& su,
    const tmp<gpufvMatrix<Type>>& tA
)
{
    checkMethod(tA(), su, "+");
    tmp<gpufvMatrix<Type>> tC(tA.ptr());
    tC.ref().source() -= su.mesh().V()*su.field();
    return tC;
}

template<class Type>
Foam::tmp<Foam::gpufvMatrix<Type>> Foam::operator+
(
    const tmp<DimensionedgpuField<Type, gpuvolMesh>>& tsu,
    const tmp<gpufvMatrix<Type>>& tA
)
{
    checkMethod(tA(), tsu(), "+");
    tmp<gpufvMatrix<Type>> tC(tA.ptr());
    tC.ref().source() -= tsu().mesh().V()*tsu().field();
    tsu.clear();
    return tC;
}

template<class Type>
Foam::tmp<Foam::gpufvMatrix<Type>> Foam::operator+
(
    const tmp<GeometricgpuField<Type, fvPatchgpuField, gpuvolMesh>>& tsu,
    const tmp<gpufvMatrix<Type>>& tA
)
{
    checkMethod(tA(), tsu(), "+");
    tmp<gpufvMatrix<Type>> tC(tA.ptr());
    tC.ref().source() -= tsu().mesh().V()*tsu().primitiveField();
    tsu.clear();
    return tC;
}


template<class Type>
Foam::tmp<Foam::gpufvMatrix<Type>> Foam::operator-
(
    const gpufvMatrix<Type>& A,
    const gpufvMatrix<Type>& B
)
{
    checkMethod(A, B, "-");
    tmp<gpufvMatrix<Type>> tC(new gpufvMatrix<Type>(A));
    tC.ref() -= B;
    return tC;
}

template<class Type>
Foam::tmp<Foam::gpufvMatrix<Type>> Foam::operator-
(
    const tmp<gpufvMatrix<Type>>& tA,
    const gpufvMatrix<Type>& B
)
{
    checkMethod(tA(), B, "-");
    tmp<gpufvMatrix<Type>> tC(tA.ptr());
    tC.ref() -= B;
    return tC;
}

template<class Type>
Foam::tmp<Foam::gpufvMatrix<Type>> Foam::operator-
(
    const gpufvMatrix<Type>& A,
    const tmp<gpufvMatrix<Type>>& tB
)
{
    checkMethod(A, tB(), "-");
    tmp<gpufvMatrix<Type>> tC(tB.ptr());
    tC.ref() -= A;
    tC.ref().negate();
    return tC;
}

template<class Type>
Foam::tmp<Foam::gpufvMatrix<Type>> Foam::operator-
(
    const tmp<gpufvMatrix<Type>>& tA,
    const tmp<gpufvMatrix<Type>>& tB
)
{
    checkMethod(tA(), tB(), "-");
    tmp<gpufvMatrix<Type>> tC(tA.ptr());
    tC.ref() -= tB();
    tB.clear();
    return tC;
}

template<class Type>
Foam::tmp<Foam::gpufvMatrix<Type>> Foam::operator-
(
    const gpufvMatrix<Type>& A,
    const DimensionedgpuField<Type, gpuvolMesh>& su
)
{
    checkMethod(A, su, "-");
    tmp<gpufvMatrix<Type>> tC(new gpufvMatrix<Type>(A));
    tC.ref().source() += su.mesh().V()*su.field();
    return tC;
}

template<class Type>
Foam::tmp<Foam::gpufvMatrix<Type>> Foam::operator-
(
    const gpufvMatrix<Type>& A,
    const tmp<DimensionedgpuField<Type, gpuvolMesh>>& tsu
)
{
    checkMethod(A, tsu(), "-");
    tmp<gpufvMatrix<Type>> tC(new gpufvMatrix<Type>(A));
    tC.ref().source() += tsu().mesh().V()*tsu().field();
    tsu.clear();
    return tC;
}

template<class Type>
Foam::tmp<Foam::gpufvMatrix<Type>> Foam::operator-
(
    const gpufvMatrix<Type>& A,
    const tmp<GeometricgpuField<Type, fvPatchgpuField, gpuvolMesh>>& tsu
)
{
    checkMethod(A, tsu(), "-");
    tmp<gpufvMatrix<Type>> tC(new gpufvMatrix<Type>(A));
    tC.ref().source() += tsu().mesh().V()*tsu().primitiveField();
    tsu.clear();
    return tC;
}

template<class Type>
Foam::tmp<Foam::gpufvMatrix<Type>> Foam::operator-
(
    const tmp<gpufvMatrix<Type>>& tA,
    const DimensionedgpuField<Type, gpuvolMesh>& su
)
{
    checkMethod(tA(), su, "-");
    tmp<gpufvMatrix<Type>> tC(tA.ptr());
    tC.ref().source() += su.mesh().V()*su.field();
    return tC;
}

template<class Type>
Foam::tmp<Foam::gpufvMatrix<Type>> Foam::operator-
(
    const tmp<gpufvMatrix<Type>>& tA,
    const tmp<DimensionedgpuField<Type, gpuvolMesh>>& tsu
)
{
    checkMethod(tA(), tsu(), "-");
    tmp<gpufvMatrix<Type>> tC(tA.ptr());
    tC.ref().source() += tsu().mesh().V()*tsu().field();
    tsu.clear();
    return tC;
}

template<class Type>
Foam::tmp<Foam::gpufvMatrix<Type>> Foam::operator-
(
    const tmp<gpufvMatrix<Type>>& tA,
    const tmp<GeometricgpuField<Type, fvPatchgpuField, gpuvolMesh>>& tsu
)
{
    checkMethod(tA(), tsu(), "-");
    tmp<gpufvMatrix<Type>> tC(tA.ptr());
    tC.ref().source() += tsu().mesh().V()*tsu().primitiveField();
    tsu.clear();
    return tC;
}

template<class Type>
Foam::tmp<Foam::gpufvMatrix<Type>> Foam::operator-
(
    const DimensionedgpuField<Type, gpuvolMesh>& su,
    const gpufvMatrix<Type>& A
)
{
    checkMethod(A, su, "-");
    tmp<gpufvMatrix<Type>> tC(new gpufvMatrix<Type>(A));
    tC.ref().negate();
    tC.ref().source() -= su.mesh().V()*su.field();
    return tC;
}

template<class Type>
Foam::tmp<Foam::gpufvMatrix<Type>> Foam::operator-
(
    const tmp<DimensionedgpuField<Type, gpuvolMesh>>& tsu,
    const gpufvMatrix<Type>& A
)
{
    checkMethod(A, tsu(), "-");
    tmp<gpufvMatrix<Type>> tC(new gpufvMatrix<Type>(A));
    tC.ref().negate();
    tC.ref().source() -= tsu().mesh().V()*tsu().field();
    tsu.clear();
    return tC;
}

template<class Type>
Foam::tmp<Foam::gpufvMatrix<Type>> Foam::operator-
(
    const tmp<GeometricgpuField<Type, fvPatchgpuField, gpuvolMesh>>& tsu,
    const gpufvMatrix<Type>& A
)
{
    checkMethod(A, tsu(), "-");
    tmp<gpufvMatrix<Type>> tC(new gpufvMatrix<Type>(A));
    tC.ref().negate();
    tC.ref().source() -= tsu().mesh().V()*tsu().primitiveField();
    tsu.clear();
    return tC;
}

template<class Type>
Foam::tmp<Foam::gpufvMatrix<Type>> Foam::operator-
(
    const DimensionedgpuField<Type, gpuvolMesh>& su,
    const tmp<gpufvMatrix<Type>>& tA
)
{
    checkMethod(tA(), su, "-");
    tmp<gpufvMatrix<Type>> tC(tA.ptr());
    tC.ref().negate();
    tC.ref().source() -= su.mesh().V()*su.field();
    return tC;
}

template<class Type>
Foam::tmp<Foam::gpufvMatrix<Type>> Foam::operator-
(
    const tmp<DimensionedgpuField<Type, gpuvolMesh>>& tsu,
    const tmp<gpufvMatrix<Type>>& tA
)
{
    checkMethod(tA(), tsu(), "-");
    tmp<gpufvMatrix<Type>> tC(tA.ptr());
    tC.ref().negate();
    tC.ref().source() -= tsu().mesh().V()*tsu().field();
    tsu.clear();
    return tC;
}

template<class Type>
Foam::tmp<Foam::gpufvMatrix<Type>> Foam::operator-
(
    const tmp<GeometricgpuField<Type, fvPatchgpuField, gpuvolMesh>>& tsu,
    const tmp<gpufvMatrix<Type>>& tA
)
{
    checkMethod(tA(), tsu(), "-");
    tmp<gpufvMatrix<Type>> tC(tA.ptr());
    tC.ref().negate();
    tC.ref().source() -= tsu().mesh().V()*tsu().primitiveField();
    tsu.clear();
    return tC;
}

template<class Type>
Foam::tmp<Foam::gpufvMatrix<Type>> Foam::operator+
(
    const gpufvMatrix<Type>& A,
    const dimensioned<Type>& su
)
{
    checkMethod(A, su, "+");
    tmp<gpufvMatrix<Type>> tC(new gpufvMatrix<Type>(A));
    tC.ref().source() -= su.value()*A.psi().mesh().V();
    return tC;
}

template<class Type>
Foam::tmp<Foam::gpufvMatrix<Type>> Foam::operator+
(
    const tmp<gpufvMatrix<Type>>& tA,
    const dimensioned<Type>& su
)
{
    checkMethod(tA(), su, "+");
    tmp<gpufvMatrix<Type>> tC(tA.ptr());
    tC.ref().source() -= su.value()*tC().psi().mesh().V();
    return tC;
}

template<class Type>
Foam::tmp<Foam::gpufvMatrix<Type>> Foam::operator+
(
    const dimensioned<Type>& su,
    const gpufvMatrix<Type>& A
)
{
    checkMethod(A, su, "+");
    tmp<gpufvMatrix<Type>> tC(new gpufvMatrix<Type>(A));
    tC.ref().source() -= su.value()*A.psi().mesh().V();
    return tC;
}

template<class Type>
Foam::tmp<Foam::gpufvMatrix<Type>> Foam::operator+
(
    const dimensioned<Type>& su,
    const tmp<gpufvMatrix<Type>>& tA
)
{
    checkMethod(tA(), su, "+");
    tmp<gpufvMatrix<Type>> tC(tA.ptr());
    tC.ref().source() -= su.value()*tC().psi().mesh().V();
    return tC;
}

template<class Type>
Foam::tmp<Foam::gpufvMatrix<Type>> Foam::operator-
(
    const gpufvMatrix<Type>& A,
    const dimensioned<Type>& su
)
{
    checkMethod(A, su, "-");
    tmp<gpufvMatrix<Type>> tC(new gpufvMatrix<Type>(A));
    tC.ref().source() += su.value()*tC().psi().mesh().V();
    return tC;
}

template<class Type>
Foam::tmp<Foam::gpufvMatrix<Type>> Foam::operator-
(
    const tmp<gpufvMatrix<Type>>& tA,
    const dimensioned<Type>& su
)
{
    checkMethod(tA(), su, "-");
    tmp<gpufvMatrix<Type>> tC(tA.ptr());
    tC.ref().source() += su.value()*tC().psi().mesh().V();
    return tC;
}

template<class Type>
Foam::tmp<Foam::gpufvMatrix<Type>> Foam::operator-
(
    const dimensioned<Type>& su,
    const gpufvMatrix<Type>& A
)
{
    checkMethod(A, su, "-");
    tmp<gpufvMatrix<Type>> tC(new gpufvMatrix<Type>(A));
    tC.ref().negate();
    tC.ref().source() -= su.value()*A.psi().mesh().V();
    return tC;
}

template<class Type>
Foam::tmp<Foam::gpufvMatrix<Type>> Foam::operator-
(
    const dimensioned<Type>& su,
    const tmp<gpufvMatrix<Type>>& tA
)
{
    checkMethod(tA(), su, "-");
    tmp<gpufvMatrix<Type>> tC(tA.ptr());
    tC.ref().negate();
    tC.ref().source() -= su.value()*tC().psi().mesh().V();
    return tC;
}


template<class Type>
Foam::tmp<Foam::gpufvMatrix<Type>> Foam::operator*
(
    const volScalargpuField::Internal& dsf,
    const gpufvMatrix<Type>& A
)
{
    tmp<gpufvMatrix<Type>> tC(new gpufvMatrix<Type>(A));
    tC.ref() *= dsf;
    return tC;
}

template<class Type>
Foam::tmp<Foam::gpufvMatrix<Type>> Foam::operator*
(
    const tmp<volScalargpuField::Internal>& tdsf,
    const gpufvMatrix<Type>& A
)
{
    tmp<gpufvMatrix<Type>> tC(new gpufvMatrix<Type>(A));
    tC.ref() *= tdsf;
    return tC;
}

template<class Type>
Foam::tmp<Foam::gpufvMatrix<Type>> Foam::operator*
(
    const tmp<volScalargpuField>& tvsf,
    const gpufvMatrix<Type>& A
)
{
    tmp<gpufvMatrix<Type>> tC(new gpufvMatrix<Type>(A));
    tC.ref() *= tvsf;
    return tC;
}

template<class Type>
Foam::tmp<Foam::gpufvMatrix<Type>> Foam::operator*
(
    const volScalargpuField::Internal& dsf,
    const tmp<gpufvMatrix<Type>>& tA
)
{
    tmp<gpufvMatrix<Type>> tC(tA.ptr());
    tC.ref() *= dsf;
    return tC;
}

template<class Type>
Foam::tmp<Foam::gpufvMatrix<Type>> Foam::operator*
(
    const tmp<volScalargpuField::Internal>& tdsf,
    const tmp<gpufvMatrix<Type>>& tA
)
{
    tmp<gpufvMatrix<Type>> tC(tA.ptr());
    tC.ref() *= tdsf;
    return tC;
}

template<class Type>
Foam::tmp<Foam::gpufvMatrix<Type>> Foam::operator*
(
    const tmp<volScalargpuField>& tvsf,
    const tmp<gpufvMatrix<Type>>& tA
)
{
    tmp<gpufvMatrix<Type>> tC(tA.ptr());
    tC.ref() *= tvsf;
    return tC;
}

template<class Type>
Foam::tmp<Foam::gpufvMatrix<Type>> Foam::operator*
(
    const dimensioned<scalar>& ds,
    const gpufvMatrix<Type>& A
)
{
    tmp<gpufvMatrix<Type>> tC(new gpufvMatrix<Type>(A));
    tC.ref() *= ds;
    return tC;
}

template<class Type>
Foam::tmp<Foam::gpufvMatrix<Type>> Foam::operator*
(
    const dimensioned<scalar>& ds,
    const tmp<gpufvMatrix<Type>>& tA
)
{
    tmp<gpufvMatrix<Type>> tC(tA.ptr());
    tC.ref() *= ds;
    return tC;
}


template<class Type>
Foam::tmp<Foam::GeometricgpuField<Type, Foam::fvPatchgpuField, Foam::gpuvolMesh>>
Foam::operator&
(
    const gpufvMatrix<Type>& M,
    const DimensionedgpuField<Type, gpuvolMesh>& psi
)
{
    tmp<GeometricgpuField<Type, fvPatchgpuField, gpuvolMesh>> tMphi
    (
        new GeometricgpuField<Type, fvPatchgpuField, gpuvolMesh>
        (
            IOobject
            (
                "M&" + psi.name(),
                psi.instance(),
                psi.mesh().hostmesh(),
                IOobject::NO_READ,
                IOobject::NO_WRITE
            ),
            psi.mesh(),
            M.dimensions()/dimVol,
            extrapolatedCalculatedFvPatchScalargpuField::typeName
        )
    );
    GeometricgpuField<Type, fvPatchgpuField, gpuvolMesh>& Mphi = tMphi.ref();

    // Loop over field components
    if (M.hasDiag())
    {
        for (direction cmpt=0; cmpt<pTraits<Type>::nComponents; cmpt++)
        {
            scalargpuField psiCmpt(psi.field().component(cmpt));
            scalargpuField boundaryDiagCmpt(M.gpuDiag());
            M.addBoundaryDiag(boundaryDiagCmpt, cmpt);
            Mphi.primitiveFieldRef().replace(cmpt, -boundaryDiagCmpt*psiCmpt);
        }
    }
    else
    {
        Mphi.primitiveFieldRef() = Zero;
    }

    Mphi.primitiveFieldRef() += M.gpulduMatrix::H(psi.field()) + M.source();
    M.addBoundarySource(Mphi.primitiveFieldRef());

    Mphi.primitiveFieldRef() /= -psi.mesh().V();
    Mphi.correctBoundaryConditions();

    return tMphi;
}

template<class Type>
Foam::tmp<Foam::GeometricgpuField<Type, Foam::fvPatchgpuField, Foam::gpuvolMesh>>
Foam::operator&
(
    const gpufvMatrix<Type>& M,
    const tmp<DimensionedgpuField<Type, gpuvolMesh>>& tpsi
)
{
    tmp<GeometricgpuField<Type, fvPatchgpuField, gpuvolMesh>> tMpsi = M & tpsi();
    tpsi.clear();
    return tMpsi;
}

template<class Type>
Foam::tmp<Foam::GeometricgpuField<Type, Foam::fvPatchgpuField, Foam::gpuvolMesh>>
Foam::operator&
(
    const gpufvMatrix<Type>& M,
    const tmp<GeometricgpuField<Type, fvPatchgpuField, gpuvolMesh>>& tpsi
)
{
    tmp<GeometricgpuField<Type, fvPatchgpuField, gpuvolMesh>> tMpsi = M & tpsi();
    tpsi.clear();
    return tMpsi;
}

template<class Type>
Foam::tmp<Foam::GeometricgpuField<Type, Foam::fvPatchgpuField, Foam::gpuvolMesh>>
Foam::operator&
(
    const tmp<gpufvMatrix<Type>>& tM,
    const DimensionedgpuField<Type, gpuvolMesh>& psi
)
{
    tmp<GeometricgpuField<Type, fvPatchgpuField, gpuvolMesh>> tMpsi = tM() & psi;
    tM.clear();
    return tMpsi;
}

template<class Type>
Foam::tmp<Foam::GeometricgpuField<Type, Foam::fvPatchgpuField, Foam::gpuvolMesh>>
Foam::operator&
(
    const tmp<gpufvMatrix<Type>>& tM,
    const tmp<DimensionedgpuField<Type, gpuvolMesh>>& tpsi
)
{
    tmp<GeometricgpuField<Type, fvPatchgpuField, gpuvolMesh>> tMpsi = tM() & tpsi();
    tM.clear();
    tpsi.clear();
    return tMpsi;
}

template<class Type>
Foam::tmp<Foam::GeometricgpuField<Type, Foam::fvPatchgpuField, Foam::gpuvolMesh>>
Foam::operator&
(
    const tmp<gpufvMatrix<Type>>& tM,
    const tmp<GeometricgpuField<Type, fvPatchgpuField, gpuvolMesh>>& tpsi
)
{
    tmp<GeometricgpuField<Type, fvPatchgpuField, gpuvolMesh>> tMpsi = tM() & tpsi();
    tM.clear();
    tpsi.clear();
    return tMpsi;
}


// * * * * * * * * * * * * * * * IOstream Operators  * * * * * * * * * * * * //

template<class Type>
Foam::Ostream& Foam::operator<<(Ostream& os, const gpufvMatrix<Type>& fvm)
{
    os  << static_cast<const gpulduMatrix&>(fvm) << nl
        << fvm.dimensions_ << nl
        << fvm.source_ << nl
        << fvm.internalCoeffs_ << nl
        << fvm.boundaryCoeffs_ << endl;

    os.check(FUNCTION_NAME);

    return os;
}


// * * * * * * * * * * * * * * * * Solvers * * * * * * * * * * * * * * * * * //

#include "gpufvMatrixSolve.C"

// ************************************************************************* //
