/*---------------------------------------------------------------------------*\
  =========                 |
  \\      /  F ield         | OpenFOAM: The Open Source CFD Toolbox
   \\    /   O peration     |
    \\  /    A nd           | www.openfoam.com
     \\/     M anipulation  |
-------------------------------------------------------------------------------
    Copyright (C) 2011-2017 OpenFOAM Foundation
    Copyright (C) 2019-2021 OpenCFD Ltd.
-------------------------------------------------------------------------------
License
    This file is part of OpenFOAM.

    OpenFOAM is free software: you can redistribute it and/or modify it
    under the terms of the GNU General Public License as published by
    the Free Software Foundation, either version 3 of the License, or
    (at your option) any later version.

    OpenFOAM is distributed in the hope that it will be useful, but WITHOUT
    ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
    FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    for more details.

    You should have received a copy of the GNU General Public License
    along with OpenFOAM.  If not, see <http://www.gnu.org/licenses/>.

\*---------------------------------------------------------------------------*/

#include "processorGGAMGInterfaceField.H"
#include "addToRunTimeSelectionTable.H"
#include "gpulduMatrix.H"

#include "GAMGInterfaceFunctors.H"

// * * * * * * * * * * * * * * Static Data Members * * * * * * * * * * * * * //

namespace Foam
{
    defineTypeNameAndDebug(processorGGAMGInterfaceField, 0);
    addToRunTimeSelectionTable
    (
        GGAMGInterfaceField,
        processorGGAMGInterfaceField,
        gpulduInterface
    );
    addToRunTimeSelectionTable
    (
        GGAMGInterfaceField,
        processorGGAMGInterfaceField,
        lduInterfacegpuField
    );
}


// * * * * * * * * * * * * * * * * Constructors  * * * * * * * * * * * * * * //

Foam::processorGGAMGInterfaceField::processorGGAMGInterfaceField
(
    const GGAMGInterface& GAMGCp,
    const lduInterfacegpuField& fineInterface
)
:
    GGAMGInterfaceField(GAMGCp, fineInterface),
    procInterface_(refCast<const processorGGAMGInterface>(GAMGCp)),
    doTransform_(false),
    rank_(0)
{
    const processorLduInterfacegpuField& p =
        refCast<const processorLduInterfacegpuField>(fineInterface);

    doTransform_ = p.doTransform();
    rank_ = p.rank();
}


Foam::processorGGAMGInterfaceField::processorGGAMGInterfaceField
(
    const GGAMGInterface& GAMGCp,
    const bool doTransform,
    const int rank
)
:
    GGAMGInterfaceField(GAMGCp, doTransform, rank),
    procInterface_(refCast<const processorGGAMGInterface>(GAMGCp)),
    doTransform_(doTransform),
    rank_(rank)
{}


// * * * * * * * * * * * * * * * Member Functions  * * * * * * * * * * * * * //

/*void Foam::processorGGAMGInterfaceField::initInterfaceMatrixUpdate
(
    solveScalarField&,
    const bool,
    const gpulduAddressing& lduAddr,
    const label patchId,
    const solveScalarField& psiInternal,
    const scalarField&,
    const direction,
    const Pstream::commsTypes commsType
) const
{
    procInterface_.interfaceInternalField(psiInternal, scalarSendBuf_);

    if
    (
        commsType == Pstream::commsTypes::nonBlocking
     && !Pstream::floatTransfer
    )
    {
        // Fast path.
        scalarReceiveBuf_.setSize(scalarSendBuf_.size());
        outstandingRecvRequest_ = UPstream::nRequests();
        IPstream::read
        (
            Pstream::commsTypes::nonBlocking,
            procInterface_.neighbProcNo(),
            scalarReceiveBuf_.data_bytes(),
            scalarReceiveBuf_.size_bytes(),
            procInterface_.tag(),
            comm()
        );

        outstandingSendRequest_ = UPstream::nRequests();
        OPstream::write
        (
            Pstream::commsTypes::nonBlocking,
            procInterface_.neighbProcNo(),
            scalarSendBuf_.cdata_bytes(),
            scalarSendBuf_.size_bytes(),
            procInterface_.tag(),
            comm()
        );
    }
    else
    {
        procInterface_.compressedSend(commsType, scalarSendBuf_);
    }

    const_cast<processorGGAMGInterfaceField&>(*this).updatedMatrix() = false;
}*/

void Foam::processorGGAMGInterfaceField::initInterfaceMatrixUpdate
(
    scalargpuField&,
    const bool,
    const gpulduAddressing& lduAddr,
    const label patchId,
    const scalargpuField& psiInternal,
    const scalargpuField&,
    const direction,
    const Pstream::commsTypes commsType
) const
{
    procInterface_.interfaceInternalField(psiInternal, scalargpuSendBuf_);

    if
    (
        commsType == Pstream::commsTypes::nonBlocking
     && !Pstream::floatTransfer
    )
    {
        std::streamsize nBytes = scalargpuSendBuf_.byteSize();
        scalar* readData;
        const scalar* sendData;
        
        if(Pstream::gpuDirectTransfer)
        {
            // Fast path.
            scalargpuReceiveBuf_.setSize(scalargpuSendBuf_.size());

            sendData = scalargpuSendBuf_.data();
            readData = scalargpuReceiveBuf_.data();
        }
        else
        {
            scalarSendBuf_.setSize(scalargpuSendBuf_.size());
            scalarReceiveBuf_.setSize(scalarSendBuf_.size());
            thrust::copy
            (
                scalargpuSendBuf_.begin(),
                scalargpuSendBuf_.end(),
                scalarSendBuf_.begin()
            );

            sendData = scalarSendBuf_.begin();
            readData = scalarReceiveBuf_.begin();
        }
        
        outstandingRecvRequest_ = UPstream::nRequests();
        IPstream::read
        (
            Pstream::commsTypes::nonBlocking,
            procInterface_.neighbProcNo(),
            //scalargpuReceiveBuf_.data_bytes(),
            //scalargpuReceiveBuf_.size_bytes(),
            reinterpret_cast<char*>(readData),
            nBytes,
            procInterface_.tag(),
            comm()
        );

        outstandingSendRequest_ = UPstream::nRequests();
        OPstream::write
        (
            Pstream::commsTypes::nonBlocking,
            procInterface_.neighbProcNo(),
            //scalarSendBuf_.cdata_bytes(),
            //scalargpuSendBuf_.size_bytes(),
            reinterpret_cast<const char*>(sendData),
            nBytes,
            procInterface_.tag(),
            comm()
        );
    }
    else
    {
        procInterface_.compressedSend(commsType, scalargpuSendBuf_);
    }

    const_cast<processorGGAMGInterfaceField&>(*this).updatedMatrix() = false;
}


/*void Foam::processorGGAMGInterfaceField::updateInterfaceMatrix
(
    solveScalarField& result,
    const bool add,
    const gpulduAddressing& lduAddr,
    const label patchId,
    const solveScalarField&,
    const scalarField& coeffs,
    const direction cmpt,
    const Pstream::commsTypes commsType
) const
{
    if (updatedMatrix())
    {
        return;
    }

    const labelUList& faceCells = lduAddr.patchAddr(patchId);

    if
    (
        commsType == Pstream::commsTypes::nonBlocking
     && !Pstream::floatTransfer
    )
    {
        // Fast path.
        if
        (
            outstandingRecvRequest_ >= 0
         && outstandingRecvRequest_ < Pstream::nRequests()
        )
        {
            UPstream::waitRequest(outstandingRecvRequest_);
        }
        // Recv finished so assume sending finished as well.
        outstandingSendRequest_ = -1;
        outstandingRecvRequest_ = -1;

        // Consume straight from scalarReceiveBuf_

        // Transform according to the transformation tensor
        transformCoupleField(scalarReceiveBuf_, cmpt);

        // Multiply the field by coefficients and add into the result
        addToInternalField(result, !add, faceCells, coeffs, scalarReceiveBuf_);
    }
    else
    {
        solveScalarField pnf
        (
            procInterface_.compressedReceive<solveScalar>
            (
                commsType,
                coeffs.size()
            )
        );
        transformCoupleField(pnf, cmpt);

        addToInternalField(result, !add, faceCells, coeffs, pnf);
    }

    const_cast<processorGGAMGInterfaceField&>(*this).updatedMatrix() = true;
}*/

void Foam::processorGGAMGInterfaceField::updateInterfaceMatrix
(
    scalargpuField& result,
    const bool add,
    const gpulduAddressing& lduAddr,
    const label patchId,
    const scalargpuField&,
    const scalargpuField& coeffs,
    const direction cmpt,
    const Pstream::commsTypes commsType
) const
{
    if (updatedMatrix())
    {
        return;
    }

    //const labelUList& faceCells = lduAddr.patchAddr(patchId);

    if
    (
        commsType == Pstream::commsTypes::nonBlocking
     && !Pstream::floatTransfer
    )
    {
       //Fast path.
       if
        (
            outstandingRecvRequest_ >= 0
         && outstandingRecvRequest_ < Pstream::nRequests()
        )
        {
            UPstream::waitRequest(outstandingRecvRequest_);
        }
        // Recv finished so assume sending finished as well.
        outstandingSendRequest_ = -1;
        outstandingRecvRequest_ = -1;

        // Consume straight from scalarReceiveBuf_

        if( ! Pstream::gpuDirectTransfer)
        {
            scalargpuReceiveBuf_ = scalarReceiveBuf_;
        }
        
        // Transform according to the transformation tensor
        transformCoupleField(scalargpuReceiveBuf_, cmpt);

        // Multiply the field by coefficients and add into the result
        //addToInternalField(result, !add, faceCells, coeffs, scalarReceiveBuf_);
        GAMGUpdateInterfaceMatrix
        (
            result,
            coeffs,
            scalargpuReceiveBuf_,
            procInterface_,
            !add
        );
    }
    else
    {
        scalargpuReceiveBuf_.setSize(coeffs.size());
        procInterface_.compressedReceive<solveScalar>
        (
            commsType, 
            scalargpuReceiveBuf_
        );

        transformCoupleField(scalargpuReceiveBuf_, cmpt);

        GAMGUpdateInterfaceMatrix
        (
            result,
            coeffs,
            scalargpuReceiveBuf_,
            procInterface_,
            !add
        );

    }

    const_cast<processorGGAMGInterfaceField&>(*this).updatedMatrix() = true;
}


// ************************************************************************* //
