/*---------------------------------------------------------------------------*\
  =========                 |
  \\      /  F ield         | OpenFOAM: The Open Source CFD Toolbox
   \\    /   O peration     |
    \\  /    A nd           | www.openfoam.com
     \\/     M anipulation  |
-------------------------------------------------------------------------------
    Copyright (C) 2011-2017 OpenFOAM Foundation
    Copyright (C) 2016-2021 OpenCFD Ltd.
-------------------------------------------------------------------------------
License
    This file is part of OpenFOAM.

    OpenFOAM is free software: you can redistribute it and/or modify it
    under the terms of the GNU General Public License as published by
    the Free Software Foundation, either version 3 of the License, or
    (at your option) any later version.

    OpenFOAM is distributed in the hope that it will be useful, but WITHOUT
    ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
    FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    for more details.

    You should have received a copy of the GNU General Public License
    along with OpenFOAM.  If not, see <http://www.gnu.org/licenses/>.

Class
    Foam::lduMatrix

Description
    lduMatrix is a general matrix class in which the coefficients are
    stored as three arrays, one for the upper triangle, one for the
    lower triangle and a third for the diagonal.

    Addressing arrays must be supplied for the upper and lower triangles.

    It might be better if this class were organised as a hierarchy starting
    from an empty matrix, then deriving diagonal, symmetric and asymmetric
    matrices.

SourceFiles
    lduMatrixATmul.C
    lduMatrix.C
    lduMatrixTemplates.C
    lduMatrixOperations.C
    lduMatrixSolver.C
    lduMatrixPreconditioner.C
    lduMatrixTests.C
    lduMatrixUpdateMatrixInterfaces.C

\*---------------------------------------------------------------------------*/

#ifndef gpulduMatrix_H
#define gpulduMatrix_H

#include "gpulduMesh.H"
#include "primitiveFieldsFwd.H"
#include "FieldField.H"
#include "lduInterfacegpuFieldPtrsList.H"
#include "typeInfo.H"
#include "autoPtr.H"
#include "runTimeSelectionTables.H"
#include "solverPerformance.H"
#include "InfoProxy.H"
#include "profilingTrigger.H"

// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * //

namespace Foam
{

// Forward Declarations

class gpulduMatrix;

Ostream& operator<<(Ostream&, const gpulduMatrix&);
Ostream& operator<<(Ostream&, const InfoProxy<gpulduMatrix>&);


/*---------------------------------------------------------------------------*\
                           Class lduMatrix Declaration
\*---------------------------------------------------------------------------*/

class gpulduMatrix
{
    // Private Data

        //- LDU mesh reference
        //const lduMesh& lduMesh_;
        std::reference_wrapper<const gpulduMesh> lduMesh_;

        //- Coefficients (not including interfaces)
      	mutable scalargpuField *lowerPtr_, *diagPtr_, *upperPtr_;	
		
        //- Coefficients for better memory access
        mutable scalargpuField *gLowerSortPtr_, *gUpperSortPtr_;

        bool coarsestLevel_;

        void calcSortCoeffs(scalargpuField& out, const scalargpuField& in) const;

public:

    //- Abstract base-class for lduMatrix solvers
    class solver
    {
    protected:

        // Protected Data

            //- Default maximum number of iterations in the solver
            static const label defaultMaxIter_;

            word fieldName_;
            const gpulduMatrix& matrix_;
            const FieldField<gpuField, scalar>& interfaceBouCoeffs_;
            const FieldField<gpuField, scalar>& interfaceIntCoeffs_;
            lduInterfacegpuFieldPtrsList interfaces_;

            //- Dictionary of controls
            dictionary controlDict_;

            //- Level of verbosity in the solver output statements
            int log_;

            //- Minimum number of iterations in the solver
            label minIter_;

            //- Maximum number of iterations in the solver
            label maxIter_;

            //- Final convergence tolerance
            scalar tolerance_;

            //- Convergence tolerance relative to the initial
            scalar relTol_;

            profilingTrigger profiling_;


        // Protected Member Functions

            //- Read the control parameters from the controlDict_
            virtual void readControls();


    public:

        //- Runtime type information
        virtual const word& type() const = 0;


        // Declare run-time constructor selection tables

            declareRunTimeSelectionTable
            (
                autoPtr,
                solver,
                symMatrix,
                (
                    const word& fieldName,
                    const gpulduMatrix& matrix,
                    const FieldField<gpuField, scalar>& interfaceBouCoeffs,
                    const FieldField<gpuField, scalar>& interfaceIntCoeffs,
                    const lduInterfacegpuFieldPtrsList& interfaces,
                    const dictionary& solverControls
                ),
                (
                    fieldName,
                    matrix,
                    interfaceBouCoeffs,
                    interfaceIntCoeffs,
                    interfaces,
                    solverControls
                )
            );

            declareRunTimeSelectionTable
            (
                autoPtr,
                solver,
                asymMatrix,
                (
                    const word& fieldName,
                    const gpulduMatrix& matrix,
                    const FieldField<gpuField, scalar>& interfaceBouCoeffs,
                    const FieldField<gpuField, scalar>& interfaceIntCoeffs,
                    const lduInterfacegpuFieldPtrsList& interfaces,
                    const dictionary& solverControls
                ),
                (
                    fieldName,
                    matrix,
                    interfaceBouCoeffs,
                    interfaceIntCoeffs,
                    interfaces,
                    solverControls
                )
            );


        // Constructors

            solver
            (
                const word& fieldName,
                const gpulduMatrix& matrix,
                const FieldField<gpuField, scalar>& interfaceBouCoeffs,
                const FieldField<gpuField, scalar>& interfaceIntCoeffs,
                const lduInterfacegpuFieldPtrsList& interfaces,
                const dictionary& solverControls
            );

        // Selectors

            //- Return a new solver
            static autoPtr<solver> New
            (
                const word& fieldName,
                const gpulduMatrix& matrix,
                const FieldField<gpuField, scalar>& interfaceBouCoeffs,
                const FieldField<gpuField, scalar>& interfaceIntCoeffs,
                const lduInterfacegpuFieldPtrsList& interfaces,
                const dictionary& solverControls
            );


        //- Destructor
        virtual ~solver() = default;


        // Member Functions

            const word& fieldName() const noexcept
            {
                return fieldName_;
            }

            const gpulduMatrix& matrix() const noexcept
            {
                return matrix_;
            }

            const FieldField<gpuField, scalar>& interfaceBouCoeffs() const noexcept
            {
                return interfaceBouCoeffs_;
            }

            const FieldField<gpuField, scalar>& interfaceIntCoeffs() const noexcept
            {
                return interfaceIntCoeffs_;
            }

            const lduInterfacegpuFieldPtrsList& interfaces() const noexcept
            {
                return interfaces_;
            }


            //- Read and reset the solver parameters from the given stream
            virtual void read(const dictionary&);

            //- Solve with given field and rhs
            virtual solverPerformance solve
            (
                scalargpuField& psi,
                const scalargpuField& source,
                const direction cmpt=0
            ) const = 0;

            //- Solve with given field and rhs (in solveScalar precision).
            //  Default is to call solve routine
            virtual solverPerformance scalarSolve
            (
                scalargpuField& psi,
                const scalargpuField& source,
                const direction cmpt=0
            ) const;

            //- Return the matrix norm used to normalise the residual for the
            //- stopping criterion
            solveScalarField::cmptType normFactor
            (
                const scalargpuField& psi,
                const scalargpuField& source,
                const scalargpuField& Apsi,
                scalargpuField& tmpField
            ) const;
    };


    //- Abstract base-class for lduMatrix smoothers
    class smoother
    {
    protected:

        // Protected Data

            word fieldName_;
            const gpulduMatrix& matrix_;
            const FieldField<gpuField, scalar>& interfaceBouCoeffs_;
            const FieldField<gpuField, scalar>& interfaceIntCoeffs_;
            const lduInterfacegpuFieldPtrsList& interfaces_;


    public:

        //- Find the smoother name (directly or from a sub-dictionary)
        static word getName(const dictionary&);

        //- Runtime type information
        virtual const word& type() const = 0;


        // Declare run-time constructor selection tables

            declareRunTimeSelectionTable
            (
                autoPtr,
                smoother,
                symMatrix,
                (
                    const word& fieldName,
                    const gpulduMatrix& matrix,
                    const FieldField<gpuField, scalar>& interfaceBouCoeffs,
                    const FieldField<gpuField, scalar>& interfaceIntCoeffs,
                    const lduInterfacegpuFieldPtrsList& interfaces
                ),
                (
                    fieldName,
                    matrix,
                    interfaceBouCoeffs,
                    interfaceIntCoeffs,
                    interfaces
                )
            );

            declareRunTimeSelectionTable
            (
                autoPtr,
                smoother,
                asymMatrix,
                (
                    const word& fieldName,
                    const gpulduMatrix& matrix,
                    const FieldField<gpuField, scalar>& interfaceBouCoeffs,
                    const FieldField<gpuField, scalar>& interfaceIntCoeffs,
                    const lduInterfacegpuFieldPtrsList& interfaces
                ),
                (
                    fieldName,
                    matrix,
                    interfaceBouCoeffs,
                    interfaceIntCoeffs,
                    interfaces
                )
            );


        // Constructors

            smoother
            (
                const word& fieldName,
                const gpulduMatrix& matrix,
                const FieldField<gpuField, scalar>& interfaceBouCoeffs,
                const FieldField<gpuField, scalar>& interfaceIntCoeffs,
                const lduInterfacegpuFieldPtrsList& interfaces
            );


        // Selectors

            //- Return a new smoother
            static autoPtr<smoother> New
            (
                const word& fieldName,
                const gpulduMatrix& matrix,
                const FieldField<gpuField, scalar>& interfaceBouCoeffs,
                const FieldField<gpuField, scalar>& interfaceIntCoeffs,
                const lduInterfacegpuFieldPtrsList& interfaces,
                const dictionary& solverControls
            );


        //- Destructor
        virtual ~smoother() = default;


        // Member Functions

            const word& fieldName() const noexcept
            {
                return fieldName_;
            }

            const gpulduMatrix& matrix() const noexcept
            {
                return matrix_;
            }

            const FieldField<gpuField, scalar>& interfaceBouCoeffs() const noexcept
            {
                return interfaceBouCoeffs_;
            }

            const FieldField<gpuField, scalar>& interfaceIntCoeffs() const noexcept
            {
                return interfaceIntCoeffs_;
            }

            const lduInterfacegpuFieldPtrsList& interfaces() const noexcept
            {
                return interfaces_;
            }


            //- Smooth the solution for a given number of sweeps
            virtual void smooth
            (   
                scalargpuField& psi,
                const scalargpuField& source,
                const direction cmpt,
                const label nSweeps
            ) const = 0;

            //- Smooth the solution for a given number of sweeps
            virtual void scalarSmooth
            (
                scalargpuField& psi,
                const scalargpuField& source,
                const direction cmpt,
                const label nSweeps
            ) const = 0;
    };

    //- Abstract base-class for lduMatrix preconditioners
    class preconditioner
    {
    protected:

        // Protected Data

            //- Reference to the base-solver this preconditioner is used with
            const solver& solver_;


    public:

        //- Find the preconditioner name (directly or from a sub-dictionary)
        static word getName(const dictionary&);

        //- Runtime type information
        virtual const word& type() const = 0;


        // Declare run-time constructor selection tables

            declareRunTimeSelectionTable
            (
                autoPtr,
                preconditioner,
                symMatrix,
                (
                    const solver& sol,
                    const dictionary& solverControls
                ),
                (sol, solverControls)
            );

            declareRunTimeSelectionTable
            (
                autoPtr,
                preconditioner,
                asymMatrix,
                (
                    const solver& sol,
                    const dictionary& solverControls
                ),
                (sol, solverControls)
            );


        // Constructors

            preconditioner
            (
                const solver& sol
            )
            :
                solver_(sol)
            {}


        // Selectors

            //- Return a new preconditioner
            static autoPtr<preconditioner> New
            (
                const solver& sol,
                const dictionary& solverControls
            );


        //- Destructor
        virtual ~preconditioner() = default;


        // Member Functions

            //- Read and reset the preconditioner parameters
            //- from the given stream
            virtual void read(const dictionary&)
            {}

            //- Return wA the preconditioned form of residual rA
            virtual void precondition
            (
                scalargpuField& wA,
                const scalargpuField& rA,
                const direction cmpt=0
            ) const =0;

            //- Return wT the transpose-matrix preconditioned form of
            //- residual rT.
            //  This is only required for preconditioning asymmetric matrices.
            virtual void preconditionT
            (
                scalargpuField& wT,
                const scalargpuField& rT,
                const direction cmpt=0
            ) const
            {
                NotImplemented;
            }
    };


    // Static Data

        // Declare name of the class and its debug switch
        ClassName("gpulduMatrix");


    // Constructors

        //- Construct given an LDU addressed mesh.
        //  The coefficients are initially empty for subsequent setting.
        gpulduMatrix(const gpulduMesh&);

        //- Construct as copy
        gpulduMatrix(const gpulduMatrix&);

        //- Construct as copy or re-use as specified.
        gpulduMatrix(gpulduMatrix&, bool reuse);

        //- Construct given an LDU addressed mesh and an Istream
        //- from which the coefficients are read
        gpulduMatrix(const gpulduMesh&, Istream&);


    //- Destructor
    ~gpulduMatrix();


    // Member Functions

        // Access to addressing

            //- Return the LDU mesh from which the addressing is obtained
            const gpulduMesh& mesh() const
            {
                return lduMesh_;
            }

            //- Set the LDU mesh containing the addressing is obtained
            void setLduMesh(const gpulduMesh& m)
            {
                lduMesh_ = m;
            }

            //- Return the LDU addressing
            const gpulduAddressing& lduAddr() const
            {
                return mesh().lduAddr();
            }

            //- Return the patch evaluation schedule
            const lduSchedule& patchSchedule() const
            {
                return lduAddr().patchSchedule();
            }

        // Access to coefficients

            scalargpuField& gpuLower();
            scalargpuField& gpuDiag();
            scalargpuField& gpuUpper();

            // Size with externally provided sizes (for constructing with 'fake'
            // mesh in GAMG)

                scalargpuField& gpuLower(const label size);
                scalargpuField& gpuDiag(const label nCoeffs);
                scalargpuField& gpuUpper(const label nCoeffs);


            const scalargpuField& gpuLower() const;
            const scalargpuField& gpuDiag() const;
            const scalargpuField& gpuUpper() const;


            label level() const
            {
                return lduAddr().level();
            }

            bool coarsestLevel() const
            {
                return coarsestLevel_;
            }

            bool& coarsestLevel()
            {
                return coarsestLevel_;
            }	
            			
            const scalargpuField& gpuLowerSort() const;
            const scalargpuField& gpuUpperSort() const;

			
            bool hasDiag() const
            {
                return (diagPtr_);
            }

            bool hasUpper() const
            {
                return (upperPtr_);
            }

            bool hasLower() const
            {
                return (lowerPtr_);
            }

            bool diagonal() const
            {
                return (diagPtr_ && !lowerPtr_ && !upperPtr_);
            }

            bool symmetric() const
            {
                return (diagPtr_ && (!lowerPtr_ && upperPtr_));
            }

            bool asymmetric() const
            {
                return (diagPtr_ && lowerPtr_ && upperPtr_);
            }


        // operations

            void sumDiag();
            void negSumDiag();

            void sumMagOffDiag(scalargpuField& sumOff) const;

            //- Matrix multiplication with updated interfaces.
            void Amul
            (
                scalargpuField&,
                const tmp<scalargpuField>&,
                const FieldField<gpuField, scalar>&,
                const lduInterfacegpuFieldPtrsList&,
                const direction cmpt
            ) const;

            //- Matrix transpose multiplication with updated interfaces.
            void Tmul
            (
                scalargpuField&,
                const tmp<scalargpuField>&,
                const FieldField<gpuField, scalar>&,
                const lduInterfacegpuFieldPtrsList&,
                const direction cmpt
            )const;

            //- Sum the coefficients on each row of the matrix
            void sumA
            (
                scalargpuField&,
                const FieldField<gpuField, scalar>&,
                const lduInterfacegpuFieldPtrsList&
            ) const;

            void residual
            (
                scalargpuField& rA,
                const scalargpuField& psi,
                const scalargpuField& source,
                const FieldField<gpuField, scalar>& interfaceBouCoeffs,
                const lduInterfacegpuFieldPtrsList& interfaces,
                const direction cmpt
            ) const;

            tmp<scalargpuField> residual
            (
                const scalargpuField& psi,
                const scalargpuField& source,
                const FieldField<gpuField, scalar>& interfaceBouCoeffs,
                const lduInterfacegpuFieldPtrsList& interfaces,
                const direction cmpt
            ) const;


            //- Initialise the update of interfaced interfaces
            //- for matrix operations
/*            void initMatrixInterfaces
            (
                const bool add,
                const FieldField<Field, scalar>& interfaceCoeffs,
                const lduInterfacegpuFieldPtrsList& interfaces,
                const solveScalarField& psiif,
                solveScalarField& result,
                const direction cmpt
            ) const;*/

            void initMatrixInterfaces
            (
                const bool add,
                const FieldField<gpuField, scalar>& interfaceCoeffs,
                const lduInterfacegpuFieldPtrsList& interfaces,
                const scalargpuField& psiif,
                scalargpuField& result,
                const direction cmpt
            ) const;

            //- Update interfaced interfaces for matrix operations
 /*           void updateMatrixInterfaces
            (   
                const bool add,
                const FieldField<Field, scalar>& interfaceCoeffs,
                const lduInterfacegpuFieldPtrsList& interfaces,
                const solveScalarField& psiif,
                solveScalarField& result,
                const direction cmpt,
                const label startRequest // starting request (for non-blocking)
            ) const;*/

            void updateMatrixInterfaces
            (
                const bool add,
                const FieldField<gpuField, scalar>& interfaceCoeffs,
                const lduInterfacegpuFieldPtrsList& interfaces,
                const scalargpuField& psiif,
                scalargpuField& result,
                const direction cmpt,
                const label startRequest // starting request (for non-blocking)
            ) const;

            //- Set the residual field using an IOField on the object registry
            //- if it exists
            void setResidualField
            (
                const scalarField& residual,
                const word& fieldName,
                const bool initial
            ) const;

            template<class Type>
            tmp<gpuField<Type>> H(const gpuField<Type>&) const;

            template<class Type>
            tmp<gpuField<Type>> H(const tmp<gpuField<Type>>&) const;

            tmp<scalargpuField> H1() const;

            template<class Type>
            tmp<gpuField<Type>> faceH(const gpuField<Type>&) const;

            template<class Type>
            tmp<gpuField<Type>> faceH(const tmp<gpuField<Type>>&) const;


        // Info

            //- Return info proxy.
            //  Used to print matrix information to a stream
            InfoProxy<gpulduMatrix> info() const
            {
                return *this;
            }


    // Member operators

        void operator=(const gpulduMatrix&);

        void negate();

        void operator+=(const gpulduMatrix&);
        void operator-=(const gpulduMatrix&);

        void operator*=(const scalargpuField&);
        void operator*=(scalar);


    // Ostream operator

        friend Ostream& operator<<(Ostream&, const gpulduMatrix&);
        friend Ostream& operator<<(Ostream&, const InfoProxy<gpulduMatrix>&);
};


// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * //

} // End namespace Foam

// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * //

#ifdef NoRepository
    #include "gpulduMatrixTemplates.C"
#endif

// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * //

#endif

// ************************************************************************* //
