/*******************************************************************************
* Copyright (C) 2022 Intel Corporation
*
* This software and the related documents are Intel copyrighted  materials,  and
* your use of  them is  governed by the  express license  under which  they were
* provided to you (License).  Unless the License provides otherwise, you may not
* use, modify, copy, publish, distribute,  disclose or transmit this software or
* the related documents without Intel's prior written permission.
*
* This software and the related documents  are provided as  is,  with no express
* or implied  warranties,  other  than those  that are  expressly stated  in the
* License.
*******************************************************************************/

/*
!  Content:
!      Intel(R) oneAPI Math Kernel Library ScaLAPACK C example source file
!
!*******************************************************************************
!===============================================================================
!  Matrix generator routines for ScaLAPACK p?getrf and p?getrs example program 
!===============================================================================
! List of routines are given in the file:
!
! pdmatgen_random
! psmatgen_random
! pzmatgen_random
! psmatgen_random
!
! Definitions for element index numbers of ScaLAPACK array descriptors 
*/
#include "mkl.h"
#include "mkl_blacs.h"
#include "mkl_pblas.h"
#include "mkl_scalapack.h"

#define DTYPE_ 1
#define CTXT_ 2
#define M_ 3
#define N_ 4
#define MB_ 5
#define NB_ 6
#define RSRC_ 7
#define CSRC_ 8
#define LLD_ 9
/* Additional  definitions */
#ifndef min 
#define min(a, b) ((a) < (b) ? (a) : (b))
#endif
#ifndef max
#define max(a, b) ((a) > (b) ? (a) : (b)) 
#endif
/* Local real and complex constants */
#define RZERO 0.0
#define RONE  1.0
#define RNEGONE  -1.0
#define CZERO {0.0f, 0.0f}
#define CONE  {1.0f, 0.0f}
#define CNEGONE  {-1.0f, 0.0f}

const MKL_INT izero = 0, ione = 1, isix = 6, inegone = -1, dist=1;

void pdmatgen_random(MKL_INT type, MKL_INT* desca, double* a, MKL_INT*  seed,  MKL_INT nprow, MKL_INT npcol)
/* 
*  Purpose
*  =======
*
*  pdmatgen_random: parallel real double precision matrix generator.
*  Generate a sub-matrix of A.
*
*  Arguments
*  =========
*
* type    (global input)
*          Type of global matrix to be generated.
*          If type==0, local matrices are parts of the matrix 
*          which possesses 1 as all elements along its leading (main) diagonal;
*          while all other elements in the matrix are 0. 
*          A random matrix is generated if type != 0.
*
*  desca   (global and local input) an array of (at least) length 9.
*          The array descriptor for the distributed matrix A.
*          See the notes below for the decription of each element.
*
*  a       (local output), pointer into the local
*          memory to an array containing the
*          local pieces of the distributed matrix.
*
*  seed    (local input and output) integer array, dimension (4)
*          On entry, the seed of the random number generator.
*          On exit, the seed is updated. 
*
*  nprow   (global input)
*          The number of process rows in the grid.
*
*  npcol   (global input)
*          The number of process columns in the grid.
*
*
*  Notes:
*  =====
*
*  Each global data object is described by an associated description
*  vector.  This vector stores the information required to establish
*  the mapping between an object element and its corresponding process
*  and memory location.
*
*
*  In  the  following  comments,   the character _  should  be  read  as
*  "of  the  distributed  matrix".  Let  A  be a generic term for any 2D
*  block cyclicly distributed matrix.  Its description vector is DESC_A:
*
*  NOTATION         STORED IN       EXPLANATION
*  ---------------- --------------- ------------------------------------
*  DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type.
*  CTXT_A  (global) DESCA[ CTXT_  ] The BLACS context handle, indicating
*                                   the NPROW x NPCOL BLACS process grid
*                                   A  is  distributed over. The context
*                                   itself  is  global,  but  the handle
*                                   (the integer value) may vary.
*  M_A     (global) DESCA[ M_     ] The  number of rows in the distribu-
*                                   ted matrix A, M_A >= 0.
*  N_A     (global) DESCA[ N_     ] The number of columns in the distri-
*                                   buted matrix A, N_A >= 0.
*  IMB_A   (global) DESCA[ IMB_   ] The number of rows of the upper left
*                                   block of the matrix A, IMB_A > 0.
*  INB_A   (global) DESCA[ INB_   ] The  number  of columns of the upper
*                                   left   block   of   the  matrix   A,
*                                   INB_A > 0.
*  MB_A    (global) DESCA[ MB_    ] The blocking factor used to  distri-
*                                   bute the last  M_A-IMB_A  rows of A,
*                                   MB_A > 0.
*  NB_A    (global) DESCA[ NB_    ] The blocking factor used to  distri-
*                                   bute the last  N_A-INB_A  columns of
*                                   A, NB_A > 0.
*  RSRC_A  (global) DESCA[ RSRC_  ] The process row over which the first
*                                   row of the matrix  A is distributed,
*                                   NPROW > RSRC_A >= 0.
*  CSRC_A  (global) DESCA[ CSRC_  ] The  process column  over  which the
*                                   first column of  A  is  distributed.
*                                   NPCOL > CSRC_A >= 0.
*  LLD_A   (local)  DESCA[ LLD_   ] The  leading dimension  of the local
*                                   array  storing  the  local blocks of
*                                   the distributed matrix A,
*                                   IF( Lc( 1, N_A ) > 0 )
*                                      LLD_A >= MAX( 1, Lr( 1, M_A ) )
*                                   ELSE
*                                      LLD_A >= 1. 
*/
{
    MKL_INT myrow, mycol;
    MKL_INT iarow = 0, iacol = 0;
    MKL_INT ictxt = desca[CTXT_-1];
    MKL_INT m     = desca[M_-1];
    MKL_INT n     = desca[N_-1];
    MKL_INT mb    = desca[MB_-1];
    MKL_INT nb    = desca[NB_-1];
    MKL_INT lld   = desca[LLD_-1];
    MKL_INT i, j, ii, jj, il = 0, jl = 0;

    blacs_gridinfo(&ictxt, &nprow, &npcol, &myrow, &mycol);
    for (j = 0; j < n; j += nb) {
       for (i = 0; i < m; i += mb) {
          i++; j++;
          infog2l(&i, &j, desca, &nprow, &npcol, &myrow, &mycol, &il, &jl, &iarow, &iacol);
          i--; j--; il--; jl--;
          if (iarow==myrow && iacol==mycol) {
             for (jj = 0; jj <  min(nb, n - j); jj++) {
                for (ii = 0; ii < min(mb, m - i); ii++) {
                    if(type) {
                       a[il+ii+(jl+jj)*lld] = (double) dlarnd( &dist, seed );
                    } else {
                        if(i==j) a[il+ii+(jl+jj)*lld] = RONE;
                        else a[il+ii+(jl+jj)*lld] = RZERO;
                    }
                }
             }
          }
       }
    }
}

void psmatgen_random(MKL_INT type, MKL_INT* desca, float* a, MKL_INT*  seed,  MKL_INT nprow, MKL_INT npcol)
{
/* 
*  Purpose
*  =======
*
*  psmatgen_random: parallel real float precision matrix generator.
*  Generate a sub-matrix of A.
*
*  Arguments
*  =========
*
* type    (global input)
*          Type of global matrix to be generated.
*          If type==0, local matrices are parts of the matrix 
*          which possesses 1 as all elements along its leading (main) diagonal;
*          while all other elements in the matrix are 0. 
*          A random matrix is generated if type != 0.
*
*  desca   (global and local input) an array of (at least) length 9.
*          The array descriptor for the distributed matrix A.
*          See the notes below for the decription of each element.
*
*  a       (local output), pointer into the local
*          memory to a float array containing the
*          local pieces of the distributed matrix.
*
*  seed    (local input and output) integer array, dimension (4)
*          On entry, the seed of the random number generator.
*          On exit, the seed is updated.
*
*  nprow   (global input)
*          The number of process rows in the grid.
*
*  npcol   (global input)
*          The number of process columns in the grid.
*
*
*  Notes:
*  =====
*
*  Each global data object is described by an associated description
*  vector.  This vector stores the information required to establish
*  the mapping between an object element and its corresponding process
*  and memory location.
* 
* 
*  In  the  following  comments,   the character _  should  be  read  as
*  "of  the  distributed  matrix".  Let  A  be a generic term for any 2D
*  block cyclicly distributed matrix.  Its description vector is DESC_A:
*
*  NOTATION         STORED IN       EXPLANATION
*  ---------------- --------------- ------------------------------------
*  DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type.
*  CTXT_A  (global) DESCA[ CTXT_  ] The BLACS context handle, indicating
*                                   the NPROW x NPCOL BLACS process grid
*                                   A  is  distributed over. The context
*                                   itself  is  global,  but  the handle
*                                   (the integer value) may vary.
*  M_A     (global) DESCA[ M_     ] The  number of rows in the distribu-
*                                   ted matrix A, M_A >= 0.
*  N_A     (global) DESCA[ N_     ] The number of columns in the distri-
*                                   buted matrix A, N_A >= 0.
*  IMB_A   (global) DESCA[ IMB_   ] The number of rows of the upper left
*                                   block of the matrix A, IMB_A > 0.
*  INB_A   (global) DESCA[ INB_   ] The  number  of columns of the upper
*                                   left   block   of   the  matrix   A,
*                                   INB_A > 0.
*  MB_A    (global) DESCA[ MB_    ] The blocking factor used to  distri-
*                                   bute the last  M_A-IMB_A  rows of A,
*                                   MB_A > 0.
*  NB_A    (global) DESCA[ NB_    ] The blocking factor used to  distri-
*                                   bute the last  N_A-INB_A  columns of
*                                   A, NB_A > 0.
*  RSRC_A  (global) DESCA[ RSRC_  ] The process row over which the first
*                                   row of the matrix  A is distributed,
*                                   NPROW > RSRC_A >= 0.
*  CSRC_A  (global) DESCA[ CSRC_  ] The  process column  over  which the
*                                   first column of  A  is  distributed.
*                                   NPCOL > CSRC_A >= 0.
*  LLD_A   (local)  DESCA[ LLD_   ] The  leading dimension  of the local
*                                   array  storing  the  local blocks of
*                                   the distributed matrix A,
*                                   IF( Lc( 1, N_A ) > 0 )
*                                      LLD_A >= MAX( 1, Lr( 1, M_A ) )
*                                   ELSE
*                                      LLD_A >= 1.
*/

    MKL_INT myrow, mycol;
    MKL_INT iarow = 0, iacol = 0;
    MKL_INT ictxt = desca[CTXT_-1];
    MKL_INT m     = desca[M_-1];
    MKL_INT n     = desca[N_-1];
    MKL_INT mb    = desca[MB_-1];
    MKL_INT nb    = desca[NB_-1];
    MKL_INT lld   = desca[LLD_-1];
    MKL_INT i, j, ii, jj, il = 0, jl = 0;

    blacs_gridinfo(&ictxt, &nprow, &npcol, &myrow, &mycol);
    for (j = 0; j < n; j += nb) {
       for (i = 0; i < m; i += mb) {
          i++; j++;
          infog2l(&i, &j, desca, &nprow, &npcol, &myrow, &mycol, &il, &jl, &iarow, &iacol);
          i--; j--; il--; jl--;
          if ( iarow==myrow && iacol==mycol ) {
             for (jj = 0; jj <  min(nb, n - j); jj++) {
                for (ii = 0; ii < min(mb, m - i); ii++) {
                    if( type ) {
                       a[il+ii+(jl+jj)*lld] = (float) dlarnd( &dist, seed );
                    } else {
                        if(i==j) a[il+ii+(jl+jj)*lld] = RONE;
                        else a[il+ii+(jl+jj)*lld] = RZERO;
                    }
                }
             }
          }
       }
    }
}

void pzmatgen_random(MKL_INT type, MKL_INT* desca, MKL_Complex16* a, MKL_INT*  seed,  MKL_INT nprow, MKL_INT npcol)
{
/*
*  Purpose
*  =======
*
*  pzmatgen_random: parallel double complex precision matrix generator.
*  Generate a sub-matrix of A.
*
*  Arguments
*  =========
*
*  type    (global input)
*          Type of global matrix to be generated.
*          If type==0, local matrices are parts of the matrix 
*          which possesses 1 as all elements along its leading (main) diagonal;
*          while all other elements in the matrix are 0. 
*          Otherwise a random matrix is generated.

*  type    (global input)
*          Type of global matrix to be generated.
*          If type==0, local matrices are parts of the identity matrix. 
*          Otherwise a random matrix is generated.
*
*  desca   (global and local input) an array of (at least) length 9.
*          The array descriptor for the distributed matrix A.
*          See the notes below for the decription of each element.
*
*  a       (local output), pointer into the local
*          memory to a MKL_Complex16 array containing the
*          local pieces of the distributed matrix.
*
*  seed    (local input and output) integer array, dimension (4)
*          On entry, the seed of the random number generator.
*          On exit, the seed is updated.
* 
*  nprow   (global input)
*          The number of process rows in the grid.
*
*  npcol   (global input)
*          The number of process columns in the grid.
*
*  Notes:
*  =====
*
*  Each global data object is described by an associated description
*  vector.  This vector stores the information required to establish
*  the mapping between an object element and its corresponding process
*  and memory location.
*
*  In  the  following  comments,   the character _  should  be  read  as
*  "of  the  distributed  matrix".  Let  A  be a generic term for any 2D
*  block cyclicly distributed matrix.  Its description vector is DESC_A:
*
*  NOTATION         STORED IN       EXPLANATION
*  ---------------- --------------- ------------------------------------
*  DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type.
*  CTXT_A  (global) DESCA[ CTXT_  ] The BLACS context handle, indicating
*                                   the NPROW x NPCOL BLACS process grid
*                                   A  is  distributed over. The context
*                                   itself  is  global,  but  the handle
*                                   (the integer value) may vary.
*  M_A     (global) DESCA[ M_     ] The  number of rows in the distribu-
*                                   ted matrix A, M_A >= 0.
*  N_A     (global) DESCA[ N_     ] The number of columns in the distri-
*                                   buted matrix A, N_A >= 0.
*  IMB_A   (global) DESCA[ IMB_   ] The number of rows of the upper left
*                                   block of the matrix A, IMB_A > 0.
*  INB_A   (global) DESCA[ INB_   ] The  number  of columns of the upper
*                                   left   block   of   the  matrix   A,
*                                   INB_A > 0.
*  MB_A    (global) DESCA[ MB_    ] The blocking factor used to  distri-
*                                   bute the last  M_A-IMB_A  rows of A,
*                                   MB_A > 0.
*  NB_A    (global) DESCA[ NB_    ] The blocking factor used to  distri-
*                                   bute the last  N_A-INB_A  columns of
*                                   A, NB_A > 0.
*  RSRC_A  (global) DESCA[ RSRC_  ] The process row over which the first
*                                   row of the matrix  A is distributed,
*                                   NPROW > RSRC_A >= 0.
*  CSRC_A  (global) DESCA[ CSRC_  ] The  process column  over  which the
*                                   first column of  A  is  distributed.
*                                   NPCOL > CSRC_A >= 0.
*  LLD_A   (local)  DESCA[ LLD_   ] The  leading dimension  of the local
*                                   array  storing  the  local blocks of
*                                   the distributed matrix A,
*                                   IF( Lc( 1, N_A ) > 0 )
*                                      LLD_A >= MAX( 1, Lr( 1, M_A ) )
*                                   ELSE
*                                      LLD_A >= 1.
*/

    MKL_INT myrow, mycol;
    MKL_INT iarow = 0, iacol = 0;
    MKL_INT ictxt = desca[CTXT_-1];
    MKL_INT m     = desca[M_-1];
    MKL_INT n     = desca[N_-1];
    MKL_INT mb    = desca[MB_-1];
    MKL_INT nb    = desca[NB_-1];
    MKL_INT lld   = desca[LLD_-1];
    MKL_INT i, j, ii, jj, il = 0, jl = 0;

    blacs_gridinfo(&ictxt, &nprow, &npcol, &myrow, &mycol);
    for (j = 0; j < n; j += nb) {
       for (i = 0; i < m; i += mb) {
          i++; j++;
          infog2l(&i, &j, desca, &nprow, &npcol, &myrow, &mycol, &il, &jl, &iarow, &iacol);
          i--; j--; il--; jl--;
          if (iarow==myrow && iacol==mycol) {
             for (jj = 0; jj <  min(nb, n - j); jj++) {
                for (ii = 0; ii < min(mb, m - i); ii++) {
                    if( type ) {
                       a[il+ii+(jl+jj)*lld].real = (double) dlarnd( &dist, seed );
                       a[il+ii+(jl+jj)*lld].imag = (double) dlarnd( &dist, seed );
                    } else {
                        if( i==j ) {
                            a[il+ii+(jl+jj)*lld].real = RONE;
                            a[il+ii+(jl+jj)*lld].imag = RZERO;
                        }  else {
                            a[il+ii+(jl+jj)*lld].real = RZERO;
                            a[il+ii+(jl+jj)*lld].imag = RZERO;
                        }
                    }
                }
             }
          }
       }
    }
}
 
void pcmatgen_random(MKL_INT type, MKL_INT* desca, MKL_Complex8* a, MKL_INT*  seed,  MKL_INT nprow, MKL_INT npcol)
{
/*
*  Purpose
*  =======
*
*  pcmatgen_random: parallel single complex precision matrix generator.
*  Generate a sub-matrix of A.
*
*  Arguments
*  =========
*
*  type    (global input)
*          Type of global matrix to be generated.
*          If type==0, local matrices are parts of the matrix 
*          which possesses 1 as all elements along its leading (main) diagonal;
*          while all other elements in the matrix are 0. 
*          Otherwise a random matrix is generated.
*
*  desca   (global and local input) an array of (at least) length 9.
*          The array descriptor for the distributed matrix A.
*          See the notes below for the decription of each element.
*
*  a       (local output), pointer into the local
*          memory to a MKL_Complex8 array containing the
*          local pieces of the distributed matrix.
*
*  seed    (local input and output) integer array, dimension (4)
*          On entry, the seed of the random number generator.
*          On exit, the seed is updated.
*
*  nprow   (global input)
*          The number of process rows in the grid.
*
*  npcol   (global input)
*          The number of process columns in the grid.
*
*
*  Notes:
*  =====
*
*  Each global data object is described by an associated description
*  vector.  This vector stores the information required to establish
*  the mapping between an object element and its corresponding process
*  and memory location.
*
*
*  In  the  following  comments,   the character _  should  be  read  as
*  "of  the  distributed  matrix".  Let  A  be a generic term for any 2D
*  block cyclicly distributed matrix.  Its description vector is DESC_A:
*
*  NOTATION         STORED IN       EXPLANATION
*  ---------------- --------------- ------------------------------------
*  DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type.
*  CTXT_A  (global) DESCA[ CTXT_  ] The BLACS context handle, indicating
*                                   the NPROW x NPCOL BLACS process grid
*                                   A  is  distributed over. The context
*                                   itself  is  global,  but  the handle
*                                   (the integer value) may vary.
*  M_A     (global) DESCA[ M_     ] The  number of rows in the distribu-
*                                   ted matrix A, M_A >= 0.
*  N_A     (global) DESCA[ N_     ] The number of columns in the distri-
*                                   buted matrix A, N_A >= 0.
*  IMB_A   (global) DESCA[ IMB_   ] The number of rows of the upper left
*                                   block of the matrix A, IMB_A > 0.
*  INB_A   (global) DESCA[ INB_   ] The  number  of columns of the upper
*                                   left   block   of   the  matrix   A,
*                                   INB_A > 0.
*  MB_A    (global) DESCA[ MB_    ] The blocking factor used to  distri-
*                                   bute the last  M_A-IMB_A  rows of A,
*                                   MB_A > 0.
*  NB_A    (global) DESCA[ NB_    ] The blocking factor used to  distri-
*                                   bute the last  N_A-INB_A  columns of
*                                   A, NB_A > 0.
*  RSRC_A  (global) DESCA[ RSRC_  ] The process row over which the first
*                                   row of the matrix  A is distributed,
*                                   NPROW > RSRC_A >= 0.
*  CSRC_A  (global) DESCA[ CSRC_  ] The  process column  over  which the
*                                   first column of  A  is  distributed.
*                                   NPCOL > CSRC_A >= 0.
*  LLD_A   (local)  DESCA[ LLD_   ] The  leading dimension  of the local
*                                   array  storing  the  local blocks of
*                                   the distributed matrix A,
*                                   IF( Lc( 1, N_A ) > 0 )
*                                      LLD_A >= MAX( 1, Lr( 1, M_A ) )
*                                   ELSE
*                                      LLD_A >= 1.
*/
    MKL_INT myrow, mycol;
    MKL_INT iarow = 0, iacol = 0;
    MKL_INT ictxt = desca[CTXT_-1];
    MKL_INT m     = desca[M_-1];
    MKL_INT n     = desca[N_-1];
    MKL_INT mb    = desca[MB_-1];
    MKL_INT nb    = desca[NB_-1];
    MKL_INT lld   = desca[LLD_-1];
    MKL_INT i, j, ii, jj, il = 0, jl = 0;

    blacs_gridinfo(&ictxt, &nprow, &npcol, &myrow, &mycol);
    for (j = 0; j < n; j += nb) {
       for (i = 0; i < m; i += mb) {
          i++; j++;
          infog2l(&i, &j, desca, &nprow, &npcol, &myrow, &mycol, &il, &jl, &iarow, &iacol);
          i--; j--; il--; jl--;
          if (iarow==myrow && iacol==mycol) {
             for (jj = 0; jj <  min(nb, n - j); jj++) {
                for (ii = 0; ii < min(mb, m - i); ii++) {
                    if( type ) {
                       a[il+ii+(jl+jj)*lld].real = (float) dlarnd( &dist, seed );
                       a[il+ii+(jl+jj)*lld].imag = (float) dlarnd( &dist, seed );
                    } else {
                        if( i==j ) {
                            a[il+ii+(jl+jj)*lld].real = RONE;
                            a[il+ii+(jl+jj)*lld].imag = RZERO;
                        }  else {
                            a[il+ii+(jl+jj)*lld].real = RZERO;
                            a[il+ii+(jl+jj)*lld].imag = RZERO;
                        }
                    }
                }
             }
          }
       }
    }
}
