#include "tools.h"
void CpsagemvA(M, N, alpha, A, IA, JA, descA, X0, IX0, JX0, descX0, incX0, 
               beta0, Y0, IY0, JY0, descY0, incY0)
int M;
int N;
float alpha;
float *A;
int IA;
int JA;
int *descA;
float *X0;
int IX0;
int JX0;
int *descX0;
int incX0;
float beta0;
float *Y0;
int IY0;
int JY0;
int *descY0;
int  incY0;
/*
 *            ======(X)======
 *                   N
 *            ---------------
 *      ||   |               |          A - M x N
 *      ||   |               |          Y - M x 1
 *      ||   |               |          X - 1 x N
 *     (Y)   |      (A)      |  M
 *      ||   |               |          X will be replicated across all rows
 *      ||   |               |          Y will have space on all columns
 *      ||   |               |
 *      ||   |               |
 *            ---------------
 */
{
/*
 * .. External routines ..
 */
   char *ptop();
   void pchkmat();
   void pchkvec();
   void pberror_();
   void Cinfog2l();
   int Cnumroc2();
   void Cblacs_gridinfo();
   void Csgebs2d();
   void Csgebr2d();
   void Csgsum2d();
   F_INTG_FCT sgemv_();
   void Cpsscal1();
   void Cpscopy1();
   void Cpsaxpy1();

   char *top;
   F_CHAR trans;
   int ctxt, nprow, npcol, myrow, mycol;
   int IX, JX, descX[DLEN_], incX, IY, JY, descY[DLEN_], incY;
   int i, j, k, h, arow, acol, LOCp, LOCq, nb, info=0, one=1;
   float *absX, *Y, *absA, *x, *y, *y0, *a, *aa, beta=0.0;

   trans = C2F_CHAR("n");
   ctxt = descA[CTXT_];
   Cblacs_gridinfo(ctxt, &nprow, &npcol, &myrow, &mycol);
   pchkmat(M, 2, N, 3, IA+1, JA+1, descA, 8, &info, nprow, npcol, myrow, mycol);
   pchkvec(N, 3, IX0+1, JX0+1, descX0, incX0, 12, &info, nprow, npcol,
           myrow, mycol);
   pchkvec(M, 2, IY0+1, JY0+1, descY0, incY0, 18, &info, nprow, npcol,
           myrow, mycol);
   if (descA[CTXT_] != descX0[CTXT_])
   {
      if (info == 0) info = -(1200+CTXT_+1);
   }
   else if (descX0[CTXT_] != descY0[CTXT_])
   {
      if (info == 0) info = -(1800+CTXT_+1);
   }
   if (info)
   {
      pberror_(&ctxt, "PSAGEMV", &info);
      return;
   }
/*
 * Quick return, if possible
 */
   if ( (M == 0) || (N == 0) || ((alpha == 0.0) && (beta0 == 1.0)) ) return;

/*
 * Scale Y0 by beta: Y0 = beta * Y0; this allows us to later add in
 * alpha*A*x to get Y0 = alpha*A*x + Y0*beta
 */
   Cpsscal1(M, beta0, Y0, IY0, JY0, descY0, incY0);
/*
 * Get local information about our matrix
 */
   Cinfog2l(IA, JA, descA, nprow, npcol, myrow, mycol, &i, &j, &arow, &acol);
   a = &A[ i+j*descA[LLD_] ];
   nb = descA[NB_];
   LOCp = Cnumroc2(M, IA, descA[MB_], myrow, descA[RSRC_], nprow);
   LOCq = Cnumroc2(N, JA, nb, mycol, descA[CSRC_], npcol);

/*
 * Allocate space for absX, Y, and absA
 */
   i = JA % nb + LOCq;
   j = IA % descA[MB_] + LOCp;

   Mmalloc(absX, float, i+j+nb*LOCp, h, ctxt);
   Y = &absX[i];
   absA = &Y[j];
/*
 * Set up absX, a row vector aligned with A, copy abs( X ) to it,
 * and give all process rows a copy
 */
   IX = 0;
   JX = JA % nb;
   Mdescset(descX, 1, N + JX, 1, nb,
            MCindxg2p(IX0, descX0[MB_], descX0[RSRC_], nprow), acol, ctxt, 1);
   incX = 1;
   Cpscopy1(N, X0, IX0, JX0, descX0, incX0, absX, IX, JX, descX, incX);
/*
 * Set local pointer into absX
 */
   if (mycol == descX[CSRC_]) x = &absX[JX];
   else x = absX;

   top = ptop("B", "C", "!");
   if (myrow == descX[RSRC_])
   {
      for (i=0; i != LOCq; i++) x[i] = ABS( x[i] );
      Csgebs2d(ctxt, "c", top, LOCq, 1, x, LOCq);
   }
   else Csgebr2d(ctxt, "c", top, LOCq, 1, x, LOCq, descX[RSRC_], mycol);
/*
 * Set up Y, a column vector aligned with A
 */
   IY = IA % descA[MB_];
   JY = 0;
   Mdescset(descY, M + IY, 1, descA[MB_], 1, arow,
            MCindxg2p(JY0, descY0[NB_], descY0[CSRC_], npcol), ctxt, LOCp+IY+1);
   incY = 1;
/*
 * Set local pointer into Y
 */
   if (myrow == descY[RSRC_]) y = &Y[IY];
   else y = Y;
/*
 * Figure local portion of abs matrix vector product by looping over NB wide panels
 */
   if (LOCp != 0)
   {
      if (LOCq != 0)
      {
         j = 0;
         do
         {
            aa = absA;
            h = MIN(nb, LOCq-j);
            for (k=0; k != h; k++)  /* set absA = abs( sub(A) ) */
            {
               for(i=0; i != LOCp; i++) aa[i] = ABS( a[i] );
               a += descA[LLD_];
               aa += LOCp;
            }
            sgemv_(trans, &LOCp, &h, &alpha, absA, &LOCp, &x[j], &one, &beta,
                   y, &one);
            j += nb;
            beta = 1.0;
         }
         while(j < LOCq);
      }
      else for (i=0; i != LOCp; i++) y[i] = 0.0;
/*
 *    Figure global answer
 */
      top = ptop("C", "R", "!");
      Csgsum2d(ctxt, "row", top, LOCp, 1, y, descY[LLD_], myrow, descY[CSRC_]);
   }
/*
 * Y contains ALPHA*A*x. Y0 contains BETA*y.  Use psaxpy to set
 * Y0 = ALPHA*A*x + BETA*y
 */
   Cpsaxpy1(M, 1.0, Y, IY, JY, descY, incY, Y0, IY0, JY0, descY0, incY0);

   if (absX) free(absX);
}
