/* Created RJudd August 30, 2002 */
/* SPAWARSYSCEN  */
/**********************************************************************
// For TASP VSIPL Documentation and Code neither the United States    /
// Government, the United States Navy, nor any of their employees,    /
// makes any warranty, express or implied, including the warranties   /
// of merchantability and fitness for a particular purpose, or        /
// assumes any legal liability or responsibility for the accuracy,    /
// completeness, or usefulness of any information, apparatus,         /
// product, or process disclosed, or represents that its use would    /
// not infringe privately owned rights                                /
**********************************************************************/
/* $Id: vsip_ccholsol_f.c,v 2.0 2003/02/22 15:18:40 judd Exp $ */

#include<vsip.h>
#include<vsip_blockattributes_f.h>
#include<vsip_cblockattributes_f.h>
#include<vsip_cmviewattributes_f.h>
#include<vsip_cvviewattributes_f.h>
#include<vsip_ccholdattributes_f.h>

#define VI_CMROWVIEW_F(a_row, A, i) { \
          (a_row)->block  = (A)->block; \
          (a_row)->offset = (A)->offset + (i) * (A)->col_stride; \
          (a_row)->length = (A)->row_length; \
          (a_row)->stride = (A)->row_stride; }
#define VI_CMROWSUBVIEW_F(a_row, A, i,j,n) { \
          (a_row)->block  = (A)->block; \
          (a_row)->offset = (A)->offset + (i) * (A)->col_stride + (j) * (A)->row_stride; \
          (a_row)->length = (n); \
          (a_row)->stride = (A)->row_stride; }
#define VI_CMCOLSUBVIEW_F(a_col, A, i,j,n) { \
          (a_col)->block  = (A)->block; \
          (a_col)->offset = (A)->offset + (i) * (A)->col_stride + (j) * (A)->row_stride; \
          (a_col)->length = (n); \
          (a_col)->stride = (A)->col_stride; }


static
void
VI_cback_spd_subh_f(
          const vsip_cmview_f *A,     /* Lower Triangular used; size M by M */
          vsip_cvview_f *a_row, /* a vector view on A                 */
          const vsip_cmview_f *B,     /* In/out; size M by K                */
          vsip_cvview_f *b_row, /* a vector view on B                 */
          vsip_cvview_f *b_col) /* a vector view on B                 */
{

   vsip_length n_A = A->row_length;
   vsip_length n_B = B->row_length;
   vsip_index i,j;
   VI_CMROWVIEW_F(b_row,B,n_A - 1);
   {
      vsip_offset off = A->block->cstride * (A->offset + (n_A-1)*(A->row_stride + A->col_stride));
      vsip_scalar_f d0 = *(A->block->R->array + off);
      vsip_length n = b_row->length;
      vsip_stride str = b_row->stride * b_row->block->cstride;
      vsip_scalar_f *re = b_row->block->R->array + b_row->offset * b_row->block->cstride;
      vsip_scalar_f *im = b_row->block->I->array + b_row->offset * b_row->block->cstride;
      while(n-- >0){
         *re /= d0; *im /= d0;
          re += str; im += str;
      }

   }
   for(i=1; i<n_A; i++){
      vsip_offset off = A->block->cstride * (A->offset + (n_A - 1 -i)*(A->row_stride + A->col_stride));
      vsip_scalar_f d = *(A->block->R->array + off);
      VI_CMROWVIEW_F(b_row,B,n_A - 1 - i);
      VI_CMCOLSUBVIEW_F(a_row,A,n_A -i ,n_A -1 - i ,i);
      for(j=0; j<n_B; j++){
         VI_CMCOLSUBVIEW_F(b_col,B,n_A - i,j,i);
         {
           vsip_cscalar_f dot;
           vsip_scalar_f *b;
           vsip_length n = a_row->length;
           vsip_stride a_str = a_row->stride * a_row->block->cstride;
           vsip_stride b_str = b_col->stride * b_col->block->cstride;
           vsip_scalar_f *a_re,*a_im,*b_re,*b_im;
           vsip_offset off = a_row->offset * a_row->block->cstride;
           a_re = a_row->block->R->array + off;
           a_im = a_row->block->I->array + off;
           off = b_col->offset * b_col->block->cstride;
           b_re = b_col->block->R->array + off;
           b_im = b_col->block->I->array + off;
           dot.r = 0; dot.i = 0;
           while(n-- > 0){
              dot.r += (*a_re * *b_re + *a_im * *b_im);
              dot.i += (*a_re * *b_im - *a_im * *b_re);
              a_re += a_str; a_im += a_str;
              b_re += b_str; b_im += b_str;
           }
           off = (b_row->offset + j * b_row->stride)* b_row->block->cstride;
           b = b_row->block->R->array + off;
           *b = (*b - dot.r)/d;
           b = b_row->block->I->array + off;
           *b = (*b - dot.i)/d;
         }
      }
   } 
}

static
void
VI_cback_spd_sub_f(
          const vsip_cmview_f *A,     /* Lower Triangular used; size M by M */
          vsip_cvview_f *a_row, /* a vector view on A                 */
          const vsip_cmview_f *B,     /* In/out; size M by K                */
          vsip_cvview_f *b_row, /* a vector view on B                 */
          vsip_cvview_f *b_col) /* a vector view on B                 */
{

   vsip_length n_A = A->row_length;
   vsip_length n_B = B->row_length;
   vsip_index i,j;
   VI_CMROWVIEW_F(b_row,B,n_A - 1);
   { 
      vsip_offset off = A->block->cstride * (A->offset + (n_A-1)*(A->row_stride + A->col_stride));
      vsip_scalar_f d0 = *(A->block->R->array + off);
      vsip_length n = b_row->length;
      vsip_stride str = b_row->stride * b_row->block->cstride;
      vsip_scalar_f *re = b_row->block->R->array + b_row->offset * b_row->block->cstride;
      vsip_scalar_f *im = b_row->block->I->array + b_row->offset * b_row->block->cstride;
      while(n-- >0){
         *re /= d0; *im /= d0;
          re += str; im += str;
      }

   }

   for(i=1; i<n_A; i++){
      vsip_offset off = A->block->cstride * (A->offset + (n_A - 1 -i)*(A->row_stride + A->col_stride));
      vsip_scalar_f d = *(A->block->R->array + off);
      VI_CMROWVIEW_F(b_row,B,n_A - 1 - i);
      VI_CMROWSUBVIEW_F(a_row,A,n_A -1 -i ,n_A - i ,i);
      for(j=0; j<n_B; j++){
         VI_CMCOLSUBVIEW_F(b_col,B,n_A - i,j,i);
         { /* vsip_cvput_f(b_row,j,vsip_cdiv_f(vsip_csub_f(vsip_cvget_f(b_row,j) , vsip_cvdot_f(a_row,b_col)),d)); */
           vsip_cscalar_f dot;
           vsip_scalar_f *b;
           vsip_length n = a_row->length;
           vsip_stride a_str = a_row->stride * a_row->block->cstride;
           vsip_stride b_str = b_col->stride * b_col->block->cstride;
           vsip_scalar_f *a_re,*a_im,*b_re,*b_im;
           vsip_offset off = a_row->offset * a_row->block->cstride;
           a_re = a_row->block->R->array + off;
           a_im = a_row->block->I->array + off;
           off = b_col->offset * b_col->block->cstride;
           b_re = b_col->block->R->array + off;
           b_im = b_col->block->I->array + off;
           dot.r = 0; dot.i = 0;
           while(n-- > 0){
              dot.r += (*a_re * *b_re - *a_im * *b_im);
              dot.i += (*a_re * *b_im + *a_im * *b_re);
              a_re += a_str; a_im += a_str;
              b_re += b_str; b_im += b_str;
           }
           off = (b_row->offset + j * b_row->stride)* b_row->block->cstride;
           b = b_row->block->R->array + off;
           *b = (*b - dot.r)/d;
           b = b_row->block->I->array + off;
           *b = (*b - dot.i)/d;
         }
      }
   } 
}



/* complex forward substitution with an R for a symetric positive definite problem */
/* Note that for this problem the diagonal of A must have zero imaginary part */
static
void
VI_cfwd_spd_subh_f(
          const vsip_cmview_f *A,     /* Lower Triangular used; size M by M */
          vsip_cvview_f *a_row, /* a vector view on A                 */
          const vsip_cmview_f *B,     /* In/out; size M by K                */
          vsip_cvview_f *b_row, /* a vector view on B                 */
          vsip_cvview_f *b_col) /* a vector view on B                 */
{
   vsip_length n_A = A->row_length;
   vsip_length n_B = B->row_length;
   vsip_index i,j;
   VI_CMROWVIEW_F(b_row,B,0);

   { 
      vsip_scalar_f d0 = *(A->block->R->array + A->offset * A->block->cstride);
      vsip_length n = b_row->length;
      vsip_stride str = b_row->stride * b_row->block->cstride;
      vsip_scalar_f *re = b_row->block->R->array + b_row->offset * b_row->block->cstride;
      vsip_scalar_f *im = b_row->block->I->array + b_row->offset * b_row->block->cstride;
      while(n-- >0){
         *re /= d0; *im /= d0;
          re += str; im += str;
      }
      
   }
   for(i=1; i<n_A; i++){
      vsip_scalar_f d = *(A->block->R->array + (A->offset + i * (A->row_stride +A->col_stride)) * A->block->cstride);
      VI_CMROWVIEW_F(b_row,B,i);
      VI_CMCOLSUBVIEW_F(a_row,A,0,i,i);
      for(j=0; j<n_B; j++){
         VI_CMCOLSUBVIEW_F(b_col,B,0,j,i);
         {
           vsip_cscalar_f dot;
           vsip_scalar_f *b;
           vsip_length n = a_row->length;
           vsip_stride a_str = a_row->stride * a_row->block->cstride;
           vsip_stride b_str = b_col->stride * b_col->block->cstride;
           vsip_scalar_f *a_re,*a_im,*b_re,*b_im;
           vsip_offset off = a_row->offset * a_row->block->cstride;
           a_re = a_row->block->R->array + off;
           a_im = a_row->block->I->array + off;
           off = b_col->offset * b_col->block->cstride;
           b_re = b_col->block->R->array + off;
           b_im = b_col->block->I->array + off;
           dot.r = 0; dot.i = 0;
           while(n-- > 0){
              dot.r += (*a_re * *b_re + *a_im * *b_im);
              dot.i += (*a_re * *b_im - *a_im * *b_re);
              a_re += a_str; a_im += a_str;
              b_re += b_str; b_im += b_str;
           }
           off = (b_row->offset + j * b_row->stride)* b_row->block->cstride;
           b = b_row->block->R->array + off;
           *b = (*b - dot.r)/d;
           b = b_row->block->I->array + off;
           *b = (*b - dot.i)/d;
         }
      }
   } 
}

static
void
VI_cfwd_spd_sub_f(
          const vsip_cmview_f *A,     /* Lower Triangular used; size M by M */
          vsip_cvview_f *a_row, /* a vector view on A                 */
          const vsip_cmview_f *B,     /* In/out; size M by K                */
          vsip_cvview_f *b_row, /* a vector view on B                 */
          vsip_cvview_f *b_col) /* a vector view on B                 */
{
   vsip_length n_A = A->row_length;
   vsip_length n_B = B->row_length;
   vsip_index i,j;
   VI_CMROWVIEW_F(b_row,B,0);

   { 
      vsip_scalar_f d0 = *(A->block->R->array + A->offset * A->block->cstride);
      vsip_length n = b_row->length;
      vsip_stride str = b_row->stride * b_row->block->cstride;
      vsip_scalar_f *re = b_row->block->R->array + b_row->offset * b_row->block->cstride;
      vsip_scalar_f *im = b_row->block->I->array + b_row->offset * b_row->block->cstride;
      while(n-- >0){
         *re /= d0; *im /= d0;
          re += str; im += str;
      }
      
   }
   for(i=1; i<n_A; i++){
      vsip_scalar_f d = *(A->block->R->array + (A->offset + i * (A->row_stride +A->col_stride)) * A->block->cstride);
      VI_CMROWVIEW_F(b_row,B,i);
      VI_CMROWSUBVIEW_F(a_row,A,i,0,i);
      for(j=0; j<n_B; j++){
         VI_CMCOLSUBVIEW_F(b_col,B,0,j,i);
         {
           vsip_cscalar_f dot;
           vsip_scalar_f *b;
           vsip_length n = a_row->length;
           vsip_stride a_str = a_row->stride * a_row->block->cstride;
           vsip_stride b_str = b_col->stride * b_col->block->cstride;
           vsip_scalar_f *a_re,*a_im,*b_re,*b_im;
           vsip_offset off = a_row->offset * a_row->block->cstride;
           a_re = a_row->block->R->array + off;
           a_im = a_row->block->I->array + off;
           off = b_col->offset * b_col->block->cstride;
           b_re = b_col->block->R->array + off;
           b_im = b_col->block->I->array + off;
           dot.r = 0; dot.i = 0;
           while(n-- > 0){
              dot.r += (*a_re * *b_re - *a_im * *b_im);
              dot.i += (*a_re * *b_im + *a_im * *b_re);
              a_re += a_str; a_im += a_str;
              b_re += b_str; b_im += b_str;
           }
           off = (b_row->offset + j * b_row->stride)* b_row->block->cstride;
           b = b_row->block->R->array + off;
           *b = (*b - dot.r)/d;
           b = b_row->block->I->array + off;
           *b = (*b - dot.i)/d;
         }
      }
   } 
}



int
vsip_ccholsol_f(
          const vsip_cchol_f *chol,
          const vsip_cmview_f *XB)
{
   int retval = 0;
   vsip_cvview_f aa_row,bb_row,bb_col;
   vsip_cvview_f *a_row = &aa_row,*b_row = &bb_row,*b_col = &bb_col;
   if(chol->uplo == VSIP_TR_UPP){
      VI_cfwd_spd_subh_f(chol->matrix,a_row,XB,b_row,b_col);
      VI_cback_spd_sub_f(chol->matrix,a_row,XB,b_row,b_col);
   } else { /* must be VSIP_TR_LOW */
      VI_cfwd_spd_sub_f(chol->matrix,a_row,XB,b_row,b_col);
      VI_cback_spd_subh_f(chol->matrix,a_row,XB,b_row,b_col);
   }
   return retval;
}  
