Information Retrieval

Development Platform:
Unix_Linux

blas.cc：Code Content
							/**************************************************************
 *                                                            *
 * multiplication of matrix A by vector x, where A is m by n  *
 * (m >> n) and is stored using the Harwell-Boeing compressed *
 * column sparse matrix format.  y stores product vector.     *
 *                                                            *
 **************************************************************/
void opa(long m, long n, double *x, double *y)
{
   long end,i,j;
   mxvcount += 1;
   for (i = 0; i < m; i++) y[i] = ZERO;
   for (i = 0; i < n; i++) {
      end = pointr[i+1];
      for (j = pointr[i]; j < end; j++)
	 y[rowind[j]] += value[j] * x[i]; 
   }
   return;
}
/**************************************************************
 *                                                            *
 * multiplication of an n by m matrix A' by a vector X, store *
 * in Y.                                                      *
 *                                                            *
 **************************************************************/
void opat(long n, double *x, double *y)
{
   long end,i,j;
   
   mtxvcount += 1;
   for (i = 0; i < n; i++) y[i] = ZERO;
   for (i = 0; i < n; i++) {
      end = pointr[i+1];
      for (j = pointr[i]; j < end; j++)
	 y[i] += value[j] * x[rowind[j]]; 
   }
   return;
}
double fsign(double a,double b)
  /************************************************************** 
   * returns |a| if b is positive; else fsign returns -|a|      *
   **************************************************************/ 
{
  
  if ((a>=0.0 && b>=0.0) || (a<0.0 && b<0.0))return(a);
  if  ((a<0.0 && b>=0.0) || (a>=0.0 && b<0.0))return(-a);
}
double dmax(double a, double b)
  /************************************************************** 
   * returns the larger of two double precision numbers         *
   **************************************************************/ 
{
  
  if (a > b) return(a);
  else return(b);
}
double dmin(double a, double b)
  /************************************************************** 
   * returns the smaller of two double precision numbers        *
   **************************************************************/ 
{
  
  if (a < b) return(a);
  else return(b);
}
long imin(long a, long b)
  /************************************************************** 
   * returns the smaller of two integers                        *
   **************************************************************/ 
{
  
  if (a < b) return(a);
  else return(b);
}
long imax(long a,long b)
  /************************************************************** 
   * returns the larger of two integers                         *
   **************************************************************/ 
{
  
  if (a > b) return(a);
  else return(b);
}
 
/***********************************************************************
 *                                                                     *
 *                        orthg()                                      *
 *         Gram-Schmidt orthogonalization procedure                    *
 *                                                                     *
 ***********************************************************************/
/***********************************************************************
   Description
   -----------
   The p by n matrix Z stored row-wise in rows f to (f+p-1) of
   array X is reorthogonalized w.r.t. the first f rows of array X.
   The resulting matrix Z is then factored into the product of a
   p by n orthonormal matrix (stored over matrix Z) and a p by p
   upper-triangular matrix (stored in the first p rows and columns 
   of array B).  (Based on orthog from Rutishauser) 
   Parameters
   ----------
   (input)
   p           number of consecutive vectors of array x (stored row-wise)
	       to be orthogonalized
   f           number of rows against which the next p rows are to be
	       orthogonalized
   n           column dimension of x
   x           2-dimensional array whose p rows are to be orthogonalized
	       against its first f rows
   temp        work array
   (output)
   x           output matrix whose f+p rows are orthonormalized
   b           p by p upper-triangular matrix
   Functions called
   --------------
   BLAS         dgemv, ddot, dscal, daxpy, dcopy
 ***********************************************************************/
void orthg(long p, long f, long n, double **b, double **x, double *temp)
{
   long fp, k, km1;
   long orig, small;
   double t, s;
   if (!p) return;
   if (f == 0 && p > n) {
      fprintf(stderr,"%sn",
         "*** ON ENTRY TO ORTHG, MATRIX TO BE ORTHONORMALIZED IS SINGULAR");
      return;
   }
   fp = f + p;
   for (k = f; k < fp; k++) {
      km1 = k - 1;
      orig = TRUE;
      while(TRUE) {
         t = ZERO;
	 if (km1 >= 0) {
	    if (km1 > 0) {
	       dgemv(NTRANSP, k, n, ONE, x, x[k], ZERO, temp);
	       t += ddot(k, temp, 1, temp, 1);
	    }
	    else {
	       temp[0] = ddot(n, x[0], 1, x[k], 1);
	       t += temp[0] * temp[0];
	    }
	    if (orig && km1 >= f) 
               dcopy(k - f, &temp[f], 1, &b[k - f][0], 1); 
            if (km1 > 0) 
	       dgemv(TRANSP, k, n, -ONE, x, temp, ONE, &x[k][0]);
            else
	       daxpy(n, -temp[0], x[0], 1, x[k], 1);
         }
	 if (km1 < 0 || p != 1) {
	    s = ddot(n, x[k], 1, x[k], 1);
	    t += s;
	    if (s > t/CONST) {
	       small = FALSE;
	       s = sqrt(s);
               b[k - f][k - f] = s;
	       if (s != ZERO) s = ONE/s;
	       dscal(n, s, x[k], 1);
	    }
	    else {
	       small = TRUE;
	       orig  = FALSE;
	    }
	 }
	 if (small == FALSE || p == 1) break;
      }
   }
}
/***********************************************************************
 *                                                                     *
 *                         dgemv()                                     *
 * A C-translation of the level 2 BLAS routine DGEMV by Dongarra,      *
 * du Croz, and Hammarling, and Hanson (see LAPACK Users' Guide).      *
 *                                                                     *
 ***********************************************************************/
/***********************************************************************
   Description
   -----------
   dgemv() performs one of the matrix-vector operations
   y := alpha * A * x + beta * y  or  y := alpha * A' * x + beta * y
   where alpha and beta are scalars, X, Y are vectors and A is an
   m by n matrix.
void dgemv(long transa, long m, long n, 
           double alpha, double **a, double *x, double beta, double *y)
   Parameters
   ----------
   (input)
   transa   TRANSP indicates op(A) = A' is to be used in the multiplication
	    NTRANSP indicates op(A) = A is to be used in the multiplication
   m        on entry, m specifies the number of rows of the matrix A.
	    m must be at least zero.  Unchanged upon exit.
   n        on entry, n specifies the number of columns of the matrix A.
	    n must be at least zero.  Unchanged upon exit.
   alpha    a scalar multiplier.  Unchanged upon exit.
   a        matrix A as a 2-dimensional array.  Before entry, the leading
	    m by n part of the array a must contain the matrix A.
   x        linear array of dimension of at least n if transa = NTRANSP
	    and at least m otherwise.
   beta     a scalar multiplier.  When beta is supplied as zero then y
	    need not be set on input.  Unchanged upon exit.
   y        linear array of dimension of at least m if transa = NTRANSP
	    and at leat n otherwise.  Before entry with beta nonzero,
	    the array y must contain the vector y.  On exit, y is 
	    overwritten by the updated vector y.
 ***********************************************************************/
void dgemv(long transa, long m, long n, 
           double alpha, double **a, double *x, double beta, double *y)
{
   long info, leny, i, j;
   double temp, *ptrtemp;
   info = 0;
   if      ( transa != TRANSP && transa != NTRANSP ) info = 1;
   else if ( m < 0 ) 				     info = 2;
   else if ( n < 0 )				     info = 3;
   if (info) {
      fprintf(stderr, "%s %1ld %sn",
      "*** ON ENTRY TO DGEMV, PARAMETER NUMBER",info,"HAD AN ILLEGAL VALUE");
      return;
      //throw new svdpack_error(info);
   }
   if (transa) leny = n;
   else        leny = m;
   if (!m || !n || (alpha == ZERO && beta == ONE))
      return;
   ptrtemp = y; 
   /* form Y := beta * Y */
   if (beta == ZERO) 
      for (i = 0; i < leny; i++) *ptrtemp++ = ZERO;
   else if (beta != ONE) 
      for (i = 0; i < leny; i++) *ptrtemp++ *= beta;
   if (alpha == ZERO) return;
   switch(transa) {
      /* form Y := alpha * A * X + Y */
      case NTRANSP:  for(i = 0; i < m; i++) {
                        ptrtemp = *a++;
		        temp = ZERO;
		        for(j = 0; j < n; j++) 
			   temp += *ptrtemp++ * x[j];
			y[i] += alpha * temp;
		     }
		     break;
		     
      /* form Y := alpha * A' * X + Y */
      case TRANSP:   for(i = 0; i < m; i++) { 
                        ptrtemp = *a++;
			if (x[i] != ZERO) {
			   temp = alpha * x[i];
			   for(j = 0; j < n; j++)
			      y[j] += temp * (*ptrtemp++);
			}
		     }
		     break;
   }
}
/***********************************************************************
 *                                                                     *
 *                         dgemm()                                     *
 *                                                                     *
 * A C-translation of the level 3 BLAS routine DGEMM by Dongarra,      *
 * Duff, du Croz, and Hammarling (see LAPACK Users' Guide).            *
 * In this version, two of the three arrays which store the matrices   *
 * used in this matrix-matrix multiplication are accessed as linear    *
 * arrays.                                                             *
 *                                                                     *
 ***********************************************************************/
/***********************************************************************
   Description
   -----------
   dgemm() performs one of the matrix-matrix operations
	      C := alpha * op(A) * op(B) + beta * C,
   where op(X) = X or op(X) = X', alpha and beta are scalars, and A, B
   and C are matrices, with op(A) an m by k matrix, op(B) a k by n
   matrix and C an m by n matrix.
   Note that the arrays storing matrices B and C are linear arrays while
   the array of A is two-dimensional.
   Parameters
   ----------
   (input)
   transa   TRANSP indicates op(A) = A' is to be used in the multiplication
	    NTRANSP indicates op(A) = A is to be used in the multiplication
   transb   TRANSP indicates op(B) = B' is to be used in the multiplication
	    NTRANSP indicates op(B) = B is to be used in the multiplication
   m        on entry, m specifies the number of rows of the matrix op(A)
	    and of the matrix C.  m must be at least zero.  Unchanged
	    upon exit.
   n        on entry, n specifies the number of columns of the matrix op(B)
	    and of the matrix C.  n must be at least zero.  Unchanged
	    upon exit.
   k        on entry, k specifies the number of columns of the matrix op(A)
	    and the number of rows of the matrix B.  k must be at least 
	    zero.  Unchanged upon exit.
   alpha    a scalar multiplier
   a        matrix A as a 2-dimensional array.  When transa = NTRANSP, the
	    first k columns of the first m rows must contain the matrix A.
	    Otherwise, the first m columns of the first k rows must contain
	    the matrix A.
   b        matrix B as a linear array.  The leading (k * n) elements of
	    b must contain the matrix B.
   beta     a scalar multiplier.  When beta is supplied as zero then C
	    need not be set on input.
   c        matrix C as a linear array.  Before entry, the leading (m * n)
	    elements of c must contain the matrix C except when beta = 0.
	    In this case, c need not be set on entry.
	    On exit, c is overwritten by the (m * n) elements of matrix
	    (alpha * op(A) * op(B) + beta * C).
 ***********************************************************************/
void dgemm(long transa, long transb, long m, long n, long k, 
           double alpha, double **a, double *b, double beta, double *c)
{
   long info;
   long i, j, l, nrowa, ncola, nrowb, ncolb, nc;
   double temp, *atemp, *btemp1, *ptrtemp, *ctemp;
   info = 0;
   if      ( transa != TRANSP && transa != NTRANSP ) info = 1;
   else if ( transb != TRANSP && transb != NTRANSP ) info = 2;
   else if ( m < 0 ) 				     info = 3;
   else if ( n < 0 )				     info = 4;
   else if ( k < 0 )        			     info = 5;
   if (info) {
      fprintf(stderr, "%s %1ld %sn",
      "*** ON ENTRY TO DGEMM, PARAMETER NUMBER",info,"HAD AN ILLEGAL VALUE");
      return; //exit(info);
   }
   if (transa) {
      nrowa = k;
      ncola = m;
   }
   else { 
      nrowa = m;
      ncola = k;
   }
   if (transb) {
      nrowb = n;
      ncolb = k;
   }
   else {
      nrowb = k;
      ncolb = n;
   }
   nc = m * n;
   if (!m || !n || ((alpha == ZERO || !k) && beta == ONE))
      return;
   ctemp = c; 
   if (alpha == ZERO) {
      if (beta == ZERO)
         for (i = 0; i < nc; i++) *ctemp++ = ZERO;
      else if (beta != ONE)
         for (i = 0; i < nc; i++) *ctemp++ *= beta;
      return;
   }
   if (beta == ZERO) 
      for (i = 0; i < nc; i++) *ctemp++ = ZERO;
   else if (beta != ONE) 
      for (i = 0; i < nc; i++) *ctemp++ *= beta;
   if (!transb) { 
      switch(transa) {
	 /* form C := alpha * A * B + beta * C */
	 case NTRANSP:  ptrtemp = c;
		        for(l = 0; l < nrowa; l++) {
                           atemp = *a++;
      	       		   btemp1 = b;
      		  	   for(j = 0; j < ncola; j++) {
	 	     	      temp = *atemp * alpha;
	             	      ctemp = ptrtemp;
         	     	      for(i = 0; i < ncolb; i++) 
	    			 (*ctemp++) += temp * (*btemp1++);
	 	     	      atemp++;
      		  	   }
   	       		   ptrtemp = ctemp;
   	    	        }
			break;
	 /* form C := alpha * A' * B + beta * C */
	 case TRANSP:   ptrtemp = b;
	                for(l = 0; l < nrowa; l++) {
                           atemp = *a++;
      	       		   ctemp = c;
      		  	   for(j = 0; j < ncola; j++) {
	 	     	      temp = *atemp * alpha;
	             	      btemp1 = ptrtemp;
         	     	      for(i = 0; i < ncolb; i++) 
	    			 (*ctemp++) += temp * (*btemp1++);
	 	     	      atemp++;
      		  	   }
   	       		   ptrtemp = btemp1;
   	    		}
			break;
      }
   }
   else { 
      ctemp = c;
      switch(transa) {
	 /* form C := alpha * A * B' + beta * C */
	 case NTRANSP: for(l = 0; l < nrowa; l++) {
      	       		   btemp1 = b;
      		  	   for(j = 0; j < nrowb; j++) {
	 	     	      atemp = *a;
         	     	      for(i = 0; i < ncolb; i++) 
	    			 *ctemp += (*atemp++) * alpha * (*btemp1++);
	 	     	      ctemp++;
      		  	   }
			   a++;
   	    		}
			break;
	 /* form C := alpha * A' * B' + beta * C */
	 case TRANSP:   for(i = 0; i < ncola; i++) {
			   btemp1 = b;
			   for (l = 0; l < nrowb; l++) {
      	       		      temp = ZERO;
	 		      for(j = 0; j < nrowa; j++) 
			         temp += a[j][i] * (*btemp1++);
	    		      *ctemp++ += alpha * temp;
			   }
   	    		}
			break;
      }
   }
}
/***********************************************************************
 *                                                                     *
 *                        enorm()                                      *
 *  a C translation of the Fortran-77 version by Burton, Garbow,       *
 *  Hillstrom and More of Argonne National Laboratory.                 *
 *                                                                     *
 ***********************************************************************/
/***********************************************************************
   Description
   -----------
   given an n-vector x, this function calculates the Euclidean norm of x.
   The Euclidean norm is computed by accumulating the sum of squares in 
   three different sums.  The sums of squares for the small and large 
   components are scaled so that no overflows occur.  Non-destructive 
   underflows are permitted.  Underflows and overflows do not occur in the 
   computation of the unscaled sum of squares for the intermediate components. 
   The definitions of small, intermediate and large components depend on two
   constants, rdwarf and rgiant.  The restrictions on these constants are 
   that rdwarf**2 not underflow and rgiant**2 not overflow.  The constants
   given here are suitable for every known computer.
   The function returns the Euclidean norm of vector x in double precision.
   Parameters
   ----------
   n         number of elements in vector x
   x         linear array of vector x whose Euclidean norm is to be 
		calculated
 ***********************************************************************/
double enorm(long n, double *x)
{
   double norm2, agiant, floatn, s1, s2, s3, xabs, x1max, x3max;
   long i;
   s1     = ZERO;
   s2     = ZERO;
   s3     = ZERO;
   x1max  = ZERO;
   x3max  = ZERO;
   floatn = (double)n;
   agiant = RGIANT / floatn;
   for (i = 0; i < n; i++) {
      xabs = fabs(x[i]);
      /* summing components of vector that need no scaling */
      if (xabs > RDWARF && xabs < agiant)
	 s2 += xabs * xabs;
      else {
	 /* underflow... */
	 if (xabs <= RDWARF) {
            if (xabs > x3max) {
	       s3 = ONE + s3 * (x3max/xabs) * (x3max/xabs);
	       x3max = xabs;
	    }
	    else if (xabs != 0)
	       s3 += (xabs/x3max) * (xabs/x3max);
	 }
	 /* overflow... */
	 else {
	    /* summing large components of vector */
	    if (xabs <= x1max)
	       s1 += (xabs/x1max) * (xabs/x1max);
            else {
	       s1 = ONE + s1 * (x1max/xabs) * (x1max/xabs);
	       x1max = xabs;
	    }
	 }
      }
   }
   if (s1 != ZERO)
      norm2 = x1max * sqrt(s1 + (s2/x1max) / x1max);
   else if (s2 != ZERO) {
      if (s2 >= x3max)
	 norm2 = sqrt(s2 * (ONE + (x3max/s2) * (x3max*s3)));
      else 
	 norm2 = sqrt(x3max * ((s2/x3max) + (x3max*s3)));
   }
   else
      norm2 = x3max * sqrt(s3);
   return(norm2);
}
/***********************************************************************
 *                                                                     *
 *                          dtbmv()                                    *
 *                                                                     *
 ***********************************************************************/
/***********************************************************************
   Description
   -----------
   The function performs one of the matrix-vector operations
	x := A * x,  or  x := A' * x,
   where A is an upper-triangular matrix.
   Parameters
   ----------
   trans     if trans = TRANSP, A' is to be used in the multiplication
             if trans = NTRANSP, A is to be used in the multiplication
   n         number of rows of matrix A; n must be at least 0.  Unchanged
	     upon exit.
   k         number of super-diagonals of matrix A
   a         2-dimensional array whose leading n by (k + 1) part must 
	     contain the upper triangular band part of the matrix of 
	     coefficients, supplied row by row, with the leading diagonal
	     of the matrix in column (k + 1) of the array, the first super-
	     diagonal starting at position 2 in column k, and so on.
	     The top left k by k triangle of the array A is not referenced.
   x         linear array of dimension of at least n.  Before entry,
	     x must contain the n elements of vector x.  On exit, x is
	     overwritten with the transformed vector x.
   Functions called
   --------------
   MISC      imax  
 ***********************************************************************/
void dtbmv(long trans, long n, long k, double **a, double *x)
{
   long info, j, i, l, end;
   double temp;
   info = 0;
   if      ( trans != TRANSP && trans != NTRANSP )   info = 1;
   else if ( n < 0 )                                 info = 2;
   else if ( k < 0 )                                 info = 3;
   if (info) {
      fprintf(stderr, "%s %1ld %sn",
      "*** ON ENTRY TO DTBMV, PARAMETER NUMBER",info,"HAD AN ILLEGAL VALUE");
      return; //exit(info);
   }
   switch(trans) {
      case NTRANSP:  for (j = 0; j < n; j++) {
                        temp = x[j];
                        l = k - j;
                        for (i = imax(0, j - k); i < j; i++) 
                           x[i] += temp * a[j][l+i];
                        x[j] *= a[j][k];
                     }
		     break;
      case TRANSP:   for (j = n - 1; j >= 0; j--) {
			temp = x[j] * a[j][k];
			l = k - j;
			end = imax(0, j - k);
			for (i = j - 1; i >= end; i--)
			   temp += x[i] * a[j][l+i];
                        x[j] = temp;
		     }
		     break;
   }
}
/************************************************************** 
 * Function interchanges two vectors		     	      *
 * Based on Fortran-77 routine from Linpack by J. Dongarra    *
 **************************************************************/ 
void dswap(long n,double *dx,long incx,double *dy,long incy)
{
   long i;
   double dtemp;
   if (n <= 0 || incx == 0 || incy == 0) return;
   if (incx == 1 && incy == 1) {
      for (i=0; i < n; i++) {
	 dtemp = *dy;
	 *dy++ = *dx;
	 *dx++ = dtemp;
      }	
   }
   else {
      if (incx < 0) dx += (-n+1) * incx;
      if (incy < 0) dy += (-n+1) * incy;
      for (i=0; i < n; i++) {
         dtemp = *dy;
         *dy = *dx;
         *dx = dtemp;
         dx += incx;
         dy += incy;
      }
   }
}
/* am keeping these temporarily here will do away with later on */
#define               MAXIT     30
#define               CASE1     1
#define               CASE2     2
#define               CASE3     3
#define               CONVERGE  4
/***********************************************************************
 *                                                                     *
 *                        qriter2()                                    *
 *                                                                     *
 ***********************************************************************/
/***********************************************************************
   Description
   -----------
   This function reduces an upper bidiagonal matrix B to diagonal form.
   It is a C translation of a portion of DSVDC from Linpack.  In this 
   version, vectors are accumulated and B is assumed to be a square matrix.
   Parameters
   ---------
   (input)
   n           order of B
   s           linear array containing the diagonal elements of B
   e           linear array containing the off-diagonal elements of B
   (output)
   s           contains the singular values of the original matrix B
   up          2-dimensional array containing left singular vectors of B
   vp          2-dimensional array containing right singular vectors of B
   Functions called
   --------------
   BLAS         dscal, dswap, drot, drotg 
   MISC         dmax
 ***********************************************************************/
void qriter2(long n, double *s, double *e, double **up, double **vp)
{
   long negligible, iter, m, mm1, k, l, qrcase;
   double ztest, test, sl, g, t, smm1;
   double f, t1, cs, sn, scale, sm, el, emm1, b, c, shift;
   m     = n - 1;
   iter  = 0;
   while (m >= 0) {
      if (iter >= MAXIT) return;
      negligible = FALSE;
      /* this portion of the code inspects for negligible elements
       * in the s and e arrays.  On completion the variable qrcase
       * is set as follows:
       * qrcase = CASE1 if s[m] and e[l-1] are negligible and l < m
       * qrcase = CASE2 if s[l] is negligible and l < m
       * qrcase = CASE3 if s[l], ..., s[m] are not negligible (QR step),
       *                e[l-1] is negligible and l < m
       * qrcase = CONVERGENCE if e[m-1] is negligible */
      for (l = m - 1; l >= 0; l--) {
	 test = fabs(s[l]) + fabs(s[l + 1]);
	 ztest = test + fabs(e[l]);
	 if (ztest == test) {
	    e[l] = ZERO;
	    negligible = TRUE;
	 }
	 if (negligible) break;
      }
      if (l == m - 1) qrcase = CONVERGE;
      else {
         negligible = FALSE;
         for (k = m; k > l; k--) {
	    test = ZERO;
	    if (k != m) test += fabs(e[k]);
	    if (k != l + 1) test += fabs(e[k-1]);
	    ztest = test + fabs(s[k]);
	    if (ztest == test) {
	       s[k] = ZERO;
	       negligible = TRUE;
	    }
	    if (negligible) break;
	 }
	 if (k == l) qrcase = CASE3;
	 else if (k == m) qrcase = CASE1;
	 else {
	    qrcase = CASE2;
	    l = k;
	 }
      }
      l += 1;
      switch(qrcase) {
         /* deflate negligible s[m] */
	 case CASE1:    mm1 = m - 1;
			f = e[mm1];
			e[mm1] = ZERO;
			for (k = mm1; k >= l; k--) {
			   t1 = s[k];
			   drotg(&t1, &f, &cs, &sn);
			   s[k] = t1;
			   if (k != l) {
			      f = -sn * e[k - 1];
			      e[k - 1] *= cs;
			   }
			   drot(n, vp[k], vp[m], cs, sn);
                        }
			break;
         /* split at negligible s[l] */
	 case CASE2:    f = e[l - 1];
			e[l - 1] = ZERO;
			for (k = l; k <= m; k++) {
			   t1 = s[k];
			   drotg(&t1, &f, &cs, &sn);
			   s[k] = t1;
			   f = -sn * e[k];
			   e[k] *= cs;
			   drot(n, up[k], up[l - 1], cs, sn);
                        }
			break;
         /* perform one QR step */
	 case CASE3:    f = e[l - 1];
			/* calculate the shift */
			scale = dmax(fabs(s[m]), fabs(s[m - 1]));
			if (scale < fabs(e[m - 1])) scale = fabs(e[m - 1]);
			if (scale < fabs(s[l])) scale = fabs(s[l]);
			if (scale < fabs(e[l])) scale = fabs(e[l]);
			sm = s[m] / scale;
			smm1 = s[m - 1] / scale;
			emm1 = e[m - 1] / scale;
			sl = s[l] / scale;
			el = e[l] / scale;
			b = ((smm1 + sm) * (smm1 - sm) + emm1 * emm1) / 2.0;
			c = (sm * emm1);
			c *= c;
			shift = ZERO;
			if (b != ZERO || c !=ZERO) {
			   shift = sqrt(b * b + c);
			   if (b < ZERO) shift = -shift;
			   shift = c / (b + shift);
			}
			f = (sl + sm) * (sl - sm) + shift;
			g = sl * el;
			/* chase zeros */
			mm1 = m - 1;
			for (k = l; k <= mm1; k++) {
			   drotg(&f, &g, &cs, &sn);
			   if (k != l) e[k - 1] = f;
			   f = cs * s[k] + sn * e[k];
			   e[k] = cs * e[k] - sn * s[k];
			   g = sn * s[k + 1];
			   s[k + 1] = cs * s[k + 1];
			   drot(n, vp[k], vp[k + 1], cs, sn);
			   drotg(&f, &g, &cs, &sn);
			   s[k] = f;
			   f = cs * e[k] + sn * s[k + 1];
			   s[k + 1] = -sn * e[k] + cs * s[k + 1];
			   g = sn * e[k + 1];
			   e[k + 1] = cs * e[k + 1];
			   if (k < n - 1)
			      drot(n, up[k], up[k + 1], cs, sn);
			}
			e[mm1] = f;
			iter += 1;
			break;
         /* convergence */
	 case CONVERGE: if (s[l] < ZERO) {
			   /* make singular value positive */
	                   s[l] = -s[l];
			   dscal (n, -ONE, vp[l], 1); 
                        }
			/* order the singular value */
			while (l < n - 1) {
			   if (s[l] < s[l + 1]) {
			      t = s[l];
			      s[l] = s[l + 1];
			      s[l + 1] = t;
			      if (l < n - 1) 
				 dswap(n, vp[l], 1, vp[l + 1], 1);
			      if (l < n - 1) 
				 dswap(n, up[l], 1, up[l + 1], 1);
                              l += 1;
			   }
			   else break;
			}
			iter = 0;
			m -= 1;
			break;
      }
   }
}
/***************************************************************** 
 * applies a plane rotation;  assume a stride of 1 for dx and dy *
 * based on FORTRAN 77 routine from Linpack by J. Dongarra       *
 *****************************************************************/ 
void drot(long n, double *dx, double *dy, double c, double s)
{
   long i;
   double temp;
   if (n <= 0) return;
   for (i = 0; i < n; i++) {
      temp = c * (*dx) + s * (*dy);
      *dy = c * (*dy) - s * (*dx);
      dy++;
      *dx++ = temp;
   }
   return;
}
/***************************************************************** 
 * constructs Givens plane rotation                              *
 * based on FORTRAN 77 routine from Linpack by J. Dongarra       *
 *****************************************************************/ 
void drotg(double *da, double *db, double *c, double *s)
{
   double r, roe, scale, z, temp1, temp2;
   roe = *db;
   temp1 = fabs(*da);
   temp2 = fabs(*db);
   if (temp1 > temp2) roe = *da;
   scale = temp1 + temp2;
   if (scale != ZERO) {
      temp1 = *da / scale;
      temp2 = *db / scale;
      r = scale * sqrt(temp1 * temp1 + temp2 * temp2);
      r *= fsign(ONE, roe);
      *c = *da / r;
      *s = *db / r;
   }
   else {
      *c = ONE;
      *s = ZERO;
      r = ZERO;
   }
   z = *s;
   temp1 = fabs(*c);
   if (temp1 > ZERO && temp1 <= *s) z = ONE / *c;
   *da = r;
   *db = z;
   return;
}
/************************************************************** 
 * Function forms the dot product of two vectors.      	      *
 * Based on Fortran-77 routine from Linpack by J. Dongarra    *
 **************************************************************/ 
double ddot(long n,double *dx,long incx,double *dy,long incy)
{
   long i;
   double dot_product;
   if (n <= 0 || incx == 0 || incy == 0) return(0.0);
   dot_product = 0.0;
   if (incx == 1 && incy == 1) 
      for (i=0; i < n; i++) dot_product += (*dx++) * (*dy++);
   else {
      if (incx < 0) dx += (-n+1) * incx;
      if (incy < 0) dy += (-n+1) * incy;
      for (i=0; i < n; i++) {
         dot_product += (*dx) * (*dy);
         dx += incx;
         dy += incy;
      }
   }
   return(dot_product);
}
/************************************************************** 
 * Function scales a vector by a constant.     		      *
 * Based on Fortran-77 routine from Linpack by J. Dongarra    *
 **************************************************************/ 
void dscal(long n,double da,double *dx,long incx)
{
   long i;
   if (n <= 0 || incx == 0) return;
   if (incx < 0) dx += (-n+1) * incx;
   for (i=0; i < n; i++) {
      *dx *= da;
      dx += incx;
   }
   return;
}
/************************************************************** 
 * Constant times a vector plus a vector     		      *
 * Based on Fortran-77 routine from Linpack by J. Dongarra    *
 **************************************************************/ 
void daxpy (long n,double da,double *dx,long incx,double *dy,long incy)
{
   long i;
   if (n <= 0 || incx == 0 || incy == 0 || da == 0.0) return;
   if (incx == 1 && incy == 1) 
      for (i=0; i < n; i++) {
	 *dy += da * (*dx++);
	 dy++;
      }
   else {
      if (incx < 0) dx += (-n+1) * incx;
      if (incy < 0) dy += (-n+1) * incy;
      for (i=0; i < n; i++) {
         *dy += da * (*dx);
         dx += incx;
         dy += incy;
      }
   }
   return;
}
/***********************************************************************
 *                                                                     *
 *				mrandom()                               *
 *                        (double precision)                           *
 ***********************************************************************/
/***********************************************************************
   Description
   -----------
   This is a translation of a Fortran-77 uniform random number
   generator.  The code is based  on  theory and suggestions  given in
   D. E. Knuth (1969),  vol  2.  The argument to the function should 
   be initialized to an arbitrary integer prior to the first call to 
   random.  The calling program should  not  alter  the value of the
   argument between subsequent calls to random.  Random returns values
   within the the interval (0,1).
   Arguments 
   ---------
   (input)
   iy	   an integer seed whose value must not be altered by the caller
	   between subsequent calls
   (output)
   random  a double precision random number between (0,1)
 ***********************************************************************/
double mrandom(long *iy)
{
   static long m2 = 0;
   static long ia, ic, mic;
   static double halfm, s;
   /* If first entry, compute (max int) / 2 */
   if (!m2) {
      m2 = 1 << (8 * (int)sizeof(int) - 2); 
      halfm = m2;
      /* compute multiplier and increment for linear congruential 
       * method */
      ia = 8 * (long)(halfm * atan(1.0) / 8.0) + 5;
      ic = 2 * (long)(halfm * (0.5 - sqrt(3.0)/6.0)) + 1;
      mic = (m2-ic) + m2;
      /* s is the scale factor for converting to floating point */
      s = 0.5 / halfm;
   }
   /* compute next random number */
   *iy = *iy * ia;
   /* for computers which do not allow integer overflow on addition */
   if (*iy > mic) *iy = (*iy - m2) - m2;
   *iy = *iy + ic;
   /* for computers whose word length for addition is greater than
    * for multiplication */
   if (*iy / 2 > m2) *iy = (*iy - m2) - m2;
  
   /* for computers whose integer overflow affects the sign bit */
   if (*iy < 0) *iy = (*iy + m2) + m2;
   return((double)(*iy) * s);
}
/************************************************************** 
 * Function copies a vector x to a vector y	     	      *
 * Based on Fortran-77 routine from Linpack by J. Dongarra    *
 **************************************************************/ 
void dcopy(long n,double *dx,long incx,double *dy,long incy)
{
   long i;
   if (n <= 0 || incx == 0 || incy == 0) return;
   if (incx == 1 && incy == 1) 
      for (i=0; i < n; i++) *dy++ = *dx++;
   else {
      if (incx < 0) dx += (-n+1) * incx;
      if (incy < 0) dy += (-n+1) * incy;
      for (i=0; i < n; i++) {
         *dy = *dx;
         dx += incx;
         dy += incy;
      }
   }
   return;
}
/***********************************************************************
 *                                                                     *
 *                         dgemm2()                                    *
 *                                                                     *
 * A C-translation of the level 3 BLAS routine DGEMM by Dongarra,      *
 * Duff, du Croz, and Hammarling (see LAPACK Users' Guide).            *
 * In this version, the arrays which store the matrices used in this   *
 * matrix-matrix multiplication are accessed as two-dimensional arrays.*
 *                                                                     *
 ***********************************************************************/
/***********************************************************************
   Description
   -----------
   dgemm2() performs one of the matrix-matrix operations
	      C := alpha * op(A) * op(B) + beta * C,
   where op(X) = X or op(X) = X', alpha and beta are scalars, and A, B
   and C are matrices, with op(A) an m by k matrix, op(B) a k by n
   matrix and C an m by n matrix.
   Parameters
   ----------
   (input)
   transa   TRANSP indicates op(A) = A' is to be used in the multiplication
	    NTRANSP indicates op(A) = A is to be used in the multiplication
   transb   TRANSP indicates op(B) = B' is to be used in the multiplication
	    NTRANSP indicates op(B) = B is to be used in the multiplication
   m        on entry, m specifies the number of rows of the matrix op(A)
	    and of the matrix C.  m must be at least zero.  Unchanged
	    upon exit.
   n        on entry, n specifies the number of columns of the matrix op(B)
	    and of the matrix C.  n must be at least zero.  Unchanged
	    upon exit.
   k        on entry, k specifies the number of columns of the matrix op(A)
	    and the number of rows of the matrix B.  k must be at least 
	    zero.  Unchanged upon exit.
   alpha    a scalar multiplier
   a        matrix A as a 2-dimensional array.  When transa = NTRANSP, the
            leading m by k part of a must contain the matrix A. Otherwise,
	    the leading k by m part of a must contain  the matrix A.
   b        matrix B as a 2-dimensional array.  When transb = NTRANSP, the
            leading k by n part of a must contain the matrix B. Otherwise,
	    the leading n by k part of a must contain  the matrix B.
   beta     a scalar multiplier.  When beta is supplied as zero then C
	    need not be set on input.
   c        matrix C as a 2-dimensional array.  On entry, the leading
	    m by n part of c must contain the matrix C, except when
	    beta = 0.  In that case, c need not be set on entry. 
	    On exit, c is overwritten by the m by n matrix
	    (alpha * op(A) * op(B) + beta * C).
 ***********************************************************************/
void dgemm2(long transa, long transb, long m, long n, long k, 
            double alpha, double **a, double **b, double beta, double **c)
{
   long info;
   long i, j, l, nrowa, ncola, nrowb, ncolb;
   double temp, *atemp;
   info = 0;
   if      ( transa != TRANSP && transa != NTRANSP ) info = 1;
   else if ( transb != TRANSP && transb != NTRANSP ) info = 2;
   else if ( m < 0 ) 				     info = 3;
   else if ( n < 0 )				     info = 4;
   else if ( k < 0 )        			     info = 5;
   if (info) {
      fprintf(stderr, "%s %1d %sn",
      "*** ON ENTRY TO DGEMM2, PARAMETER NUMBER",info,"HAD AN ILLEGAL VALUE");
      exit(info);
   }
   if (transa) {
      nrowa = k;
      ncola = m;
   }
   else { 
      nrowa = m;
      ncola = k;
   }
   if (transb) {
      nrowb = n;
      ncolb = k;
   }
   else {
      nrowb = k;
      ncolb = n;
   }
   if (!m || !n || ((alpha == ZERO || !k) && beta == ONE))
      return;
   if (alpha == ZERO) {
      if (beta == ZERO) 
         for (i = 0; i < m; i++)
            for (j = 0; j < n; j++) c[i][j] = ZERO;
      else if (beta != ONE)
         for (i = 0; i < m; i++)
            for (j = 0; j < n; j++) c[i][j] *= beta;
      return;
   }
   if (beta == ZERO)
      for (i = 0; i < m; i++)
         for (j = 0; j < n; j++) c[i][j] = ZERO;
   else if (beta != ONE)
      for (i = 0; i < m; i++)
         for (j = 0; j < n; j++) c[i][j] *= beta;
   if (!transb) { 
      switch(transa) {
	 /* form C := alpha * A * B + beta * C */
	 case NTRANSP:  for(l = 0; l < nrowa; l++) {
                           atemp = *a++;
      		  	   for(j = 0; j < ncola; j++) {
	 	     	      temp = *atemp * alpha;
         	     	      for(i = 0; i < ncolb; i++) 
	    			 c[l][i] += temp * b[j][i];
	 	     	      atemp++;
      		  	   }
   	    	        }
			break;
	 /* form C := alpha * A' * B + beta * C */
	 case TRANSP:   for(l = 0; l < nrowa; l++) {
                           atemp = *a++;
      		  	   for(j = 0; j < ncola; j++) {
	 	     	      temp = *atemp * alpha;
         	     	      for(i = 0; i < ncolb; i++) 
	    			 c[j][i] += temp * b[l][i];
	 	     	      atemp++;
      		  	   }
   	    		}
			break;
      }
   }
   else { 
      switch(transa) {
	 /* form C := alpha * A * B' + beta * C */
	 case NTRANSP: for(l = 0; l < nrowa; l++) {
      		  	   for(j = 0; j < nrowb; j++) {
	 	     	      atemp = *a;
         	     	      for(i = 0; i < ncolb; i++) 
	    			 c[l][j] += (*atemp++) * alpha * b[j][i];
      		  	   }
			   a++;
   	    		}
			break;
	 /* form C := alpha * A' * B' + beta * C */
	 case TRANSP:   for(i = 0; i < ncola; i++) {
			   for (l = 0; l < nrowb; l++) {
      	       		      temp = ZERO;
	 		      for(j = 0; j < nrowa; j++) 
				 temp += a[j][i] * b[l][j];
                              c[i][l] += alpha * temp;
			   }
   	    		}
			break;
      }
   }
}
/***********************************************************************
 *                                                                     *
 *                          dsbmv()                                    *
 *                                                                     *
 ***********************************************************************/
/***********************************************************************
   Description
   -----------
   The function performs the matrix-vector operation
	  y := alpha * A * y + beta * y,
   where alpha and beta are scalars, x and y are n-element vectors and
   A is an n by n symmetric band matrix, with k super-diagonals.
   Parameters
   ----------
   n         number of rows of matrix A; n must be at least 0.  Unchanged
	     upon exit.
   k         number of super-diagonals of matrix A
   a         2-dimensional array whose leading n by (k + 1) part must 
	     contain the upper triangular band part of the symmetric matrix,
	     supplied row by row, with the leading diagonal of the matrix 
	     in column (k + 1) of the array, the first super-diagonal 
	     starting at position 2 in column k, and so on.
	     The top left k by k triangle of the array A is not referenced.
   x         linear array of dimension of at least n.  Before entry,
	     x must contain the n elements of vector x.  Unchanged on exit.
   y         linear array of dimension of at least n.  Before entry,
	     y must contain the n elements of vector y.  On exit, y is
	     overwritten by the updated vector y.
   Functions called
   --------------
   MISC      imax  
 ***********************************************************************/
void dsbmv(long n, long k, double alpha, double **a, 
           double *x, double beta, double *y)
{
   long info, j, i, l;
   double *ptrtemp, temp1, temp2;
   info = 0;
   if ( n < 0 )                                      info = 1;
   else if ( k < 0 )                                 info = 2;
   if (info) {
      fprintf(stderr, "%s %1d %sn",
      "*** ON ENTRY TO DSBMV, PARAMETER NUMBER",info,"HAD AN ILLEGAL VALUE");
      exit(info);
   }
   if (!n || (alpha == ZERO && beta == ONE))
      return;
   ptrtemp = y; 
   /* form y := beta * y */
   if (beta == ZERO) 
      for (i = 0; i < n; i++) *ptrtemp++ = ZERO;
   else if (beta != ONE) 
      for (i = 0; i < n; i++) *ptrtemp++ *= beta;
   if (alpha == ZERO) return;
   for (j = 0; j < n; j++) {
      temp1 = alpha * x[j];
      temp2 = ZERO;
      l = k - j;
      for (i = imax(0, j - k); i < j; i++) {
         y[i] = y[i] + temp1 * a[j][l+i];
         temp2 = temp2 + a[j][l+i] * x[i];
      }
      y[j] = y[j] + temp1 * a[j][k] + alpha * temp2;
   }
}
/***********************************************************************
 *                                                                     *
 *				tql2()   			       *
 *                                                                     *
 ***********************************************************************/
/***********************************************************************
   Description
   -----------
   tql2() is a translation of a Fortran version of the Algol
   procedure TQL2, Num. Math. 11, 293-306(1968) by Dowdler, Martin, 
   Reinsch and Wilkinson.
   Handbook for Auto. Comp., vol.II-Linear Algebra, 227-240(1971).  
   This function finds the eigenvalues and eigenvectors of a symmetric
   tridiagonal matrix by the QL method.
   Arguments
   ---------
   (input)                                                             
   n      order of the symmetric tridiagonal matrix           
   d      contains the diagonal elements of the input matrix        
   e      contains the subdiagonal elements of the input matrix in its
            first n-1 positions.
   z      contains the identity matrix				    
                                                                   
   (output)                                                       
   d      contains the eigenvalues in ascending order.  if an error
            exit is made, the eigenvalues are correct but unordered for
            for indices 0,1,...,ierr.				   
   e      has been destroyed.					  
   z      contains orthonormal eigenvectors of the symmetric   
            tridiagonal (or full) matrix.  if an error exit is made,
            z contains the eigenvectors associated with the stored 
          eigenvalues.					
   ierr   set to zero for normal return, j if the j-th eigenvalue has
            not been determined after 30 iterations.		    
   Functions used
   --------------
   UTILITY	fsign
   MISC		pythag
 ***********************************************************************/
long tql2(long n, double *d, double *e, double **z)
{
   long j, last, l, l1, l2, m, i, k, iteration;
   double tst1, tst2, g, r, s, s2, c, c2, c3, p, f, h, el1, dl1;
   if (n == 1) return(0);
   f = ZERO;
   last = n - 1;
   tst1 = ZERO;
   e[last] = ZERO;
   for (l = 0; l < n; l++) {
      iteration = 0;
      h = fabs(d[l]) + fabs(e[l]);
      if (tst1 < h) tst1 = h;
      /* look for small sub-diagonal element */
      for (m = l; m < n; m++) {
	 tst2 = tst1 + fabs(e[m]);
	 if (tst2 == tst1) break;
      }
      if (m != l) {
	 while (iteration < 30) {
	    iteration += 1;
            /*  form shift */
	    l1 = l + 1;
	    l2 = l1 + 1;
	    g = d[l];
            p = (d[l1] - g) / (2.0 * e[l]);
	    r = pythag(p, ONE);
	    d[l] = e[l] / (p + fsign(r, p));
	    d[l1] = e[l] * (p + fsign(r, p));
	    dl1 = d[l1];
	    h = g - d[l];
	    if (l2 < n) 
	       for (i = l2; i < n; i++) d[i] -= h;
            f += h;
	    /* QL transformation */
	    p = d[m];
	    c = ONE;
	    c2 = c;
	    el1 = e[l1];
	    s = ZERO;
	    i = m - 1;
	    while (i >= l) {
	       c3 = c2;
	       c2 = c;
	       s2 = s;
	       g = c * e[i];
	       h = c * p;
	       r = pythag(p, e[i]);
	       e[i + 1] = s * r;
	       s = e[i] / r;
	       c = p / r;
	       p = c * d[i] - s * g;
	       d[i + 1]= h + s * (c * g + s * d[i]);
	       /*  form vector */
	       for (k = 0; k < n; k ++) {
	          h = z[i + 1][k];
	          z[i + 1][k] = s * z[i][k] + c * h;
	          z[i][k] = c * z[i][k] - s * h;
	       }
	       i--;
	    }
	    p = -s * s2 * c3 *el1 * e[l] / dl1;
	    e[l] = s * p;
	    d[l] = c * p;
	    tst2 = tst1 + fabs(e[l]);
	    if (tst2 <= tst1) break;
	    if (iteration == 30) 
	       return(l);
         }
      }
      d[l] += f;
   }
   /* order the eigenvalues */
   for (l = 1; l < n; l++) {
      i = l - 1;
      k = i;
      p = d[i];
      for (j = l; j < n; j++) {
	 if (d[j] < p) {
	    k = j;
	    p = d[j];
	 }
      }
      /* ...and corresponding eigenvectors */
      if (k != i) {
	 d[k] = d[i];
	 d[i] = p;
	  for (j = 0; j < n; j ++) {
	     p = z[i][j];
	     z[i][j] = z[k][j];
	     z[k][j] = p;
	  }
      }   
   }
   return(0);
}
double pythag(double a, double b)
{
   double p, r, s, t, u, temp;
   p = dmax(fabs(a), fabs(b));
   if (p != 0.0) {
      temp = dmin(fabs(a), fabs(b)) / p;
      r = temp * temp; 
      t = 4.0 + r;
      while (t != 4.0) {
	 s = r / t;
	 u = 1.0 + 2.0 * s;
	 p *= u;
	 temp = s / u;
	 r *= temp * temp;
	 t = 4.0 + r;
      }
   }
   return(p);
}
/**************************************************************
 *                                                            *
 * multiplication of A'A by the transpose of an nc by n       *
 * matrix X.  A is nrow by ncol and is stored using the       *
 * Harwell-Boeing compressed column sparse matrix format.     *
 * The transpose of the n by nc product matrix Y is returned  *
 * in the two-dimensional array y.                            *
 *                                                            *
 *              Y = A'A * X'  and  y := Y'                    *
 *                                                            *
 **************************************************************/
void opm(long nrow, long ncol, long nc, double **x, double **y)
{
   long i, j, k, end;
   
   mxvcount  += nc;
   mtxvcount += nc;
   for (i = 0; i < nc; i++) 
      for (j = 0; j < nrow; j++) ztempp[i][j] = ZERO;
   for (i = 0; i < nc; i++) 
      for (j = 0; j < ncol; j++) y[i][j] = ZERO;
   /* multiply by sparse matrix A */
   for (i = 0; i < ncol; i++) {
      end = pointr[i+1];
      for (j = pointr[i]; j < end; j++)
         for (k = 0; k < nc; k++)
	    ztempp[k][rowind[j]] += value[j] * x[k][i]; 
   }
   /* multiply by transpose of A (A') */
   for (k = 0; k < nc; k++)
      for (i = 0; i < ncol; i++) {
         end = pointr[i+1];
         for (j = pointr[i]; j < end; j++) 
	    y[k][i] += value[j] * ztempp[k][rowind[j]];
      }
   return;
}
/**************************************************************
 *                                                            *
 * multiplication of matrix B by vector x, where B = A'A,     *
 * and A is nrow by ncol (nrow >> ncol) and is stored using   *
 * the Harwell-Boeing compressed column sparse matrix format. *
 * Hence, B is of order n = ncol.  y stores product vector.   *
 *                                                            *
 **************************************************************/
void opb(long nrow, long ncol, double *x, double *y)
{
   long i, j, end;
   
   mxvcount += 1;
   mtxvcount += 1;
   for (i = 0; i < ncol; i++) y[i] = ZERO;
   for (i = 0; i < nrow; i++) ztemp[i] = ZERO;
   for (i = 0; i < ncol; i++) {
      end = pointr[i+1];
      for (j = pointr[i]; j < end; j++) 
	 ztemp[rowind[j]] += value[j] * (*x); 
      x++;
   }
   for (i = 0; i < ncol; i++) {
      end = pointr[i+1];
      for (j = pointr[i]; j < end; j++) 
	 *y += value[j] * ztemp[rowind[j]];
      y++;
   }
   return;
}
/********************************************************************* 
 * Function sorts array1 and array2 into increasing order for array1 *
 *********************************************************************/
void dsort2(long igap,long n,double *array1,double *array2)
{
    double temp;
    long i, j, index;
    if (!igap) return;
    else {
	for (i = igap; i < n; i++) {
	    j = i - igap;
	    index = i;
	    while (j >= 0 && array1[j] > array1[index]) {
		temp = array1[j];
		array1[j] = array1[index];
		array1[index] = temp;
		temp = array2[j];
		array2[j] = array2[index];
		array2[index] = temp;
	        j -= igap;
		index = j + igap;
	    }
	} 
    }
    dsort2(igap/2,n,array1,array2);
}
/**************************************************************
 * multiplication of matrix B by vector x, where B = A'A,     *
 * and A is nrow by ncol (nrow >> ncol). Hence, B is of order *
 * n = ncol (y stores product vector).		              *
 **************************************************************/
void opb(long n, double *x, double *y, bool flag)
{
   long i, j, end;
   
   mxvcount += 2;
   for (i = 0; i < n; i++) y[i] = 0.0;
   for (i = 0; i < nrow; i++) ztemp[i] = 0.0;
   for (i = 0; i < ncol; i++) {
      end = pointr[i+1];
      for (j = pointr[i]; j < end; j++) 
	 ztemp[rowind[j]] += value[j] * (*x); 
      x++;
   }
   for (i = 0; i < ncol; i++) {
      end = pointr[i+1];
      for (j = pointr[i]; j < end; j++) 
	 *y += value[j] * ztemp[rowind[j]];
      y++;
   }
   return;
}
double norm_1()
{
/* find matrix 1-norm  */
    double alpha,sum;
    long i,j,last;
    alpha = 0.0;
    for (j=0; j<ncol; ++j) {
        sum = 0.0;
        last= pointr[j+1];
        for (i=pointr[j]; i<last ; ++i)
            sum += fabs(value[i]);
        alpha = dmax(alpha,sum);
    }
    return(alpha);
}
/************************************************************** 
 * multiplication of 2-cyclic matrix B by a vector x, where   *
 *							      *
 * B = [0  A]						      *
 *     [A' 0] , where A is nrow by ncol (nrow >> ncol). Hence,*
 * B is of order n = nrow+ncol (y stores product vector).     *
 **************************************************************/ 
void opb(long n,double *x, double *y)
{
   long i, j, end;
   double *tmp;
   
   mxvcount += 2;
   for (i = 0; i < n; i++) y[i] = 0.0;
   tmp = &x[nrow]; 
   for (i = 0; i < ncol; i++) {
      end = pointr[i+1];
      for (j = pointr[i]; j < end; j++) 
	 y[rowind[j]] += value[j] * (*tmp); 
      tmp++; 
   }
   for (i = nrow; i < n; i++) {
      end = pointr[i-nrow+1];
      for (j = pointr[i-nrow]; j < end; j++) 
	 y[i] += value[j] * x[rowind[j]];
   }
   return;
}
/************************************************************** 
 * function scales a vector by a constant.	     	      *
 * Based on Fortran-77 routine from Linpack by J. Dongarra    *
 **************************************************************/ 
void datx(long n,double da,double *dx,long incx,double *dy,long incy)
{
   long i;
   if (n <= 0 || incx == 0 || incy == 0 || da == 0.0) return;
   if (incx == 1 && incy == 1) 
      for (i=0; i < n; i++) *dy++ = da * (*dx++);
   else {
      if (incx < 0) dx += (-n+1) * incx;
      if (incy < 0) dy += (-n+1) * incy;
      for (i=0; i < n; i++) {
         *dy = da * (*dx);
         dx += incx;
         dy += incy;
      }
   }
   return;
}
/***********************************************************************
 *                                                                     *
 *                         dgemm3()                                    *
 *                                                                     *
 * A C-translation of the level 3 BLAS routine DGEMM by Dongarra,      *
 * Duff, du Croz, and Hammarling (see LAPACK Users' Guide).            *
 * In this version, the arrays which store the matrices used in this   *
 * matrix-matrix multiplication are accessed as two-dimensional arrays.*
 *                                                                     *
 ***********************************************************************/
/***********************************************************************
   Description
   -----------
   dgemm3() performs one of the matrix-matrix operations
	      C := alpha * op(A) * op(B) + beta * C,
   where op(X) = X or op(X) = X', alpha and beta are scalars, and A, B
   and C are matrices, with op(A) an m by k matrix, op(B) a k by n
   matrix and C an m by n matrix.
   Parameters
   ----------
   (input)
   transa   TRANSP indicates op(A) = A' is to be used in the multiplication
	    NTRANSP indicates op(A) = A is to be used in the multiplication
   transb   TRANSP indicates op(B) = B' is to be used in the multiplication
	    NTRANSP indicates op(B) = B is to be used in the multiplication
   m        on entry, m specifies the number of rows of the matrix op(A)
	    and of the matrix C.  m must be at least zero.  Unchanged
	    upon exit.
   n        on entry, n specifies the number of columns of the matrix op(B)
	    and of the matrix C.  n must be at least zero.  Unchanged
	    upon exit.
   k        on entry, k specifies the number of columns of the matrix op(A)
	    and the number of rows of the matrix B.  k must be at least 
	    zero.  Unchanged upon exit.
   alpha    a scalar multiplier
   a        matrix A as a 2-dimensional array.  When transa = NTRANSP, the
            leading m by k part of a must contain the matrix A. Otherwise,
	    the leading k by m part of a must contain  the matrix A.
   ira,ica  row and column indices of matrix a, where mxn part starts.
   b        matrix B as a 2-dimensional array.  When transb = NTRANSP, the
            leading k by n part of a must contain the matrix B. Otherwise,
	    the leading n by k part of a must contain  the matrix B.
   irb,icb  row and column indices of matrix b, where kxn starts.
   beta     a scalar multiplier.  When beta is supplied as zero then C
	    need not be set on input.
   c        matrix C as a 2-dimensional array.  On entry, the leading
	    m by n part of c must contain the matrix C, except when
	    beta = 0.  In that case, c need not be set on entry. 
	    On exit, c is overwritten by the m by n matrix
	    (alpha * op(A) * op(B) + beta * C).
   irc,icc  row and column indices of matrix c, where the mxn part is stored.
***********************************************************************/
void dgemm3(long transa, long transb, long m, long n, long k, 
            double alpha, double **a, long ira, long ica, double **b,
            long irb, long icb, double beta, double **c, long irc,
            long icc)
{
   long info;
   long i, j, l, nrowa, ncola, nrowb, ncolb;
   double temp;
   info = 0;
   if      ( transa != TRANSP && transa != NTRANSP ) info = 1;
   else if ( transb != TRANSP && transb != NTRANSP ) info = 2;
   else if ( m < 0 ) 				     info = 3;
   else if ( n < 0 )				     info = 4;
   else if ( k < 0 )        			     info = 5;
   if (info) {
      fprintf(stderr, "%s %1ld %sn",
      "*** ON ENTRY TO DGEMM3, PARAMETER NUMBER",info,"HAD AN ILLEGAL VALUE");
      exit(info);
   }
   if (transa) {
      nrowa = m;
      ncola = k;
   }
   else { 
      nrowa = k;
      ncola = m;
   }
   if (transb) {
      nrowb = k;
      ncolb = n;
   }
   else {
      nrowb = n;
      ncolb = k;
   }
   if (!m || !n || ((alpha == ZERO || !k) && beta == ONE))
      return;
   if (alpha == ZERO) {
      if (beta == ZERO) 
         for (j = 0; j < n; j++)
            for (i = 0; i < m; i++) c[icc+j][irc+i] = ZERO;
      else 
         for (j = 0; j < n; j++)
             for (i = 0; i < m; i++) c[icc+j][irc+i] *= beta;
      return;
   }
   if (!transb) { 
      switch(transa) {
	 /* form C := alpha * A * B + beta * C */
	 case NTRANSP: for(j = 0; j < n; j++) {
                          for(i=0; i<m; i++) c[icc+j][irc+i]=0.0;
                          for(l=0; l<k; l++) 
                             if(b[icb+j][irb+l]!=0.0) {
                               temp = alpha*b[icb+j][irb+l];
                               for(i=0; i<m; i++) 
                                c[icc+j][irc+i] += temp*a[ica+l][ira+i];
      		  	      }
                        }
			break;
	 /* form C := alpha * A' * B + beta * C */
	 case TRANSP:   for(j = 0; j < n; j++) {
                           for(i=0; i<m; i++) {
                              temp = 0.0;
      		  	      for(l = 0; l< k;l++) 
                                 temp += a[ica+i][ira+l]*b[icb+j][irb+l];
                              if(beta==0.0) c[icc+j][irc+i]=alpha*temp; 
                              else 
                       c[icc+j][irc+i] = alpha*temp + beta*c[icc+j][irc+i];
      		  	   }
   	    		}
			break;
      }
   }
   else { 
      switch(transa) {
	 /* form C := alpha * A * B' + beta * C */
	 case NTRANSP: for(j=0; j<n; j++) {
      		  	   for(i=0; i<m; i++) c[icc+j][irc+i]=ZERO;
      		  	   for(l=0; l<k; l++) {
	 	     	      temp = alpha*b[l+icb][j+irb];
         	     	      for(i=0; i<m; i++) 
	    			 c[j+icc][i+irc] += temp*a[l+ica][i+ira];
      		  	   }
   	    		}
			break;
	 /* form C := alpha * A' * B' + beta * C */
	 case TRANSP:   for(j=0; j<n; j++) {
			   for (i=0; i<m; i++) {
      	       		      temp = ZERO;
	 		      for(l=0; l<k; l++) 
				 temp += a[i+ica][l+ira]*b[l+icb][j+irb];
                              if(!beta) c[j+icc][i+irc] += alpha * temp;
                              else 
                                 c[j+icc][i+irc]= alpha*temp+
                                                  beta*c[j+icc][i+irc];
			   }
   	    		}
			break;
      }
   }
}
void opat(double *x, double *y)
{
  /*  multiplication of transpose (nrow by ncol sparse matrix A)
    by the vector x, store in y  */
  long i,j,upper;
  for(i=0; i<ncol; i++)
     y[i]=ZERO;
  for(i=0; i<ncol; i++) {
     upper = pointr[i+1];
     for(j=pointr[i]; j<upper; j++)
        y[i] += value[j]*x[rowind[j]];
  }
  return;
}
double dasum(long n, double *dx, long incx)
{
  /**************************************************************
   *  Function forms the sum of the absolute values.            *
   *  Uses unrolled loops for increment equal to one.           *
   *  Based on Fortran-77 routine from Linpack by J.Dongarra.
   **************************************************************/
  double dtemp,dsum;
  long   i,ix,m,mp1;
  dsum = ZERO;
  dtemp = ZERO;
  if(n <= 0) return 0;
  if(incx != 1) {
  /* code for increment not equal to 1 */
    ix = 0;
    if(incx < 0) ix = (-n+1)*incx + 1;
    for(i=0; i<n; i++) {
       dtemp += fabs(dx[ix]);
       ix    += incx;
    }
    dsum = dtemp;
    return(dsum);
  }  
  /* code for increment equal to 1 */
  
  /* clean-up loop */
  m = n % 6;
  if(m) {
    for(i=0; i<m; i++)
       dtemp += fabs(dx[i]);
  }
  if(n>=6) {
    for(i=m; i<n; i+=6)
       dtemp += fabs(dx[i]) + fabs(dx[i+1]) + fabs(dx[i+2]) +
                fabs(dx[i+3]) + fabs(dx[i+4]) + fabs(dx[i+5]);
  }
  dsum = dtemp;
  return(dsum);
}
/***********************************************************************
 *                                                                     *
 *                              tred2()                                *
 *                                                                     *
 ***********************************************************************/
/***********************************************************************
  Description
  -----------
  
  tred2() is a translation of the algol procedure TRED2, Num. Math. 11, 
  181-195 (1968) by Martin, Reinsch, and Wikinson.  Handbook for Auto.
  Comp., Vol. II- Linear Algebra, 212-226 (1971)
  This subroutine reduces a real symmetric matrix to a symmetric
  tridiagonal matrix using and accumulating orthogonal similarity
  transformations.
  Arguments
  ---------
  (input)
  offset index of the leading element of the matrix to be
         tridiagonalized. The matrix tridiagonalized should be 
         stored in a[offset:n-1, offset:n-1]
  n	 order of the matrix
  a	 contains the real symmetric input matrix. Only the upper
	 triangle of the matrix need be supplied
  (output)
  d	 contains the diagonal elements of the tridiagonal matrix.
  
  e	 contains the subdiagonal elements of the tridiagonal matrix
	 in its first n-1 positions.
  z	 contains the orthogonal transformation matrix produced in the
	 reduction.
  a and z may coincide. If distinct, a is unaltered.
  Functions used:
  UTILITY: fsign
***********************************************************************/
void tred2(long offset, long n, double **a, double *d, double *e, double **z)
{
 long jj,ii,i,j,k,l, jp1;
 double *zptr,*aptr,h, scale, f, g,  hh, tmp;
 long i1;
 for (i=offset;i<n;i++) 
  { 
   for (j=i;j<n;j++)
     {
      z[j][i]=a[i][j];   /*fix this later?.. the rest of the routine 
                           assumes that z has the lower triangular part
                           of the symmetric matrix */
     }
   d[i]=a[i][n-1];
  }
  if (n==1) 
   {
    for (i=offset;i<n;i++)
     {
       d[i]=z[n-1][i];
       z[n-1][i]=ZERO;
     }
    z[n-1][n-1]=ONE;
    e[1]=ZERO;
    return;
   }
  /*for i = n step -1 until 2 do*/
  for (ii=3;ii<n+2-offset;ii++)
   {
     i=n+2-ii;
     l=i-1;
     h=ZERO; 
     scale=ZERO;
    /*scale row (algol tol then not needed)*/
     if (l>=1)
       for (k=offset;k<=l;k++)
        {
         scale+= fabs(d[k]);
        }
	
    if ((scale==ZERO)||(l<1))
     {
      e[i]=d[l];
      for (j=offset;j<=l;j++)
          {
            d[j]=z[l][j];
            z[i][j]=ZERO;
            z[j][i]=ZERO;
          }
     }
   else                   /*scale <> ZERO */
     {
       for (k=offset;k<=l;k++)
        {
         d[k]=d[k]/scale;
         h+=d[k]*d[k];
        }
       f=d[l];
       g=-fsign(sqrt(h), f);
       e[i]=scale * g;
       h-=f*g;
       d[l]=f-g;
   
       /* form A*u */
  
       for (j=offset; j<=l; j++)
          e[j]=ZERO;
          
          for (j=offset;j<=l;j++)
            {
             f=d[j];
             z[j][i]=f;
             g= e[j] + z[j][j] * f;
             
             jp1= j + 1;
   
             if (l >= jp1) 
                 {
                  for (k=jp1; k<=l; k++)
                   {
                     g+= z[k][j] * d[k];
                     e[k] += z[k][j] * f;
                   }
                 };
             e[j]=g;
           }
       /* form P */
 
       f= ZERO;
 
       for (j=offset; j<=l; j++)
        {
          e[j]=e[j]/h;
          f+= e[j] * d[j];
        }
       hh= f/ (h+h);
  
       /* form Q */
  
      for (j=offset; j<=l; j++)
       e[j] -= hh * d[j];
      /* form reduced A */
      for (j=offset; j<=l; j++)
       {
         f= d[j];
         g = e[j];
         for (k=j; k<=l; k++)
          z[k][j]= z[k][j] - f * e[k] - g * d[k];
         d[j]=z[l][j];
         z[i][j]=ZERO;
       }
    }  /* end scale <> zero */
    d[i]=h;
   }   /* end for ii */
   /*accumulation of transformation matrices */
   for (i=offset + 1;i<n;i++)
    {
     l=i-1;
     z[n-1][l] = z[l][l];
     z[l][l] = ONE;
     h=d[i];
     if (h != ZERO) 
       {
        for (k=offset; k<=l; k++)
          d[k]= z[k][i]/h;
        for (j=offset; j<=l; j++)
         {
           g= ZERO;
           
           for (k=offset;k<=l; k++)
            g+= z[k][i]*z[k][j];
	   for (k=offset;k<=l;k++)
            z[k][j] -= g * d[k];
         }
       }
       for (k=offset;k<=l;k++) z[k][i]=ZERO;
     }
  
     for (i=offset;i<n;i++)
       {
        d[i]=z[n-1][i];
        z[n-1][i]=ZERO;
       }
     z[n-1][n-1]=ONE;
     e[0]=ZERO;
/*preparation for tql2.c.. reorder e[]*/
for (i=1+offset;i<n;i++) e[i-1]=e[i]; 
/*preparation for tql2.c.. z has to be transposed for 
  tql2 to give correct eigenvectors */
for (ii=offset; ii<n; ii++)
 for (jj=ii; jj<n; jj++)
 {
   tmp=z[ii][jj];
  z[ii][jj]=z[jj][ii];
  z[jj][ii]=tmp;
 }
     return;
}
/***********************************************************************
 *                                                                     *
 *				tql2()   			       *
 *                                                                     *
 ***********************************************************************/
/***********************************************************************
   Description
   -----------
   tql2() is a translation of a Fortran version of the Algol
   procedure TQL2, Num. Math. 11, 293-306(1968) by Dowdler, Martin, 
   Reinsch and Wilkinson.
   Handbook for Auto. Comp., vol.II-Linear Algebra, 227-240(1971).  
   This function finds the eigenvalues and eigenvectors of a symmetric
   tridiagonal matrix by the QL method.
   Arguments
   ---------
   (input)                                                             
   offset the index of the leading element  of the input(full) matrix
          to be factored.
   n      order of the symmetric tridiagonal matrix           
   d      contains the diagonal elements of the input matrix        
   e      contains the subdiagonal elements of the input matrix in its
            first n-1 positions.
   z      contains the identity matrix				    
                                                                   
   (output)                                                       
   d      contains the eigenvalues in ascending order.  if an error
            exit is made, the eigenvalues are correct but unordered for
            for indices 0,1,...,ierr.				   
   e      has been destroyed.					  
   z      contains orthonormal eigenvectors of the symmetric   
            tridiagonal (or full) matrix.  if an error exit is made,
            z contains the eigenvectors associated with the stored 
          eigenvalues.					
   ierr   set to zero for normal return, j if the j-th eigenvalue has
            not been determined after 30 iterations.		    
   Functions used
   --------------
   UTILITY	fsign
   MISC		pythag
 ***********************************************************************/
long tql2(long offset, long n, double *d, double *e, double **z)
{
   long j, last, l, l1, l2, m, i, k, iteration;
   double tst1, tst2, g, r, s, s2, c, c2, c3, p, f, h, el1, dl1;
   if (n == 1) return(0);
   f = ZERO;
   last = n - 1;
   tst1 = ZERO;
   e[last] = ZERO;
   for (l = offset; l < n; l++) {
      iteration = 0;
      h = fabs(d[l]) + fabs(e[l]);
      if (tst1 < h) tst1 = h;
      /* look for small sub-diagonal element */
      for (m = l; m < n; m++) {
	 tst2 = tst1 + fabs(e[m]);
	 if (tst2 == tst1) break;
      }
      if (m != l) {
	 while (iteration < 30) {
	    iteration += 1;
            /*  form shift */
	    l1 = l + 1;
	    l2 = l1 + 1;
	    g = d[l];
            p = (d[l1] - g) / (2.0 * e[l]);
	    r = pythag(p, ONE);
	    d[l] = e[l] / (p + fsign(r, p));
	    d[l1] = e[l] * (p + fsign(r, p));
	    dl1 = d[l1];
	    h = g - d[l];
	    if (l2 < n) 
	       for (i = l2; i < n; i++) d[i] -= h;
            f += h;
	    /* QL transformation */
	    p = d[m];
	    c = ONE;
	    c2 = c;
	    el1 = e[l1];
	    s = ZERO;
	    i = m - 1;
	    while (i >= l) {
	       c3 = c2;
	       c2 = c;
	       s2 = s;
	       g = c * e[i];
	       h = c * p;
	       r = pythag(p, e[i]);
	       e[i + 1] = s * r;
	       s = e[i] / r;
	       c = p / r;
	       p = c * d[i] - s * g;
	       d[i + 1]= h + s * (c * g + s * d[i]);
	       /*  form vector */
	       for (k = offset; k < n; k ++) {
	          h = z[i + 1][k];
	          z[i + 1][k] = s * z[i][k] + c * h;
	          z[i][k] = c * z[i][k] - s * h;
	       }
	       i--;
	    }
	    p = -s * s2 * c3 *el1 * e[l] / dl1;
	    e[l] = s * p;
	    d[l] = c * p;
	    tst2 = tst1 + fabs(e[l]);
	    if (tst2 <= tst1) break;
	    if (iteration == 30) 
	       return(l);
         }
      }
      d[l] += f;
   }
   /* order the eigenvalues */
   for (l = 1+offset; l < n; l++) {
      i = l - 1;
      k = i;
      p = d[i];
      for (j = l; j < n; j++) {
	 if (d[j] < p) {
	    k = j;
	    p = d[j];
	 }
      }
      /* ...and corresponding eigenvectors */
      if (k != i) {
	 d[k] = d[i];
	 d[i] = p;
	  for (j = offset; j < n; j ++) {
	     p = z[i][j];
	     z[i][j] = z[k][j];
	     z[k][j] = p;
	  }
      }   
   }
   return(0);
}
// void opb(long n, double *x, double *y, double shift)
// {
// /* multiplication of a shifted 2-cylic matrix c by a vector x , where
//        [D    A]  
//    c = [      ]
//        [A'   D] , where A is nrow by ncol (nrow>>ncol),
//                   and D = (alpha-shift)*I , alpha an upper bound
//                   for the largest singular value of A, and 
//                   shift is the approximate singular value of A.
//    Hence, C is of order N=nrow+ncol (y stores product vector) */
//   long i,j,upper;
//   for(i=0; i<n; i++)
//      y[i]=(nalpha-shift)*x[i];
//   for(i=0; i<ncol; i++) {
//      upper = pointr[i+1];
//      for(j=pointr[i]; j<upper; j++)
//         y[rowind[j]] += value[j]*x[nrow+i];
//   }
//   for(i=nrow; i<n; i++) {
//      upper = pointr[i-nrow+1];
//      for(j=pointr[i-nrow]; j<upper; j++)
//         y[i] += value[j]*x[rowind[j]];
//   }
//   return;
// }
/************************************************************** 
 * multiplication of matrix B by a vector x, where            *
 *							      *
 * B =  A'A, where A is nrow by ncol (nrow >> ncol)           *
 * Hence, B is of order n:=ncol                               *
 * y stores product vector                                    *
 **************************************************************/ 
void opa(double *x, double *y)
{
   long i, j,end;
   for (i = 0; i < nrow; i++) y[i] = ZERO;
/* multiply by sparse C */
   for (i = 0; i < ncol; i++) {
      end = pointr[i+1];
      for (j = pointr[i]; j < end; j++)
	y[rowind[j]] += value[j] * x[i];
   }
   return;
}
/***********************************************************************
 *                                                                     *
 *                        dgemv2()                                     *
 * A C-translation of the level 2 BLAS routine DGEMV by Dongarra,      *
 * du Croz, and Hammarling, and Hanson (see LAPACK Users' Guide).      *
 *                                                                     *
 ***********************************************************************
   Description
   -----------
   dgemv2() performs one of the matrix-vector operations
   y := alpha * A * x + beta * y  or  y := alpha * A' * x + beta * y
   where alpha and beta are scalars, X, Y are vectors and A is an
   m by n matrix.
   Parameters
   ----------
   (input)
   transa   TRANSP indicates op(A) = A' is to be used in the multiplication
	    NTRANSP indicates op(A) = A is to be used in the multiplication
   m        on entry, m specifies the number of rows of the matrix A.
	    m must be at least zero.  Unchanged upon exit.
   n        on entry, n specifies the number of columns of the matrix A.
	    n must be at least zero.  Unchanged upon exit.
   alpha    a scalar multiplier.  Unchanged upon exit.
   a        matrix A as a 2-dimensional array.  Before entry, the leading
	    m by n part of the array a must contain the matrix A.
   ira,ica  row and column indices of array A, where mxn part starts.
   x        linear array of dimension of at least n if transa = NTRANSP
	    and at least m otherwise.
   beta     a scalar multiplier.  When beta is supplied as zero then y
	    need not be set on input.  Unchanged upon exit.
   y        linear array of dimension of at least m if transa = NTRANSP
	    and at leat n otherwise.  Before entry with beta nonzero,
	    the array y must contain the vector y.  On exit, y is 
	    overwritten by the updated vector y.
***********************************************************************/
void dgemv2(long transa, long m, long n, 
           double alpha, double **a, long ira, long ica,
           double *x, double beta, double *y)
{
   long info, leny, i, j;
   double temp, *ptrtemp;
   info = 0;
   if      ( transa != TRANSP && transa != NTRANSP ) info = 1;
   else if ( m < 0 ) 				     info = 2;
   else if ( n < 0 )				     info = 3;
   if (info) {
      fprintf(stderr, "%s %1ld %sn",
      "*** ON ENTRY TO DGEMV, PARAMETER NUMBER",info,"HAD AN ILLEGAL VALUE");
      exit(info);
   }
   if (transa) leny = n;
   else        leny = m;
   if (!m || !n || (alpha == ZERO && beta == ONE))
      return;
   /* form Y := beta * Y */
   if (beta == 0.0) 
      for (i = 0; i < leny; i++) y[i] = ZERO;
   else if (beta != 1.0) 
      for (i = 0; i < leny; i++) y[i] *= beta;
   if (alpha == ZERO) return;
   switch(transa) {
      /* form Y := alpha * A * X + Y */
      case NTRANSP:  for(j = 0; j < n; j++) {
		        temp = alpha*x[j];
		        for(i = 0; i < m; i++) 
			y[i] += temp*a[j+ica][i+ira];
		     }
		     break;
		     
      /* form Y := alpha * A' * X + Y */
      case TRANSP:   for(j = 0; j < n; j++) { 
                        temp = ZERO;
			for(i=0; i<m; i++) 
			   temp += a[j+ica][i+ira]*x[i];
                        y[j] += alpha*temp;
		     }
		     break;
   }
}
void imtql2(long nm, long n, double d[], double e[], double z[])
{
   long index, nnm, j, last, l, m, i, k, iteration, convergence, underflow;
   double b, test, g, r, s, c, p, f;
   if (n == 1) return;
   ierr = 0;
   last = n - 1;
   for (i = 1; i < n; i++) e[i-1] = e[i];
   e[last] = 0.0;
   nnm = n * nm;
   for (l = 0; l < n; l++) {
      iteration = 0;
      /* look for small sub-diagonal element */
      while (iteration <= 30) {
	 for (m = l; m < n; m++) {
	    convergence = FALSE;
	    if (m == last) break;
	    else {
	       test = fabs(d[m]) + fabs(d[m+1]);
	       if (test + fabs(e[m]) == test) convergence = TRUE;
	    }
	    if (convergence) break;
	 }
	 if (m != l) {
	    /* set error -- no convergence to an eigenvalue after
	     * 30 iterations. */     
	    if (iteration == 30) {
	       ierr = l;
	       return;
	    }
	    p = d[l]; 
	    iteration += 1;
	    /* form shift */
	    g = (d[l+1] - p) / (2.0 * e[l]);
	    r = pythag(g, 1.0);
	    g = d[m] - p + e[l] / (g + fsign(r, g));
	    s = 1.0;
	    c = 1.0;
	    p = 0.0;
	    underflow = FALSE;
	    i = m - 1;
	    while (underflow == FALSE && i >= l) {
	       f = s * e[i];
	       b = c * e[i];
	       r = pythag(f, g);
	       e[i+1] = r;
	       if (r == 0.0) underflow = TRUE;
	       else {
		  s = f / r;
		  c = g / r;
		  g = d[i+1] - p;
		  r = (d[i] - g) * s + 2.0 * c * b;
		  p = s * r;
		  d[i+1] = g + p;
		  g = c * r - b;
		  /* form vector */
		  for (k = 0; k < nnm; k += n) {
		     index = k + i;
		     f = z[index+1];
		     z[index+1] = s * z[index] + c * f;
		     z[index] = c * z[index] - s * f;
		  } 
		  i--;
	       }
	    }   /* end while (underflow != FALSE && i >= l) */
	    /*........ recover from underflow .........*/
	    if (underflow) {
	       d[i+1] -= p;
	       e[m] = 0.0;
	    }
	    else {
	       d[l] -= p;
	       e[l] = g;
	       e[m] = 0.0;
	    }
	 }
	 else break;
      }		/*...... end while (iteration <= 30) .........*/
   }		/*...... end for (l=0; l<n; l++) .............*/
   /* order the eigenvalues */
   for (l = 1; l < n; l++) {
      i = l - 1;
      k = i;
      p = d[i];
      for (j = l; j < n; j++) {
	 if (d[j] < p) {
	    k = j;
	    p = d[j];
	 }
      }
      /* ...and corresponding eigenvectors */
      if (k != i) {
	 d[k] = d[i];
	 d[i] = p;
	  for (j = 0; j < nnm; j += n) {
	     p = z[j+i];
	     z[j+i] = z[j+k];
	     z[j+k] = p;
	  }
      }   
   }
   return;
}