Information Retrieval

Development Platform:
Unix_Linux

s_tms2.cc：Code Content
							// File: s_tms2.cc -- Implements the tms1 class
// Author: Suvrit Sra
// Date: 15 Nov, 2003
/*************************************************************************
 THE ORIGINAL SVDPAKC COPYRIGHT
                           (c) Copyright 1993
                        University of Tennessee
                          All Rights Reserved                          
 *************************************************************************/
#include <cmath>
#include <cstdio>
#include <cstdlib>
#include <cerrno>
#include <cstring>
#include <unistd.h>
#include <fcntl.h>
#include "s_tms2.h"
using namespace ssvd;
/***********************************************************************
 *                                                                     *
 *                       tsvd2()                                       *
 *     Sparse SVD Via Eigensystem of Equivalent 2-Cyclic Matrix        *
 *                  (double precision)                                 *
 *                                                                     *
 ***********************************************************************
   Description
   -----------
   tsvd2 implements a trace-minimization svd method for determining
   the p-largest singular triplets of a large sparse matrix A.
   tms2 computes the singular triplets of the matrix A via the 
   equivalent symmetric eigenvalue problem.
   
   B = (alpha*alpha)*I - A'A, where A is m (=nrow) by n (=ncol),
   so that sqrt(alpha*alpha-lambda) is a singular value of A.
   alpha is chosen so that B is (symmetric) positive definite.  In
   the calling program, alpha is determined as the matrix 1-norm of
   A.  the user can set alpha to be any known upper bound for the
   the largest singular value of the matrix A.
   (A' = transpose of A)
     
   User supplied routines: opat, opb,timer,store
 
   opat(x,y)                takes an m-vector x and should return A'*x
                            in y.
   opb(nrow+ncol,x,y,shift) takes an (m+n)-vector x and should return
                            D*x in y, where D=[B-shift*I], I is the 
                            (m+n) order identity matrix.
   user should replace calls to timer() with an appropriate call to
   an instrinsic timing routine that returns delta user cpu time
   (i.e., user cpu time elapsed from a previous call to timing routine)
   tsvd2 utilizes ritz-shifting and chebyshev polynomials for acceleration of 
   convergence.
   External parameters
   -------------------
   Defined and documented in tmsg.h
   Local parameters:
   ----------------
   (input) 
   n               order of equivalent symmetric eigenvalue problem
                   should be equal to min(nrow,ncol), where nrow is the
                   number of rows of A and ncol is the number of columns of A.
   p               number of desired singular triplets (largest) of A.
   s               dimension of initial subspace (s should be great er
                   than p; s=2*p is usually safe but since the comp lexity
                   of the method is determined by s, the user should
                   try to keep s as close to p as possible).
   job             acceleration strategy switch:
                   job = 0, no acceleration used,
                       = 1, ritz-shifting used.
                       = 2, chebyshev polynomials and ritz-shifting used.
   maxi            maximum number of trace minimization steps allowed.
   tol             user-supplied tolerance for residual of an
                   equivalent eigenpair of B (singular triplet of A).
   red             user-supplied tolerance for residual reduction to
                   initiate ritz-shifting when job=1 or job=2.
   lwork(s)        (logical 0,1) work array.
   (output parameters):
    -----------------
   ierr            ierr=99, input parameter job invalid.
   (error flag)
   mem             number of bytes of memory needed
   
   maxd            maximum polynomial degree used (job = 2).
   mxv(3)          matrix-vector multiplication counters:
                   mxv[0] = number of A*x,
                   mxv[1] = number of A'*x,
                   mxv[2] = mxv[0] + mxv[1]
   sig(s)          contains p-desired singular values.
   y(nrow+ncol,s)  first p columns contain left and right singular
                   vectors, i.e.,
                   y(1:nrow+ncol,1:p)  = [u(1:r,1:p)' | v(1:c,1:p)']',
                   where r = no. of rows of matrix A and
                   c = no. of cols of matrix A.
   titer(s)        titer(i) := number of trace min. steps
                   need for i-th triplet of A.
   itcgt(s)        itcgt(i) := number of cg steps needed for
                   i-th triplet of A.
   time(5)         timing breakdown array:
                   time[0]  = gram-schmidt orthogonalization
                   time[1]  = section-formation (spectral decomposition)
                   time[2]  = convergence criteria
                   time[3]  = conjugate gradient method
                   time[4]  = total execution time (user-cpu)
   res(s)          2-norms of residual vectors
                   (A*y[i]-sig(i)*y[i]), i=1,2,...,p.
   Functions used
   --------------
    BLAS       daxpy, ddot, dgemm3, dgemv, dgemv2, xerbla,
               pmul, dtrsm, porth
    USER       timer,opb,opat,acosh (if not in math library)
    RNG        random
    PRECISION  epslon
    MATH       sqrt,fabs,acosh
    EISPACK    tred2,tql2,pythag
   Precision
   ---------
   All floating-point calculations use double precision;
   variables are declared as long and double. */
long s_tms2::tsvd2(FILE* fp, long n, long p, long s, long job,
           double tol, double red, double *sig, long maxi,
           long *mem, long *itcgt, long *titer,double *time,
           double *res, long *mxv, double **work1, double **work2,
           double **work3, double **work4, double **work5,
          double **y, long **iwork, long *lwork,long *maxd)
{
  double  *workptr5,total,*tempptr5,meps;
  double  tmp,tmpi,pparm1,pparm2,e,enew;
  double  t1,sec1,sec2,sec21,sec22,sec23,sec3,sec4;
  long  degree,ndegre,itmp,ierr,iptr,left,i,j,k,irand,iter;
  long  shift,memsize,temp_left,polyac;
  pparm1 = 0;
  pparm2 = 0;
  e = 0;
  degree = 0;
  ndegre = 0;
  /* allocate memory for temporary arrays in subroutines cgt() and cgts() */
  memsize = sizeof(double) * n * 6;
  if(!(workptr5 = (double *) malloc(memsize))) {
    perror("FIRST MALLOC FAILED IN TSVD2()n");
    exit(errno);
  }
  *mem += memsize;
  tempptr5 = workptr5;
  z = tempptr5;
  tempptr5 += n;
  v2 = tempptr5;
  tempptr5 +=n;
  r = tempptr5;
  tempptr5 += n;
  r2 = tempptr5;
  tempptr5 += n;
  pp = tempptr5;
  tempptr5 += n;
  z2 = tempptr5;
  /* allocate memory for temporary array in subroutine myopb */
  memsize = sizeof(double) * (ncol+nrow);
  if(!(zz = (double *) malloc(memsize))) {
     perror("SECOND MALLOC FAILED IN TSVD2()n");
     exit(errno);
    }
  *mem += memsize;
  /*get machine epsilon (meps) */
  meps = epslon(ONE);
  shift = FALSE;
  ierr = 0;
  if(job && (job != 1) && (job != 2)) {
     ierr = 99;
     return(ierr);
  }
  if(job == 1) {
    for(i=0; i<p; i++) {
        lwork[i] = FALSE;
        work5[2][i] = -ONE;
        work5[3][i] = -ONE;
    }
  }
  else {
    if(job==2) {
         degree = 0;
         ndegre = 0;
         pparm1 = 0.04;
         pparm2 = 4.0;
    }
  }
  /*initialize timers,counters and flags: */
  sec1 = ZERO;
  sec21 = ZERO;
  sec22 = ZERO;
  sec23 = ZERO;
  sec3 = ZERO;
  sec4 = ZERO;
  mxv[0] = 0;
  mxv[1] = 0;
  polyac = FALSE;
  /*initialize y(1:n,1:s) = random matrix
     (carry s vectors in the tmin iterations, assuming s.ge.p) */
  irand =SEED;
  for(k=0; k<s; k++) {
     sig[k] = ZERO;
     work5[1][k] = ZERO;
  }
  for(k=0; k<s; k++) {
      for (i=0; i<n; i++) y[k][i] = mrandom(&irand);
  }
  /*----------------------------------------------------------
       pointer and counter for hybrid monitor:
  
              1 2 3 4 5 ... i ... p
             -----------------------
         sig:| | | | | |...| |...| | (ascending order)
             -----------------------
                            ^
                            |
                          iptr : points to first e-value of B
                                 that has not converged
  
       left:=s-iptr+1 (ie how many y-vectors remain for tsvd2)
  -------------------------------------------------------------- */
  
  /* initialize a few pointers and counters: */
  total = ZERO;
  iptr = 1;
  left = s;
  for(i=0; i<s; i++) {
      titer[i] = 0;
      itcgt[i] = 0;
  }
  /*--------------------------------------------------------------
       main tmin iteration loop (nmax iterations)
   --------------------------------------------------------------*/
  for(iter=0; iter<maxi; iter++) {
     if((job==2) && (!shift)) { 
       if(ndegre) {
         degree = ndegre;
         *maxd = imax(*maxd,degree);
         polyac = TRUE;
       }
       else polyac = FALSE;
     }
/* section formation */
     t1 = timer();
     for(j=0; j<s; j++) {
        for(i=0; i<n; i++) work2[j][i] = y[j][i];
     }
     if(polyac) {
       porth(s,0,n,work2,work1[0],work1[1],work4[0],work4[1],
             work4[2],e,degree,alpha);
       mxv[0] += degree*s;
       mxv[1] += degree*s;
     }
     else {
       orthg(s,0,n,work1,work2,&work4[0][0]);
     }
   t1 = timer() - t1;
   sec1 = sec1 + t1;
  
     /* form work1(1:n,iptr:s) = b(1:n,1:n)*work2(1:n,iptr:s) */
  
     t1 = timer();
  
  if(polyac) {
    for(j=0; j<left; j++) {
       for(i=0; i<n; i++) work1[iptr+j-1][i] = work2[iptr+j-1][i];
    }
  }
  else {
     for(j=0; j<left; j++)
        myopb(n,work2[iptr+j-1],work1[iptr+j-1],ZERO);
  }
 
/* form work3(1:left,1:left) = work2(1:n,iptr:s)'*work1(1:n,iptr:s) */
  dgemm3(TRANSP, NTRANSP, left, left, n, ONE, work2, 0,iptr-1,
         work1, 0, iptr-1, ZERO, work3, 0, 0);
     
  if(job >= 1) {
    for(j=0; j<left; j++)
       work4[0][j] = dasum(left, &work3[0][j], s) - fabs(work3[j][j]);
  }
     t1 = timer() - t1;
     sec21 = sec21 + t1;
     /*    compute the eigenvalues and eigenvectors of work3:  */
     
     t1 = timer();
     /*    eigenvectors overwrite array work3(:,:)
         store current e-values in work5(:,2)  */
  if(polyac) {
    tred2(0,left,work3,work5[0],work4[1],work3);
    ierr = tql2(0,left,work5[0],work4[1],work3);
    if(ierr) printf("IERR FROM TQL2 = %ldn",ierr);
  }
  else {
     
     for(j=iptr-1; j<s; j++) work5[1][j] = sig[j];
     
     tred2(0,left, work3, &sig[iptr-1], &work4[1][0], work3);
     ierr = tql2(0,left, &sig[iptr-1], &work4[1][0], work3);
    if(ierr) printf("IERR FROM TQL2 = %ldn",ierr);
  } 
  t1 = timer() - t1;
  sec22 = sec22 + t1;
     
     /* form y(1:n,iptr:s) = work2(1:n,iptr:s)*work3(1:left,1:left) */
     t1 = timer();
  dgemm3(NTRANSP, NTRANSP, n, left, left, ONE, work2, 0, iptr-1,
           work3, 0, 0, ZERO, y, 0, iptr-1);
     t1 = timer() - t1;
     sec23 = sec23 + t1;
     
     /*    test for convergence here */
     
     t1 = timer();
     for(j=iptr-1; j<s; j++) {
        myopb(n, y[j], work2[j], ZERO);
        if(polyac) {
          work5[1][j]=sig[j];
          sig[j] = ddot(n,y[j],1,work2[j],1)/ddot(n,y[j],1,y[j],1);
        }
        daxpy(n, -sig[j], y[j], 1, work2[j],1);
        work4[2][j-iptr+1] = sqrt(ddot(n,work2[j],1,work2[j],1));
        if(j < p) titer[j] += 1;
     }
     mxv[0] += s-iptr+1;
     mxv[1] += s-iptr+1;
     t1 = timer() - t1;
     sec3 = sec3 + t1;
    temp_left=left; 
    /* array work4(:,3) stores the vector 2-norms of residual vectors */
     for(k=0; k<temp_left; k++) {
         if(fabs(work4[2][k]) <= tol) {
            iptr = iptr + 1;
            if(iptr > p)  {
                          goto L10;
                          }
            left = s-iptr+1;
         }
     }
     if((!shift) && (job>=1)) {
     
       /* work4(:,1) stores gershgorin radii  *
        * iwork(:,1) is the clustering vector */ 
       disk(left, &sig[iptr-1], work4[0], iwork[0], iwork[1]);
       
       for(k=iptr-1; k<s; k++) {
          if(iwork[0][k-iptr+1] == 1)
            isol(k, work4[2][k-iptr+1], red, lwork, work5[2],
                 work5[3],s);
          else
             if(iwork[0][k-iptr+1] > 1)
               clus(k,iwork[0][k-iptr+1],&work4[2][k-iptr+1],red,lwork,
                    work5[2], work5[3], work4[1],s);
       }
     }             
       
     /* continue algorithm...  */
       
     /* shift start */
     
     if((iter>0) && (!shift) && (job>=1)) {
        if(lwork[iptr-1]) {
           shift = TRUE;
           polyac = FALSE;
           orthg(s, 0, n, work1, y, &work4[0][0]);
        }
     }
     if(shift) {
     
     /* compute shifts in work5(:,1) here: */
  
      for(k=iptr-1; k<s; k++) {
         if((k>iptr-1) && (k<=p-1)) {
           work5[0][k] = ZERO;
           for(j=iptr-2; j<k; j++) {
              if((j!=-1) && (sig[j] <= (sig[k] - work4[2][k-iptr+1]))) 
                work5[0][k] = sig[j];
              }
 
           if((work5[0][k] == sig[k-1]) && (work5[0][k-1]==sig[k-1]))
             work5[0][k] = sig[k];
           if(work5[0][k] == ZERO) 
             work5[0][k] = work5[0][k-1];
         }
         else if(k>p-1) work5[0][k] = work5[0][p-1];
              else if(k==iptr-1) work5[0][k] = sig[k];
      }
     }
     t1 = timer();
/* monitor polynomial degree (if job=2) */
   if(job==2) {
     enew = sig[s-1];
     e = fabs(enew-alpha*alpha);
     if((sig[iptr-1]>pparm1*sig[s-1]) && (enew<alpha*alpha) &&
        (!shift)) {
       tmp = acosh(sig[s-1]/sig[iptr-1]);
       itmp = 2*imax(((int) (pparm2/tmp)),1);
       if(degree) ndegre = imin(2*degree,itmp);
       else ndegre = 2;
     }
   }
     
  if(polyac) {
    for(j=iptr-1; j<s; j++) {
       pmul(n,y[j],work2[j],work4[0],work4[1],work4[2],
            e,degree,alpha);
       }
    orthg(left,0,n,work1,&work2[iptr-1],work4[0]);
    for(j=iptr-1; j<s; j++) {
       for(i=0; i<n; i++)
          y[j][i]=work2[j][i];
    }
    dtrsm('R','U',TRANSP,'N',n,left,ONE,work1,&y[iptr-1]);
    mxv[0] += degree*left;
    mxv[1] += degree*left;
 }
 else {
  
     /* do cg iterations */
     /*load y into work1 array for orthog. projector in subroutine cgt*/
     for(j=0; j<left; j++) {
        for(i=0; i<n; i++)
            work1[j][i] = y[iptr+j-1][i];
     }
           
    /* cg loop for independent systems (no shifting used)  */
     if(!shift) 
       for(i=0; i<left; i++)
             cgt(n, left, y[iptr+i-1], work1, &itcgt[iptr+i-1],
                 sig[iptr+i-1], sig[s-1], &iwork[0][i], meps);
     else {
     
         /*shift with work5(:,1) in cg iterations */
       for(i=0; i<left; i++) {
                cgts(n, left, y[iptr+i-1], work1, &itcgt[iptr+i-1],
                     sig[iptr+i-1], work5[1][iptr+i-1], sig[s-1],
                     work5[0][iptr+i-1], &iwork[0][i], meps);
                }
     }
     for(i=0; i<left; i++) {
        mxv[0] += iwork[0][i];
        mxv[1] += iwork[0][i];
     }
  }
     t1 = timer() - t1;
     sec4 += t1;
}
L10:     
   /* compute final 2-norms of residual vectors w.r.t. B */
   for(j=0; j<p; j++) {
     myopb(n, y[j], work2[j], alpha*alpha);
     tmp = ddot(n,y[j],1,work2[j],1)/ddot(n,y[j],1,y[j],1);
     daxpy(n,-tmp,y[j],1,work2[j],1);
     tmp = sqrt(fabs(tmp));
     work4[2][j] = sqrt(ddot(n,work2[j],1,work2[j],1));
     opa(y[j],&y[j][n]);
     tmpi = 1.0/fabs(tmp);
     dscal(nrow,tmpi,&y[j][n],1);
     work4[2][j] = work4[2][j]*tmpi;
     sig[j]=tmp;
   }
   mxv[0] += 2*p;
   mxv[1] += p;
   
   sec2   = sec21 + sec22 + sec23;
   total += sec1 + sec2 + sec3 + sec4;
   
   /* load output time and mxv arrays */
   time[0]=sec1;
   time[1]=sec2;
   time[2]=sec3;
   time[3]=sec4;
   time[4]=total;
   mxv[2]=mxv[0]+mxv[1];
  
  /* load residual vector */
  for(i=0; i<p; i++)
      res[i] = work4[2][i];
  return(ierr);
}