QMcBeaver: Array2D.h Source File

00001 //            QMcBeaver
00002 //
00003 //         Constructed by
00004 //
00005 //     Michael Todd Feldmann
00006 //              and
00007 //   David Randall "Chip" Kent IV
00008 //
00009 // Copyright 2000.  All rights reserved.
00010 //
00011 // drkent@users.sourceforge.net mtfeldmann@users.sourceforge.net
00012 
00013 /**************************************************************************
00014 This SOFTWARE has been authored or contributed to by an employee or 
00015 employees of the University of California, operator of the Los Alamos 
00016 National Laboratory under Contract No. W-7405-ENG-36 with the U.S. 
00017 Department of Energy.  The U.S. Government has rights to use, reproduce, 
00018 and distribute this SOFTWARE.  Neither the Government nor the University 
00019 makes any warranty, express or implied, or assumes any liability or 
00020 responsibility for the use of this SOFTWARE.  If SOFTWARE is modified 
00021 to produce derivative works, such modified SOFTWARE should be clearly 
00022 marked, so as not to confuse it with the version available from LANL.   
00023  
00024 Additionally, this program is free software; you can distribute it and/or 
00025 modify it under the terms of the GNU General Public License. Accordingly, 
00026 this program is  distributed in the hope that it will be useful, but WITHOUT 
00027 ANY WARRANTY;  without even the implied warranty of MERCHANTABILITY or 
00028 FITNESS FOR A  PARTICULAR PURPOSE.  See the GNU General Public License 
00029 for more details. 
00030 **************************************************************************/
00031 
00032 #ifndef Array2D_H
00033 #define Array2D_H
00034 
00035 #include <iostream>
00036 #include <iomanip>
00037 #include <typeinfo>
00038 #include "Complex.h"
00039 #include <assert.h>
00040 #include "Array1D.h"
00041 #include "fastfunctions.h"
00042 #include "cppblas.h"
00043 
00044 /*
00045   Kahan's method is very slightly more accurate with
00046   single precision. It is implemented for curiousity's sake,
00047   since it's only available when gemm libraries are not linked.
00048   http://en.wikipedia.org/wiki/Kahan_summation_algorithm
00049 */
00050 static const bool USE_KAHAN = false;
00051 
00052 /*
00053   These are function prototypes for the LAPACK
00054   libraries that might be linked in.
00055   
00056   You may or may not need to modify the function names
00057   depending on the local fortran compiler's calling convention.
00058 */
00059 extern "C"
00060 {
00061   void FORTRAN_FUNC(dgesv)(int* N, int* NRHS,
00062               double *A, int* lda, int* ipiv,
00063               double *B, int* ldb,
00064               int* info);
00065   void FORTRAN_FUNC(sgesv)(int* N, int* NRHS,
00066               float *A, int* lda, int* ipiv,
00067               float *B, int* ldb,
00068               int* info);
00069   void FORTRAN_FUNC(dggev)(const char* jobvl, const char* jobvr, int* n,
00070               double *a, int* lda, double *b, int* ldb,
00071               double *alphar, double *alphai, double *beta,
00072               double *vl, int* ldvl,
00073               double *vr, int* ldvr,
00074               double *work, int* lwork,
00075               int *info);
00076 }
00077 
00078 //Array2D is included in all the classes where this change is relevant
00079 #if defined SINGLEPRECISION || defined QMC_GPU
00080 typedef float  qmcfloat;
00081 const static  float REALLYTINY = 1e-35f;
00082 #else
00083 typedef double qmcfloat;
00084 const static double REALLYTINY = 1e-300;
00085 #endif
00086 
00087 using namespace std;
00088 
00095 template <class T> class Array2D
00096 {
00097  private:
00102   int n_1;
00103   
00104   
00109   int n_2;
00110   
00114   T * pArray;
00115   
00120   Array1D<T> diCol;
00121   Array1D<int> diINDX;
00122   Array1D<T> diVV;
00123   
00124  public:
00129   T* array()
00130     {
00131       return pArray;
00132     }
00133   
00139   int dim1() const
00140     {
00141       return n_1;
00142     }
00143   
00149   int dim2() const
00150     {
00151       return n_2;
00152     }
00153   
00159   int size() const
00160     {
00161       return n_1*n_2;
00162     }
00163   
00171   void allocate(int i, int j)
00172     {
00173 #ifdef QMC_DEBUG
00174       if( i < 0 || j < 0)
00175         {
00176           cerr << "Error: invalid dimensions in Array2D::allocate\n"
00177                << " i = " << i << endl
00178                << " j = " << j << endl;
00179           assert(0);
00180         }
00181 #endif
00182       if( n_1 != i || n_2 != j )
00183         {
00184           deallocate();
00185           
00186           n_1 = i;
00187           n_2 = j;
00188           
00189           if(n_1 >= 1 && n_2 >= 1)
00190             {
00191               pArray = new T[ n_1*n_2 ];
00192             }
00193           else
00194             {
00195               pArray = 0;
00196             }
00197         }
00198     }
00199   
00204   void deallocate()
00205     {
00206       if(pArray != 0)
00207         delete [] pArray;
00208       pArray = 0;
00209       
00210       n_1 = 0;
00211       n_2 = 0;
00212       
00213       diCol.deallocate();
00214       diINDX.deallocate();
00215       diVV.deallocate();
00216     }
00217   
00218   void setupMatrixMultiply(const Array2D<T> & rhs, Array2D<T> & result,
00219                            const bool rhsIsTransposed) const
00220     {
00221       if(rhsIsTransposed)
00222         {
00223 #ifdef QMC_DEBUG
00224           if(n_2 != rhs.n_2)
00225             {
00226               cerr << "ERROR: Transposed Matrix multiplication: " << n_1 << "x"
00227                    << n_2 << " * " << rhs.n_2 << "x" << rhs.n_1 << endl;
00228               exit(1);
00229             }
00230           if(result.dim1() != n_1 || result.dim2() != rhs.n_1)
00231             {
00232               cerr << "Warning: we had to reallocate result: "
00233                    << result.dim1() << "x" << result.dim2() << " to "
00234                    << n_1 << "x" << rhs.n_1 << endl;
00235               //assert(0);
00236             }
00237 #endif
00238           result.allocate(n_1,rhs.n_1);
00239         }
00240       else
00241         {
00242 #ifdef QMC_DEBUG
00243           if(n_2 != rhs.n_1)
00244             {
00245               cerr << "ERROR: Matrix multiplication: " << n_1 << "x"
00246                    << n_2 << " * " << rhs.n_1 << "x" << rhs.n_2 << endl;
00247               exit(1);
00248             }
00249           if(result.dim1() != n_1 || result.dim2() != rhs.n_2)
00250             {
00251               cerr << "Warning: we had to reallocate result: "
00252                    << result.dim1() << "x" << result.dim2() << " to "
00253                    << n_1 << "x" << rhs.n_2 << endl;
00254             }
00255 #endif
00256           result.allocate(n_1,rhs.n_2);
00257         }
00258     }
00259   
00260 #ifdef USEBLAS
00261   
00262   void gemm(const Array2D<double> & rhs, Array2D<double> & result,
00263             const bool rhsIsTransposed) const
00264     {
00265       setupMatrixMultiply(rhs,result,rhsIsTransposed);
00266       
00267       //MxN = alpha * MxK * KxN + beta * MxN
00268       double alpha = 1.0;
00269       double beta  = 0.0;
00270       
00271 #ifdef USE_CBLAS
00272       int M        = result.n_1;
00273       int N        = result.n_2;
00274       int K        = n_2;
00275       int lda      = n_2;
00276       int ldb      = rhs.n_2;
00277       int ldc      = result.n_2;
00278       
00279       CBLAS_TRANSPOSE myTrans = rhsIsTransposed ? CblasTrans : CblasNoTrans;
00280       cblas_dgemm(CBLAS_ORDER(CblasRowMajor),
00281                   CBLAS_TRANSPOSE(CblasNoTrans), myTrans,
00282                   M, N, K,
00283                   alpha, pArray, lda,
00284                   rhs.pArray, ldb,
00285                   beta, result.pArray, ldc);
00286 #else
00287       /*
00288         since fortran is column major, we need to mess around a bit
00289         to trick it into handling our row major data correctly.
00290         a row major matrix as a transposed column major matrix.
00291       */
00292       int M        = result.n_2;
00293       int N        = result.n_1;
00294       int K        = n_2;
00295       int lda      = rhs.n_2;
00296       int ldb      = n_2;
00297       int ldc      = result.n_2;
00298       
00299       char transa = rhsIsTransposed ? 'T' : 'N';
00300       char transb = 'N';
00301       
00302       FORTRAN_FUNC(dgemm)(&transa, &transb,                 
00303              &M, &N, &K,
00304              &alpha,
00305              rhs.pArray, &lda,
00306              pArray, &ldb,
00307              &beta,
00308              result.pArray, &ldc);
00309 #endif
00310     }
00311   
00312   void gemm(const Array2D<float> & rhs, Array2D<float> & result,
00313             const bool rhsIsTransposed) const
00314     {
00315       setupMatrixMultiply(rhs,result,rhsIsTransposed);
00316       
00317       //MxN = alpha * MxK * KxN + beta * MxN
00318       float alpha = 1.0;
00319       float beta  = 0.0;
00320       
00321 #ifdef USE_CBLAS
00322       int M        = result.n_1;
00323       int N        = result.n_2;
00324       int K        = n_2;
00325       int lda      = n_2;
00326       int ldb      = rhs.n_2;
00327       int ldc      = result.n_2;
00328       
00329       CBLAS_TRANSPOSE myTrans = rhsIsTransposed ? CblasTrans : CblasNoTrans;
00330       cblas_sgemm(CBLAS_ORDER(CblasRowMajor),
00331                   CBLAS_TRANSPOSE(CblasNoTrans), myTrans,
00332                   M, N, K,
00333                   alpha, pArray, lda,
00334                   rhs.pArray, ldb,
00335                   beta, result.pArray, ldc);
00336 #else
00337       /*
00338         since fortran is column major, we need to mess around a bit
00339         to trick it into handling our row major data correctly.
00340         a row major matrix as a transposed column major matrix.
00341       */
00342       int M        = result.n_2;
00343       int N        = result.n_1;
00344       int K        = n_2;
00345       int lda      = rhs.n_2;
00346       int ldb      = n_2;
00347       int ldc      = result.n_2;
00348       
00349       char transa = rhsIsTransposed ? 'T' : 'N';
00350       char transb = 'N';
00351       
00352       FORTRAN_FUNC(sgemm)(&transa, &transb,                 
00353              &M, &N, &K,
00354              &alpha,
00355              rhs.pArray, &lda,
00356              pArray, &ldb,
00357              &beta,
00358              result.pArray, &ldc);
00359 #endif
00360     }
00361   
00366   double dotAllElectrons(const Array2D<double> & rhs)
00367     {
00368 #ifdef USE_CBLAS
00369       return cblas_ddot(n_1*n_2, pArray, 1, rhs.pArray, 1);
00370 #else
00371       int inc = 1;
00372       int num = n_1 * n_2;
00373       return FORTRAN_FUNC(ddot)(&num, pArray, &inc, rhs.pArray, &inc);
00374 #endif
00375     }
00376   
00377   float dotAllElectrons(const Array2D<float> & rhs)
00378     {
00379 #ifdef USE_CBLAS
00380       return cblas_sdot(n_1*n_2, pArray, 1, rhs.pArray, 1);
00381 #else
00382       int inc = 1;
00383       int num = n_1 * n_2;
00384       return FORTRAN_FUNC(sdot)(&num, pArray, &inc, rhs.pArray, &inc);
00385 #endif
00386     }
00387   
00392   double dotOneElectron(const Array2D<double> & rhs, int whichElectron)
00393     {
00394 #ifdef USE_CBLAS
00395       return cblas_ddot(n_1, pArray + whichElectron*n_2, 1, rhs.pArray + whichElectron*n_2, 1);
00396 #else
00397       int inc = 1;
00398       return FORTRAN_FUNC(ddot)(&n_1, pArray + whichElectron*n_2, &inc, rhs.pArray + whichElectron*n_2, &inc);
00399 #endif
00400     }
00401   
00402   float dotOneElectron(const Array2D<float> & rhs, int whichElectron)
00403     {
00404 #ifdef USE_CBLAS
00405       return cblas_sdot(n_1, pArray + whichElectron*n_2, 1, rhs.pArray + whichElectron*n_2, 1);
00406 #else
00407       int inc = 1;
00408       return FORTRAN_FUNC(sdot)(&n_1, pArray + whichElectron*n_2, &inc, rhs.pArray + whichElectron*n_2, &inc);
00409 #endif  
00410     }
00411 
00412   void operator+=( const Array2D<double> & rhs)
00413     {
00414 #ifdef USE_CBLAS
00415       cblas_daxpy(n_1*n_2, 1.0, rhs.pArray,1, pArray, 1);
00416 #else
00417       int num = n_1*n_2;
00418       int inc = 1;
00419       double a = 1.0;
00420       FORTRAN_FUNC(daxpy)(&num, &a, rhs.pArray, &inc, pArray, &inc);
00421 #endif
00422     }
00423 
00424 #else
00425   void gemm(const Array2D<T> & rhs, Array2D<T> & result,
00426             const bool rhsIsTransposed) const
00427     {
00428       setupMatrixMultiply(rhs,result,rhsIsTransposed);
00429       
00430       //MxN = MxK * KxN
00431       int M = n_1;
00432       int N = rhsIsTransposed ? rhs.n_1 : rhs.n_2;
00433       int K = n_2;
00434       T * A = pArray;
00435       T * B = rhs.pArray;
00436       T * C = result.pArray;
00437       
00438       if(rhsIsTransposed)
00439         {
00440           if(USE_KAHAN)
00441             {
00442               T Cee, Why, Tee;
00443               int i, j, k;
00444               
00445               for (i = 0; i < M; ++i)
00446                 {
00447                   const register T *Ai_ = A + i*K;
00448                   for (j = 0; j < N; ++j)
00449                     {
00450                       const register T *B_j = B + j*K;
00451                       register T cij = Ai_[0] * B_j[0];
00452                       Cee = 0;
00453                       for (k = 1; k < K; ++k)
00454                         {
00455                           /*
00456                             This is the KSF. The parenthesis
00457                             are critical to its success.
00458                           */
00459                           Why = Ai_[k] * B_j[k] - Cee;
00460                           Tee = cij + Why;
00461                           Cee = (Tee - cij) - Why;
00462                           cij = Tee;
00463                         }
00464                       C[i*N + j] = cij;
00465                     }
00466                 }
00467             }
00468           else//no kahan summation formula
00469             {
00470               int i, j, k;
00471               
00472               for (i = 0; i < M; ++i)
00473                 {
00474                   const register T *Ai_ = A + i*K;
00475                   for (j = 0; j < N; ++j)
00476                     {
00477                       const register T *B_j = B + j*K;
00478                       register T cij = 0;
00479                       for (k = 0; k < K; ++k)
00480                         {
00481                           cij += Ai_[k] * B_j[k];
00482                         }
00483                       C[i*N + j] = cij;
00484                     }
00485                 }
00486             }
00487         }
00488       else// rhs is not transposed, KSF not implemented here
00489         {
00490           for (int i = 0; i < M; ++i)
00491             {
00492               register T *Ai_ = A + i*K;
00493               for (int j = 0; j < N; ++j)
00494                 {
00495                   register T cij = 0;
00496                   for (int k = 0; k < K; ++k)
00497                     {
00498                       cij += Ai_[k] * B[k*N+j];
00499                     }
00500                   C[i*N + j] = cij;
00501                 }
00502             }
00503         }
00504     }
00505   
00510   T dotAllElectrons(const Array2D<T> & rhs)
00511     {
00512       register T temp = 0;
00513       for(int i=0; i<n_1*n_2; i++)
00514         temp += pArray[i]*rhs.pArray[i];
00515       return temp;
00516     }
00517   
00522   T dotOneElectron(const Array2D<T> & rhs, int whichElectron)
00523     {
00524       register T temp = 0;
00525       T * l = pArray + whichElectron*n_2;
00526       T * r = rhs.pArray + whichElectron*n_2;
00527       for(int i=0; i<n_1; i++)
00528         temp += l[i]*r[i];
00529       return temp;
00530     }
00531 
00532   void operator+=( const Array2D<T> & rhs)
00533     {
00534       for(int i=0; i<n_1*n_2; i++)
00535         pArray[i] += rhs.pArray[i];
00536     }
00537 
00538 #endif
00539   
00540   Array2D<T> operator*(const Array2D<T> & rhs) const
00541     {
00542       Array2D<T> TEMP(n_1,rhs.n_2);
00543       TEMP = 0;
00544       gemm(rhs,TEMP,false);
00545       return TEMP;
00546     }
00547   
00548   void setToIdentity(int i)
00549     {
00550       for(int j=0; j<min(n_1,n_2); j++)
00551         {
00552           (*this)(i,j) = (T)(0.0);
00553           (*this)(j,i) = (T)(0.0);
00554         }
00555       (*this)(i,i) = (T)(1.0);
00556     }
00557   
00558   void setToIdentity()
00559     {
00560       *this = (T)(0.0);
00561       for(int i=0; i<min(n_1,n_2); i++)
00562         (*this)(i,i) = (T)(1.0);
00563     }
00564   
00565   bool isIdentity()
00566     {
00567       for(int i=0; i<n_1; i++)
00568         for(int j=0; j<n_2; j++)
00569           {
00570             T val       = (*this)(i,j);
00571             if(i == j) val -= 1.0;
00572             if(fabs(val) > REALLYTINY) return false;
00573           }
00574       return true;
00575     }
00576   
00580   void transpose()
00581     {
00582       Array2D<T> old = *this;
00583       if(n_1 != n_2)
00584         {
00585           n_1 = old.dim2();
00586           n_2 = old.dim1();
00587         }
00588       for(int i=0; i<n_1; i++)
00589         for(int j=0; j<n_2; j++)
00590           (*this)(i,j) = old(j,i);
00591       old.deallocate();
00592     }
00593   
00598   double nonSymmetry()
00599     {
00600       if(n_1 != n_2)
00601         {
00602           cerr << "Array2D::symmetric just handles square matrices right now\n";
00603           return -1.0;
00604         }
00605       
00606       if(n_1 < 2)
00607         return 0.0;
00608       
00609       double diff = 0;
00610       for(int i=0; i<n_1; i++)
00611         for(int j=0; j<i; j++)
00612           {
00613             double term = 0;
00614             if(fabs((*this)(i,j)) > REALLYTINY)
00615               term = ((*this)(i,j) - (*this)(j,i))/(*this)(i,j);
00616             diff += term * term;
00617           }
00618       
00619       //relative error per pair
00620       diff = 2.0*diff/(n_1 * n_2 - n_1);
00621       
00622       //if the matrix is symmetric, it should return zero
00623       //otherwize, this measurement of symmetry.
00624       return diff;
00625     }
00626   
00633   template <class _RHS>
00634     double compare(const Array2D<_RHS> & rhs, double reallyBad, bool print, bool & same)
00635     {
00636       if(n_1 != rhs.dim1() || n_2 != rhs.dim2())
00637         {
00638           cerr << "ERROR: dims don't match for comparison: " << n_1 << "x"
00639                << n_2 << " * " << rhs.dim2() << "x" << rhs.dim1() << endl;
00640           return 0.0;
00641         }
00642       
00643       same = true;
00644       int largestI = 0, largestJ = 0;
00645       double badCount = 0;
00646       double curRelErr=0, largestError = 0;
00647       double curAbsErr=0;
00648       double error = 0, error2 = 0, sample_size = 0;
00649       double abs_error = 0;
00650 
00651       for(int i=0; i<n_1; i++)
00652         {
00653           for(int j=0; j<n_2; j++)
00654             {
00655               curAbsErr = fabs((double)(get(i,j)-rhs.get(i,j)));
00656               double den = 1.0;
00657               if(fabs(rhs.get(i,j)) > 1e-200)
00658                 den = rhs.get(i,j);
00659               curRelErr = fabs( curAbsErr / den );
00660               
00661               if(curRelErr > largestError)
00662                 {
00663                   largestError = curRelErr;
00664                   largestI = i; largestJ = j;
00665                 }
00666               
00667               if(curRelErr > reallyBad)
00668                 {
00669                   badCount++;
00670                   same = false;
00671                   if(print)
00672                     {
00673                       cout << scientific;
00674                       int prec = 8;
00675                       int width = 15;
00676                       cout << "Error at (" << i << "," << j << "): this "
00677                            << setprecision(prec) << setw(width) << get(i,j) << " rhs "
00678                            << setprecision(prec) << setw(width) << rhs.get(i,j) << " current error " << curRelErr << endl;
00679                       
00680                     }
00681                 }
00682               
00683               if(curRelErr < reallyBad)
00684                 {
00685                   abs_error += curAbsErr;
00686                   error  += curRelErr;
00687                   error2 += curRelErr * curRelErr;
00688                   sample_size++;
00689                 }
00690             }
00691         }
00692       
00693       if(print)
00694         {
00695           cout << scientific;
00696           int prec = 8;
00697           int width = 15;
00698           cout << "Largest Error at (" << largestI << "," << largestJ << "): this "
00699                << setprecision(prec) << setw(width) << get(largestI,largestJ) << " rhs "
00700                << setprecision(prec) << setw(width) << rhs.get(largestI,largestJ) << " current error " << largestError << endl;
00701         }
00702       
00703       error  =  error/sample_size;
00704       error2 = error2/sample_size;
00705       double std_dev = sqrt(error2 - error*error);
00706       
00707       if(print)
00708         printf("sample_size %5.2e ave_abs_error %6.3e ave_rel_error %6.3e std_dev %6.3e (%4.2e worse_than %5.3e)\n",
00709                (double)sample_size, abs_error, error, std_dev, badCount, reallyBad);
00710       
00711       //This cuttoff is very generous for double, but about right for float.
00712       //You probably shouldn't rely on "same" for anything important.
00713       if(fabs(error) > 1e-7) same = false;
00714       
00715       return error;
00716     }
00717   
00722   bool operator==(const Array2D & rhs)
00723     {
00724       bool same;
00725       compare(rhs,1e-5,false,same);
00726       return same;
00727     }
00728   
00729   void operator=(const Array2D & rhs)
00730     {
00731       if(n_1 != rhs.dim1() || n_2 != rhs.dim2())
00732         allocate(rhs.dim1(),rhs.dim2());
00733       
00734       memcpy(pArray, rhs.pArray, sizeof(T)*n_1*n_2);
00735     }
00736   
00741   template <class _RHS>
00742     void operator=(const Array2D<_RHS> & rhs)
00743     {
00744       if(n_1 != rhs.dim1() || n_2 != rhs.dim2())
00745         allocate(rhs.dim1(),rhs.dim2());
00746       
00747       for(int i=0; i<n_1; i++)
00748         for(int j=0; j<n_2; j++)
00749           pArray[map(i,j)] = (T)(rhs.get(i,j));
00750     }
00751   
00757   void operator=(const T C)
00758     {
00759 #ifdef QMC_DEBUG
00760       if(n_1 < 1 || n_2 < 1)
00761         {
00762           cerr << "Warning: using Array2D::operator=(const T C)"
00763                << " with dims " << n_1 << " and " << n_2
00764                << "; C = " << C << endl;
00765           //assert(0);
00766         }
00767 #endif
00768       if(C == 0)
00769         {
00770           memset(pArray,0,sizeof(T)*n_1*n_2);
00771           return;
00772         }
00773       
00774       for(int i=0; i<n_1*n_2; i++)
00775         pArray[i] = C;
00776     }
00777   
00778   double addLL()
00779     {
00780       double sum = 0;
00781       for(int i=0; i<n_1; i++)
00782         for(int j=0; j < i; j++)
00783           sum += pArray[n_2*i + j];
00784       return sum;
00785     }
00786 
00790   Array2D operator*(const T C)
00791     {
00792 #ifdef QMC_DEBUG
00793       if(n_1 < 1 || n_2 < 1)
00794         {
00795           cerr << "Warning: using Array2D::operator*(const T C)"
00796                << " with dims " << n_1 << " and " << n_2
00797                << "; C = " << C << endl;
00798           //assert(0);
00799         }
00800 #endif
00801       Array2D<T> TEMP(n_1,n_2);
00802       for(int i=0; i<n_1*n_2; i++)
00803         TEMP.pArray[i] = C*pArray[i];
00804       return TEMP;
00805     }
00806   
00810   Array2D<T> operator-(const Array2D<T> & rhs) const
00811     {
00812 #ifdef QMC_DEBUG
00813       if(n_1 < 1 || n_2 < 1)
00814         {
00815           cerr << "Warning: using Array2D::operator-(const T C)"
00816                << " with dims " << n_1 << " and " << n_2 << endl;
00817           //assert(0);
00818         }
00819 #endif
00820       Array2D<T> TEMP(n_1,n_2);
00821       for(int i=0; i<n_1*n_2; i++)
00822         TEMP.pArray[i] = pArray[i] - rhs.pArray[i];
00823       return TEMP;
00824     }
00825   
00829   Array2D<T> operator+(const Array2D<T> & rhs) const
00830     {
00831 #ifdef QMC_DEBUG
00832       if(n_1 < 1 || n_2 < 1)
00833         {
00834           cerr << "Warning: using Array2D::operator+(const T C)"
00835                << " with dims " << n_1 << " and " << n_2 << endl;
00836           //assert(0);
00837         }
00838 #endif
00839       Array2D<T> TEMP(n_1,n_2);
00840       for(int i=0; i<n_1*n_2; i++)
00841         TEMP.pArray[i] = pArray[i] + rhs.pArray[i];
00842       return TEMP;
00843     }
00844   
00848   void operator*=(const T C)
00849     {
00850 #ifdef QMC_DEBUG
00851       if(n_1 < 1 || n_2 < 1)
00852         {
00853           cerr << "Warning: using Array2D::operator*=(const T C)"
00854                << " with dims " << n_1 << " and " << n_2 
00855                << "; C = " << C << endl;
00856           //assert(0);
00857         }
00858 #endif
00859       for(int i=0; i<n_1*n_2; i++)
00860         pArray[i] *= C;
00861     }
00862   
00866   void operator/=(const T C)
00867     {
00868 #ifdef QMC_DEBUG
00869       if(n_1 < 1 || n_2 < 1)
00870         {
00871           cerr << "Warning: using Array2D::operator/=(const T C)"
00872                << " with dims " << n_1 << " and " << n_2
00873                << "; C = " << C << endl;
00874           //assert(0);
00875         }
00876       if(C == 0.0)
00877         {
00878           cerr << "Error: divide by zero in Array2D::operator/=" << endl;
00879         }
00880 #endif
00881       
00882       T inv = 1.0/C;
00883       operator*=(inv);
00884     }
00885   
00889   Array2D()
00890     {
00891       pArray = 0; n_1 = 0; n_2 = 0;
00892     }
00893 
00900   Array2D(int i, int j)
00901     {
00902       pArray = 0;
00903       n_1 = 0; n_2 = 0;
00904       allocate(i,j);
00905     }
00906   
00916   Array2D(int i, int j, double shift, double range, long & iseed)
00917     {
00918       pArray = 0;
00919       n_1 = 0; n_2 = 0;
00920       allocate(i,j);
00921       
00922       if(iseed > 0) srand(iseed);
00923       for(int idx=0; idx<n_1*n_2; idx++)
00924         {
00925           // between 0 and 1.0
00926           double val = (double)(rand())/RAND_MAX;
00927           val = range * (val + shift);
00928           pArray[idx] = (T)val;
00929         }
00930     }
00931   
00937   Array2D( const Array2D<T> & rhs)
00938     {
00939       n_1 = 0;
00940       n_2 = 0;
00941       pArray = 0;
00942       allocate(rhs.dim1(),rhs.dim2());
00943       operator=(rhs);
00944     }
00945   
00949   void setRow(int whichRow, Array1D<T> & rhs)
00950     {
00951 #ifdef QMC_DEBUG
00952       if(whichRow >= n_1)
00953         cerr << "Error: invalid row index in Array2D::setRow\n";
00954       if(rhs.dim1() > n_2)
00955         cerr << "Error: rhs length is incorrect in Array2D::setRow\n";
00956 #endif
00957       memcpy(pArray + n_2*whichRow, rhs.array(), sizeof(T)*n_2);
00958     }
00959   
00964   void setRows(int to, int from, int numRows, const Array2D<T> & rhs)
00965     {
00966 #ifdef QMC_DEBUG
00967       if(to < 0 || from < 0 || numRows < 0 ||
00968          (to+numRows) > n_1 || (from+numRows) > rhs.dim1() ||
00969          n_2 != rhs.dim2())
00970         {
00971           cerr << "Error: incompatible dimensions in setRows:\n"
00972                << "      to = " << to << endl
00973                << "    from = " << from << endl
00974                << " numRows = " << numRows << endl
00975                << "     n_1 = " << n_1 << endl
00976                << "     n_2 = " << n_2 << endl
00977                << " rhs.n_1 = " << rhs.dim1() << endl
00978                << " rhs.n_2 = " << rhs.dim2() << endl;
00979           assert(0);
00980         }
00981       assert(pArray);
00982       assert(rhs.pArray);
00983       if(numRows == 0)
00984         cout << "Warning: 0 rows in Array2D::setRows" << endl;
00985 #endif
00986       memcpy(pArray + n_2*to, rhs.pArray + n_2*from, sizeof(T)*n_2*numRows);
00987     }
00988 
00989   /*
00990     Interchange two rows in our data.
00991   */
00992   void swapRows(int i, int j)
00993     {
00994       Array2D<T> temp = *this;
00995       setRows(i,j,1,temp);
00996       setRows(j,i,1,temp);
00997     }
00998 
01002   void setColumn(int to, int from, const Array2D<T> & rhs)
01003     {
01004       for(int i=0; i<n_1; i++)
01005         pArray[map(i,to)] = rhs.pArray[rhs.map(i,from)];
01006     }
01007   
01011   ~Array2D()
01012     {
01013       deallocate();
01014     }
01015   
01019   inline int map(int i, int j) const
01020     {
01021 #ifdef QMC_DEBUG
01022       if(i >= n_1 || j >= n_2 ||
01023          i < 0 || j < 0)
01024         {
01025           cerr << "Error: bad index in Array2D::map(" << i << "," << j << "); "
01026                << " allocated (" << n_1 << "," << n_2 << ")" << endl;
01027           assert(0);
01028         }
01029       if(pArray == 0)
01030         {
01031           cerr << "Error: pArray == 0 in Array2D::map" << endl;
01032           assert(0);
01033         }
01034 #endif
01035       return n_2*i + j;
01036     }
01037   
01038   /*
01039     A pointer to a row.
01040   */
01041   T* operator[](int row)
01042     {
01043 #ifdef QMC_DEBUG
01044       if(row >= n_1 || row < 0)
01045         {
01046           cerr << "Error: bad dimension in Array2D::operator[]" << endl
01047                << " row = " << row << endl
01048                << " n_1 = " << n_1 << endl;
01049           assert(0);
01050         }
01051 #endif
01052       return & pArray[n_2 * row];
01053     }
01054   
01058   T& operator()(int i,int j)
01059     {
01060 #ifdef QMC_DEBUG
01061       if(i >= n_1 || j >= n_2 ||
01062          i < 0 || j < 0)
01063         {
01064           cerr << "Error: bad index in Array2D::operator()(" << i << "," << j << "); "
01065                << " allocated (" << n_1 << "," << n_2 << ")" << endl;
01066           assert(0);
01067         }
01068       if(pArray == 0)
01069         {
01070           cerr << "Error: pArray == 0 in Array2D::operator()" << endl;
01071           assert(0);
01072         }
01073 #endif
01074       return pArray[map(i,j)];
01075     }
01076   
01081   T get(int i, int j) const
01082     {
01083 #ifdef QMC_DEBUG
01084       if(i >= n_1 || j >= n_2 ||
01085          i < 0 || j < 0)
01086         {
01087           cerr << "Error: bad index in Array2D::get(" << i << "," << j << "); "
01088                << " allocated (" << n_1 << "," << n_2 << ")" << endl;
01089           assert(0);
01090         }
01091       if(pArray == 0)
01092         {
01093           cerr << "Error: pArray == 0 in Array2D::get(int,int)" << endl;
01094           assert(0);
01095         }
01096 #endif
01097       return pArray[map(i,j)];
01098     }
01099   
01100   double frobeniusNorm()
01101     {
01102       double sum = 0;
01103       for(int i=0; i<n_1*n_2; i++)
01104         sum += pArray[i]*pArray[i];
01105       return sqrt(sum);
01106     }
01107   
01108   double pInfNorm()
01109     {
01110       double norm = 0;
01111       double sum = 0;
01112       for(int i=0; i<n_1; i++)
01113         {
01114           for(int j=0; j<n_2; j++)
01115             {
01116               sum += fabs( get(i,j) );
01117             }
01118           norm = max(sum,norm);
01119           sum = 0;
01120         }
01121       return norm;
01122     }
01123   
01124   double p1Norm()
01125     {
01126       double norm = 0;
01127       double sum = 0;
01128       for(int j=0; j<n_2; j++)
01129         {
01130           for(int i=0; i<n_1; i++)
01131             {
01132               sum += fabs( get(i,j) );
01133             }
01134           norm = max(sum,norm);
01135           sum = 0;
01136         }
01137       return norm;
01138     }
01139   
01147   friend ostream& operator<<(ostream & strm, const Array2D<T> & rhs)
01148     {
01149       if(rhs.n_2 < 8)
01150         rhs.printArray2D(strm, 15, 10, -1, ',', true);
01151       else if(rhs.n_2 < 11)
01152         rhs.printArray2D(strm, 8, 10, -1, ',', true);
01153       else
01154         rhs.printArray2D(strm, 2, 7, -1, ',', false);
01155       
01156       return strm;
01157     }
01158   
01164   void printArray2D(ostream & strm, int numSigFig, int columnSpace, int maxJ, char sep, bool sci) const
01165     {
01166       strm.precision(4);
01167       if(maxJ < 0) maxJ = n_2;
01168       strm << "{";
01169       for(int i=0; i<n_1;i++)
01170         {
01171           if(i==0) strm <<  "{";
01172           else     strm << " {";
01173           for(int j=0; j<n_2 && j<maxJ; j++)
01174             {
01175               if(sci) strm.setf(ios::scientific);
01176               else    strm.unsetf(ios::scientific);
01177 
01178               strm << setprecision(numSigFig)
01179                    << setw(numSigFig+columnSpace);
01180               strm << pArray[map(i,j)];
01181 
01182               if(j<n_2-1) strm << sep;
01183             }
01184           if(i<n_1-1)  strm << "},";
01185           else         strm << "}}";
01186 
01187           //label the rows...
01188           //strm << " " << setw(4) << i;
01189           strm << endl;
01190         }
01191     }
01192   
01204   void ludcmp(double *d, bool *calcOK)
01205     {
01206       int i,j,k;
01207       int imax = -1;
01208       long double big,dum,temp;
01209       long double Why, Cee, Tee;
01210       register long double sum;
01211       long double one = (T)(1.0);
01212       long double zero = (T)(0.0);
01213       
01214       *calcOK = true;
01215       *d=1.0;
01216       
01217       diVV.allocate(n_1);
01218       
01219       //this section finds the largest value from each column
01220       //and puts its inverse in the vv vector.
01221       for (i=0;i<n_1;i++)
01222         {
01223           big=zero;
01224           for (j=0;j<n_1;j++)
01225             if ((temp=(T)(fabs((double)get(i,j)))) > big)
01226               big=temp;
01227           
01228           //checks if the column is full of zeros
01229           if (big == zero)
01230             {
01231               // cerr << "Singular matrix in routine ludcmp*********" << endl;
01232               *calcOK = false;
01233               return;
01234             }
01235           
01236           diVV(i)= (T)(one/big);
01237         }
01238       
01239       //loop over columns
01240       for (j=0;j<n_1;j++)
01241         {
01242           
01243           //part 1: i < j
01244           for (i=0;i<j;i++)
01245             {
01246               sum=-1.0*get(i,j);
01247               
01248               if(USE_KAHAN)
01249                 {
01250                   Cee = 0;
01251                   for (k=0;k<i;k++)
01252                     {
01253                       Why = get(i,k)*get(k,j) - Cee;
01254                       Tee = sum + Why;
01255                       Cee = (Tee - sum) - Why;
01256                       sum = Tee;
01257                     }
01258                 }
01259               else
01260                 {
01261                   
01262                   for (k=0;k<i;k++)
01263                     sum += get(i,k)*get(k,j);
01264                   
01265                 }
01266               
01267               pArray[map(i,j)] = (T)(-1.0*sum);
01268             }
01269           
01270           //part 2: i >= j
01271           big=zero;
01272           for (i=j;i<n_1;i++)
01273             {
01274               sum=-1.0*get(i,j);
01275               
01276               if(USE_KAHAN)
01277                 {
01278                   Cee = 0;
01279                   for (k=0;k<j;k++)
01280                     {
01281                       Why = get(i,k)*get(k,j) - Cee;
01282                       Tee = sum + Why;
01283                       Cee = (Tee - sum) - Why;
01284                       sum = Tee;
01285                     }
01286                 }
01287               else
01288                 {
01289                   
01290                   for (k=0;k<j;k++)
01291                     sum += get(i,k)*get(k,j);
01292                   
01293                 }
01294               
01295               pArray[map(i,j)] = (T)(-1.0*sum);
01296               
01297               //find the best row to pivot
01298               if ( (dum=diVV(i)*(T)(fabs((double)sum))) >= big)
01299                 {
01300                   big=dum;
01301                   imax=i;
01302                 }
01303             }
01304           
01305           //if our row isn't the best to pivot, we need to
01306           //change to the best
01307           if (j != imax)
01308             {
01309               
01310               //swap rows
01311               for (k=0;k<n_1;k++)
01312                 {
01313                   dum=get(imax,k);
01314                   pArray[map(imax,k)]=get(j,k);
01315                   pArray[map(j,k)] = (T)(dum);
01316                 }
01317               //a.swapRows(imax,j);
01318               
01319               //indicate permutation change
01320               *d = -(*d);
01321               
01322               //update vv
01323               diVV(imax)=diVV(j);
01324             }
01325           diINDX(j)=imax;
01326           
01327           //make sure we don't divide by zero
01328           if (get(j,j) == zero)
01329             pArray[map(j,j)]=(T)(REALLYTINY);
01330           
01331           //each element needs to be divided by it's diagonal
01332           if (j != n_1)
01333             {
01334               dum=one/(get(j,j));
01335               for (i=j+1;i<n_1;i++)
01336                 pArray[map(i,j)] *= (T)(dum);
01337             }
01338           
01339         }
01340     }
01341   
01352   void lubksb(Array1D<T> &b)
01353     {
01354       int ii=-1, ip;
01355       long double sum;
01356       long double Cee, Why, Tee;
01357       
01358       for (int i=0;i<n_1;i++)
01359         {
01360           ip=diINDX(i);
01361           sum= -1.0*b(ip);
01362           b(ip)=b(i);
01363           
01364           if (ii>=0)
01365             {
01366               //we do forward-subsitution to find "y"
01367               
01368               if(USE_KAHAN)
01369                 {
01370                   Cee = 0;
01371                   for (int j=ii;j<=i-1;j++)
01372                     {
01373                       Why = get(i,j)*b(j) - Cee;
01374                       Tee = sum + Why;
01375                       Cee = (Tee - sum) - Why;
01376                       sum = Tee;
01377                     }
01378                 }
01379               else
01380                 {
01381                   
01382                   for (int j=ii;j<=i-1;j++)
01383                     sum += get(i,j)*b(j);
01384                 }
01385               
01386             }
01387           else if (sum)
01388             {
01389               //our first non-zero element
01390               ii=i;
01391             }
01392           
01393           b(i) = (T)(-1.0*sum);
01394         }
01395       
01396       for (int i=n_1-1;i>=0;i--)
01397         {
01398           sum=-1.0*b(i);
01399           
01400           //back-substitution to find "x"
01401           if(USE_KAHAN)
01402             {
01403               Cee = 0;
01404               for (int j=i+1;j<n_1;j++)
01405                 {
01406                   Why = get(i,j)*b(j) - Cee;
01407                   Tee = sum + Why;
01408                   Cee = (Tee - sum) - Why;
01409                   sum = Tee;
01410                 }
01411             }
01412           else
01413             {
01414               
01415               for (int j=i+1;j<n_1;j++)
01416                 sum += get(i,j)*b(j);
01417             }
01418           
01419           b(i) = (T)(-1.0*sum/get(i,i));
01420         }
01421     }
01422   
01428   void mprove(const Array2D<T> & A, Array2D<T> & inv)
01429     {
01430       int i,j;
01431       Array1D<T> r = Array1D<T>(n_1);
01432       
01433       for(int k=0; k<n_1; k++)
01434         {
01435           
01436           for(int i=0; i<n_2; i++)
01437             {
01438               long double sdp = i == k ? -1.0 : 0.0;
01439               for(int j=0; j<n_2; j++)
01440                 sdp += (long double) A.get(i,j) * (long double) inv(k,j);
01441               r(i) = sdp;
01442             }
01443           
01444           lubksb(r);
01445           for(int i=0; i<n_1; i++) inv(k,i) -= r(i);
01446         }
01447     }
01448   
01459 #if defined USELAPACK || defined USECLPK
01460   void determinant_and_inverse(Array2D<double> &inv, double& det, bool *calcOK)
01461     {
01462       diINDX.allocate(n_1);
01463       
01464       inv.setToIdentity();
01465       
01466       int info = 0;
01467       int N    = n_1;
01468       int NRHS = n_1;
01469       int LDA  = n_1;
01470       int LDB  = n_1;
01471       
01472       //Warning: this will destroy pArray
01473       FORTRAN_FUNC(dgesv)(&N, &NRHS, pArray, &LDA, diINDX.array(), inv.array(), &LDB, &info);
01474       
01475       if(info == 0) *calcOK = true;
01476       else *calcOK = false;
01477       
01478       det = 1.0;
01479       for(int i=0; i<n_1; i++)
01480         {
01481           if(diINDX(i) != i+1)
01482             det *= -1;
01483           det *= get(i,i);
01484         }
01485     }
01486   
01487   void determinant_and_inverse(Array2D<float> &inv, double& det, bool *calcOK)
01488     {
01489       diINDX.allocate(n_1);
01490       
01491       inv.setToIdentity();
01492       
01493       int info = 0;
01494       int N    = n_1;
01495       int NRHS = n_1;
01496       int LDA  = n_1;
01497       int LDB  = n_1;
01498       
01499       //Warning: this will destroy pArray
01500       FORTRAN_FUNC(sgesv)(&N, &NRHS, pArray, &LDA, diINDX.array(), inv.array(), &LDB, &info);
01501       
01502       if(info == 0) *calcOK = true;
01503       else *calcOK = false;
01504 
01505       det = 1.0;
01506       for(int i=0; i<n_1; i++)
01507         {
01508           if(diINDX(i) != i+1)
01509             det *= -1;
01510           det *= get(i,i);
01511         }
01512     }
01513 #else
01514 
01521   void determinant_and_inverse(Array2D<T> &inv, double& det, bool *calcOK)
01522     {
01523       double d;
01524       T one = (T)1.0;
01525       T zero = (T)0.0;
01526       
01527       diINDX.allocate(n_1);
01528       diCol.allocate(n_1);
01529       inv.allocate(n_1,n_1);
01530       
01531       ludcmp(&d,calcOK);
01532       
01533       if( *calcOK )
01534         {
01535           for(int j=0; j<n_1; j++)
01536             {
01537               diCol = zero;
01538               diCol(j) = one;
01539               lubksb(diCol);
01540               for(int i=0; i<n_1; i++)
01541                 inv(i,j) = diCol(i);
01542             }
01543         }
01544       
01545       for(int i=0; i<n_1; i++)
01546         d *= get(i,i);
01547       det = d;
01548     }
01549 #endif
01550   
01551   /*
01552     Call this function on D^(-1) to update it given
01553     the new row in D.
01554     
01555     The return value will be |D new| / |D|.
01556     
01557     This is slightly different from the Sherman-Morrison
01558     formula because here we are replacing a row, as opposed
01559     to adding values to the row.
01560     
01561     @param row which we are updating the inverse with
01562     @param newD the matrix which has the new row
01563     @param rowInD which row in newD to use for the update
01564     @return the ratio of new to old determinants
01565   */
01566   template <class _RHS>
01567     double inverseUpdateOneRow(int row, Array2D<_RHS> & newD, int rowInD)
01568     {
01569       int d = n_1;
01570 #ifdef QMC_DEBUG
01571       if( rowInD >= newD.dim1() || row >= n_1 ||
01572           rowInD < 0 || row < 0 ||
01573           n_2 != newD.dim2())
01574         {
01575           cout << "Error: dimensions don't match in inverse update." << endl;
01576           cout << "   n_1 = " << n_1 << endl;
01577           cout << "   n_2 = " << n_2 << endl;
01578           cout << "   row = " << row << endl;
01579           cout << " D.n_1 = " << newD.dim1() << endl;
01580           cout << " D.n_2 = " << newD.dim2() << endl;
01581           cout << " D row = " << rowInD << endl;
01582           assert(0);
01583         }
01584 #endif
01585       double detRatio = 0;
01586       for(int r=0; r<d; r++)
01587         detRatio += (T)(newD.get(rowInD,r)) * pArray[map(r,row)];
01588       double q = 1.0/detRatio;
01589       
01590       for(int c=0; c<d; c++)
01591         {
01592           //We need to preserve this column for the moment
01593           if(c == row)
01594             continue;
01595           
01596           double val = 0.0;
01597           for(int l=0; l<d; l++)
01598             val += (T)(newD.get(rowInD,l)) * pArray[map(l,c)];
01599           val *= q;
01600           
01601           for(int r=0; r<d; r++)
01602             pArray[map(r,c)] -= val * pArray[map(r,row)];
01603         }
01604       
01605       //Lastly, we update the column
01606       for(int r=0; r<d; r++)
01607         pArray[map(r,row)] *= q;
01608       
01609       return detRatio;
01610     }
01611 
01612   /*
01613     Call this function on D^(-1) to update it given
01614     the new column in D.
01615     
01616     The return value will be |D new| / |D|.
01617     
01618     This is slightly different from the Sherman-Morrison
01619     formula because here we are replacing a column, as opposed
01620     to adding values to the column.
01621     
01622     @param column which we are updating the inverse with
01623     @param newD the matrix which has the new column
01624     @param columnInD which column in newD to use for the update
01625     @return the ratio of new to old determinants
01626   */
01627   template <class _RHS>
01628     double inverseUpdateOneColumn(int column, Array2D<_RHS> & newD, int columnInD)
01629     {
01630       int d = n_2;
01631 #ifdef QMC_DEBUG
01632       if( columnInD >= newD.dim2() || column >= n_2 ||
01633           columnInD < 0 || column < 0 ||
01634           n_1 != newD.dim1())
01635         {
01636           cout << "Error: dimensions don't match in inverse update." << endl;
01637           cout << "     n_1 = " << n_1 << endl;
01638           cout << "     n_2 = " << n_2 << endl;
01639           cout << "  column = " << column << endl;
01640           cout << "   D.n_1 = " << newD.dim1() << endl;
01641           cout << "   D.n_2 = " << newD.dim2() << endl;
01642           cout << "D column = " << columnInD << endl;
01643           assert(0);
01644         }
01645 #endif
01646       double detRatio = 0;
01647       for(int c=0; c<d; c++)
01648         detRatio += (T)(newD.get(c,columnInD)) * pArray[map(column,c)];
01649       double q = 1.0/detRatio;
01650       
01651       for(int r=0; r<d; r++)
01652         {
01653           //We need to preserve this row for the moment
01654           if(r == column)
01655             continue;
01656           
01657           double val = 0.0;
01658           for(int l=0; l<d; l++)
01659             val += (T)(newD.get(l,columnInD)) * pArray[map(r,l)];
01660           val *= q;
01661           
01662           for(int c=0; c<d; c++)
01663             pArray[map(r,c)] -= val * pArray[map(column,c)];
01664         }
01665       
01666       //Lastly, we update the row
01667       for(int c=0; c<d; c++)
01668         pArray[map(column,c)] *= q;
01669       
01670       return detRatio;
01671     }
01672   
01690   void rref()
01691     {
01692       double max_data, temp;
01693       int max_index, i, j, k, l;
01694       int order = min(n_1,n_2);
01695       Array1D<int> pivots(order);
01696       double tolerance = 1e-20;
01697       bool allowPivot = true;
01698       
01699       l = 0;
01700       for (k = 0; k <  order; k++)
01701         {
01702           max_data  = pArray[map(l,k)];
01703           max_index = l;
01704           
01705           if(allowPivot)
01706             {
01707               //try to figure out if there is a target row
01708               //we'd rather operate on
01709               for (i = l; i < n_1; i++)
01710                 {
01711                   if (fabs(pArray[map(i,k)]) > fabs(max_data))
01712                     {
01713                       max_data  = pArray[map(i,k)];
01714                       max_index = i;
01715                     }
01716                 }
01717             }
01718           
01719           pivots(k) = max_index;
01720           if (fabs(max_data) > tolerance)
01721             {
01722               //whether or not we pivot, we still want to scale
01723               //our target row so that the first element is 1.0
01724               for (j = k; j < n_2; j++)
01725                 {
01726                   temp                     = pArray[map(max_index,j)] / max_data;
01727                   pArray[map(max_index,j)] = pArray[map(l,j)];
01728                   pArray[map(l,j)]         = temp;
01729                 }
01730               
01731               //and we will subtract from all lower rows
01732               //the target row, multiplied by a scale factor to give
01733               //each row a 0.0 for this column.
01734               for (i = l + 1; i < n_1; i++)
01735                 {
01736                   double scale = pArray[map(i,l)];
01737                   for (j = k; j < n_2; j++)
01738                     pArray[map(i,j)] -= pArray[map(l,j)] * scale;
01739                 }
01740               
01741               l++;
01742             }
01743         }
01744       
01745       /*
01746         At this point, we have 1.0 along the diagonal, and 0.0 below
01747         the diagonal. Now we need to make the other
01748         half of that triangle to be zero.
01749       */
01750       for (k = 0; k < order-1; k++)
01751         {
01752           if (pArray[map(k+1,k+1)] != 0)
01753             {
01754               for (i = k; i >= 0; i--) 
01755                 {
01756                   double scale = pArray[map(i,k+1)];            
01757                   for (j = k+1; j < n_2; j++) 
01758                     pArray[map(i,j)] -= pArray[map(k+1,j)] * scale;
01759                 }
01760             }
01761         }
01762     }
01763 
01780   void rref(int t)
01781     {
01782       if(isDependent(t) != -1) 
01783         {
01784           //this variable is already dependent, so there's nothing to do
01785           return;
01786         }
01787 
01788       /*
01789         Nothing will be damaged by bad calls, but it might be annoying to print
01790         messages.
01791       */
01792       bool printWarnings = true;
01793       if(t < 0 || t >= n_2)
01794         {
01795           if(printWarnings)
01796             {
01797               cerr << "Error: invalid variable elimination. ";
01798               cerr << "Number variables = " << n_2 << " but ";
01799               cerr << "t = " << t << "." << endl;
01800             }
01801           return;
01802         }
01803 
01804       /*
01805         We just have to make sure that given enough calls
01806         to this function, all rows will be visited.
01807       */
01808       static int s = -1;
01809       s++;
01810       if(s >= n_1) s=0;
01811 
01812       double tolerance = 1e-10;
01813 
01814       /*
01815         We're going to divide by this scale, so
01816         we need to find a non zero value.
01817       */
01818       double scale = pArray[map(s,t)];
01819       int best = s;
01820       bool willUnsolve = false;
01821       for(int r=n_1-1; r>=0; r--)
01822         {
01823           scale  = pArray[map(r,t)];
01824           
01825           if(fabs(scale) < tolerance)
01826             continue;
01827           else
01828             best = r;
01829 
01830           willUnsolve = constraintUsed(r) == -1 ? false : true;
01831           if(constraintUsed(r) == -1)
01832             {
01833               best = r;
01834               break;
01835             }
01836         }
01837 
01838       if(best != s)
01839         swapRows(best,s);
01840       scale = pArray[map(s,t)];
01841 
01842       /*
01843         If we're given a column with all zeros, then there's nothing we
01844         can do for this variable.
01845       */
01846       if(fabs(scale) <= tolerance)
01847         {
01848           if(printWarnings)
01849             {
01850               cerr << "Warning: none of the constraints use variable t = " << t
01851                    << " so we can't eliminate it!" << endl;
01852             }
01853           return;
01854         }
01855 
01856       /*
01857         Scale the constraint row so that the variable column has
01858         a 1.0 on that row.
01859         
01860         If our scale is already 1.0, don't waste time.
01861        */
01862       if(fabs(scale - 1.0) > tolerance)
01863         {
01864           for(int c = 0; c < n_2; c++)
01865             pArray[map(s,c)] = pArray[map(s,c)] / scale;
01866         }
01867       /*
01868         Mathematically, this is already 1.0, so we set it
01869         to eliminate roundoff error.
01870       */
01871       pArray[map(s,t)] = 1.0;
01872 
01873       /*
01874         Subtract from all other rows the target row,
01875         multiplied by the scale factor which will leave 0.0
01876         in this column.
01877       */
01878       for(int r = 0; r < n_1; r++) 
01879         {
01880           scale = pArray[map(r,t)];
01881 
01882           if(r == s || fabs(scale) < tolerance) continue;
01883 
01884           for(int c = 0; c < n_2; c++) 
01885             {
01886               /*
01887                 This subtraction is highly vulnerable to
01888                 "catastrophic cancellation".
01889               */
01890               pArray[map(r,c)] -= pArray[map(s,c)] * scale;
01891 
01892 
01893               //So eliminate values that are near to zero
01894               if(fabs(pArray[map(r,c)]) < tolerance)
01895                 pArray[map(r,c)] = 0.0;
01896             }     
01897 
01898           //Mathematically, this is already 0.0
01899           pArray[map(r,t)] = 0.0;
01900         }
01901     }
01902 
01903   /*
01904     This function is useful in conjunction with rref(int). It will
01905     count the number of dependent variables in the data.
01906 
01907     Of course, you can not have more dependent variables than
01908     rows.
01909   */
01910   int numDependent()
01911     {
01912       int numDep = 0;
01913       for(int j=0; j<dim2(); j++)
01914         if(isDependent(j) > -1)
01915           numDep++;
01916       return numDep;
01917     }
01918 
01919   /*
01920     This function is useful in conjunction with rref(int).
01921     It will determine whether a particular column represents
01922     a dependent variable. If it does, then it will return
01923     the row index containing the 1.0.
01924   */
01925   int isDependent(int column)
01926     {
01927       double tolerance = 1e-50;
01928       int hasOne  = -1;
01929       bool zeros  = true;
01930       for(int i=0; i<dim1(); i++)
01931         {
01932           if(fabs(pArray[map(i,column)] - 1.0) < tolerance && hasOne < 0)
01933             hasOne = i;
01934           else if(fabs(pArray[map(i,column)]) > tolerance)
01935             zeros = false;
01936         }
01937       
01938       /*
01939         We know the variable is dependent if there is exactly
01940         one 1.0 in the column and the rest of the column is zero.
01941       */
01942       if(hasOne > -1 && zeros)
01943         return hasOne;
01944 
01945       return -1;
01946     }
01947 
01956   int constraintUsed(int r)
01957     {
01958       for(int c=0; c<n_2; c++)
01959         if(fabs(pArray[map(r,c)] - 1.0) < 1e-10)
01960           if(isDependent(c) == r)
01961             return c;
01962       return -1;
01963     }
01964 
01976   void generalizedEigenvectors(Array2D<double> A,
01977                                Array2D<double> B,
01978                                Array1D<Complex> & eigval)
01979     {
01980 #ifdef USELAPACK
01981       allocate(A.n_1,A.n_1);
01982       
01983       char vl  = 'N';
01984       char vr  = 'V';
01985       int n    = n_1;
01986       int lda  = A.n_1;
01987       int ldb  = B.n_1;
01988       int ldvl = 1;
01989       int ldvr = n_1;
01990       int info = 0;
01991       
01992       Array1D<double> alphar(n);
01993       Array1D<double> alphai(n);
01994       Array1D<double> beta(n);
01995       
01996       Array1D<double> work(8*n);
01997       int lwork = work.dim1();
01998       FORTRAN_FUNC(dggev)(&vl,&vr,&n,
01999              A.array(), &lda, B.array(), &ldb,
02000              alphar.array(), alphai.array(), beta.array(),
02001              (double*)0,&ldvl,
02002              pArray, &ldvr,
02003              work.array(), &lwork,
02004              &info);
02005       
02006       if(lwork == -1 && info == 0)
02007         {
02008           cout << "Notice: dggev recommends " << work(0) << " memory." << endl;
02009         }
02010       else if(info == 0)
02011         {
02012           eigval.allocate(n);
02013           for(int i=0; i<n; i++)
02014             {
02015               //if(fabs(beta(i)) < 1e-50)
02016               //  cout << "Eigenvalue " << setw(3) << i << " has small beta " << setw(15) << beta(i) << endl;
02017               double real = alphar(i) / beta(i);
02018               double imag = alphai(i) / beta(i);
02019               
02020               eigval(i) = Complex(real,imag);
02021             }
02022         }
02023       else
02024         {
02025           cout << "Error: dggev reported info = " << info << endl;
02026         }
02027       
02028       alphar.deallocate();
02029       alphai.deallocate();
02030       beta.deallocate();
02031       work.deallocate();
02032       
02033 #else
02034       cout << "Error: dggev requires you to compile with LAPACK!!" << endl;
02035 #endif
02036     }
02037   
02044   Array1D<double> eigenvaluesRS()
02045     {
02046       Array1D<double> wr;
02047       
02048       if(n_1 != n_2)
02049         {
02050           cout << "Error: no eigenvalues for non square matrix!" << endl
02051                << " n_1 = " << n_1 << endl
02052                << " n_2 = " << n_2 << endl
02053                << endl;
02054           return wr;
02055         }
02056       
02057       double check = nonSymmetry();
02058       if(check > 1e-5)
02059         {
02060           cout << "Error: eigenvaluesRS only works with symmetric matrices" << endl
02061                << " nonSymmetry = " << check << endl;
02062           return wr;
02063         }
02064       
02065       wr.allocate(n_1);
02066       wr = 0.0;
02067       
02068       Array1D<double> fv1;
02069       fv1.allocate(n_1);
02070       
02071       Array2D<double> A = *this;
02072       
02073       int i, ii, ierr = -1, j, k, l, l1, l2;
02074       double c, c2, c3, dl1, el1, f, g, h, p, r, s, s2, scale, tst1, tst2;
02075       // ======BEGINNING OF TRED1 ===================================
02076       
02077       ii = n_1 - 1;
02078       for (i = 0; i < ii; i++)
02079         {
02080           wr[i] = A[ii][i];
02081           A[ii][i] = A[i][i];
02082         }//End for i
02083       wr[ii] = A[ii][ii]; // Take last assignment out of loop
02084       
02085       for (i = (n_1 - 1); i >= 0; i--)
02086         {
02087           
02088           l = i - 1;
02089           scale = h = 0.0;
02090           
02091           if (l < 0)
02092             {
02093               fv1[i] = 0.0;
02094               continue;
02095             } // End if (l < 0)
02096           
02097           for (j = 0; j <= l; j++)
02098             scale += fabs(wr[j]);
02099           
02100           if (scale == 0.0)
02101             {
02102               for (j = 0; j <= l; j++)
02103                 {
02104                   wr[j] = A[l][j];
02105                   A[l][j] = A[i][j];
02106                   A[i][j] = 0.0;
02107                 }//End for j
02108               
02109               fv1[i] = 0.0;
02110               continue;
02111             } // End if (scale == 0.0)
02112           
02113           for (j = 0; j <= l; j++)
02114             {
02115               wr[j] /= scale;
02116               h += wr[j]*wr[j];
02117             }//End for j
02118           
02119           f = wr[l];
02120           g = ((f >= 0) ? -sqrt(h) : sqrt(h));
02121           fv1[i] = g*scale;
02122           h -= f*g;
02123           wr[l] = f - g;
02124           
02125           if (l != 0)
02126             {
02127               
02128               for (j = 0; j <= l; j++)
02129                 fv1[j] = 0.0;
02130               
02131               for (j = 0; j <= l; j++)
02132                 {
02133                   f = wr[j];
02134                   g = fv1[j] + f*A[j][j];
02135                   for (ii = (j + 1); ii <= l; ii++)
02136                     {
02137                       g += wr[ii]*A[ii][j];
02138                       fv1[ii] += f*A[ii][j];
02139                     } // End for ii
02140                   fv1[j] = g;
02141                 }//End for j
02142               
02143               // Form p
02144               
02145               f = 0.0;
02146               for (j = 0; j <= l; j++)
02147                 {
02148                   fv1[j] /= h;
02149                   f += fv1[j]*wr[j];
02150                 }//End for j
02151               
02152               h = f/(h*2);
02153               
02154               // Form q
02155               
02156               for (j = 0; j <= l; j++)
02157                 fv1[j] -= h*wr[j];
02158               
02159               // Form reduced A
02160               
02161               for (j = 0; j <= l; j++)
02162                 {
02163                   f = wr[j];
02164                   g = fv1[j];
02165                   
02166                   for (ii = j; ii <= l; ii++)
02167                     A[ii][j] -= f*fv1[ii] + g*wr[ii];
02168                   
02169                 }//End for j
02170               
02171             } // End if (l != 0)
02172           
02173           for (j = 0; j <= l; j++)
02174             {
02175               f = wr[j];
02176               wr[j] = A[l][j];
02177               A[l][j] = A[i][j];
02178               A[i][j] = f*scale;
02179             }//End for j
02180           
02181         }//End for i
02182       
02183       // ======END OF TRED1 =========================================
02184       
02185       // ======BEGINNING OF TQL1 ===================================
02186       
02187       for (i = 1; i < n_1; i++)
02188         fv1[i - 1] = fv1[i];
02189       
02190       fv1[n_1 - 1] = tst1 = f = 0.0;
02191       
02192       for (l = 0; l < n_1; l++)
02193         {
02194           
02195           j = 0;
02196           h = fabs(wr[l]) + fabs(fv1[l]);
02197           
02198           tst1 = ((tst1 < h) ? h : tst1);
02199           
02200           // Look for small sub-diagonal element
02201           
02202           for (k = l; k < n_1; k++)
02203             {
02204               tst2 = tst1 + fabs(fv1[k]);
02205               if (tst2 == tst1) break; // fv1[n_1-1] is always 0, so there is no exit through the bottom of the loop
02206             }//End for k
02207           
02208           if (k != l)
02209             {
02210               
02211               do
02212                 {
02213                   
02214                   if (j == 30)
02215                     {
02216                       ierr = l;
02217                       break;
02218                     } // End if (j == 30)
02219                   
02220                   j++;
02221                   
02222                   // Form shift
02223                   
02224                   l1 = l + 1;
02225                   l2 = l1 + 1;
02226                   g = wr[l];
02227                   p = (wr[l1] - g)/(2.0*fv1[l]);
02228                   r = pythag(p, 1.0);
02229                   scale = ((p >= 0) ? r : -r); // Use scale as a dummy variable
02230                   scale += p;
02231                   wr[l] = fv1[l]/scale;
02232                   dl1 = wr[l1] = fv1[l]*scale;
02233                   h = g - wr[l];
02234                   
02235                   for (i = l2; i < n_1; i++)
02236                     wr[i] -= h;
02237                   
02238                   f += h;
02239                   
02240                   // q1 transformation
02241                   
02242                   p = wr[k];
02243                   c2 = c = 1.0;
02244                   el1 = fv1[l1];
02245                   s = 0.0;
02246                   c3 = 99.9;
02247                   s2 = 99.9;
02248                   // Look for i = k - 1 until l in steps of -1
02249                   
02250                   for (i = (k - 1); i >= l; i--)
02251                     {
02252                       c3 = c2;
02253                       c2 = c;
02254                       s2 = s;
02255                       g = c*fv1[i];
02256                       h = c*p;
02257                       r = pythag(p, fv1[i]);
02258                       fv1[i + 1] = s*r;
02259                       s = fv1[i]/r;
02260                       c = p/r;
02261                       p = c*wr[i] - s*g;
02262                       wr[i + 1] = h + s*(c*g + s*wr[i]);
02263                     }//End for i
02264                   
02265                   p = -s*s2*c3*el1*fv1[l]/dl1;
02266                   fv1[l] = s*p;
02267                   wr[l] = c*p;
02268                   tst2 = tst1 + fabs(fv1[l]);
02269                 }
02270               while (tst2 > tst1); // End do-while loop
02271               
02272             } // End if (k != l)
02273           
02274           if (ierr >= 0) //This check required to ensure we break out of for loop too, not just do-while loop
02275             break;
02276           
02277           p = wr[l] + f;
02278           
02279           // Order eigenvalues
02280           
02281           // For i = l to 1, in steps of -1
02282           for (i = l; i >= 1; i--)
02283             {
02284               if (p >= wr[i - 1])
02285                 break;
02286               wr[i] = wr[i - 1];
02287             }//End for i
02288           
02289           wr[i] = p;
02290           
02291         }//End for l
02292       
02293       // ======END OF TQL1 =========================================
02294       
02295       A.deallocate();
02296       fv1.deallocate();
02297       return wr;
02298     }
02299 };
02300 
02301 #endif