00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033 #include "QMCSlater.h"
00034
00035 static const bool showTimings = false;
00036 static const bool printPsi = false;
00037
00038 QMCSlater::QMCSlater()
00039 {
00040 Start = 0;
00041 Stop = 0;
00042 }
00043
00044 void QMCSlater::operator=(const QMCSlater & rhs )
00045 {
00046 Input = rhs.Input;
00047 Dwc = rhs.Dwc;
00048 rDwc_x = rhs.rDwc_x;
00049 rDwc_xx = rhs.rDwc_xx;
00050
00051 if (Input->flags.calculate_bf_density == 1)
00052 Xw_Density = rhs.Xw_Density;
00053
00054 BF = rhs.BF;
00055 WF = rhs.WF;
00056 Start = rhs.Start;
00057 Stop = rhs.Stop;
00058 if (Input->flags.replace_electron_nucleus_cusps == 1)
00059 ElectronNucleusCusp = rhs.ElectronNucleusCusp;
00060
00061 Singular = rhs.Singular;
00062
00063
00064
00065
00066 Dw = rhs.Dw;
00067 allocate();
00068 }
00069
00070 void QMCSlater::initialize(QMCInput *INPUT,
00071 int startEl, int stopEl,
00072 bool selectAlpha)
00073 {
00074 Start = startEl;
00075 Stop = stopEl;
00076 isAlpha = selectAlpha;
00077 Input = INPUT;
00078 BF = &Input->BF;
00079 WF = &Input->WF;
00080
00081 if(getNumberElectrons() == 0)
00082 return;
00083
00084 allocate();
00085
00086 for(int i=0; i<Singular.dim1(); i++)
00087 Singular(i) = false;
00088
00089 if (Input->flags.replace_electron_nucleus_cusps == 1)
00090 {
00091 Array2D<qmcfloat> * coeffs = WF->getCoeff(isAlpha);
00092 ElectronNucleusCusp.initialize(Input, * coeffs);
00093 ElectronNucleusCusp.fitReplacementOrbitals();
00094 }
00095
00096 #ifdef QMC_GPU
00097
00098
00099 GPUQMCBasisFunction temp(*BF, getNumberElectrons(), Input->flags.getNumGPUWalkers());
00100 gpuBF = temp;
00101
00102 gpuMatMult = GPUQMCMatrix(Input, WF_coeffs,Input->flags.getNumGPUWalkers());
00103 #endif
00104 }
00105
00106 void QMCSlater::allocate()
00107 {
00108 int N = getNumberElectrons();
00109 int ndet = WF->getNumberDeterminants();
00110 int nbasisfunc = WF->getNumberBasisFunctions();
00111 int wpp = Input->flags.walkers_per_pass;
00112 int gpp = Input->flags.getNumGPUWalkers();
00113
00114 if(wpp - gpp > 0)
00115 {
00116 if(WF->getNumberDeterminants() > 1)
00117 ciDet = new Array2D<double>();
00118 Dw.allocate(wpp-gpp);
00119 Dwc_inv.allocate(wpp-gpp,ndet);
00120 Dw_xx.allocate(wpp-gpp);
00121 Dw_x.allocate(wpp-gpp,3);
00122 }
00123
00124 #ifdef QMC_GPU
00125 gpu_Dw.allocate(gpp);
00126 gpu_Dwc_inv.allocate(gpp);
00127 gpu_Dw_xx.allocate(gpp);
00128 gpu_Dw_x.allocate(gpp,3);
00129
00130
00131
00132 assert(0);
00133 for(int j=0; j<gpu_Dw.dim1(); j++)
00134 {
00135 for (int i=0; i<gpu_Dw.dim2(); i++)
00136 gpu_Dwc_inv(j,i).allocate(N,N);
00137
00138 gpu_D(j).allocate(N,N);
00139
00140 gpu_Dw_xx(j).allocate(N,N);
00141 for(int k=0; k<3; k++)
00142 gpu_Dw_x(j,k).allocate(N,N);
00143 }
00144 #endif
00145
00146 Singular.allocate(wpp);
00147 Dwc.allocate(wpp);
00148 rDwc_xx.allocate(wpp);
00149 rDwc_x.allocate(wpp);
00150
00151 pointer_Dwc.allocate(wpp);
00152 pointer_rDwc_xx.allocate(wpp);
00153 pointer_rDwc_x.allocate(wpp);
00154
00155 if (Input->flags.calculate_bf_density == 1)
00156 Xw_Density.allocate(wpp);
00157
00158 for(int w=0; w<wpp; w++)
00159 {
00160 Singular(w).allocate(ndet);
00161 Dwc(w).allocate(ndet);
00162 rDwc_xx(w).allocate(ndet);
00163 rDwc_x(w).allocate(ndet,N,3);
00164 if (Input->flags.calculate_bf_density == 1)
00165 Xw_Density(w).allocate(nbasisfunc);
00166 }
00167
00168 if (Input->flags.optimize_Orbitals == 1)
00169 {
00170 p_a.allocate(wpp);
00171 p2_xa.allocate(wpp);
00172 p3_xxa.allocate(wpp);
00173
00174 for(int w=0; w<wpp; w++)
00175 {
00176 p_a(w).allocate(ndet);
00177 p2_xa(w).allocate(ndet,N,3);
00178 p3_xxa(w).allocate(ndet);
00179
00180 for(int pi=0; pi<ndet; pi++)
00181 {
00182 (p_a(w))(pi).allocate(N,nbasisfunc);
00183 (p3_xxa(w))(pi).allocate(N,nbasisfunc);
00184
00185 for(int pj=0; pj<N; pj++)
00186 for(int pk=0; pk<3; pk++)
00187 (p2_xa(w))(pi,pj,pk).allocate(N,nbasisfunc);
00188 }
00189 }
00190 }
00191
00192 #ifdef QMC_GPU
00193
00194
00195
00196
00197
00198
00199
00200
00201 resultsCollector.allocate(ndet, gpp*5);
00202 for(int iDet=0; iDet<ndet; iDet++)
00203 {
00204 for(int iWalker=0; iWalker<gpp; iWalker++)
00205 {
00206 resultsCollector(iDet,iWalker*5 ) = & gpu_Dw(iWalker, iDet);
00207 resultsCollector(iDet,iWalker*5 + 1) = & gpu_Dw_x(iWalker, iDet, 0);
00208 resultsCollector(iDet,iWalker*5 + 2) = & gpu_Dw_x(iWalker, iDet, 1);
00209 resultsCollector(iDet,iWalker*5 + 3) = & gpu_Dw_x(iWalker, iDet, 2);
00210 resultsCollector(iDet,iWalker*5 + 4) = & gpu_Dw_xx(iWalker, iDet);
00211 }
00212 }
00213 #endif
00214 }
00215
00216 void QMCSlater::allocateIteration(int whichE, int & start, int & stop)
00217 {
00218 int nOR = WF->getNumberOrbitals(isAlpha);
00219 int nEL = getNumberElectrons();
00220 int nBF = WF->getNumberBasisFunctions();
00221 int wpp = Input->flags.walkers_per_pass;
00222
00223
00224 int nUP = whichE == -1 ? nEL : 1;
00225
00226 if(whichE == -1)
00227 {
00228 start = Start;
00229 stop = Stop;
00230 }
00231 else
00232 {
00233 start = whichE;
00234 stop = whichE;
00235 }
00236
00237 bool needToKeepChi = false;
00238 if(globalInput.flags.optimize_Orbitals)
00239 needToKeepChi = true;
00240
00241 if(needToKeepChi)
00242 {
00243
00244
00245
00246
00247
00248 Xw.allocate(wpp);
00249 Xw_x.allocate(wpp);
00250 Xw_xx.allocate(wpp);
00251 }
00252 else
00253 {
00254 Xw.allocate(1);
00255 Xw_x.allocate(1);
00256 Xw_xx.allocate(1);
00257 }
00258
00259 for(int w=0; w<Xw.dim1(); w++)
00260 {
00261 Xw(w).allocate(nUP,nBF);
00262 Xw_xx(w).allocate(nUP,nBF);
00263 Xw_x(w).allocate(3);
00264 for(int xyz=0; xyz<3; xyz++)
00265 (Xw_x(w))(xyz).allocate(nUP,nBF);
00266 }
00267
00268 for(int w=0; w<Dw.dim1(); w++)
00269 {
00270 for (int ci=0; ci<WF->getNumberDeterminants(); ci++)
00271 Dwc_inv(w,ci).allocate(nEL,nEL);
00272
00273 Dw(w).allocate(nUP,nOR);
00274
00275 Dw_xx(w).allocate(nUP,nOR);
00276 for(int xyz=0; xyz<3; xyz++)
00277 Dw_x(w,xyz).allocate(nUP,nOR);
00278 }
00279 }
00280
00281 QMCSlater::~QMCSlater()
00282 {
00283 if (Start != Stop)
00284 {
00285 for(int j=0; j<Dw.dim1(); j++)
00286 {
00287 for (int ci=0; ci<WF->getNumberDeterminants(); ci++)
00288 Dwc_inv(j,ci).deallocate();
00289
00290 Dw(j).deallocate();
00291 Dw_xx(j).deallocate();
00292 for(int k=0; k<3; k++)
00293 Dw_x(j,k).deallocate();
00294 }
00295 if(ciDet != 0)
00296 delete ciDet;
00297 Dw.deallocate();
00298 Dwc_inv.deallocate();
00299 Dw_xx.deallocate();
00300 Dw_x.deallocate();
00301
00302 #ifdef QMC_GPU
00303 for(int j=0; j<gpu_Dw.dim1(); j++)
00304 {
00305 for (int i=0; i<WF->getNumberDeterminants(); i++)
00306 gpu_Dwc_inv(j,i).deallocate();
00307
00308
00309 gpu_Dw(j).deallocate();
00310
00311 gpu_Dw_xx(j).deallocate();
00312 for(int k=0; k<3; k++)
00313 gpu_Dw_x(j,k).deallocate();
00314 }
00315 gpu_Dw.deallocate();
00316 gpu_Dwc_inv.deallocate();
00317 gpu_Dw_xx.deallocate();
00318 gpu_Dw_x.deallocate();
00319 #endif
00320
00321 for(int j=0; j < Input->flags.walkers_per_pass; j++)
00322 {
00323 Singular(j).deallocate();
00324 Dwc(j).deallocate();
00325 rDwc_xx(j).deallocate();
00326 rDwc_x(j).deallocate();
00327 if (Input->flags.calculate_bf_density == 1)
00328 Xw_Density(j).deallocate();
00329 }
00330
00331 if (Input->flags.optimize_Orbitals == 1)
00332 {
00333 for(int j=0; j<p_a.dim1(); j++)
00334 {
00335 for(int pi=0; pi<p_a(j).dim1(); pi++)
00336 {
00337 (p_a(j))(pi).deallocate();
00338 (p3_xxa(j))(pi).deallocate();
00339
00340 for(int pj=0; pj<p2_xa(j).dim2(); pj++)
00341 for(int pk=0; pk<3; pk++)
00342 (p2_xa(j))(pi,pj,pk).deallocate();
00343 }
00344
00345 p_a(j).deallocate();
00346 p2_xa(j).deallocate();
00347 p3_xxa(j).deallocate();
00348 }
00349 p_a.deallocate();
00350 p2_xa.deallocate();
00351 p3_xxa.deallocate();
00352 }
00353
00354 Singular.deallocate();
00355 Dwc.deallocate();
00356 rDwc_xx.deallocate();
00357 rDwc_x.deallocate();
00358
00359 if (Input->flags.calculate_bf_density == 1)
00360 Xw_Density.deallocate();
00361
00362 #ifdef QMC_GPU
00363
00364
00365
00366
00367 resultsCollector.deallocate();
00368
00369 gpuMatMult.destroy();
00370 #endif
00371
00372 for(int k=0; k<Xw.dim1(); k++)
00373 {
00374 Xw(k).deallocate();
00375 Xw_xx(k).deallocate();
00376 for (int j=0; j<3; j++)
00377 (Xw_x(k))(j).deallocate();
00378 Xw_x(k).deallocate();
00379 }
00380 Xw.deallocate();
00381 Xw_xx.deallocate();
00382 Xw_x.deallocate();
00383 }
00384 }
00385
00386 void QMCSlater::update_Ds(Array1D< QMCWalkerData * > & walkerData)
00387 {
00388 int whichE = walkerData(0)->whichE;
00389 int gpp = Input->flags.getNumGPUWalkers();
00390 int wpp = walkerData.dim1();
00391
00392 if(getNumberElectrons() == 0)
00393 return;
00394
00395 for(int w=0; w<wpp; w++)
00396 {
00397
00398
00399
00400
00401
00402
00403 if(globalInput.flags.one_e_per_iter ||
00404 globalInput.flags.use_psuedopotential == 1)
00405 {
00406 if(isAlpha)
00407 {
00408 pointer_Dwc(w) = & walkerData(w+gpp)->DcA;
00409 } else {
00410 pointer_Dwc(w) = & walkerData(w+gpp)->DcB;
00411 }
00412 } else {
00413 pointer_Dwc(w) = & Dwc(w);
00414 }
00415
00416 if(globalInput.flags.one_e_per_iter)
00417 {
00418 if(isAlpha)
00419 {
00420 pointer_rDwc_x(w) = & walkerData(w+gpp)->rDc_xA;
00421 pointer_rDwc_xx(w) = & walkerData(w+gpp)->rDc_xxA;
00422 } else {
00423 pointer_rDwc_x(w) = & walkerData(w+gpp)->rDc_xB;
00424 pointer_rDwc_xx(w) = & walkerData(w+gpp)->rDc_xxB;
00425 }
00426 } else {
00427 pointer_rDwc_x(w) = & rDwc_x(w);
00428 pointer_rDwc_xx(w) = & rDwc_xx(w);
00429 }
00430 }
00431
00432 if(whichE != -1)
00433 if(whichE < Start || whichE > Stop)
00434 return;
00435
00436 for(int w=0; w<wpp; w++)
00437 {
00438
00439 int off;
00440 Array2D<double> * inv;
00441 Array2D<qmcfloat> * gradx, * grady, * gradz, * lap;
00442
00443 #ifdef QMC_GPU
00444
00445
00446
00447 assert(0);
00448
00449
00450
00451
00452
00453 off = w;
00454 gpuMatMult.getResults(resultsCollector);
00455 calculate_DerivativeRatios(0,ci,gpu_D,gpu_Dwc_inv,
00456 gpu_Dw_xx,gpu_Dw_x);
00457
00458 #endif
00459 off = w + gpp;
00460
00461 if(globalInput.flags.one_e_per_iter)
00462 {
00463
00464 int numE = Dw(w).dim1();
00465
00466 int iEl = whichE == -1 ? 0 : whichE - Start;
00467
00468
00469 if(isAlpha)
00470 {
00471 lap = & walkerData(off)->D_xxA;
00472 gradx = & (walkerData(off)->D_xA)(0);
00473 grady = & (walkerData(off)->D_xA)(1);
00474 gradz = & (walkerData(off)->D_xA)(2);
00475 } else {
00476 lap = & walkerData(off)->D_xxB;
00477 gradx = & (walkerData(off)->D_xB)(0);
00478 grady = & (walkerData(off)->D_xB)(1);
00479 gradz = & (walkerData(off)->D_xB)(2);
00480 }
00481
00482
00483 lap->setRows(iEl,0,numE,Dw_xx(w));
00484 gradx->setRows(iEl,0,numE,Dw_x(w,0));
00485 grady->setRows(iEl,0,numE,Dw_x(w,1));
00486 gradz->setRows(iEl,0,numE,Dw_x(w,2));
00487
00488 } else {
00489 lap = & Dw_xx(w);
00490 gradx = & Dw_x(w,0);
00491 grady = & Dw_x(w,1);
00492 gradz = & Dw_x(w,2);
00493 }
00494
00495 for(int ci=0; ci<WF->getNumberDeterminants(); ci++)
00496 {
00497 if(globalInput.flags.one_e_per_iter ||
00498 globalInput.flags.use_psuedopotential == 1)
00499 {
00500 if(isAlpha) inv = & walkerData(off)->Dc_invA(ci);
00501 else inv = & walkerData(off)->Dc_invB(ci);
00502
00503 if(ci > 0 && whichE < 0)
00504 {
00505
00506
00507 if(isAlpha) (*inv) = walkerData(off)->Dc_invA(ci-1);
00508 else (*inv) = walkerData(off)->Dc_invB(ci-1);
00509 }
00510 } else {
00511
00512 inv = & Dwc_inv(w,ci);
00513
00514
00515
00516 if(ci > 0)
00517 (*inv) = Dwc_inv(w,ci-1);
00518 }
00519
00520 Array2D<double> * psi;
00521 #if defined SINGLEPRECISION || defined QMC_GPU
00522 static Array2D<double> temp;
00523 temp = Dw(w);
00524 psi = & temp;
00525 #else
00526 psi = & Dw(w);
00527 #endif
00528
00529 (Singular(off))(ci) = calculate_DerivativeRatios(ci,whichE - Start,
00530 *psi,*inv,*lap,
00531 *gradx, *grady, *gradz,
00532 *pointer_Dwc(w),
00533 *pointer_rDwc_x(w),
00534 *pointer_rDwc_xx(w));
00535
00536 }
00537 }
00538
00539 if(Input->flags.optimize_Orbitals)
00540 {
00541 #if defined SINGLEPRECISION || defined QMC_GPU
00542
00543
00544
00545
00546
00547 cout << "Error: Don't use single precision to optimize the orbitals.\n";
00548
00549 exit(0);
00550 #else
00551
00552
00553
00554
00555
00556
00557
00558
00559
00560
00561
00562
00563
00564
00565
00566
00567
00568
00569
00570
00571
00572
00573
00574
00575
00576 Array1D< Array2D<double> > nabDinvDdD;
00577 nabDinvDdD.allocate(3);
00578
00579
00580 Array2D<double> nab2DinvDdD;
00581
00582 Array2D<double> temp;
00583 Array2D<double> ciData(getNumberElectrons(),getNumberElectrons());
00584
00585 for(int w=0; w<Dw.dim1(); w++)
00586 {
00587 for(int ci=0; ci<WF->getNumberDeterminants(); ci++)
00588 {
00589 Array2D<double> * pa = & (p_a(w+gpp))(ci);
00590 Array2D<double> * p3xxa = & (p3_xxa(w+gpp))(ci);
00591 * pa = 0.0;
00592 * p3xxa = 0.0;
00593
00594
00595 Dwc_inv(w,ci).gemm(Xw(w),*pa,false);
00596 (*pa) *= (Dwc(w+gpp))(ci);
00597
00598 for(int xyz=0; xyz<3; xyz++)
00599 {
00600 WF->getDataForCI(ci,isAlpha,Dw_x(w,xyz),ciData);
00601 ciData.gemm(Dwc_inv(w,ci),temp,false);
00602 temp.gemm(Xw(w),nabDinvDdD(xyz),false);
00603
00604 }
00605 WF->getDataForCI(ci,isAlpha,Dw_xx(w),ciData);
00606 ciData.gemm(Dwc_inv(w,ci),temp,false);
00607 temp.gemm(Xw(w),nab2DinvDdD,false);
00608
00609 int nor = pa->dim1();
00610 int nbf = pa->dim2();
00611 for(int e=0; e<nor; e++)
00612 {
00613 for(int o=0; o<nor; o++)
00614 {
00615 for(int bf=0; bf<nbf; bf++)
00616 {
00617 for(int xyz=0; xyz<3; xyz++)
00618 {
00619 double d2xa = 0.0;
00620 d2xa += (Dwc_inv(w,ci))(o,e) * ((Xw_x(w))(xyz))(e,bf);
00621 d2xa -= (Dwc_inv(w,ci))(o,e) * (nabDinvDdD(xyz))(e,bf);
00622 ((p2_xa(w+gpp))(ci,e,xyz))(o,bf) = d2xa;
00623 }
00624 double d3xxa = 0.0;
00625 d3xxa += (Dwc_inv(w,ci))(o,e) * (Xw_xx(w))(e,bf);
00626 d3xxa -= (Dwc_inv(w,ci))(o,e) * nab2DinvDdD(e,bf);
00627 (*p3xxa)(o,bf) += d3xxa;
00628 }
00629 }
00630 }
00631 }
00632 }
00633
00634 for(int dim=0; dim<nabDinvDdD.dim1(); dim++)
00635 nabDinvDdD(dim).deallocate();
00636 nabDinvDdD.deallocate();
00637 nab2DinvDdD.deallocate();
00638 temp.deallocate();
00639 #endif
00640 }
00641 }
00642
00643 template <class T>
00644 bool QMCSlater::calculate_DerivativeRatios(int ci, int row,
00645 Array2D<double> & psi,
00646 Array2D<double> & inv,
00647 Array2D<T> & lap,
00648 Array2D<T> & gradx,
00649 Array2D<T> & grady,
00650 Array2D<T> & gradz,
00651 Array1D<double> & det,
00652 Array3D<double> & gradPR,
00653 Array1D<double> & lapPR)
00654 {
00655 Array2D<int> * occ = WF->getOccupations(isAlpha);
00656 Array2D<int> * swap = WF->getDeterminantSwaps(isAlpha);
00657
00658 bool calcOK = true;
00659 double ratio = 0;
00660
00661 if(WF->getNumberDeterminants() == 1)
00662 {
00663
00664 ciDet = & psi;
00665 } else {
00666
00667 ciDet->allocate(psi.dim1(),inv.dim1());
00668 WF->getDataForCI(ci,isAlpha,psi,*ciDet);
00669 }
00670 if(ciDet->dim1() > 1 || row < 0)
00671 {
00672 if(ci == 0)
00673 {
00674
00675
00676
00677 ciDet->determinant_and_inverse(inv,det(ci),&calcOK);
00678 } else {
00679
00680
00681
00682
00683
00684
00685 det(ci) = det(ci-1);
00686 for(int o=0; o<swap->dim2(); o++)
00687 {
00688 int so = swap->get(ci,o);
00689 if(so != -1)
00690 {
00691 ratio = inv.inverseUpdateOneColumn(so,*ciDet,so);
00692
00693 det[ci] *= ratio;
00694 if(ratio == 0) break;
00695 } else {
00696 break;
00697 }
00698 }
00699
00700
00701 if(det(ci) == 0)
00702 {
00703 calcOK = false;
00704 ciDet->determinant_and_inverse(inv,det(ci),&calcOK);
00705 }
00706 }
00707 }
00708 else
00709 {
00710
00711 ratio = inv.inverseUpdateOneRow(row,*ciDet,0);
00712
00713
00714
00715 if(ratio != 0.0)
00716 det[ci] *= ratio;
00717 else
00718 calcOK = false;
00719 }
00720
00721 if(WF->getNumberDeterminants() == 1)
00722 ciDet = 0;
00723
00724 if(!calcOK) return true;
00725
00726 int numE = getNumberElectrons();
00727 double & lap_PR = lapPR[ci];
00728 double ** grad_PR = gradPR.array()[ci];
00729 lap_PR = 0.0;
00730
00731 double * p_lap = lap.array();
00732 double * p_grx = gradx.array();
00733 double * p_gry = grady.array();
00734 double * p_grz = gradz.array();
00735
00736 for(int e=0; e<numE; e++)
00737 {
00738 grad_PR[e][0] = 0.0;
00739 grad_PR[e][1] = 0.0;
00740 grad_PR[e][2] = 0.0;
00741 }
00742
00743 for(int o=0; o<numE; o++)
00744 {
00745 int orb = occ->get(ci,o);
00746
00747 for(int e=0; e<numE; e++)
00748 {
00749 int ind = lap.map(e,orb);
00750 double inv_v = inv(o,e);
00751
00752 lap_PR += p_lap[ind] * inv_v;
00753 grad_PR[e][0] += p_grx[ind] * inv_v;
00754 grad_PR[e][1] += p_gry[ind] * inv_v;
00755 grad_PR[e][2] += p_grz[ind] * inv_v;
00756 }
00757 }
00758 return false;
00759 }
00760
00761 #if defined SINGLEPRECISION || defined QMC_GPU
00762 template bool QMCSlater::calculate_DerivativeRatios<float>
00763 (int ci, int row,
00764 Array2D<double> & psi,
00765 Array2D<double> & inv,
00766 Array2D<float> & lap,
00767 Array2D<float> & gradx,
00768 Array2D<float> & grady,
00769 Array2D<float> & gradz,
00770 Array1D<double> & det,
00771 Array3D<double> & gradPR,
00772 Array1D<double> & lapPR);
00773 #endif
00774
00775 template bool QMCSlater::calculate_DerivativeRatios<double>
00776 (int ci, int row,
00777 Array2D<double> & psi,
00778 Array2D<double> & inv,
00779 Array2D<double> & lap,
00780 Array2D<double> & gradx,
00781 Array2D<double> & grady,
00782 Array2D<double> & gradz,
00783 Array1D<double> & det,
00784 Array3D<double> & gradPR,
00785 Array1D<double> & lapPR);
00786
00800 #ifdef QMC_GPU
00801 void QMCSlater::gpuEvaluate(Array1D<Array2D<double>*> &X, int num)
00802 {
00803 if(getNumberElectrons == 0)
00804 return;
00805
00806 static double averageM = 0, timeM = 0;
00807 static double averageB = 0, timeB = 0;
00808 static double numT = -5;
00809 static int nBF = WF->getNumberBasisFunctions();
00810 static int nOE = getNumberElectrons();
00811 static int mat_multiplier = gpuMatMult.getNumIterations() * 1000;
00812 static int bas_multiplier = gpuBF.getNumIterations();
00813
00814 double numOps = 5*num*(nBF * nOE * nOE * 2.0 - nOE * nOE) / 1000.0;
00815
00816 GLuint texture;
00817 Stopwatch sw = Stopwatch();
00818 sw.reset();
00819
00820 if(showTimings)
00821 {
00822 sw.reset(); sw.start();
00823 }
00824
00825 texture = gpuBF.runCalculation(X,num, Start, Stop);
00826
00827 if(showTimings)
00828 {
00829 glFinish();
00830 sw.stop();
00831 timeB = sw.timeUS();
00832 if(numT >= 0) averageB += sw.timeUS();
00833 }
00834
00835 if(showTimings)
00836 {
00837 sw.reset(); sw.start();
00838 }
00839
00840 gpuMatMult.runCalculation(num,texture);
00841
00842 if(showTimings)
00843 {
00844
00845
00846 glFinish();
00847 sw.stop();
00848 timeM = sw.timeUS();
00849 if(numT >= 0) averageM += sw.timeUS();
00850
00851 if( num > 1) numT++;
00852 cout << "\ngpu bf: " << (int)(timeB/bas_multiplier+0.5) << " ( " << (int)(averageB/(numT*bas_multiplier)+0.5) << ") ";
00853 cout << "mm: " << (int)(timeM/mat_multiplier+0.5) << " (" << (int)(averageM/(numT*mat_multiplier)+0.5) << ") ";
00854 cout << "; mflops: " << (int)(mat_multiplier*numOps/timeM+0.5) <<
00855 " (" << (int)(mat_multiplier*numOps/(averageM/numT)+0.5) << ")\n";
00856 }
00857 }
00858 #endif
00859
00860 void QMCSlater::evaluate(Array1D<Array2D<double>*> &R,
00861 int num, int start, int whichE)
00862 {
00863 if(getNumberElectrons() == 0)
00864 return;
00865
00866
00867
00868 if(whichE != -1)
00869 if(whichE < Start || whichE > Stop)
00870 return;
00871
00872 static double averageM = 0, timeM = 0;
00873 static double averageB = 0, timeB = 0;
00874 static double numT = -5;
00875 static int nBF = WF->getNumberBasisFunctions();
00876 static int nOE = getNumberElectrons();
00877 static int mat_multiplier = 1000;
00878 static int bas_multiplier = 1000;
00879
00880 double numOps = 5*num*(nBF * nOE * nOE * 2.0 - nOE * nOE) / 1000.0;
00881 int aveB=0, aveM=0, aveF=0;
00882
00883 Stopwatch sw = Stopwatch();
00884 sw.reset();
00885
00886 timeB = 0; timeM = 0;
00887 for(int walker = 0; walker < num; walker++)
00888 {
00889 if(showTimings)
00890 {
00891 sw.reset(); sw.start();
00892 }
00893
00894 int Chi_i = Xw.dim1() == 1 ? 0 : walker;
00895 int startE, stopE;
00896
00897 allocateIteration(whichE,startE,stopE);
00898
00899 BF->evaluateBasisFunctions(*R(walker+start),
00900 startE,stopE,
00901 Xw(Chi_i),
00902 (Xw_x(Chi_i))(0),
00903 (Xw_x(Chi_i))(1),
00904 (Xw_x(Chi_i))(2),
00905 Xw_xx(Chi_i));
00906
00907 if(showTimings)
00908 {
00909 sw.stop();
00910 timeB += sw.timeUS();
00911 if(numT >= 0) averageB += sw.timeUS();
00912 sw.reset(); sw.start();
00913 }
00914
00915 Array2D<qmcfloat> * coeffs = WF->getCoeff(isAlpha);
00916
00917 Xw(Chi_i).gemm( *coeffs, Dw(walker),true);
00918 Xw_xx(Chi_i).gemm( *coeffs, Dw_xx(walker),true);
00919 (Xw_x(Chi_i))(0).gemm( *coeffs, Dw_x(walker,0),true);
00920 (Xw_x(Chi_i))(1).gemm( *coeffs, Dw_x(walker,1),true);
00921 (Xw_x(Chi_i))(2).gemm( *coeffs, Dw_x(walker,2),true);
00922
00923 if (Input->flags.replace_electron_nucleus_cusps == 1)
00924 ElectronNucleusCusp.replaceCusps(*R(walker+start),
00925 startE,stopE,
00926 Dw(walker),
00927 Dw_x(walker,0),
00928 Dw_x(walker,1),
00929 Dw_x(walker,2),
00930 Dw_xx(walker));
00931
00932 if(showTimings)
00933 {
00934 sw.stop();
00935 timeM += sw.timeUS();
00936 if(numT >= 0) averageM += sw.timeUS();
00937 }
00938 }
00939
00940 for(int walker = 0; walker < num; walker++)
00941 {
00942 int Chi_i = Xw.dim1() == 1 ? 0 : walker;
00943
00944 if (Input->flags.calculate_bf_density == 1)
00945 {
00946 Xw_Density(walker+start) = 0.0;
00947 for (int i=0; i<WF->getNumberBasisFunctions(); i++)
00948 for (int j=0; j<getNumberElectrons(); j++)
00949 {
00950 (Xw_Density(walker))(i) += (Xw(Chi_i))(j,i);
00951 }
00952 }
00953 }
00954
00955 if(showTimings)
00956 {
00957
00958
00959 if(num>1) numT++;
00960 if(numT > 0)
00961 {
00962 aveB = (int)(averageB/(numT*bas_multiplier)+0.5);
00963 aveM = (int)(averageM/(numT*mat_multiplier)+0.5);
00964 aveF = (int)(numOps/(averageM/(numT*mat_multiplier))+0.5);
00965 }
00966 else
00967 {
00968 aveB = 0;
00969 aveM = 0;
00970 aveF = 0;
00971 }
00972
00973 cout << "cpu bf: " << (int)(timeB/bas_multiplier+0.5) << " (" << aveB << ") ";
00974 cout << "mm: " << (int)(timeM/mat_multiplier+0.5) << " (" << aveM << ") ";
00975 cout << "; mflops: " << (int)(mat_multiplier*numOps/timeM+0.5) <<
00976 " (" << aveF << ")\n";
00977 }
00978 }
00979
00980 Array1D<double>* QMCSlater::getPsi(int i)
00981 {
00982 if(getNumberElectrons() != 0)
00983 return pointer_Dwc(i);
00984
00985 if(Dwc.dim1() == 0)
00986 {
00987 Dwc.allocate(1);
00988 Dwc(0).allocate(WF->getNumberDeterminants());
00989 Dwc(0) = 1.0;
00990 }
00991 return &Dwc(0);
00992 }
00993
00994 Array1D<double>* QMCSlater::getLaplacianPsiRatio(int i)
00995 {
00996 if(getNumberElectrons() != 0)
00997 return pointer_rDwc_xx(i);
00998 return 0;
00999 }
01000
01001 Array3D<double>* QMCSlater::getGradPsiRatio(int i)
01002 {
01003 if(getNumberElectrons() != 0)
01004 return pointer_rDwc_x(i);
01005
01006 return 0;
01007 }
01008
01009 Array1D<double>* QMCSlater::getChiDensity(int i)
01010 {
01011 if(getNumberElectrons() != 0)
01012 return &Xw_Density(i);
01013
01014 return 0;
01015 }
01016
01017 bool QMCSlater::isSingular(int j)
01018 {
01019 if(getNumberElectrons() == 0)
01020 return false;
01021
01022 for (int i=0; i<WF->getNumberDeterminants(); i++)
01023 {
01024 if (Singular(j)(i)) return true;
01025 }
01026 return false;
01027 }