00001
00002
00003
00004
00005
00006
00007
00008 #include "GPUQMCJastrowElectronElectron.h"
00009 #ifdef QMC_GPU
00010
00011
00012
00013 static const bool PRINT_SHADER = false;
00014 static const int TIMING_REPS = 10;
00015 static const bool INT_FINISHES = false;
00016
00017 static const bool USE_TRIANGLES = true;
00018 static const bool REUSE_SHADERS = false;
00019 static bool shadersCreated = false;
00020
00021
00022 #define TEXTURE_INTERNAL_FORMAT GL_FLOAT_RGBA32_NV
00023 #define TEXTURE_TARGET GL_TEXTURE_RECTANGLE_NV
00024
00025
00026 vector<CGprogram> GPUQMCJastrowElectronElectron::mapElectronsCG;
00027 vector<CGparameter> GPUQMCJastrowElectronElectron::inputCGP;
00028 CGparameter GPUQMCJastrowElectronElectron::mixedInputCGP;
00029
00030 vector<CGprogram> GPUQMCJastrowElectronElectron::polynomialCG;
00031 vector<CGparameter> GPUQMCJastrowElectronElectron::polyInputCGP;
00032
00033 CGprogram GPUQMCJastrowElectronElectron::sumReductionCG;
00034 vector<CGparameter> GPUQMCJastrowElectronElectron::sumReductionCGP;
00035
00036 vector<CGprogram> GPUQMCJastrowElectronElectron::gradientReductionCG;
00037 vector<CGparameter> GPUQMCJastrowElectronElectron::gradientReductionCGP;
00038
00039 GPUQMCJastrowElectronElectron::GPUQMCJastrowElectronElectron()
00040 {
00041 nCols = 0; nRows = 0;
00042 }
00043
00044 GPUQMCJastrowElectronElectron::GPUQMCJastrowElectronElectron(
00045 QMCJastrowElectronElectron jee, int max_calcs) : QMCJastrowElectronElectron(jee)
00046 {
00047 getFactors(max_calcs,nRows,nCols);
00048 allocatedRows = nRows;
00049 allocatedCols = nCols;
00050
00051 numA = Input->WF.getNumberElectrons(true);
00052 numB = Input->WF.getNumberElectrons(false);
00053 numE = numA + numB;
00054 if(numA > numB) numLarger = numA;
00055 else numLarger = numB;
00056
00057 r1r2FB = new GPUQMCFramebuffer[3];
00058 r1r2FB[aa].initialize( nCols*numA, nRows*numA, 1, 1);
00059 r1r2FB[bb].initialize( nCols*numB, nRows*numB, 1, 1);
00060 r1r2FB[ab].initialize( nCols*numA, nRows*numB, 1, 1);
00061
00062 polynomialFB = new GPUQMCFramebuffer[3];
00063 polynomialFB[aa].initialize( nCols*numA, nRows*numA, 1, 2);
00064 polynomialFB[bb].initialize( nCols*numB, nRows*numB, 1, 2);
00065 polynomialFB[ab].initialize( nCols*numA, nRows*numB, 1, 2);
00066
00067 finalUandLapUFB = new GPUQMCFramebuffer( nCols*1, nRows*numLarger, 1, 1);
00068 finalGradUFB = new GPUQMCFramebuffer( nCols*1, nRows*numE, 1, 1);
00069
00070 array_sum.allocate(max_calcs);
00071 array_grad_sum.allocate(max_calcs);
00072 array_lap_sum.allocate(max_calcs);
00073 for(int i=0; i<max_calcs; i++)
00074 {
00075 array_grad_sum(i).allocate(numE,3);
00076 }
00077
00078 GET_GLERROR("Error setting up framebuffer");
00079
00080 cpuData = (GLfloat *) calloc( nCols*numLarger * nRows*numLarger * 4 , sizeof(GLfloat) );
00081
00082 if(mapElectronsCG.empty())
00083 loadShaders();
00084 }
00085
00086 GPUQMCJastrowElectronElectron::~GPUQMCJastrowElectronElectron()
00087 {
00088 delete [] cpuData;
00089
00090 for(int i=0; i<array_grad_sum.dim1(); i++)
00091 {
00092 array_grad_sum(i).deallocate();
00093 }
00094 array_sum.deallocate();
00095 array_grad_sum.deallocate();
00096 array_lap_sum.deallocate();
00097
00098 delete finalUandLapUFB;
00099 delete finalGradUFB;
00100 delete [] r1r2FB;
00101 delete [] polynomialFB;
00102 }
00103
00104 GLuint GPUQMCJastrowElectronElectron::runCalculation(GLuint aElectronsTexID, GLuint bElectronsTexID, int num)
00105 {
00106 getFactors(num,nRows,nCols);
00107
00108 #ifdef PRINT_TIMINGS
00109 Stopwatch sw = Stopwatch();
00110 sw.reset(); sw.start();
00111 for(int numReps=0; numReps<TIMING_REPS; numReps++)
00112 #endif
00113
00114 translateElectronPositions(aElectronsTexID, bElectronsTexID);
00115
00116 #ifdef PRINT_TIMINGS
00117 sw.stop();
00118 double temp = (double)sw.timeUS()/TIMING_REPS;
00119 printf(" jee_trans: %7.2f", temp );
00120
00121 sw.reset(); sw.start();
00122 for(int numReps=0; numReps<TIMING_REPS; numReps++){
00123 #endif
00124
00125 for(int i=0; i<3; i++){
00126 int maxs = polynomialFB[i].getWidth();
00127 int maxt = polynomialFB[i].getHeight();
00128 polynomialFB[i].cleanAllBuffers();
00129 polynomialFB[i].drawTo(0);
00130
00131 cgGLSetTextureParameter(polyInputCGP[i] , r1r2FB[i].getTextureID(0,0));
00132
00133 cgGLEnableProfile(g_cgProfile);
00134
00135 cgGLEnableTextureParameter(polyInputCGP[i]);
00136 cgGLBindProgram(polynomialCG[i]);
00137
00138 if(i == ab){
00139 drawPrimative(maxs,maxt,0,0);
00140 } else {
00141 drawTriangles(maxs,maxt,nCols,nRows);
00142 }
00143
00144 cgGLDisableProfile(g_cgProfile);
00145
00146 cgGLDisableTextureParameter(polyInputCGP[i]);
00147 }
00148
00149 if(INT_FINISHES) glFinish();
00150
00151
00152
00153
00154
00155
00156
00157
00158
00159
00160 #ifdef PRINT_TIMINGS
00161 }
00162 sw.stop();
00163 temp = (double)sw.timeUS()/TIMING_REPS;
00164 printf(" jee_poly: %7.2f\n", temp );
00165
00166 sw.reset(); sw.start();
00167 for(int numReps=0; numReps<TIMING_REPS; numReps++)
00168 #endif
00169
00170 sumAllJastrowValues();
00171
00172 #ifdef PRINT_TIMINGS
00173 sw.stop();
00174 temp = (double)sw.timeUS()/TIMING_REPS;
00175 printf(" jee_sum: %7.2f", temp );
00176
00177 sw.reset(); sw.start();
00178 for(int numReps=0; numReps<TIMING_REPS; numReps++)
00179 #endif
00180
00181 sumGradJastrowValues();
00182
00183 #ifdef PRINT_TIMINGS
00184 sw.stop();
00185 temp = (double)sw.timeUS()/TIMING_REPS;
00186 printf(" jee_grad: %7.2f\n", temp );
00187 #endif
00188
00189 glMatrixMode(GL_PROJECTION);
00190 glLoadIdentity();
00191 gluOrtho2D(-1, 1, -1, 1);
00192 glMatrixMode(GL_MODELVIEW);
00193 glLoadIdentity();
00194 glFlush();
00195 getOpenGLError("Error in Jastrow Electron-Electron calculation");
00196 return 0;
00197 }
00198
00199 void GPUQMCJastrowElectronElectron::unloadResults()
00200 {
00201 int index = 0;
00202 finalUandLapUFB->readFrom(0,0);
00203 glReadPixels(0,0,finalUandLapUFB->getWidth(),finalUandLapUFB->getHeight(),GL_RGB,GL_FLOAT,cpuData);
00204
00205 array_sum = 0;
00206 array_lap_sum = 0;
00207 for(int r=0; r<nRows; r++)
00208 {
00209 for(int c=0; c<nCols; c++)
00210 {
00211 for(int i=0; i<numLarger; i++)
00212 {
00213 index = 3*( (r*numLarger + i)*nCols + c );
00214 array_sum( c*nRows + r ) += cpuData[index];
00215 array_lap_sum( c*nRows + r ) += cpuData[index+1];
00216 }
00217 }
00218 }
00219
00220 finalGradUFB->readFrom(0,0);
00221 glReadPixels(0,0,finalGradUFB->getWidth(),finalGradUFB->getHeight(),GL_RGB,GL_FLOAT,cpuData);
00222 for(int r=0; r<nRows; r++)
00223 {
00224 for(int c=0; c<nCols; c++)
00225 {
00226 for(int i=0; i<numE; i++)
00227 {
00228 index = 3*( (r*numE + i)*nCols + c );
00229 (array_grad_sum(c*nRows + r))(i,0) = cpuData[index];
00230 (array_grad_sum(c*nRows + r))(i,1) = cpuData[index+1];
00231 (array_grad_sum(c*nRows + r))(i,2) = cpuData[index+2];
00232 }
00233 }
00234 }
00235 }
00236
00237 void GPUQMCJastrowElectronElectron::translateElectronPositions(GLuint aElectronsTexID, GLuint bElectronsTexID)
00238 {
00239 for(int i=0; i<3; i++){
00240 int maxs = r1r2FB[i].getWidth();
00241 int maxt = r1r2FB[i].getHeight();
00242 r1r2FB[i].cleanBuffer(0);
00243
00244 if(i == aa){
00245 cgGLSetTextureParameter(inputCGP[i] , aElectronsTexID);
00246 } else if(i == bb){
00247 cgGLSetTextureParameter(inputCGP[i], bElectronsTexID);
00248 } else {
00249 cgGLSetTextureParameter(inputCGP[i], aElectronsTexID);
00250 cgGLSetTextureParameter(mixedInputCGP, bElectronsTexID);
00251 cgGLEnableTextureParameter(mixedInputCGP);
00252 }
00253
00254 cgGLEnableProfile(g_cgProfile);
00255
00256 cgGLEnableTextureParameter(inputCGP[i]);
00257 cgGLBindProgram(mapElectronsCG[i]);
00258
00259 if(i == ab){
00260 drawPrimative(maxs,maxt,0,0);
00261 } else {
00262 drawTriangles(maxs,maxt,nCols,nRows);
00263 }
00264
00265 cgGLDisableProfile(g_cgProfile);
00266
00267 cgGLDisableTextureParameter(inputCGP[i]);
00268 if(i==ab)
00269 cgGLDisableTextureParameter(mixedInputCGP);
00270 }
00271
00272 if(INT_FINISHES) glFinish();
00273 glFlush();
00274
00275
00276
00277
00278 getOpenGLError("Error in Jastrow Electron-Electron translation");
00279 }
00280
00281 void GPUQMCJastrowElectronElectron::sumAllJastrowValues()
00282 {
00283 int maxs = finalUandLapUFB->getWidth();
00284 int maxt = finalUandLapUFB->getHeight();
00285 finalUandLapUFB->cleanBuffer(0);
00286
00287 for(int i=0; i<3; i++){
00288 cgGLSetTextureParameter(sumReductionCGP[i], polynomialFB[i].getTextureID(0,0));
00289 cgGLEnableTextureParameter(sumReductionCGP[i]);
00290 }
00291
00292 cgGLEnableProfile(g_cgProfile);
00293 cgGLBindProgram(sumReductionCG);
00294 drawPrimative(maxs,maxt,0,0);
00295 cgGLDisableProfile(g_cgProfile);
00296
00297 for(int i=0; i<3; i++){
00298 cgGLDisableTextureParameter(sumReductionCGP[i]);
00299 }
00300
00301 if(INT_FINISHES) glFinish();
00302 glFlush();
00303
00304
00305
00306 getOpenGLError("Error in Jastrow Electron-Electron translation");
00307 }
00308
00309 void GPUQMCJastrowElectronElectron::sumGradJastrowValues()
00310 {
00311 int maxs = finalGradUFB->getWidth();
00312 int maxt = finalGradUFB->getHeight();
00313 finalGradUFB->cleanBuffer(0);
00314
00315 int tShift;
00316 for(int i=0; i<2; i++){
00317 cgGLSetTextureParameter(gradientReductionCGP[2*i], polynomialFB[i==0?aa:bb].getTextureID(0,1));
00318 cgGLSetTextureParameter(gradientReductionCGP[2*i+1], polynomialFB[ab].getTextureID(0,1));
00319 cgGLEnableTextureParameter(gradientReductionCGP[2*i]);
00320 cgGLEnableTextureParameter(gradientReductionCGP[2*i+1]);
00321
00322 cgGLEnableProfile(g_cgProfile);
00323 cgGLBindProgram(gradientReductionCG[i]);
00324 glEnable(GL_SCISSOR_TEST);
00325 int opp = (i+1)%2;
00326 for(int r=0; r<nRows; r++){
00327 tShift = r*numE + i*numA;
00328 glScissor(0,tShift,maxs,i==0?numA:numB);
00329
00330
00331
00332 drawPrimative(maxs,maxt,0,-numA*(r+i)+r*opp*(numA-numB));
00333 }
00334
00335 glDisable(GL_SCISSOR_TEST);
00336 cgGLDisableProfile(g_cgProfile);
00337
00338 for(int j=0; j<2; j++){
00339 cgGLDisableTextureParameter(gradientReductionCGP[2*i+j]);
00340 }
00341 }
00342
00343 if(INT_FINISHES) glFinish();
00344 glFlush();
00345
00346
00347
00348 getOpenGLError("Error in Jastrow Electron-Electron translation");
00349 }
00350
00351 void GPUQMCJastrowElectronElectron::unloadData(GPUQMCFramebuffer & fb, int w, int h)
00352 {
00353 int num = 10;
00354 fb.readFrom(0,0);
00355 glReadPixels(0,0,w,h,GL_RGBA,GL_FLOAT,cpuData);
00356 cout << "unloaded by jee\n";
00357 PrintRGBAPixelsBoxE(cpuData,w,h,num,num,-1,-1,true);
00358 }
00359
00360 void GPUQMCJastrowElectronElectron::drawPrimative(GLfloat maxs, GLfloat maxt, GLfloat sShift, GLfloat tShift)
00361 {
00362 glMatrixMode(GL_PROJECTION);
00363 glLoadIdentity();
00364 gluOrtho2D(-1, 1, -1, 1);
00365 glMatrixMode(GL_MODELVIEW);
00366 glLoadIdentity();
00367 if(USE_TRIANGLES)
00368 {
00369 glBegin(GL_TRIANGLES);
00370 glTexCoord2f( sShift , tShift ); glVertex2f(-1.0f, -1.0f);
00371 glTexCoord2f( sShift , maxt*2+tShift ); glVertex2f(-1.0f, 3.0f);
00372 glTexCoord2f(maxs*2+sShift , tShift ); glVertex2f( 3.0f, -1.0f);
00373 glEnd();
00374 }
00375 else
00376 {
00377 glBegin(GL_QUADS);
00378 glTexCoord2f( sShift , tShift ); glVertex2f(-1.0, -1.0);
00379 glTexCoord2f( sShift , maxt+tShift ); glVertex2f(-1.0, 1.0);
00380 glTexCoord2f(maxs+sShift , maxt+tShift ); glVertex2f( 1.0, 1.0);
00381 glTexCoord2f(maxs+sShift , tShift ); glVertex2f( 1.0, -1.0);
00382 glEnd();
00383 }
00384 }
00385
00386 void GPUQMCJastrowElectronElectron::drawTriangles(GLfloat maxs, GLfloat maxt, int nCols, int nRows)
00387 {
00388 int tShift, sShift;
00389 int deltaW = (int)(maxs/nCols);
00390 int deltaH = (int)(maxt/nRows);
00391
00392
00393 glMatrixMode(GL_PROJECTION);
00394 glLoadIdentity();
00395 glOrtho(0, maxs, 0, maxt, 0, 100);
00396 glMatrixMode(GL_MODELVIEW);
00397 glLoadIdentity();
00398
00399 glEnable(GL_SCISSOR_TEST);
00400 for(int r=0; r<nRows; r++){
00401 for(int c=0; c<nCols; c++){
00402 tShift = r*deltaH;
00403 sShift = c*deltaW;
00404 glScissor( sShift, tShift, deltaW, deltaH);
00405
00406 glBegin(GL_TRIANGLES);
00407 glTexCoord2f( sShift, tShift);
00408 glVertex2f ( sShift, tShift);
00409 glTexCoord2f( sShift, tShift + deltaH );
00410 glVertex2f ( sShift, tShift + deltaH );
00411 glTexCoord2f( sShift + deltaW, tShift + deltaH );
00412 glVertex2f ( sShift + deltaW, tShift + deltaH );
00413 glEnd();
00414 }
00415 }
00416 glDisable(GL_SCISSOR_TEST);
00417 }
00418
00419
00420 string GPUQMCJastrowElectronElectron::generateTranslationShader(int which)
00421 {
00422 string shader;
00423 shader +=
00424 "float4 main(in float2 coords : TEX0, \n"
00425 " MIXED_PARAM \n"
00426 " uniform samplerRECT input \n"
00427 " ) : COLOR \n"
00428 "{ \n"
00429 " int2 rc = coords/float2(WIDTH,HEIGHT); \n"
00430 " float2 pos = fmod(coords,float2(WIDTH,HEIGHT)); \n"
00431 " int2 eposY = pos/TEXW; \n"
00432 " float2 eposX = fmod(pos,TEXW); \n"
00433 " float3 electron1 = texRECT(input,float2(TEXW*rc.x + eposX.x,TEXH_I*rc.y + eposY.x)).xyz; \n"
00434 " float3 electron2 = texRECT(MIXIN,float2(TEXW*rc.x + eposX.y,TEXH_M*rc.y + eposY.y)).xyz; \n"
00435 " float4 output; \n"
00436 " output.xyz = normalize(electron2 - electron1); \n"
00437 " output.w = length(electron2 - electron1); \n"
00438 " return output; \n"
00439
00440
00441
00442
00443 "} \n";
00444
00445 int elecW = 4;
00446 int elecHA = (int)(numA/4.0);
00447 if(numA%4 != 0) elecHA += 1;
00448 int elecHB = (int)(numB/4.0);
00449 if(numB%4 != 0) elecHB += 1;
00450 findandreplace(shader,"TEXW",elecW);
00451
00452 switch(which)
00453 {
00454 case aa:
00455 {
00456 findandreplace(shader,"MIXIN","input");
00457 findandreplace(shader,"MIXED_PARAM", "");
00458 findandreplace(shader,"WIDTH",numA);
00459 findandreplace(shader,"HEIGHT",numA);
00460 findandreplace(shader,"TEXH_I",elecHA);
00461 findandreplace(shader,"TEXH_M",elecHA);
00462 break;
00463 }
00464 case bb:
00465 {
00466 findandreplace(shader,"MIXIN","input");
00467 findandreplace(shader,"MIXED_PARAM", "");
00468 findandreplace(shader,"WIDTH",numB);
00469 findandreplace(shader,"HEIGHT",numB);
00470 findandreplace(shader,"TEXH_I",elecHB);
00471 findandreplace(shader,"TEXH_M",elecHB);
00472 break;
00473 }
00474 case ab:
00475 {
00476 findandreplace(shader,"MIXIN","inputMix");
00477 findandreplace(shader,"MIXED_PARAM", "uniform samplerRECT inputMix,");
00478 findandreplace(shader,"WIDTH",numA);
00479 findandreplace(shader,"HEIGHT",numB);
00480 findandreplace(shader,"TEXH_I",elecHA);
00481 findandreplace(shader,"TEXH_M",elecHB);
00482 break;
00483 }
00484 }
00485 return shader;
00486 }
00487
00488 string GPUQMCJastrowElectronElectron::coeffToCgString(Array1D<double> & input, string name)
00489 {
00490 string lines =
00491 " float NAME[NUM] = {";
00492 findandreplace(lines,"NAME",name);
00493 findandreplace(lines,"NUM",input.dim1());
00494 for(int i=0; i<input.dim1(); i++){
00495 if(i==0){
00496 lines += "ENTRY";
00497 } else {
00498 lines += ", ENTRY";
00499 }
00500 findandreplace(lines,"ENTRY",input(i));
00501 }
00502 lines += "};\n";
00503 return lines;
00504 }
00505
00506 string GPUQMCJastrowElectronElectron::generatePolynomialShader(int which)
00507 {
00508 Array1D<double> num, den;
00509 string shader;
00510 shader +=
00511 "struct outputType { \n"
00512 " float4 o1 : COLOR0; \n"
00513 " float4 o2 : COLOR1; \n"
00514 "}; \n"
00515 "outputType main(in float2 coords : TEX0, \n"
00516 " uniform samplerRECT input \n"
00517 " ) : COLOR \n"
00518 "{ \n"
00519 " float4 r = texRECT(input,coords); \n"
00520 " outputType output; \n";
00521
00522 switch(which)
00523 {
00524 case aa:
00525 {
00526 num = Input->JP.getElectronUpElectronUpParameters()->getCorrelationFunction()->getNumeratorCoeffs();
00527 den = Input->JP.getElectronUpElectronUpParameters()->getCorrelationFunction()->getDenominatorCoeffs();
00528 break;
00529 }
00530 case bb:
00531 {
00532 num = Input->JP.getElectronDownElectronDownParameters()->getCorrelationFunction()->getNumeratorCoeffs();
00533 den = Input->JP.getElectronDownElectronDownParameters()->getCorrelationFunction()->getDenominatorCoeffs();
00534 break;
00535 }
00536 case ab:
00537 {
00538 num = Input->JP.getElectronUpElectronDownParameters()->getCorrelationFunction()->getNumeratorCoeffs();
00539 den = Input->JP.getElectronUpElectronDownParameters()->getCorrelationFunction()->getDenominatorCoeffs();
00540 break;
00541 }
00542 }
00543
00544 shader += coeffToCgString(num,"num");
00545 shader += coeffToCgString(den,"den");
00546
00547
00548
00549
00550
00551
00552
00553
00554
00555
00556
00557
00558
00559
00560 shader +=
00561 " float4 dfd2f = 0; \n"
00562 " float2 f = float2(num[NN],den[NN]); \n"
00563 " for(int i=NN-1; i >= 0; i--){ \n"
00564 " dfd2f = dfd2f*r.w + float4(f.x, dfd2f.x, f.y, dfd2f.z); \n"
00565 " f = f*r.w + float2(num[i],den[i]); \n"
00566 " } \n"
00567 " dfd2f.yw *= 2.0f; \n";
00568
00569
00570
00571 assert(num.dim1() == den.dim1());
00572 findandreplace(shader,"NN",num.dim1()-1);
00573
00574
00575
00576 shader +=
00577 " dfd2f /= f.y; \n"
00578 " output.o1.x = f.x/f.y; \n"
00579 " output.o1.y = dfd2f.x - dfd2f.z*output.o1.x; \n"
00580 " output.o1.z = dfd2f.y - dfd2f.w*output.o1.x - 2*dfd2f.z*output.o1.y;\n"
00581 " output.o1.z = 2.0f*(2.0f * output.o1.y / r.w + output.o1.z); \n"
00582
00583 " output.o2.xyz = r.xyz * output.o1.y; \n";
00584
00585
00586
00587
00588
00589
00590
00591
00592
00593
00594
00595
00596
00597
00598
00599
00600
00601
00602
00603 shader +=
00604 " return output; \n"
00605 "} \n";
00606 return shader;
00607 }
00608
00609 string GPUQMCJastrowElectronElectron::generateReductionShader()
00610 {
00611 string shader;
00612 shader +=
00613 "float4 main(in float2 coords : TEX0, \n"
00614 " uniform samplerRECT inputAA, \n"
00615 " uniform samplerRECT inputAB, \n"
00616 " uniform samplerRECT inputBB \n"
00617 " ) : COLOR \n"
00618 "{ \n"
00619 " float2 sum = 0; \n"
00620 " int2 rc = coords/float2(WIDTH,HEIGHT); \n"
00621 " int2 pos = fmod(coords,float2(WIDTH,HEIGHT)); \n";
00622
00623 if(numA > numB)
00624 shader +=
00625 " if(pos.y < NUMB) \n";
00626
00627 shader +=
00628 " for(int i=0; i<NUMA; i++){ \n"
00629 " sum += texRECT(inputAB,float2(i+rc.x*NUMA,coords.y-rc.y*ALARGER)).xz; \n"
00630 " } \n";
00631 if(numA < numB)
00632 shader +=
00633 " if(pos.y < NUMA) \n";
00634
00635 shader +=
00636 " for(int i=0; i<pos.y; i++){ \n"
00637 " sum += texRECT(inputAA,float2(i+rc.x*NUMA,coords.y-rc.y*BLARGER)).xz; \n"
00638 " } \n";
00639
00640 if(numA > numB)
00641 shader +=
00642 " if(pos.y < NUMB) \n";
00643
00644 shader +=
00645 " for(int i=0; i<pos.y; i++){ \n"
00646 " sum += texRECT(inputBB,float2(i+rc.x*NUMB,coords.y-rc.y*ALARGER)).xz; \n"
00647 " } \n"
00648 " return float4(sum,0,0); \n"
00649 "} \n";
00650
00651 findandreplace(shader,"NUMA",numA);
00652 findandreplace(shader,"NUMB",numB);
00653 findandreplace(shader,"WIDTH",1);
00654 findandreplace(shader,"HEIGHT",numLarger);
00655
00656
00657
00658
00659
00660
00661 findandreplace(shader,"ALARGER",numLarger-numB);
00662 findandreplace(shader,"BLARGER",numLarger-numA);
00663 return shader;
00664 }
00665
00666 string GPUQMCJastrowElectronElectron::generateGradientReductionShader(int which)
00667 {
00668 string shader;
00669 shader +=
00670 "float4 main(in float2 coords : TEX0, \n"
00671 " uniform samplerRECT inputParallel, \n"
00672 " uniform samplerRECT inputOpposite \n"
00673 " ) : COLOR \n"
00674 "{ \n"
00675 " float3 sum = 0; \n"
00676 " int2 rc = coords/float2(WIDTH,HEIGHT); \n"
00677 " int2 pos = fmod(coords,float2(WIDTH,HEIGHT)); \n"
00678 " for(float i=0; i<NUMO; i++){ \n"
00679 " LINE \n"
00680 " } \n"
00681 " for(int i=0; i < pos.y; i++){ \n"
00682 " sum += texRECT(inputParallel,float2(i+rc.x*NUMP,coords.y)).xyz; \n"
00683 " } \n"
00684 " for(int i=(rc.y)*NUMP+pos.y+1; i <(rc.y+1)*NUMP; i++){ \n"
00685 " sum -= texRECT(inputParallel,float2(pos.y+rc.x*NUMP,i)).xyz; \n"
00686 " } \n"
00687 " return float4(sum,0); \n"
00688
00689 "} \n";
00690
00691 if(which == 0){
00692 findandreplace(shader,"WIDTH",1);
00693 findandreplace(shader,"HEIGHT",numA);
00694 findandreplace(shader,"NUMP",numA);
00695 findandreplace(shader,"NUMO",numB);
00696 findandreplace(shader,"LINE","sum -= texRECT(inputOpposite,float2(pos.y+rc.x*NUMA,i+rc.y*NUMB)).xyz;");
00697 } else {
00698 findandreplace(shader,"WIDTH",1);
00699 findandreplace(shader,"HEIGHT",numB);
00700 findandreplace(shader,"NUMP",numB);
00701 findandreplace(shader,"NUMO",numA);
00702 findandreplace(shader,"LINE","sum += texRECT(inputOpposite,float2(i+rc.x*NUMA,pos.y+rc.y*NUMB)).xyz;");
00703 }
00704 findandreplace(shader,"NUMA",numA);
00705 findandreplace(shader,"NUMB",numB);
00706
00707 return shader;
00708 }
00709
00710 int GPUQMCJastrowElectronElectron::getNumIterations()
00711 {
00712 #ifdef PRINT_TIMINGS
00713 return TIMING_REPS;
00714 #else
00715 return 1;
00716 #endif
00717 }
00718
00719 void GPUQMCJastrowElectronElectron::operator=(const GPUQMCJastrowElectronElectron & rhs)
00720 {
00721 nRows = rhs.nRows;
00722 nCols = rhs.nCols;
00723 allocatedRows = rhs.allocatedRows;
00724 allocatedCols = rhs.allocatedCols;
00725
00726 numE = rhs.numE;
00727 numA = rhs.numA;
00728 numB = rhs.numB;
00729 numLarger = rhs.numLarger;
00730
00731 elecW = rhs.elecW;
00732 elecH = rhs.elecH;
00733
00734
00735
00736 r1r2FB = new GPUQMCFramebuffer[3];
00737 r1r2FB[aa].initialize( nCols*numA, nRows*numA, 1, 1);
00738 r1r2FB[bb].initialize( nCols*numB, nRows*numB, 1, 1);
00739 r1r2FB[ab].initialize( nCols*numA, nRows*numB, 1, 1);
00740
00741 polynomialFB = new GPUQMCFramebuffer[3];
00742 polynomialFB[aa].initialize( nCols*numA, nRows*numA, 1, 2);
00743 polynomialFB[bb].initialize( nCols*numB, nRows*numB, 1, 2);
00744 polynomialFB[ab].initialize( nCols*numA, nRows*numB, 1, 2);
00745
00746 finalUandLapUFB = new GPUQMCFramebuffer( nCols*1, nRows*numLarger, 1, 1);
00747 finalGradUFB = new GPUQMCFramebuffer( nCols*1, nRows*numE, 1, 1);
00748
00749
00750
00751
00752 array_sum = rhs.array_sum;
00753 array_grad_sum = rhs.array_grad_sum;
00754 array_lap_sum = rhs.array_lap_sum;
00755
00756 cpuData = (GLfloat *) calloc( nCols*numLarger * nRows*numLarger * 4 , sizeof(GLfloat) );
00757 }
00758
00759 double GPUQMCJastrowElectronElectron::getLaplacianLnJastrow(int which)
00760 {
00761 return array_lap_sum(which);
00762 }
00763
00764 Array2D<double> * GPUQMCJastrowElectronElectron::getGradientLnJastrow(int which)
00765 {
00766 return &array_grad_sum(which);
00767 }
00768
00769 double GPUQMCJastrowElectronElectron::getLnJastrow(int which)
00770 {
00771 return array_sum(which);
00772 }
00773
00774 void GPUQMCJastrowElectronElectron::loadShaders()
00775 {
00776 for(int i=0; i<3; i++)
00777 {
00778 char shaderName[256];
00779 sprintf(shaderName,"shader_jeet.%d.type%d.cg-asm",numE,i);
00780 if(!shadersCreated && !REUSE_SHADERS)
00781 {
00782
00783 string generatedShader = generateTranslationShader(i);
00784
00785 char cgName[256];
00786 sprintf(cgName,"shader_jeet.%d.type%d.cg",numE,i);
00787 writeShader(generatedShader.c_str(),cgName);
00788
00789 mapElectronsCG.push_back(cgCreateProgram(g_cgContext, CG_SOURCE,
00790 generatedShader.c_str(),
00791 g_cgProfile, "main", NULL));
00792
00793 if(mapElectronsCG[i])
00794 writeShader(cgGetProgramString(mapElectronsCG[i],
00795 CG_COMPILED_PROGRAM),shaderName);
00796 }
00797 else
00798 {
00799
00800 mapElectronsCG.push_back(cgCreateProgramFromFile(g_cgContext, CG_OBJECT,
00801 shaderName,
00802 g_cgProfile, "main", NULL));
00803 }
00804
00805 if(!mapElectronsCG[i])
00806 {
00807 cerr << "ERROR: Jastrow Electron-Electron translation shader " << i << " did not compile.\n";
00808 exit(1);
00809 }
00810
00811 inputCGP.push_back(cgGetNamedParameter(mapElectronsCG[i], "input"));
00812 if(i == ab)
00813 mixedInputCGP = cgGetNamedParameter(mapElectronsCG[ab], "inputMix");
00814
00815 cgGLLoadProgram(mapElectronsCG[i]);
00816 }
00817
00818 for(int i=0; i<3; i++)
00819 {
00820 char shaderName[256];
00821 sprintf(shaderName,"shader_jee.%d.type%d.cg-asm",numE,i);
00822 if(!shadersCreated && !REUSE_SHADERS)
00823 {
00824
00825 string generatedShader = generatePolynomialShader(i);
00826
00827 char cgName[256];
00828 sprintf(cgName,"shader_jee.%d.type%d.cg",numE,i);
00829 writeShader(generatedShader.c_str(),cgName);
00830
00831 polynomialCG.push_back(cgCreateProgram(g_cgContext, CG_SOURCE,
00832 generatedShader.c_str(),
00833 g_cgProfile, "main", NULL));
00834
00835 if(polynomialCG[i])
00836 writeShader(cgGetProgramString(polynomialCG[i],
00837 CG_COMPILED_PROGRAM),shaderName);
00838 }
00839 else
00840 {
00841
00842 polynomialCG.push_back(cgCreateProgramFromFile(g_cgContext, CG_OBJECT,
00843 shaderName,
00844 g_cgProfile, "main", NULL));
00845 }
00846
00847 if(!polynomialCG[i])
00848 {
00849 cerr << "ERROR: Jastrow Electron-Electron translation shader " << i << " did not compile.\n";
00850 exit(1);
00851 }
00852
00853 polyInputCGP.push_back(cgGetNamedParameter(polynomialCG[i], "input"));
00854
00855 cgGLLoadProgram(polynomialCG[i]);
00856 }
00857
00858
00859 {
00860 char shaderName[256];
00861 sprintf(shaderName,"shader_jeer.%d.cg-asm",numLarger);
00862 if(!shadersCreated && !REUSE_SHADERS)
00863 {
00864
00865 string generatedShader = generateReductionShader();
00866
00867 char cgName[256];
00868 sprintf(cgName,"shader_jeer.%d.cg",numLarger);
00869 writeShader(generatedShader.c_str(),cgName);
00870
00871 sumReductionCG = cgCreateProgram(g_cgContext, CG_SOURCE,
00872 generatedShader.c_str(),
00873 g_cgProfile, "main", NULL);
00874
00875 if(sumReductionCG)
00876 writeShader(cgGetProgramString(sumReductionCG,
00877 CG_COMPILED_PROGRAM),shaderName);
00878 }
00879 else
00880 {
00881
00882 sumReductionCG = cgCreateProgramFromFile(g_cgContext, CG_OBJECT,
00883 shaderName, g_cgProfile, "main", NULL);
00884 }
00885
00886 if(!sumReductionCG)
00887 {
00888 cerr << "ERROR: Jastrow Electron-Electron reduction shader did not compile.\n";
00889 exit(1);
00890 }
00891
00892 sumReductionCGP.push_back(cgGetNamedParameter(sumReductionCG, "inputAA"));
00893 sumReductionCGP.push_back(cgGetNamedParameter(sumReductionCG, "inputBB"));
00894 sumReductionCGP.push_back(cgGetNamedParameter(sumReductionCG, "inputAB"));
00895
00896 cgGLLoadProgram(sumReductionCG);
00897 }
00898
00899 for(int i=0; i<2; i++)
00900 {
00901 char shaderName[256];
00902 sprintf(shaderName,"shader_jeeg.%d.type%d.cg-asm",numE,i);
00903 if(!shadersCreated && !REUSE_SHADERS)
00904 {
00905
00906 string generatedShader = generateGradientReductionShader(i);
00907
00908 char cgName[256];
00909 sprintf(cgName,"shader_jeeg.%d.type%d.cg",numE,i);
00910 writeShader(generatedShader.c_str(),cgName);
00911
00912 gradientReductionCG.push_back(cgCreateProgram(g_cgContext, CG_SOURCE,
00913 generatedShader.c_str(),
00914 g_cgProfile, "main", NULL));
00915
00916 if(gradientReductionCG[i])
00917 writeShader(cgGetProgramString(gradientReductionCG[i],
00918 CG_COMPILED_PROGRAM),shaderName);
00919 }
00920 else
00921 {
00922
00923 gradientReductionCG.push_back(cgCreateProgramFromFile(g_cgContext, CG_OBJECT,
00924 shaderName,
00925 g_cgProfile, "main", NULL));
00926 }
00927
00928 if(!gradientReductionCG[i])
00929 {
00930 cerr << "ERROR: Jastrow Electron-Electron gradient reduction shader " << i << " did not compile.\n";
00931 exit(1);
00932 }
00933
00934 gradientReductionCGP.push_back(cgGetNamedParameter(gradientReductionCG[i], "inputParallel"));
00935 gradientReductionCGP.push_back(cgGetNamedParameter(gradientReductionCG[i], "inputOpposite"));
00936
00937 cgGLLoadProgram(gradientReductionCG[i]);
00938 }
00939
00940
00941
00942 shadersCreated = true;
00943 }
00944 #endif