00001
00002
00003
00004
00005
00006
00007
00008 #include "GPUQMCBasisFunction.h"
00009 #ifdef QMC_GPU
00010
00011
00012
00013 static const bool PRINT_SHADER = false;
00014 static const int TIMING_REPS = 10;
00015 static const bool INT_FINISHES = false;
00016
00017
00028 static const int EXP_SHIFT = 35;
00029 static const int MULTIPLIER = 0;
00030 static const bool USE_BASE_2 = true;
00031 static const bool NAN_CHECKS = false;
00032 static const bool USE_TRIANGLES = true;
00033 static const bool REUSE_SHADERS = false;
00034 static bool shadersCreated = false;
00035 static bool warnedAboutSizeR = false;
00036 static bool warnedAboutSizeC = false;
00037
00038
00039 #define TEXTURE_INTERNAL_FORMAT GL_FLOAT_RGBA32_NV
00040 #define TEXTURE_TARGET GL_TEXTURE_RECTANGLE_NV
00041
00042
00043 vector<CGprogram> GPUQMCBasisFunction::fragProg;
00044 vector<CGparameter> GPUQMCBasisFunction::electronsCGP;
00045 vector<CGparameter> GPUQMCBasisFunction::paramsCGP;
00046 CGprogram GPUQMCBasisFunction::fxo_to_txt_CG;
00047 CGparameter GPUQMCBasisFunction::fxo_to_txt_CGP;
00048
00049 GPUQMCBasisFunction::GPUQMCBasisFunction(QMCBasisFunction bf, int numElectrons, int max_calcs) : QMCBasisFunction(bf)
00050 {
00051 getFactors(max_calcs,nRows,nCols);
00052 allocatedRows = nRows;
00053 allocatedCols = nCols;
00054
00055 nElectrons = numElectrons;
00056 nBasisF = N_BasisFunctions;
00057
00058 fxo_deltaBF = nBasisF;
00059 fxo_deltaOE = (int)(nElectrons/4.0);
00060 if(nElectrons%4 != 0) fxo_deltaOE += 1;
00061
00062 elecW = 4;
00063 elecH = (int)(nElectrons/4.0);
00064 if(nElectrons%4 != 0) elecH += 1;
00065
00066 txt_deltaBF = (int)(nBasisF/2.0);
00067 txt_deltaOE = (int)(nElectrons/2.0);
00068 if(nBasisF%2 != 0) txt_deltaBF += 1;
00069 if(nElectrons%2 != 0) txt_deltaOE += 1;
00070
00071
00072 maxGaussians = 0;
00073 for (int atom=0; atom<flags->Natoms; atom++)
00074 {
00075 for (int j=0; j<BFCoeffs(atom).getNumberBasisFunctions(); j++)
00076 {
00077 if(BFCoeffs(atom).N_Gauss(j) > maxGaussians) maxGaussians = BFCoeffs(atom).N_Gauss(j);
00078 }
00079 }
00080 basisfunctionParamsH = 2 + (int)(maxGaussians/2.0 + 0.5);
00081
00082 basisFunctionsFB.initialize(nCols*fxo_deltaBF,nRows*nMats*fxo_deltaOE,1,1);
00083 outputFB.initialize(nCols*txt_deltaBF,nRows*nMats*txt_deltaOE,1,1);
00084 basisFunctionsFB.checkFramebufferStatus();
00085 outputFB.checkFramebufferStatus();
00086 GET_GLERROR("Error setting up framebuffer");
00087
00088
00089 glGenTextures(1, &electronsTexID);
00090 glGenTextures(1, &bfParametersTexID);
00091
00092 cpuData = (GLfloat *) calloc( nCols*fxo_deltaBF * nRows*nMats*max(fxo_deltaOE,basisfunctionParamsH) * 4 , sizeof(GLfloat) );
00093
00094 setUpInputs();
00095
00096 if(fragProg.empty())
00097 loadShaders();
00098 }
00099
00100 void GPUQMCBasisFunction::loadShaders()
00101 {
00102
00103 for(int i=0; i<nMats; i++)
00104 {
00105 char shaderName[256];
00106 sprintf(shaderName,"shader_bf.%d.%d_type%d.cg-asm",nElectrons,nBasisF,i);
00107 if(!shadersCreated && !REUSE_SHADERS)
00108 {
00109
00110 string generatedShader = generateShader(i);
00111
00112 char cgName[256];
00113 sprintf(cgName,"shader_bf.%d.%d_type%d.cg",nElectrons,nBasisF,i);
00114 writeShader(generatedShader.c_str(),cgName);
00115
00116 fragProg.push_back(cgCreateProgram(g_cgContext, CG_SOURCE,
00117 generatedShader.c_str(),
00118 g_cgProfile, "main", NULL));
00119
00120 if(fragProg[i])
00121 writeShader(cgGetProgramString(fragProg[i],
00122 CG_COMPILED_PROGRAM),shaderName);
00123
00124 }
00125 else
00126 {
00127
00128 fragProg.push_back(cgCreateProgramFromFile(g_cgContext, CG_OBJECT,
00129 shaderName,
00130 g_cgProfile, "main", NULL));
00131 }
00132
00133 if(!fragProg[i])
00134 {
00135 cerr << "ERROR: Basisfunction shader did not compile.\n";
00136 exit(1);
00137 }
00138
00139 electronsCGP.push_back(cgGetNamedParameter(fragProg[i], "epos"));
00140 paramsCGP.push_back(cgGetNamedParameter(fragProg[i], "params"));
00141
00142 cgGLLoadProgram(fragProg[i]);
00143 }
00144
00145
00146 char shaderName[256];
00147 sprintf(shaderName,"shader_trans.%d.%d.cg-asm",nElectrons,nBasisF);
00148 if(!shadersCreated && !REUSE_SHADERS)
00149 {
00150 string generatedShader = generateTranslationShader(nElectrons%4==1 || nElectrons%4 ==2);
00151
00152 char cgName[256];
00153 sprintf(cgName,"shader_trans.%d.%d.cg",nElectrons,nBasisF);
00154 writeShader(generatedShader.c_str(),cgName);
00155
00156 fxo_to_txt_CG = cgCreateProgram(g_cgContext, CG_SOURCE,
00157 generatedShader.c_str(),
00158 g_cgProfile, "main", NULL);
00159
00160 writeShader(cgGetProgramString(fxo_to_txt_CG,CG_COMPILED_PROGRAM),shaderName);
00161
00162 }
00163 else
00164 {
00165 fxo_to_txt_CG = cgCreateProgramFromFile(g_cgContext, CG_OBJECT,
00166 shaderName,
00167 g_cgProfile, "main", NULL);
00168 }
00169
00170 if(!fxo_to_txt_CG)
00171 {
00172 cerr << "ERROR: Translation shader did not compile.\n";
00173 exit(1);
00174 }
00175
00176 fxo_to_txt_CGP = cgGetNamedParameter(fxo_to_txt_CG, "input");
00177 cgGLLoadProgram(fxo_to_txt_CG);
00178
00179
00180
00181 shadersCreated = true;
00182 }
00183
00184 GPUQMCBasisFunction::~GPUQMCBasisFunction()
00185 {
00186
00187 delete [] cpuData;
00188 glDeleteTextures(1, &electronsTexID);
00189
00190 }
00191
00192 GLuint GPUQMCBasisFunction::runCalculation(Array1D<Array2D<double>*> &X, int num, int start, int stop)
00193 {
00194 getFactors(num,nRows,nCols);
00195 if(nRows > allocatedRows || nCols > allocatedCols)
00196 {
00197 cerr << "Error: remainder walkers chose bad dimensions.\n";
00198 exit(-1);
00199 }
00200
00201 if(nCols*fxo_deltaBF >= 3500 && !warnedAboutSizeC)
00202 {
00203 cerr << "Warning: nCols (" << nCols << ") may be too high for the GPU.\n";
00204 warnedAboutSizeC = true;
00205 }
00206 if(nRows*nMats*txt_deltaOE >= 3500 && !warnedAboutSizeR)
00207 {
00208 cerr << "Warning: nRows (" << nRows << ") may be too high for the GPU.\n";
00209 warnedAboutSizeR = true;
00210 }
00211
00212 #ifdef PRINT_TIMINGS
00213 Stopwatch sw = Stopwatch();
00214 sw.reset(); sw.start();
00215 for(int numReps=0; numReps<TIMING_REPS; numReps++)
00216 #endif
00217
00218 loadElectronPositions(X,start,stop);
00219
00220 #ifdef PRINT_TIMINGS
00221 sw.stop();
00222 double temp = (double)sw.timeUS()/TIMING_REPS;
00223 printf(" bf_loading: %7.2f", temp );
00224
00225 sw.reset(); sw.start();
00226 for(int numReps=0; numReps<TIMING_REPS; numReps++)
00227 {
00228 #endif
00229
00230 int tShift;
00231
00232 int maxs = basisFunctionsFB.getWidth();
00233 int maxt = basisFunctionsFB.getHeight();
00234 basisFunctionsFB.cleanBuffer(0);
00235
00236 cgGLEnableProfile(g_cgProfile);
00237
00238 glEnable(GL_SCISSOR_TEST);
00239 for(int program=0; program<nMats; program++)
00240 {
00241 cgGLSetTextureParameter(electronsCGP[program], electronsTexID);
00242 cgGLSetTextureParameter(paramsCGP[program], bfParametersTexID);
00243 cgGLEnableTextureParameter(electronsCGP[program]);
00244 cgGLEnableTextureParameter(paramsCGP[program]);
00245
00246 cgGLBindProgram(fragProg[program]);
00247
00248 for(int row=0; row<nRows; row++)
00249 {
00250 tShift = (program + row*nMats)*fxo_deltaOE;
00251 glScissor( 0, tShift, maxs, fxo_deltaOE);
00252 tShift -= row*fxo_deltaOE;
00253 drawPrimative(maxs,maxt,0,-tShift);
00254 }
00255
00256 }
00257 glDisable(GL_SCISSOR_TEST);
00258
00259 cgGLDisableProfile(g_cgProfile);
00260 for(int whichType=0; whichType<nMats; whichType++)
00261 {
00262 cgGLDisableTextureParameter(electronsCGP[whichType]);
00263 cgGLDisableTextureParameter(paramsCGP[whichType]);
00264 }
00265
00266 if(INT_FINISHES)
00267 {
00268 glFinish();
00269 }
00270
00271 GET_GLERROR("Error in QMC basis function calculation");
00272
00273 #ifdef PRINT_TIMINGS
00274
00275 }
00276 sw.stop();
00277 temp = (double)sw.timeUS()/TIMING_REPS;
00278 printf(" bf_cg: %7.2f", temp );
00279
00280 sw.reset(); sw.start();
00281 for(int numReps=0; numReps<TIMING_REPS; numReps++)
00282 {
00283 #endif
00284
00285 translate();
00286
00287 #ifdef PRINT_TIMINGS
00288
00289 }
00290 sw.stop();
00291 temp = (double)sw.timeUS()/TIMING_REPS;
00292 printf(" bf_translate: %7.2f\n", temp );
00293 #endif
00294
00295 if(num > 1 && !true)
00296 {
00297 unloadData(basisFunctionsFB, nCols*fxo_deltaBF, nRows*nMats*fxo_deltaOE);
00298 unloadData(outputFB, nCols*txt_deltaBF, nRows*nMats*txt_deltaOE);
00299 }
00300
00301 glFlush();
00302
00303 return outputFB.getTextureID(0,0);
00304 }
00305
00306 void GPUQMCBasisFunction::loadElectronPositions(Array1D<Array2D<double>*> &X, int start, int stop)
00307 {
00308 int index, i, j;
00309 for(int c = 0; c < nCols; c++)
00310 {
00311 for(int r = 0; r < nRows; r++)
00312 {
00313 for(int electron=0; electron<nElectrons; electron++)
00314 {
00315 i = electron/4;
00316 j = electron%4;
00317
00318 index = 3*( (r*elecH + i)*nCols*elecW + (c*elecW + j) );
00319 cpuData[index ] = (GLfloat) X(c*nRows + r)->get(electron+start, 0);
00320 cpuData[index + 1] = (GLfloat) X(c*nRows + r)->get(electron+start, 1);
00321 cpuData[index + 2] = (GLfloat) X(c*nRows + r)->get(electron+start, 2);
00322 }
00323 }
00324 }
00325 glBindTexture(TEXTURE_TARGET, electronsTexID);
00326 glTexImage2D(TEXTURE_TARGET, 0, TEXTURE_INTERNAL_FORMAT,
00327 elecW*nCols, elecH*nRows, 0, GL_RGB, GL_FLOAT, cpuData);
00328 }
00329
00330 GLuint GPUQMCBasisFunction::getElectronicTexture()
00331 {
00332 return electronsTexID;
00333 }
00334
00335 void GPUQMCBasisFunction::translate()
00336 {
00337
00338 int maxs = outputFB.getWidth();
00339 int maxt = outputFB.getHeight();
00340 outputFB.cleanBuffer(0);
00341 cgGLSetTextureParameter(fxo_to_txt_CGP, basisFunctionsFB.getTextureID(0,0));
00342
00343 cgGLEnableProfile(g_cgProfile);
00344
00345 cgGLEnableTextureParameter(fxo_to_txt_CGP);
00346 cgGLBindProgram(fxo_to_txt_CG);
00347
00348
00349
00350
00351
00352 if(fxo_deltaBF%2==0)
00353 {
00354 drawPrimative(maxs,maxt,0,0);
00355 }
00356 else
00357 {
00358 glEnable(GL_SCISSOR_TEST);
00359 for(int col=0; col<nCols; col++)
00360 {
00361 glScissor( col*txt_deltaBF, 0, txt_deltaBF, maxt);
00362 drawPrimative(maxs,maxt,-0.5*col,0);
00363 }
00364 glDisable(GL_SCISSOR_TEST);
00365 }
00366 cgGLDisableProfile(g_cgProfile);
00367
00368 cgGLDisableTextureParameter(fxo_to_txt_CGP);
00369
00370 if(INT_FINISHES)
00371 {
00372 glFinish();
00373 }
00374
00375 getOpenGLError("Error in QMC basis function translation");
00376 }
00377
00378 void GPUQMCBasisFunction::drawPrimative(GLfloat maxs, GLfloat maxt, GLfloat sShift, GLfloat tShift)
00379 {
00380 if(USE_TRIANGLES)
00381 {
00382 glBegin(GL_TRIANGLES);
00383 glTexCoord2f( sShift , tShift ); glVertex2f(-1.0f, -1.0f);
00384 glTexCoord2f( sShift , maxt*2+tShift ); glVertex2f(-1.0f, 3.0f);
00385 glTexCoord2f(maxs*2+sShift , tShift ); glVertex2f( 3.0f, -1.0f);
00386 glEnd();
00387 }
00388 else
00389 {
00390 glBegin(GL_QUADS);
00391 glTexCoord2f( sShift , tShift ); glVertex2f(-1.0, -1.0);
00392 glTexCoord2f( sShift , maxt+tShift ); glVertex2f(-1.0, 1.0);
00393 glTexCoord2f(maxs+sShift , maxt+tShift ); glVertex2f( 1.0, 1.0);
00394 glTexCoord2f(maxs+sShift , tShift ); glVertex2f( 1.0, -1.0);
00395 glEnd();
00396 }
00397 }
00398
00399 string GPUQMCBasisFunction::generateTranslationShader(bool is12)
00400 {
00401 string testVarIsNaN;
00402
00403
00404
00405
00406
00407
00408
00409 testVarIsNaN +=
00410 " VAR.x = isnan(VAR.x)? 0 : VAR.x; \n"
00411 " VAR.y = isnan(VAR.y)? 0 : VAR.y; \n"
00412 " VAR.z = isnan(VAR.z)? 0 : VAR.z; \n"
00413 " VAR.w = isnan(VAR.w)? 0 : VAR.w; \n";
00414
00415 string shader;
00416 shader +=
00417 "float4 main(in float2 coords : TEX0, \n"
00418 " uniform samplerRECT input) : COLOR \n"
00419 "{ \n";
00420
00421
00422
00423
00424
00425 if(is12)
00426 {
00427 shader +=
00428 " int s = coords.y/ROW_HEIGHT; \n"
00429 " int t = fmod(coords.y,ROW_HEIGHT); \n"
00430 " coords.y += s; \n";
00431 }
00432
00433
00434
00435
00436
00437
00438
00439
00440
00441 shader +=
00442 " float4 left = texRECT(input, float2(2*coords.x-0.99, coords.y/2)); \n"
00443 " float4 right = texRECT(input, float2(2*coords.x+0.01, coords.y/2)); \n";
00444
00445 if(NAN_CHECKS)
00446 {
00447 shader += testVarIsNaN;
00448 findandreplace(shader,"VAR", "right");
00449 shader += testVarIsNaN;
00450 findandreplace(shader,"VAR", "left");
00451 }
00452
00453
00454
00455 shader +=
00456 " int switcher = fmod(coords.y,2); \n"
00457 " return (switcher==0 EXTRA_CHECK) ? \n"
00458 " float4(left.x, right.x, left.y, right.y): \n"
00459 " float4(left.z, right.z, left.w, right.w); \n"
00460 "} \n";
00461 findandreplace(shader,"ROW_HEIGHT", txt_deltaOE);
00462 if(is12)
00463 {
00464 findandreplace(shader,"EXTRA_CHECK", "|| t==0");
00465 }
00466 else
00467 {
00468 findandreplace(shader,"EXTRA_CHECK", "");
00469 }
00470 if(PRINT_SHADER)
00471 {
00472 cout << shader << endl;
00473 getchar();
00474 }
00475 return shader;
00476 }
00477
00478 string GPUQMCBasisFunction::generateShader(int which)
00479 {
00480 string shader;
00481 shader +=
00482 "float4 main(in float2 coords : TEX0, \n"
00483 " uniform samplerRECT params, \n"
00484 " uniform samplerRECT epos) : COLOR \n"
00485 "{ \n"
00486 " int bfX = fmod(coords.x,WIDTH); \n"
00487 " int eposX = coords.x/WIDTH; \n"
00488 " int eposY = coords.y; \n"
00489 " eposX *= 4; \n"
00490 " float3 n_center = texRECT(params,float2(bfX,0)).xyz; \n"
00491 " float4 klm = texRECT(params,float2(bfX,1)); \n"
00492 " float ntexs = klm.w; \n"
00493 " float4 output = 0; \n"
00494 " float4 r_sq = 0; \n"
00495 " float4 xyz_term = 1; \n"
00496 " float3 r = 0; \n";
00497
00498 if(which != psi)
00499 {
00500 shader +=
00501 " float4 r_extra = 0; \n";
00502 }
00503
00504
00505
00506
00507 shader +=
00508 " \n"
00509 " r = texRECT(epos, float2(eposX,eposY)).xyz; \n"
00510 " r = r + n_center; \n"
00511 " r_sq.x = dot(r,r); \n"
00512 " xyz_term.x = klm.x>0 ? xyz_term.x*r.x : xyz_term.x; \n"
00513 " xyz_term.x = klm.x>1 ? xyz_term.x*r.x : xyz_term.x; \n"
00514 " xyz_term.x = klm.y>0 ? xyz_term.x*r.y : xyz_term.x; \n"
00515 " xyz_term.x = klm.y>1 ? xyz_term.x*r.y : xyz_term.x; \n"
00516 " xyz_term.x = klm.z>0 ? xyz_term.x*r.z : xyz_term.x; \n"
00517 " xyz_term.x = klm.z>1 ? xyz_term.x*r.z : xyz_term.x; \n"
00518 " VARIABLE.x = EQUATION; \n"
00519 " \n"
00520 " r = texRECT(epos, float2(eposX + 1,eposY)).xyz; \n"
00521 " r = r + n_center; \n"
00522 " r_sq.y = dot(r,r); \n"
00523 " xyz_term.y = klm.x>0 ? xyz_term.y*r.x : xyz_term.y; \n"
00524 " xyz_term.y = klm.x>1 ? xyz_term.y*r.x : xyz_term.y; \n"
00525 " xyz_term.y = klm.y>0 ? xyz_term.y*r.y : xyz_term.y; \n"
00526 " xyz_term.y = klm.y>1 ? xyz_term.y*r.y : xyz_term.y; \n"
00527 " xyz_term.y = klm.z>0 ? xyz_term.y*r.z : xyz_term.y; \n"
00528 " xyz_term.y = klm.z>1 ? xyz_term.y*r.z : xyz_term.y; \n"
00529 " VARIABLE.y = EQUATION; \n"
00530 " \n"
00531 " r = texRECT(epos, float2(eposX + 2,eposY)).xyz; \n"
00532 " r = r + n_center; \n"
00533 " r_sq.z = dot(r,r); \n"
00534 " xyz_term.z = klm.x>0 ? xyz_term.z*r.x : xyz_term.z; \n"
00535 " xyz_term.z = klm.x>1 ? xyz_term.z*r.x : xyz_term.z; \n"
00536 " xyz_term.z = klm.y>0 ? xyz_term.z*r.y : xyz_term.z; \n"
00537 " xyz_term.z = klm.y>1 ? xyz_term.z*r.y : xyz_term.z; \n"
00538 " xyz_term.z = klm.z>0 ? xyz_term.z*r.z : xyz_term.z; \n"
00539 " xyz_term.z = klm.z>1 ? xyz_term.z*r.z : xyz_term.z; \n"
00540 " VARIABLE.z = EQUATION; \n"
00541 " \n"
00542 " r = texRECT(epos, float2(eposX + 3,eposY)).xyz; \n"
00543 " r = r + n_center; \n"
00544 " r_sq.w = dot(r,r); \n"
00545 " xyz_term.w = klm.x>0 ? xyz_term.w*r.x : xyz_term.w; \n"
00546 " xyz_term.w = klm.x>1 ? xyz_term.w*r.x : xyz_term.w; \n"
00547 " xyz_term.w = klm.y>0 ? xyz_term.w*r.y : xyz_term.w; \n"
00548 " xyz_term.w = klm.y>1 ? xyz_term.w*r.y : xyz_term.w; \n"
00549 " xyz_term.w = klm.z>0 ? xyz_term.w*r.z : xyz_term.w; \n"
00550 " xyz_term.w = klm.z>1 ? xyz_term.w*r.z : xyz_term.w; \n"
00551 " VARIABLE.w = EQUATION; \n"
00552 " xyz_term *= LARGE_MULTIPLIER; \n"
00553 " \n"
00554 " float4 coeff = 0; \n"
00555 " for(int j=0; j<ntexs; j++){ \n"
00556 " coeff = texRECT(params,float2(bfX,2+j)); \n"
00557 " output += PREFACTOR1coeff.y*EXP_BASE(N_ONE_DIV_LN2*coeff.x*r_sq+SHIFT); \n"
00558 " if(coeff.z != 0){ \n"
00559 " output += PREFACTOR2coeff.w*EXP_BASE(N_ONE_DIV_LN2*coeff.z*r_sq+SHIFT); \n"
00560 " } \n"
00561 " } \n"
00562 " output *= xyz_term; \n"
00563
00564 " return output*EXP_SHIFT; \n"
00565
00566
00567
00568
00569
00570
00571
00572
00573
00574 "} \n";
00575
00576 switch(which)
00577 {
00578 case psi:
00579 {
00580 findandreplace(shader,"VARIABLE", "//");
00581 findandreplace(shader,"PREFACTOR1", "");
00582 findandreplace(shader,"PREFACTOR2", "");
00583 break;
00584 }
00585 case grx:
00586 {
00587 findandreplace(shader,"VARIABLE", "r_extra");
00588 findandreplace(shader,"EQUATION", "r.x");
00589 findandreplace(shader,"PREFACTOR1", "(klm.x/r_extra - 2*coeff.x*r_extra)*");
00590 findandreplace(shader,"PREFACTOR2", "(klm.x/r_extra - 2*coeff.z*r_extra)*");
00591 break;
00592 }
00593 case gry:
00594 {
00595 findandreplace(shader,"VARIABLE", "r_extra");
00596 findandreplace(shader,"EQUATION", "r.y");
00597 findandreplace(shader,"PREFACTOR1", "(klm.y/r_extra - 2*coeff.x*r_extra)*");
00598 findandreplace(shader,"PREFACTOR2", "(klm.y/r_extra - 2*coeff.z*r_extra)*");
00599 break;
00600 }
00601 case grz:
00602 {
00603 findandreplace(shader,"VARIABLE", "r_extra");
00604 findandreplace(shader,"EQUATION", "r.z");
00605 findandreplace(shader,"PREFACTOR1", "(klm.z/r_extra - 2*coeff.x*r_extra)*");
00606 findandreplace(shader,"PREFACTOR2", "(klm.z/r_extra - 2*coeff.z*r_extra)*");
00607 break;
00608 }
00609 case lap:
00610 {
00611 findandreplace(shader,"VARIABLE", "r_extra");
00612 findandreplace(shader,"EQUATION", "dot( klm.xyz/r, (klm.xyz-1.0)/r )");
00613
00614 findandreplace(shader,"PREFACTOR1",
00615 "(r_extra + (-4.0*(klm.x + klm.y + klm.z - r_sq*coeff.x) - 6.0)*coeff.x)*");
00616 findandreplace(shader,"PREFACTOR2",
00617 "(r_extra + (-4.0*(klm.x + klm.y + klm.z - r_sq*coeff.z) - 6.0)*coeff.z)*");
00618 break;
00619 }
00620 }
00621 findandreplace(shader,"WIDTH",fxo_deltaBF);
00622 findandreplace(shader,"HEIGHT",fxo_deltaOE);
00623 findandreplace(shader,"NELECTRONS",nElectrons);
00624 findandreplace(shader,"LARGE_MULTIPLIER","exp2((float)MULTIPLIER)");
00625 findandreplace(shader,"MULTIPLIER",MULTIPLIER);
00626 findandreplace(shader,"EXP_SHIFT","EXP_BASE((float)-SHIFT)");
00627 findandreplace(shader,"SHIFT",EXP_SHIFT);
00628
00629 if(USE_BASE_2)
00630 {
00631 findandreplace(shader,"N_ONE_DIV_LN2","-1.0/log(2.0)");
00632 findandreplace(shader,"EXP_BASE","exp2");
00633 }
00634 else
00635 {
00636 findandreplace(shader,"N_ONE_DIV_LN2","-1.0");
00637 findandreplace(shader,"EXP_BASE","exp");
00638 }
00639
00640 if(PRINT_SHADER)
00641 {
00642 cout << "which: " << which << endl << shader << endl;
00643 getchar();
00644 }
00645 return shader;
00646 }
00647
00648 void GPUQMCBasisFunction::unloadData(GPUQMCFramebuffer & fb, int w, int h)
00649 {
00650 fb.readFrom(0,0);
00651 glReadPixels(0,0,w,h,GL_RGBA,GL_FLOAT,cpuData);
00652 cout << "unloaded by bf\n";
00653 PrintRGBAPixelsBoxE(cpuData,w,h,20,20,-1,-1,true);
00654 }
00655
00656 int GPUQMCBasisFunction::mapping(int i, int j, int h, int w)
00657 {
00658 return 4*(i*w + j);
00659 }
00660
00661 void GPUQMCBasisFunction::setUpInputs()
00662 {
00663 int nGaussians, bf = 0, index;
00664 int count1=0, count2=0;
00665 for (int atom=0; atom<flags->Natoms; atom++)
00666 {
00667 for (int j=0; j<BFCoeffs(atom).getNumberBasisFunctions(); j++)
00668 {
00669
00670
00671 index = mapping(0,bf,basisfunctionParamsH,fxo_deltaBF);
00672 for (int translate=0; translate<3; translate++)
00673 cpuData[index + translate] = -1.0*Molecule->Atom_Positions(atom,translate);
00674
00675 nGaussians = BFCoeffs(atom).N_Gauss(j);
00676
00677
00678 index = mapping(1,bf,basisfunctionParamsH,fxo_deltaBF);
00679 for (int translate=0; translate<3; translate++)
00680 cpuData[index + translate] = BFCoeffs(atom).xyz_powers(j,translate);
00681
00682
00683 cpuData[index + 3] = (int)(nGaussians/2);
00684 if(nGaussians%2 != 0) cpuData[index + 3]++;
00685
00686
00687 for (int i=0; i<nGaussians; i++)
00688 {
00689 if(2 + i/2 >= basisfunctionParamsH) cerr << "ERROR: cpuData not big enough for all coefficients\n";
00690 index = mapping(2 + i/2,bf,basisfunctionParamsH,fxo_deltaBF);
00691 int add1 = (i%2)?2:0;
00692 int add2 = (i%2)?3:1;
00693 cpuData[index + add1] = BFCoeffs(atom).Coeffs.array()[j][i][0];
00694 cpuData[index + add2] = BFCoeffs(atom).Coeffs.array()[j][i][1];
00695 }
00696 bf++;
00697 }
00698 }
00699
00700 glBindTexture(TEXTURE_TARGET, bfParametersTexID);
00701 glTexImage2D(TEXTURE_TARGET, 0, TEXTURE_INTERNAL_FORMAT,
00702 fxo_deltaBF, basisfunctionParamsH, 0, GL_RGBA, GL_FLOAT, cpuData);
00703 glTexParameterf(TEXTURE_TARGET, GL_TEXTURE_WRAP_S, GL_CLAMP);
00704 glTexParameterf(TEXTURE_TARGET, GL_TEXTURE_WRAP_T, GL_CLAMP);
00705 glTexParameterf(TEXTURE_TARGET, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
00706 glTexParameterf(TEXTURE_TARGET, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
00707
00708 glBindTexture(GL_TEXTURE_RECTANGLE_NV, electronsTexID);
00709 glTexParameterf(GL_TEXTURE_RECTANGLE_NV, GL_TEXTURE_WRAP_S, GL_CLAMP);
00710 glTexParameterf(GL_TEXTURE_RECTANGLE_NV, GL_TEXTURE_WRAP_T, GL_CLAMP);
00711 glTexParameterf(GL_TEXTURE_RECTANGLE_NV, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
00712 glTexParameterf(GL_TEXTURE_RECTANGLE_NV, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
00713 }
00714
00715 int GPUQMCBasisFunction::getNumIterations()
00716 {
00717 #ifdef PRINT_TIMINGS
00718 return TIMING_REPS;
00719 #else
00720 return 1;
00721 #endif
00722 }
00723
00724 void GPUQMCBasisFunction::operator=(GPUQMCBasisFunction & rhs)
00725 {
00726 nRows = rhs.nRows;
00727 nCols = rhs.nCols;
00728 nElectrons = rhs.nElectrons;
00729 nBasisF = rhs.nBasisF;
00730 allocatedRows = rhs.allocatedRows;
00731 allocatedCols = rhs.allocatedCols;
00732 fxo_deltaOE = rhs.fxo_deltaOE;
00733 fxo_deltaBF = rhs.fxo_deltaBF;
00734 txt_deltaBF = rhs.txt_deltaBF;
00735 txt_deltaOE = rhs.txt_deltaOE;
00736 elecW = rhs.elecW;
00737 elecH = rhs.elecH;
00738 maxGaussians = rhs.maxGaussians;
00739 basisfunctionParamsH = rhs.basisfunctionParamsH;
00740
00741 fragProg = rhs.fragProg;
00742 electronsCGP = rhs.electronsCGP;
00743 paramsCGP = rhs.paramsCGP;
00744 fxo_to_txt_CG = rhs.fxo_to_txt_CG;
00745 fxo_to_txt_CGP = rhs.fxo_to_txt_CGP;
00746
00747 basisFunctionsFB = rhs.basisFunctionsFB;
00748 outputFB = rhs.outputFB;
00749
00750 glGenTextures(1, &electronsTexID);
00751 cpuData = (GLfloat *) calloc( nCols*fxo_deltaBF * nRows*nMats*max(fxo_deltaOE,basisfunctionParamsH) * 4 , sizeof(GLfloat) );
00752 }
00753
00754 GPUQMCBasisFunction::GPUQMCBasisFunction()
00755 {
00756 nRows = 0; nCols = 0;
00757 cpuData = (GLfloat *) calloc( 1 , sizeof(GLfloat) );
00758 }
00759
00760 #endif