00001
00022 #include <iostream>
00023 #include <fstream>
00024 #include <string>
00025 #include <cmath>
00026 #include <assert.h>
00027 #include <cstdlib>
00028 #include <limits>
00029 #include <cuda.h>
00030 #include <ctime>
00031 #include "gpu_forces.cuh"
00032 #include "gpu_integrator.cuh"
00033 #include "gpu_tools.cuh"
00034 #include "define.h"
00035 #include "data.h"
00036 #include <cstring>
00037 #include "tests.cuh"
00038
00039
00064 int main(int argc, char *argv[])
00065 {
00066
00067
00068
00069
00070
00071
00072
00073
00074
00075
00076
00077
00078
00079
00080
00081
00082
00083
00084
00085
00086
00087
00088
00089
00090
00091 if( argc !=15 )
00092 {
00093 std::cout<<"Argument parsing failed!\nPlease edit 'example.sh' for your needs!\n";
00094 std::string exeName = argv[0];
00095 std::cout<<exeName<<"\n";
00096 std::cout<<"Number of given arguments: "<<argc<<"\n";
00097 bool tmp=generateStartScript(exeName);
00098 return 1;
00099 }
00100 std::string inputfile=argv[1];
00101 std::string basename = argv[2];
00102 int dim = atoi(argv[3]);
00103 int N = atoi(argv[4]);
00104 int numNN = atoi(argv[5]);
00105 float T = atof(argv[6]);
00106 float k = atof(argv[7]);
00107 float boxSize = atof(argv[8]);
00108 float dt = atof(argv[9]);
00109 int NOffset = atoi(argv[10]);
00110 int NSim = atoi(argv[11]);
00111 int NOut = atoi(argv[12]);
00112 int NAdjust = atoi(argv[13]);
00113 int gpuDevNo = atoi(argv[14]);
00114
00115 bool success = true;
00116 bool linLogEqual = false;
00117 bool NthreadEqN = true;
00118
00119
00120 success=setCudaDevice(gpuDevNo);
00121 if(!success)
00122 {
00123 return 1;
00124 }
00125
00126
00127
00128
00131
00132 float rCut = 3. * pow(2., 1./6.);
00133 float rCutSq = rCut * rCut;
00134 float rVer = 1.5 * rCut;
00135 float rVerSq = rVer * rVer;
00136 int numPointsPerDecade = 20;
00139
00140 int threads=128;
00141 int blocks=N/threads + (N%threads == 0?0:1);
00142 if(blocks >= 65536)
00143 {
00144 std::cout<<"Maximum block number (65536) exeeded: "<<blocks<<"\n";
00145 return 1;
00146 }
00147 if( blocks * threads != N)
00148 {
00149 NthreadEqN = false;
00150 }
00151 dim3 dimGrid(blocks);
00152 dim3 dimBlock(threads);
00153
00154
00155 int redM = 128;
00156 int redN = 128;
00160
00161 double logInc = (log(10)-log(1))/static_cast<double>(numPointsPerDecade);
00162 int numLogOutputPoints = static_cast<int>( (log(NSim) - log(NOffset) )/logInc);
00163 int LogOutputCounter = 1;
00164 int LogOutputOffset = NOffset==0 ? 1 : NOffset;
00165 int nextLogOutput = static_cast<int>(exp(logInc * LogOutputCounter)*LogOutputOffset);
00166 std::string backup="_bkp_"+basename;
00167
00168
00169 FLOAT_ARRAY_TYPE *hPos;
00170 FLOAT_ARRAY_TYPE *hVel;
00171 FLOAT_ARRAY_TYPE *dPos;
00172 FLOAT_ARRAY_TYPE *dVel;
00173 FLOAT_ARRAY_TYPE *velOnDt;
00174 FLOAT_ARRAY_TYPE *dForce;
00175 float *dEPot;
00176 float *reduction;
00177 FLOAT_ARRAY_TYPE *compReduction;
00178 int *hCnn;
00179 int *dCnn;
00180
00181
00182 int *vCount;
00183 int *vLists;
00184 size_t vListsPitch;
00185 int NVList = static_cast<int>(0.75 * pow( 2.0 * rVer, 3.));
00186
00187 float *dMax;
00188 bool *dListUpdate;
00189 bool hListUpdate=false;
00190 float maxDisplacement=(rVer - rCut) * 0.5;
00191
00192
00193
00194
00195
00196
00197 hPos = new FLOAT_ARRAY_TYPE[N];
00198 hVel = new FLOAT_ARRAY_TYPE[N];
00199 hCnn = new int[numNN*N];
00200
00201
00202 success = loadInitData(inputfile, hPos, hVel, hCnn, N, numNN);
00203 if(!success)
00204 {
00205 std::cout<<"Data could not be read!\n";
00206 delete[] hPos;
00207 delete[] hVel;
00208 delete[] hCnn;
00209 return 1;
00210 }
00211
00212
00213 success = writeData(basename, hPos, hVel, hCnn, N, numNN, NOffset);
00214
00215
00216 size_t size=N*sizeof(FLOAT_ARRAY_TYPE);
00217
00218 cudaMalloc((void **) &dPos, size);
00219 checkCudaError("Mem Alloc dPos");
00220 success=memInfo();
00221 cudaMalloc((void **) &dVel, size);
00222 checkCudaError("Mem Alloc dVel");
00223 cudaMalloc((void **) &velOnDt, size);
00224 checkCudaError("Mem Alloc velOnDt");
00225 cudaMalloc((void **) &dForce, size);
00226 checkCudaError("Mem Alloc dForce");
00227 cudaMalloc((void **) &dEPot, N* sizeof(float) );
00228 checkCudaError("Mem Alloc dEPot");
00229 cudaMalloc((void**) &reduction, redM * sizeof(float));
00230 checkCudaError("Mem Alloc reduction");
00231 cudaMalloc((void**) &compReduction, redM * sizeof(float3));
00232 checkCudaError("Mem Alloc compReduction");
00233
00234 size_t hCnn_pitch=N * sizeof(int);
00235 size_t dCnnPitch=0;
00236 cudaMallocPitch((void**)&dCnn, &dCnnPitch, N * sizeof(int), numNN);
00237 checkCudaError("Pitch Mem Alloc dCnn");
00238
00239
00240 cudaMalloc((void **) &vCount, N * sizeof(int));
00241 checkCudaError("Mem Alloc vCount");
00242 cudaMallocPitch((void**)&vLists, &vListsPitch, N * sizeof(int), NVList);
00243 checkCudaError("Pitch Mem Alloc vLists");
00244 cudaMalloc((void **) &dListUpdate, sizeof(bool));
00245 checkCudaError("Mem Alloc dListUpdate");
00246 cudaMalloc((void **) &dMax, N * sizeof(float));
00247 checkCudaError("Mem Alloc dMax");
00248 success=memInfo();
00249
00250
00251 cudaMemcpy(dPos, hPos, sizeof(FLOAT_ARRAY_TYPE)*N, cudaMemcpyHostToDevice);
00252 checkCudaError("Mem Cpy: dPos, hPos");
00253 cudaMemcpy(dVel, hVel, sizeof(FLOAT_ARRAY_TYPE)*N, cudaMemcpyHostToDevice);
00254 checkCudaError("Mem Cpy: dVel, hVel");
00255 cudaMemcpy2D(dCnn, dCnnPitch, hCnn, hCnn_pitch, N * sizeof(int), numNN, cudaMemcpyHostToDevice);
00256 checkCudaError("Mem Cpy 2D: dCnn, hCnn");
00257 cudaMemcpy(dListUpdate, &hListUpdate, sizeof(bool), cudaMemcpyHostToDevice);
00258
00259
00260
00261 gpuResetForces<<<dimGrid, dimBlock>>>(dForce, N);
00262 checkCudaError("Kernel error: gpuResetForces");
00263
00264
00265
00266
00267 cudaEvent_t start, stop;
00268 cudaEventCreate(&start);
00269 cudaEventCreate(&stop);
00270 float time;
00271
00272 clock_t cStart, cEnd;
00273 cudaEventRecord(start, 0);
00274 cStart=clock();
00275
00276
00277 if(NthreadEqN)
00278 {
00279 gpuGenerateVerletListSmem<<<dimGrid, dimBlock, dimBlock.x*sizeof(FLOAT_ARRAY_TYPE)>>>(dPos, vLists, vListsPitch, vCount, dMax, rVerSq, N, boxSize, dListUpdate);
00280 checkCudaError("Kernel error: gpuGenerateVerletListSmem");
00281 }
00282 else
00283 {
00284 gpuGenerateVerletListVar1<<<dimGrid, dimBlock>>>(dPos, vLists, vListsPitch, vCount, dMax, rVerSq, N, boxSize, dListUpdate);
00285 checkCudaError("Kernel error: gpuGenerateVerletList");
00286 }
00287
00288
00289
00290 rescaleT(N, dim, redM, redN, T, threads, blocks, dVel, reduction);
00291
00292
00293 removeDrift(N, redM, redN, threads, blocks, dVel, compReduction);
00294
00295
00296 gpuResetForces<<<dimGrid, dimBlock>>>(dForce, N);
00297 checkCudaError("Kernel error: gpuResetForces");
00298
00299
00300 gpuLJForcesVerletList<<< dimGrid, dimBlock>>>(dPos, dForce, N, vLists, vListsPitch, vCount, boxSize, rCutSq);
00301 checkCudaError("Kernel error: gpuLJForcesVerletList");
00302
00303 gpuHarmonicBondForces<<< dimGrid, dimBlock>>>(dPos, dForce, dCnn, dCnnPitch, N, numNN, k, boxSize);
00304 checkCudaError("Kernel error: gpuHarmonicBondForces");
00305 gpuInitialBackstep<<<dimGrid, dimBlock>>>(dVel, dForce, N, dt);
00306 checkCudaError("Kernel error: gpuInitialBackstep");
00307
00308 std::cout.setf(std::ios_base::scientific);
00309 std::cout.precision(std::numeric_limits< float >::digits10);
00310
00311
00312 int counter = NOffset + 1;
00313 for(int i=NOffset+NOut; i<=NSim; i=i+NOut)
00314 {
00315 for(int j=NAdjust; j<= NOut; j=j+NAdjust)
00316 {
00317
00318 rescaleT(N, dim, redM, redN, T, threads, blocks, dVel, reduction);
00319 removeDrift(N, redM, redN, threads, blocks, dVel, compReduction);
00320
00321 for(int mAdj=1; mAdj<=NAdjust; ++mAdj)
00322 {
00323
00324 cudaMemcpy(&hListUpdate, dListUpdate, sizeof(bool), cudaMemcpyDeviceToHost);
00325 checkCudaError("Mem Cpy: &hListUpdate, dListUpdate");
00326 if(hListUpdate == true)
00327 {
00328
00329 if(NthreadEqN)
00330 {
00331 gpuGenerateVerletListSmem<<<dimGrid, dimBlock, dimBlock.x*sizeof(FLOAT_ARRAY_TYPE)>>>(dPos, vLists, vListsPitch, vCount, dMax, rVerSq, N, boxSize, dListUpdate);
00332 checkCudaError("Kernel error: gpuGenerateVerletListSmem");
00333 }
00334 else
00335 {
00336 gpuGenerateVerletListVar1<<<dimGrid, dimBlock>>>(dPos, vLists, vListsPitch, vCount, dMax, rVerSq, N, boxSize, dListUpdate);
00337 checkCudaError("Kernel error: gpuGenerateVerletList");
00338 }
00339 hListUpdate = false;
00340 }
00341
00342
00343 gpuResetForces<<<dimGrid, dimBlock>>>(dForce, N);
00344 checkCudaError("Kernel error: gpuResetForces");
00345
00346
00347 gpuLJForcesVerletList<<< dimGrid, dimBlock>>>(dPos, dForce, N, vLists, vListsPitch, vCount, boxSize, rCutSq);
00348 checkCudaError("Kernel error: gpuLJForcesVerletList");
00349 gpuHarmonicBondForces<<< dimGrid, dimBlock>>>(dPos, dForce, dCnn, dCnnPitch, N, numNN, k, boxSize);
00350 checkCudaError("Kernel error: gpuHarmonicBondForces");
00351
00352 gpuIntegratorLeapFrog<<< dimGrid, dimBlock>>>(dPos, dVel, dForce, N, dt, boxSize, dMax, maxDisplacement, dListUpdate);
00353 checkCudaError("Kernel error: gpuIntegratorLeapFrog");
00354
00355 if(nextLogOutput == counter)
00356 {
00357 if(i == nextLogOutput)
00358 {
00359 linLogEqual = true;
00360 }
00361 gpuResetForces<<<dimGrid, dimBlock>>>(dForce, N);
00362 checkCudaError("Kernel error: gpuResetForces");
00363
00364 gpuLJForcesVerletList<<< dimGrid, dimBlock>>>(dPos, dForce, N, vLists, vListsPitch, vCount, boxSize, rCutSq);;
00365 checkCudaError("Kernel error: gpuLJForcesVerletList");
00366 gpuHarmonicBondForces<<< dimGrid, dimBlock>>>(dPos, dForce, dCnn, dCnnPitch, N, numNN, k, boxSize);
00367 checkCudaError("Kernel error: gpuHarmonicBondForces");
00368 gpuGetVelOnTime<<<dimGrid, dimBlock>>>(dVel, velOnDt, dForce, N, dt);
00369 checkCudaError("Kernel error: gpuGetVelOnTime");
00370
00371 cudaMemcpy(hPos, dPos, sizeof(FLOAT_ARRAY_TYPE)*N, cudaMemcpyDeviceToHost);
00372 checkCudaError("Mem Cpy: hPos, dPos");
00373 cudaMemcpy(hVel, velOnDt, sizeof(FLOAT_ARRAY_TYPE)*N, cudaMemcpyDeviceToHost);
00374 checkCudaError("Mem Cpy: hVel, velOnDt");
00375
00376 success = writeData(basename, hPos, hVel, hCnn, N, numNN, counter);
00377 int tmp;
00378 do
00379 {
00380 LogOutputCounter += 1;
00381 tmp = static_cast<int>(exp(logInc * LogOutputCounter)*LogOutputOffset);
00382 }
00383 while (tmp == nextLogOutput);
00384 nextLogOutput = tmp;
00385 }
00386 counter++;
00387 }
00388 }
00389
00390
00391 if(linLogEqual == false)
00392 {
00393 gpuResetForces<<<dimGrid, dimBlock>>>(dForce, N);
00394 checkCudaError("Kernel error: gpuResetForces");
00395
00396
00397 gpuLJForcesVerletList<<< dimGrid, dimBlock>>>(dPos, dForce, N, vLists, vListsPitch, vCount, boxSize, rCutSq);
00398 checkCudaError("Kernel error: gpuLJForcesVerletList");
00399 gpuHarmonicBondForces<<< dimGrid, dimBlock>>>(dPos, dForce, dCnn, dCnnPitch, N, numNN, k, boxSize);
00400 checkCudaError("Kernel error: gpuHarmonicBondForces");
00401 gpuGetVelOnTime<<<dimGrid, dimBlock>>>(dVel, velOnDt, dForce, N, dt);
00402 checkCudaError("Kernel error: gpuGetVelOnTime");
00403
00404
00405 cudaMemcpy(hPos, dPos, sizeof(FLOAT_ARRAY_TYPE)*N, cudaMemcpyDeviceToHost);
00406 checkCudaError("Mem Cpy: hPos, dPos");
00407 cudaMemcpy(hVel, velOnDt, sizeof(FLOAT_ARRAY_TYPE)*N, cudaMemcpyDeviceToHost);
00408 checkCudaError("Mem Cpy: hVel, velOnDt");
00409 }
00410
00411 success = writeData(backup, hPos, hVel, hCnn, N, numNN, i);
00412 linLogEqual = false;
00413 }
00414
00415
00416 cudaThreadSynchronize();
00417 cudaEventRecord(stop, 0);
00418 cudaEventSynchronize(stop);
00419 cEnd=clock();
00420 std::cout<<"########### CPU TIME ###########\n";
00421 std::cout<<"start: "<<cStart<<"\nend: "<<cEnd<<"\n";
00422 std::cout<<"clocks per second: "<<CLOCKS_PER_SEC<<"\n";
00423 std::cout<<"cpu time: "<<(cEnd-cStart)/static_cast<double>(CLOCKS_PER_SEC)<<"s\n\n";
00424 cudaEventElapsedTime( &time, start, stop);
00425 std::cout<<"########## CUDA TIMER #########\n";
00426 std::cout<<"elapsed time: "<<time<<"ms\n";
00427
00428
00429 delete[] hPos;
00430 delete[] hVel;
00431 delete[] hCnn;
00432 cudaFree(dPos);
00433 cudaFree(dVel);
00434 cudaFree(velOnDt);
00435 cudaFree(dForce);
00436 cudaFree(dEPot);
00437 cudaFree(reduction);
00438 cudaFree(compReduction);
00439 cudaFree(dCnn);
00440 cudaFree(vCount);
00441 cudaFree(vLists);
00442 cudaFree(dMax);
00443 cudaFree(dListUpdate);
00444 success=memInfo();
00445
00446 return 0;
00447 }