00001
00022 #ifndef GPU_TOOLS_CUH
00023 #define GPU_TOOLS_CUH
00024
00025 #include <cuda.h>
00026 #include "define.h"
00027 #include <iostream>
00028 #include <fstream>
00029 #include <iomanip>
00030 #include <string>
00031 #include <sstream>
00032 #include <limits>
00033
00034
00083
00084
00085
00086
00087
00098 __device__ inline int gpuRnd(float b)
00099 {
00100 return b<0.0f ? static_cast<int>(b-0.5f) : static_cast<int>(b+0.5f);
00101 }
00102
00111 __device__ inline void gpuCumulateVec(FLOAT_ARRAY_TYPE *dVec, FLOAT_ARRAY_TYPE *dVecToAdd)
00112 {
00113 (*dVec).x += (*dVecToAdd).x;
00114 (*dVec).y += (*dVecToAdd).y;
00115 (*dVec).z += (*dVecToAdd).z;
00116 }
00118
00131 __device__ void gpuDistPBC(FLOAT_ARRAY_TYPE *dDist, FLOAT_ARRAY_TYPE *dA, FLOAT_ARRAY_TYPE *dB, float *boxSize)
00132 {
00133 (*dDist).x = (*dA).x - (*dB).x;
00134 (*dDist).y = (*dA).y - (*dB).y;
00135 (*dDist).z = (*dA).z - (*dB).z;
00136 (*dDist).x = ( (*dDist).x - gpuRnd( (*dDist).x / (*boxSize) ) * (*boxSize) );
00137 (*dDist).y = ( (*dDist).y - gpuRnd( (*dDist).y / (*boxSize) ) * (*boxSize) );
00138 (*dDist).z = ( (*dDist).z - gpuRnd( (*dDist).z / (*boxSize) ) * (*boxSize) );
00139 }
00140
00141
00142
00143
00144
00154 __host__ __device__ inline void checkPBC(FLOAT_ARRAY_TYPE *pos, float *boxSize)
00155 {
00156 (*pos).x = (*pos).x - int( (*pos).x / (*boxSize) ) * (*boxSize);
00157 if( (*pos).x<0.0f) (*pos).x = (*pos).x + (*boxSize);
00158
00159 (*pos).y = (*pos).y - int( (*pos).y / (*boxSize) ) * (*boxSize);
00160 if( (*pos).y<0.0f) (*pos).y = (*pos).y + (*boxSize);
00161
00162 (*pos).z = (*pos).z - int( (*pos).z / (*boxSize) ) * (*boxSize);
00163 if( (*pos).z<0.0f) (*pos).z = (*pos).z + (*boxSize);
00164 }
00166
00167
00168
00169
00170
00171
00193 __global__ void gpuGenerateVerletList(FLOAT_ARRAY_TYPE *dPos, int *dVlists, size_t pitchDVlists, int *dVcount, float *dmax, float rListSq, int N, float boxSize, bool *doUpdate);
00194
00195
00214 __global__ void gpuGenerateVerletListVar1(FLOAT_ARRAY_TYPE *dPos, int *dVlists, size_t pitchDVlists, int *dVcount, float *dmax, float rListSq, int N, float boxSize, bool *doUpdate);
00215
00216
00236 __global__ void gpuGenerateVerletListSmem(FLOAT_ARRAY_TYPE *dPos, int *dVlists, size_t pitchDVlists, int *dVcount, float *dmax, float rListSq, int N, float boxSize, bool *doUpdate);
00237
00238
00259 __global__ void gpuGenerateVerletListSmemVar1(FLOAT_ARRAY_TYPE *dPos, int *dVlists, size_t pitchDVlists, int *dVcount, float *dmax, float rListSq, int N, float boxSize, bool *doUpdate);
00261
00282 void rescaleT(int N, int dim, int numInterData, int numThreads, float T, int maxThreads, int maxBlocks, FLOAT_ARRAY_TYPE *dVel, float *sumSq);
00283
00284
00298 __global__ void gpuRescaleTKernel(int N, int dim, float T, FLOAT_ARRAY_TYPE *dVel, float *vSumSq);
00300
00301
00302
00303
00304
00305
00306
00323 void removeDrift(int N, int numInterData, int numThreads, int maxThreads, int maxBlocks, FLOAT_ARRAY_TYPE *dVel, FLOAT_ARRAY_TYPE *compSum);
00324
00325
00335 void __global__ gpuRemoveDriftKernel(int N, FLOAT_ARRAY_TYPE *dVel, FLOAT_ARRAY_TYPE *compSum);
00352 __global__ void gpuGetVelOnTime(FLOAT_ARRAY_TYPE *dVel, FLOAT_ARRAY_TYPE *dVt, FLOAT_ARRAY_TYPE *dForce, int N, float dt);
00354
00355
00356
00357
00373 float getKineticEnergy(FLOAT_ARRAY_TYPE *dVel, int &N, float *sumSq, int &maxThreads, int &maxBlocks);
00374
00375
00395 float getHarmonicPotentialEnergy(float *dEpot, FLOAT_ARRAY_TYPE *dPos, float *sum, int *cnn, size_t &pitch, int &N, int &numNN, float &k, float &boxSize, dim3 &dimGrid, dim3 &dimBlock, int &maxThreads, int &maxBlocks);
00396
00397
00412 __global__ void gpuGetHarmonicPotentialEnergyKernel(float *dEpot, FLOAT_ARRAY_TYPE *dPos, int *cnn, size_t pitch, int N, int numNN, float k, float boxSize);
00413
00414
00431 float getLJCutPotentialEnergy(FLOAT_ARRAY_TYPE *dPos, float *dEpot, float *sum, int &N, float &boxSize, float &rCutSq, dim3 &dimGrid, dim3 &dimBlock, int &maxThreads, int &maxBlocks);
00432
00433
00444 __global__ void gpuLJCutPotentialEnergyKernel(FLOAT_ARRAY_TYPE *dPos, float *dEpot, int N, float boxSize, float rCutSq);
00447
00448
00460 inline static unsigned long inKB(unsigned long bytes);
00461
00462
00470 inline static unsigned long inMB(unsigned long bytes);
00471
00472
00473
00482 __global__ void copyDevToDev(FLOAT_ARRAY_TYPE *dA, FLOAT_ARRAY_TYPE *dB, int N);
00483
00484
00495 __global__ void gpuSimpleSqSum(FLOAT_ARRAY_TYPE *dVal, float*sqSum, int N);
00496
00507 __global__ void gpuSimpleComponentSum(FLOAT_ARRAY_TYPE *dVal, FLOAT_ARRAY_TYPE *sum, int N);
00511
00512
00522 bool memInfo();
00523
00524
00531 void checkCudaError(const char *identifier);
00532
00541 void showPositions(FLOAT_ARRAY_TYPE *hPos, int N);
00542
00543
00552 void showVelocities(FLOAT_ARRAY_TYPE *hVel, int &N);
00553
00554
00555
00565 void showForces(FLOAT_ARRAY_TYPE *hForce, int &N);
00566
00567
00568
00577 void showDeviceArray(FLOAT_ARRAY_TYPE *dArray, int N);
00578
00579
00589 void showNeareastNeighborList(int *dNNListArray, int &N, int &numNN);
00590
00591
00602 void showVerletList(int *dVlists, size_t pitchVlists, int *dVcount, int N, int NList);
00605
00617 bool setCudaDevice(int gpuDevNo);
00618
00626 bool generateStartScript(std::string exeName);
00627
00628
00649 bool generateRestartScript(std::string exeName, std::string basename, int dim, int N, int numNN, float T, float k, float boxSize, float dt, int NOffset, int NSim, int NOut, int NAdjust, int gpuDevNo);
00650
00651
00674 bool generateScript(std::string scriptname, std::string exeName, std::string inputfile, std::string basename, int dim, int N, int numNN, float T, float k, float boxSize, float dt, int NOffset, int NSim, int NOut, int NAdjust, int gpuDevNo);
00677
00678
00679
00680
00695 __global__ void gpuReduce1(float *dInData, float *dInterData, int numInterData, int numThreads, int N);
00696
00697
00707 __global__ void gpuReduce2(float *dInterData, int numInterData, int numThreads);
00708
00709
00721 __global__ void gpuSqSumReduce1(FLOAT_ARRAY_TYPE *dInData, float *dInterData, int numInterData, int numThreads, int N);
00722
00723
00735 __global__ void gpuComponentSumReduce1(FLOAT_ARRAY_TYPE *dInData, FLOAT_ARRAY_TYPE *dInterData, int numInterData, int numThreads, int N);
00736
00737
00747 __global__ void gpuComponentSumReduce2(FLOAT_ARRAY_TYPE *dInterData, int numInterData, int numThreads);
00748
00749
00761 void sumReduction(float *dInData, float *dInterData, int numInterData, int numThreads, int N);
00762
00763
00775 void SqVecSumReduction(FLOAT_ARRAY_TYPE *dInData, float *dInterData, int numInterData, int numThreads, int N);
00776
00777
00789 void componentSumReduction(FLOAT_ARRAY_TYPE *dInData, FLOAT_ARRAY_TYPE *dInterData, int numInterData, int numThreads, int N);
00792 #endif //GPU_TOOLS_CUH
00793