Functions | |
__global__ void | copyDevToDev (FLOAT_ARRAY_TYPE *dA, FLOAT_ARRAY_TYPE *dB, int N) |
Copy an array of vectors on the device. | |
__global__ void | gpuSimpleComponentSum (FLOAT_ARRAY_TYPE *dVal, FLOAT_ARRAY_TYPE *sum, int N) |
One thread kernel for summation. | |
__global__ void | gpuSimpleSqSum (FLOAT_ARRAY_TYPE *dVal, float *sqSum, int N) |
One thread kernel for squared summation. |
This module contains tools for everyday tasks dealing with CUDA
__global__ void copyDevToDev | ( | FLOAT_ARRAY_TYPE * | dA, | |
FLOAT_ARRAY_TYPE * | dB, | |||
int | N | |||
) |
Copy an array of vectors on the device
[in] | dA | Device vector array a |
[out] | dB | Device vector array b |
[in] | N | Number of vectors |
Definition at line 391 of file gpu_tools.cu.
__global__ void gpuSimpleComponentSum | ( | FLOAT_ARRAY_TYPE * | dVal, | |
FLOAT_ARRAY_TYPE * | sum, | |||
int | N | |||
) |
One thread kernel for summation
[in] | dVal | Input data |
[out] | sum | Output of sum |
[in] | N | Number of elements to be summed |
Definition at line 416 of file gpu_tools.cu.
__global__ void gpuSimpleSqSum | ( | FLOAT_ARRAY_TYPE * | dVal, | |
float * | sqSum, | |||
int | N | |||
) |
One thread kernel for squared summation
[in] | dVal | Input data |
[out] | sqSum | Output of the squared sum |
[in] | N | Number of elements to be summed |
Definition at line 403 of file gpu_tools.cu.