Cuda Kernels
/*
*
--------------------------------------------------------------------------------
*
Bootstrap kernels
*
--------------------------------------------------------------------------------
*/
__global__
void kernel_bs_normalizeRandoms(
unsigned int* data,
unsigned int* randoms,
unsigned int nbElements
)
{
// Compute the
index
unsigned int x =
blockIdx.x*blockDim.x+threadIdx.x;
unsigned int y =
blockIdx.y*blockDim.y+threadIdx.y;
unsigned int index = (y*blockDim.x) + x;
unsigned int
randIndex = index*nbElements;
// Compute
performance
for( int i(0); i<nbElements; ++i )
{
if(
index==0 )
{
data[i] = data[i]%nbElements;
}
randoms[randIndex+i] =
randoms[randIndex+i]%nbElements;
}
}
__global__
void kernel_bs_resample(
unsigned int* data,
unsigned int* randoms,
float* series,
unsigned int nbSeries,
unsigned
int
nbElements )
{
// Compute the
index
unsigned
int x
= blockIdx.x*blockDim.x+threadIdx.x;
unsigned
int y
= blockIdx.y*blockDim.y+threadIdx.y;
unsigned
int index = (y*blockDim.x) + x;
unsigned
int randIndex = index*nbElements;
// Compute average
int
sum(0);
for(
int i(0); i<nbElements; ++i )
{
int r = randoms[randIndex+i];
if( r>=0 && r<nbElements )
{
sum +=
data[r];
}
}
// Store average
if( index
< nbSeries )
{
series[index] = (sum/nbElements);
}
}
__global__
void kernel_bs_distribute(
float* series,
float scale,
unsigned int
nbElements,
unsigned int* distribution,
unsigned int
distributionSize )
{
// Compute the
index
unsigned int x =
blockIdx.x*blockDim.x+threadIdx.x;
unsigned int y =
blockIdx.y*blockDim.y+threadIdx.y;
unsigned int index = (y*blockDim.x) + x;
float position = series[index] * scale;
if( position>0 &&
position<distributionSize)
{
// TODO: Can be optimized with shared memory within each block
atomicAdd(&distribution[(unsigned int)position],1);
}
}