|
Packit |
a7d494 |
#include "cuMatrix.h"
|
|
Packit |
a7d494 |
|
|
Packit |
a7d494 |
__global__ void make_BlackWhite(int *image, int N){
|
|
Packit |
a7d494 |
unsigned int x = blockIdx.x * blockDim.x + threadIdx.x;
|
|
Packit |
a7d494 |
unsigned int y = blockIdx.y * blockDim.y + threadIdx.y;
|
|
Packit |
a7d494 |
|
|
Packit |
a7d494 |
image[y*N + x] = image[y*N + x] > 128 ? 255 : 0;
|
|
Packit |
a7d494 |
}
|
|
Packit |
a7d494 |
|
|
Packit |
a7d494 |
void convertToArray(int **matrix, int *array, int N){
|
|
Packit |
a7d494 |
for(unsigned int i=0; i< N; i++)
|
|
Packit |
a7d494 |
for(unsigned int j=0; j< N; j++)
|
|
Packit |
a7d494 |
array[i*N+ j] = matrix[i][j];
|
|
Packit |
a7d494 |
}
|
|
Packit |
a7d494 |
|
|
Packit |
a7d494 |
// Wrapper function for kernel launch (not the complete function, just an example).
|
|
Packit |
a7d494 |
template <class T>
|
|
Packit |
a7d494 |
void
|
|
Packit |
a7d494 |
reduce(int size, int threads, int blocks,
|
|
Packit |
a7d494 |
int whichKernel, T *d_idata, T *d_odata)
|
|
Packit |
a7d494 |
{
|
|
Packit |
a7d494 |
dim3 dimBlock(threads, 1, 1);
|
|
Packit |
a7d494 |
dim3 dimGrid(blocks, 1, 1);
|
|
Packit |
a7d494 |
|
|
Packit |
a7d494 |
// when there is only one warp per block, we need to allocate two warps
|
|
Packit |
a7d494 |
// worth of shared memory so that we don't index shared memory out of bounds
|
|
Packit |
a7d494 |
int smemSize = (threads <= 32) ? 2 * threads * sizeof(T) : threads * sizeof(T);
|
|
Packit |
a7d494 |
|
|
Packit |
a7d494 |
// choose which of the optimized versions of reduction to launch
|
|
Packit |
a7d494 |
switch (whichKernel)
|
|
Packit |
a7d494 |
{
|
|
Packit |
a7d494 |
case 0:
|
|
Packit |
a7d494 |
reduce0<T><<< dimGrid, dimBlock, smemSize >>>(d_idata, d_odata, size);
|
|
Packit |
a7d494 |
break;
|
|
Packit |
a7d494 |
|
|
Packit |
a7d494 |
case 1:
|
|
Packit |
a7d494 |
reduce1<T><<< dimGrid, dimBlock, smemSize >>>(d_idata, d_odata, size);
|
|
Packit |
a7d494 |
break;
|
|
Packit |
a7d494 |
}
|
|
Packit |
a7d494 |
}
|