Blame tests/syntax-highlighting/file.cu

Packit a7d494
#include "cuMatrix.h"
Packit a7d494
Packit a7d494
__global__ void make_BlackWhite(int *image, int N){
Packit a7d494
	unsigned int x = blockIdx.x * blockDim.x + threadIdx.x;
Packit a7d494
	unsigned int y = blockIdx.y * blockDim.y + threadIdx.y;
Packit a7d494
Packit a7d494
	image[y*N + x] = image[y*N + x] > 128 ? 255 : 0;
Packit a7d494
}
Packit a7d494
Packit a7d494
void convertToArray(int **matrix, int *array, int N){
Packit a7d494
	for(unsigned int i=0; i< N; i++)
Packit a7d494
		for(unsigned int j=0; j< N; j++)
Packit a7d494
			array[i*N+ j] = matrix[i][j];
Packit a7d494
}
Packit a7d494
Packit a7d494
// Wrapper function for kernel launch (not the complete function, just an example).
Packit a7d494
template <class T>
Packit a7d494
void
Packit a7d494
reduce(int size, int threads, int blocks,
Packit a7d494
       int whichKernel, T *d_idata, T *d_odata)
Packit a7d494
{
Packit a7d494
	dim3 dimBlock(threads, 1, 1);
Packit a7d494
	dim3 dimGrid(blocks, 1, 1);
Packit a7d494
Packit a7d494
	// when there is only one warp per block, we need to allocate two warps
Packit a7d494
	// worth of shared memory so that we don't index shared memory out of bounds
Packit a7d494
	int smemSize = (threads <= 32) ? 2 * threads * sizeof(T) : threads * sizeof(T);
Packit a7d494
Packit a7d494
	// choose which of the optimized versions of reduction to launch
Packit a7d494
	switch (whichKernel)
Packit a7d494
	{
Packit a7d494
		case 0:
Packit a7d494
			reduce0<T><<< dimGrid, dimBlock, smemSize >>>(d_idata, d_odata, size);
Packit a7d494
			break;
Packit a7d494
Packit a7d494
		case 1:
Packit a7d494
			reduce1<T><<< dimGrid, dimBlock, smemSize >>>(d_idata, d_odata, size);
Packit a7d494
			break;
Packit a7d494
	}
Packit a7d494
}