Paste von Anonym am 2012-01-27 19:32:59 Syntax:

#include "filterImage.h"

#include "CudaTimer.h"
#include <iostream>

using namespace std;


//von wegen 10 minuten... feeeeeeeeeeeelix

//thread.x ist der channel. also 3 threads pro block -> 3 threads pro pixel
//blockIdx.x ist die position, in breite
//blockIdx.y ist die position in hoehe
//gridDim.x ist die breite 

//blockIdx in which block am I?
//blockDim how big is a block

//gridDim how many blocks

//threadIdx what thread in the block


 

__global__ void box_filter(float *data, float *result) {
  unsigned int x = blockIdx.x;
  unsigned int y = blockIdx.y;
  unsigned int idx = (y * gridDim.x + x) * blockDim.x; //bist du sicher das +x in die klammer kommt?  wueder (y * gridDim.x * blockDim.x) + x sagen
	float meanValue = 0.0;

 	unsigned int maxSize =  gridDim.x*gridDim.y*blockDim.x; //number of elements / pixels
	int a = x - 15; 
	a < 0 ? a = 0 : a = a;
	
	int b = y - 15;
	b < 0 ? b = 0; b = b;

	for()
	{
		meanValue = meanValue + data[a*gridDim.x + threadIdx.x*blockDim.x+b*blockDim.x];
	}

		result[idx + threadIdx.x] = data[idx + threadIdx.x];		

  
}

vector<float> filterImage(const unsigned int &width, 
    const unsigned int &height,
    const unsigned int &channels,
    const vector<float> &data) {
  cout << "filter image " << width << "x" << height << "x" << channels << endl;

  CudaTimer filterTimer;
  CudaTimer kernelTimer;
  filterTimer.start();

  //Result data
  vector<float> result;
  result.resize(width * height * channels);

  //Pointer to data on device
  float *d_data;
  float *d_result;
  unsigned int bytesize = data.size() * sizeof(float);
  
  //Allocate memory on graphics card
  cudaMalloc(reinterpret_cast<void **>(&d_data), bytesize);
  cudaMalloc(reinterpret_cast<void **>(&d_result), bytesize);

  //Copy data to graphics memory
  cout << "Copy data to device" << endl;
  cudaMemcpy(d_data, &data[0], bytesize, cudaMemcpyHostToDevice);

  kernelTimer.start();
  //Call Kernel
  dim3 grid(width, height);
  box_filter<<<grid, channels>>>(d_data, d_result);
  kernelTimer.stop();

  //Copy results back to host (cpu) memory
  cout << "Copy result from device" << endl;
  cudaMemcpy(&result[0], d_result, bytesize, cudaMemcpyDeviceToHost);

  //Free graphics memory
  cudaFree(d_data);
  cudaFree(d_result);

  filterTimer.stop();

  //Evaluate Timers.
  cout << "Kernel time " << kernelTimer.getMilliseconds() << "ms" << endl;
  cout << "Filter time " << filterTimer.getMilliseconds() << "ms" << endl;

  return result;
}


» ohne Titel
« ohne Titel