Paste von Anonym am 2012-01-27 19:32:59 Syntax:
#include "filterImage.h"
#include "CudaTimer.h"
#include <iostream>
using namespace std;
//von wegen 10 minuten... feeeeeeeeeeeelix
//thread.x ist der channel. also 3 threads pro block -> 3 threads pro pixel
//blockIdx.x ist die position, in breite
//blockIdx.y ist die position in hoehe
//gridDim.x ist die breite
//blockIdx in which block am I?
//blockDim how big is a block
//gridDim how many blocks
//threadIdx what thread in the block
__global__ void box_filter(float *data, float *result) {
unsigned int x = blockIdx.x;
unsigned int y = blockIdx.y;
unsigned int idx = (y * gridDim.x + x) * blockDim.x; //bist du sicher das +x in die klammer kommt? wueder (y * gridDim.x * blockDim.x) + x sagen
float meanValue = 0.0;
unsigned int maxSize = gridDim.x*gridDim.y*blockDim.x; //number of elements / pixels
int a = x - 15;
a < 0 ? a = 0 : a = a;
int b = y - 15;
b < 0 ? b = 0; b = b;
for()
{
meanValue = meanValue + data[a*gridDim.x + threadIdx.x*blockDim.x+b*blockDim.x];
}
result[idx + threadIdx.x] = data[idx + threadIdx.x];
}
vector<float> filterImage(const unsigned int &width,
const unsigned int &height,
const unsigned int &channels,
const vector<float> &data) {
cout << "filter image " << width << "x" << height << "x" << channels << endl;
CudaTimer filterTimer;
CudaTimer kernelTimer;
filterTimer.start();
//Result data
vector<float> result;
result.resize(width * height * channels);
//Pointer to data on device
float *d_data;
float *d_result;
unsigned int bytesize = data.size() * sizeof(float);
//Allocate memory on graphics card
cudaMalloc(reinterpret_cast<void **>(&d_data), bytesize);
cudaMalloc(reinterpret_cast<void **>(&d_result), bytesize);
//Copy data to graphics memory
cout << "Copy data to device" << endl;
cudaMemcpy(d_data, &data[0], bytesize, cudaMemcpyHostToDevice);
kernelTimer.start();
//Call Kernel
dim3 grid(width, height);
box_filter<<<grid, channels>>>(d_data, d_result);
kernelTimer.stop();
//Copy results back to host (cpu) memory
cout << "Copy result from device" << endl;
cudaMemcpy(&result[0], d_result, bytesize, cudaMemcpyDeviceToHost);
//Free graphics memory
cudaFree(d_data);
cudaFree(d_result);
filterTimer.stop();
//Evaluate Timers.
cout << "Kernel time " << kernelTimer.getMilliseconds() << "ms" << endl;
cout << "Filter time " << filterTimer.getMilliseconds() << "ms" << endl;
return result;
}
» ohne Titel
« ohne Titel

