/* * kernel for computational device, to be used in heat-eqn-par.c * * compute next values, maximum difference */ __kernel void do_step( const int num_points, const int points_per_workitem, const float dx, const float dt, __global float* uk, __global float* ukp1, __local float* local_maxdiff, __global float* partial_maxdiff) { int local_work_items = get_local_size(0); int local_id = get_local_id(0); int group_id = get_group_id(0); int global_id = get_global_id(0); /* give each work item a contiguous range of points */ int loop_start = 1+ global_id * points_per_workitem; int loop_limit = loop_start + points_per_workitem; if (loop_limit > (num_points+1)) loop_limit = num_points+1; /* compute new values and check for convergence */ float accum = 0.0f; for (int i = loop_start; i < loop_limit; ++i) { ukp1[i]=uk[i]+ (dt/(dx*dx))*(uk[i+1]-2*uk[i]+uk[i-1]); float diff = fabs(uk[i] - ukp1[i]); accum = (diff > accum) ? diff : accum; } local_maxdiff[local_id] = accum; /* combine convergence results */ barrier(CLK_LOCAL_MEM_FENCE); if (local_id == 0) { float maxdiff = 0.0f; for(int i = 0; i < local_work_items; ++i) { if (local_maxdiff[i] > maxdiff) maxdiff = local_maxdiff[i]; } partial_maxdiff[group_id] = maxdiff; } }