44 void GaussianXBlurOperation::update_gauss()
61 float ATTR_ALIGN(16) color_accum[4] = {0.0f, 0.0f, 0.0f, 0.0f};
62 float multiplier_accum = 0.0f;
66 int bufferwidth = input_buffer->
get_width();
67 int bufferstartx = input_rect.
xmin;
68 int bufferstarty = input_rect.
ymin;
76 int bufferindex = ((xmin - bufferstartx) * 4) + ((ymin - bufferstarty) * 4 * bufferwidth);
79 __m128 accum_r = _mm_load_ps(color_accum);
80 for (
int nx = xmin, index = (xmin -
x) +
filtersize_; nx < xmax; nx += step, index += step) {
81 __m128 reg_a = _mm_load_ps(&
buffer[bufferindex]);
82 reg_a = _mm_mul_ps(reg_a, gausstab_sse_[index]);
83 accum_r = _mm_add_ps(accum_r, reg_a);
85 bufferindex += offsetadd;
87 _mm_store_ps(color_accum, accum_r);
89 for (
int nx = xmin, index = (xmin -
x) +
filtersize_; nx < xmax; nx += step, index += step) {
90 const float multiplier =
gausstab_[index];
92 multiplier_accum += multiplier;
93 bufferindex += offsetadd;
101 cl_mem cl_output_buffer,
103 std::list<cl_mem> *cl_mem_to_clean_up,
104 std::list<cl_kernel> * )
107 "gaussian_xblur_operation_kernel",
nullptr);
110 cl_mem gausstab = clCreateBuffer(device->
get_context(),
111 CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,
120 input_memory_buffers,
123 gaussian_xblur_operation_kernel, 2, cl_output_buffer);
125 gaussian_xblur_operation_kernel, 3, output_memory_buffer);
126 clSetKernelArg(gaussian_xblur_operation_kernel, 4,
sizeof(cl_int), &filter_size);
128 clSetKernelArg(gaussian_xblur_operation_kernel, 6,
sizeof(cl_mem), &gausstab);
132 clReleaseMemObject(gausstab);
146 gausstab_sse_ =
nullptr;
MINLINE float max_ff(float a, float b)
MINLINE int min_ii(int a, int b)
MINLINE float min_ff(float a, float b)
MINLINE int max_ii(int a, int b)
MINLINE void mul_v4_v4fl(float r[4], const float a[4], float f)
MINLINE void madd_v4_v4fl(float r[4], const float a[4], float f)
#define MAX_GAUSSTAB_RADIUS
_GL_VOID GLfloat value _GL_VOID_RET _GL_VOID const GLuint GLboolean *residences _GL_BOOL_RET _GL_VOID GLsizei GLfloat GLfloat GLfloat GLfloat const GLubyte *bitmap _GL_VOID_RET _GL_VOID GLenum const void *lists _GL_VOID_RET _GL_VOID const GLdouble *equation _GL_VOID_RET _GL_VOID GLdouble GLdouble blue _GL_VOID_RET _GL_VOID GLfloat GLfloat blue _GL_VOID_RET _GL_VOID GLint GLint blue _GL_VOID_RET _GL_VOID GLshort GLshort blue _GL_VOID_RET _GL_VOID GLubyte GLubyte blue _GL_VOID_RET _GL_VOID GLuint GLuint blue _GL_VOID_RET _GL_VOID GLushort GLushort blue _GL_VOID_RET _GL_VOID GLbyte GLbyte GLbyte alpha _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble alpha _GL_VOID_RET _GL_VOID GLfloat GLfloat GLfloat alpha _GL_VOID_RET _GL_VOID GLint GLint GLint alpha _GL_VOID_RET _GL_VOID GLshort GLshort GLshort alpha _GL_VOID_RET _GL_VOID GLubyte GLubyte GLubyte alpha _GL_VOID_RET _GL_VOID GLuint GLuint GLuint alpha _GL_VOID_RET _GL_VOID GLushort GLushort GLushort alpha _GL_VOID_RET _GL_VOID GLenum mode _GL_VOID_RET _GL_VOID GLint y
SocketReader * input_program_
float * make_gausstab(float rad, int size)
virtual void init_execution() override
virtual void deinit_execution() override
void execute_pixel(float output[4], int x, int y, void *data) override
The inner loop of this operation.
void execute_opencl(OpenCLDevice *device, MemoryBuffer *output_memory_buffer, cl_mem cl_output_buffer, MemoryBuffer **input_memory_buffers, std::list< cl_mem > *cl_mem_to_clean_up, std::list< cl_kernel > *cl_kernels_to_clean_up) override
custom handle to add new tasks to the OpenCL command queue in order to execute a chunk on an GPUDevic...
void deinit_execution() override
Deinitialize the execution.
bool determine_depending_area_of_interest(rcti *input, ReadBufferOperation *read_operation, rcti *output) override
void init_execution() override
initialize the execution
void * initialize_tile_data(rcti *rect) override
a MemoryBuffer contains access to the data of a chunk
const rcti & get_rect() const
get the rect of this MemoryBuffer
const int get_width() const
get the width of this MemoryBuffer
float * get_buffer()
get the data of this MemoryBuffer
NodeOperation contains calculation logic.
unsigned int get_height() const
unsigned int get_width() const
eExecutionModel execution_model_
NodeOperation * get_input_operation(int index)
virtual bool determine_depending_area_of_interest(rcti *input, ReadBufferOperation *read_operation, rcti *output)
virtual void * initialize_tile_data(rcti *)
device representing an GPU OpenCL device. an instance of this class represents a single cl_device
void COM_cl_attach_size_to_kernel_parameter(cl_kernel kernel, int offset_index, NodeOperation *operation)
cl_mem COM_cl_attach_memory_buffer_to_kernel_parameter(cl_kernel kernel, int parameter_index, int offset_index, std::list< cl_mem > *cleanup, MemoryBuffer **input_memory_buffers, SocketReader *reader)
void COM_cl_attach_output_memory_buffer_to_kernel_parameter(cl_kernel kernel, int parameter_index, cl_mem cl_output_memory_buffer)
void COM_cl_enqueue_range(cl_kernel kernel, MemoryBuffer *output_memory_buffer)
cl_kernel COM_cl_create_kernel(const char *kernelname, std::list< cl_kernel > *cl_kernels_to_clean_up)
void COM_cl_attach_memory_buffer_offset_to_kernel_parameter(cl_kernel kernel, int offset_index, MemoryBuffer *memory_buffers)
int get_offset_add() const
ccl_global float * buffer
ccl_global KernelShaderEvalInput ccl_global float * output
ccl_global KernelShaderEvalInput * input
void(* MEM_freeN)(void *vmemh)
ccl_device_inline float3 ceil(const float3 &a)