Blender  V3.3
COM_GaussianXBlurOperation.cc
Go to the documentation of this file.
1 /* SPDX-License-Identifier: GPL-2.0-or-later
2  * Copyright 2011 Blender Foundation. */
3 
5 #include "COM_OpenCLDevice.h"
6 
7 namespace blender::compositor {
8 
10 {
11 }
12 
14 {
15  lock_mutex();
16  if (!sizeavailable_) {
17  update_gauss();
18  }
19  void *buffer = get_input_operation(0)->initialize_tile_data(nullptr);
20  unlock_mutex();
21  return buffer;
22 }
23 
24 /* TODO(manzanilla): to be removed with tiled implementation. */
26 {
28 
29  init_mutex();
30 
32  float rad = max_ff(size_ * data_.sizex, 0.0f);
34 
35  /* TODO(sergey): De-duplicate with the case below and Y blur. */
37 #ifdef BLI_HAVE_SSE2
38  gausstab_sse_ = BlurBaseOperation::convert_gausstab_sse(gausstab_, filtersize_);
39 #endif
40  }
41 }
42 
43 /* TODO(manzanilla): to be removed with tiled implementation. */
44 void GaussianXBlurOperation::update_gauss()
45 {
46  if (gausstab_ == nullptr) {
47  update_size();
48  float rad = max_ff(size_ * data_.sizex, 0.0f);
49  rad = min_ff(rad, MAX_GAUSSTAB_RADIUS);
51 
53 #ifdef BLI_HAVE_SSE2
54  gausstab_sse_ = BlurBaseOperation::convert_gausstab_sse(gausstab_, filtersize_);
55 #endif
56  }
57 }
58 
59 void GaussianXBlurOperation::execute_pixel(float output[4], int x, int y, void *data)
60 {
61  float ATTR_ALIGN(16) color_accum[4] = {0.0f, 0.0f, 0.0f, 0.0f};
62  float multiplier_accum = 0.0f;
63  MemoryBuffer *input_buffer = (MemoryBuffer *)data;
64  const rcti &input_rect = input_buffer->get_rect();
65  float *buffer = input_buffer->get_buffer();
66  int bufferwidth = input_buffer->get_width();
67  int bufferstartx = input_rect.xmin;
68  int bufferstarty = input_rect.ymin;
69 
70  int xmin = max_ii(x - filtersize_, input_rect.xmin);
71  int xmax = min_ii(x + filtersize_ + 1, input_rect.xmax);
72  int ymin = max_ii(y, input_rect.ymin);
73 
74  int step = get_step();
75  int offsetadd = get_offset_add();
76  int bufferindex = ((xmin - bufferstartx) * 4) + ((ymin - bufferstarty) * 4 * bufferwidth);
77 
78 #ifdef BLI_HAVE_SSE2
79  __m128 accum_r = _mm_load_ps(color_accum);
80  for (int nx = xmin, index = (xmin - x) + filtersize_; nx < xmax; nx += step, index += step) {
81  __m128 reg_a = _mm_load_ps(&buffer[bufferindex]);
82  reg_a = _mm_mul_ps(reg_a, gausstab_sse_[index]);
83  accum_r = _mm_add_ps(accum_r, reg_a);
84  multiplier_accum += gausstab_[index];
85  bufferindex += offsetadd;
86  }
87  _mm_store_ps(color_accum, accum_r);
88 #else
89  for (int nx = xmin, index = (xmin - x) + filtersize_; nx < xmax; nx += step, index += step) {
90  const float multiplier = gausstab_[index];
91  madd_v4_v4fl(color_accum, &buffer[bufferindex], multiplier);
92  multiplier_accum += multiplier;
93  bufferindex += offsetadd;
94  }
95 #endif
96  mul_v4_v4fl(output, color_accum, 1.0f / multiplier_accum);
97 }
98 
100  MemoryBuffer *output_memory_buffer,
101  cl_mem cl_output_buffer,
102  MemoryBuffer **input_memory_buffers,
103  std::list<cl_mem> *cl_mem_to_clean_up,
104  std::list<cl_kernel> * /*cl_kernels_to_clean_up*/)
105 {
106  cl_kernel gaussian_xblur_operation_kernel = device->COM_cl_create_kernel(
107  "gaussian_xblur_operation_kernel", nullptr);
108  cl_int filter_size = filtersize_;
109 
110  cl_mem gausstab = clCreateBuffer(device->get_context(),
111  CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,
112  sizeof(float) * (filtersize_ * 2 + 1),
113  gausstab_,
114  nullptr);
115 
116  device->COM_cl_attach_memory_buffer_to_kernel_parameter(gaussian_xblur_operation_kernel,
117  0,
118  1,
119  cl_mem_to_clean_up,
120  input_memory_buffers,
123  gaussian_xblur_operation_kernel, 2, cl_output_buffer);
125  gaussian_xblur_operation_kernel, 3, output_memory_buffer);
126  clSetKernelArg(gaussian_xblur_operation_kernel, 4, sizeof(cl_int), &filter_size);
127  device->COM_cl_attach_size_to_kernel_parameter(gaussian_xblur_operation_kernel, 5, this);
128  clSetKernelArg(gaussian_xblur_operation_kernel, 6, sizeof(cl_mem), &gausstab);
129 
130  device->COM_cl_enqueue_range(gaussian_xblur_operation_kernel, output_memory_buffer, 7, this);
131 
132  clReleaseMemObject(gausstab);
133 }
134 
136 {
138 
139  if (gausstab_) {
141  gausstab_ = nullptr;
142  }
143 #ifdef BLI_HAVE_SSE2
144  if (gausstab_sse_) {
145  MEM_freeN(gausstab_sse_);
146  gausstab_sse_ = nullptr;
147  }
148 #endif
149 
150  deinit_mutex();
151 }
152 
154  rcti *input, ReadBufferOperation *read_operation, rcti *output)
155 {
156  rcti new_input;
157 
158  if (!sizeavailable_) {
159  rcti size_input;
160  size_input.xmin = 0;
161  size_input.ymin = 0;
162  size_input.xmax = 5;
163  size_input.ymax = 5;
164  NodeOperation *operation = this->get_input_operation(1);
165  if (operation->determine_depending_area_of_interest(&size_input, read_operation, output)) {
166  return true;
167  }
168  }
169  {
170  if (sizeavailable_ && gausstab_ != nullptr) {
171  new_input.xmax = input->xmax + filtersize_ + 1;
172  new_input.xmin = input->xmin - filtersize_ - 1;
173  new_input.ymax = input->ymax;
174  new_input.ymin = input->ymin;
175  }
176  else {
177  new_input.xmax = this->get_width();
178  new_input.xmin = 0;
179  new_input.ymax = this->get_height();
180  new_input.ymin = 0;
181  }
182  return NodeOperation::determine_depending_area_of_interest(&new_input, read_operation, output);
183  }
184 }
185 
186 } // namespace blender::compositor
#define ATTR_ALIGN(x)
MINLINE float max_ff(float a, float b)
MINLINE int min_ii(int a, int b)
MINLINE float min_ff(float a, float b)
MINLINE int max_ii(int a, int b)
MINLINE void mul_v4_v4fl(float r[4], const float a[4], float f)
MINLINE void madd_v4_v4fl(float r[4], const float a[4], float f)
#define MAX_GAUSSTAB_RADIUS
_GL_VOID GLfloat value _GL_VOID_RET _GL_VOID const GLuint GLboolean *residences _GL_BOOL_RET _GL_VOID GLsizei GLfloat GLfloat GLfloat GLfloat const GLubyte *bitmap _GL_VOID_RET _GL_VOID GLenum const void *lists _GL_VOID_RET _GL_VOID const GLdouble *equation _GL_VOID_RET _GL_VOID GLdouble GLdouble blue _GL_VOID_RET _GL_VOID GLfloat GLfloat blue _GL_VOID_RET _GL_VOID GLint GLint blue _GL_VOID_RET _GL_VOID GLshort GLshort blue _GL_VOID_RET _GL_VOID GLubyte GLubyte blue _GL_VOID_RET _GL_VOID GLuint GLuint blue _GL_VOID_RET _GL_VOID GLushort GLushort blue _GL_VOID_RET _GL_VOID GLbyte GLbyte GLbyte alpha _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble alpha _GL_VOID_RET _GL_VOID GLfloat GLfloat GLfloat alpha _GL_VOID_RET _GL_VOID GLint GLint GLint alpha _GL_VOID_RET _GL_VOID GLshort GLshort GLshort alpha _GL_VOID_RET _GL_VOID GLubyte GLubyte GLubyte alpha _GL_VOID_RET _GL_VOID GLuint GLuint GLuint alpha _GL_VOID_RET _GL_VOID GLushort GLushort GLushort alpha _GL_VOID_RET _GL_VOID GLenum mode _GL_VOID_RET _GL_VOID GLint y
#define X
Definition: GeomUtils.cpp:199
float * make_gausstab(float rad, int size)
void execute_pixel(float output[4], int x, int y, void *data) override
The inner loop of this operation.
void execute_opencl(OpenCLDevice *device, MemoryBuffer *output_memory_buffer, cl_mem cl_output_buffer, MemoryBuffer **input_memory_buffers, std::list< cl_mem > *cl_mem_to_clean_up, std::list< cl_kernel > *cl_kernels_to_clean_up) override
custom handle to add new tasks to the OpenCL command queue in order to execute a chunk on an GPUDevic...
void deinit_execution() override
Deinitialize the execution.
bool determine_depending_area_of_interest(rcti *input, ReadBufferOperation *read_operation, rcti *output) override
void init_execution() override
initialize the execution
a MemoryBuffer contains access to the data of a chunk
const rcti & get_rect() const
get the rect of this MemoryBuffer
const int get_width() const
get the width of this MemoryBuffer
float * get_buffer()
get the data of this MemoryBuffer
NodeOperation contains calculation logic.
NodeOperation * get_input_operation(int index)
virtual bool determine_depending_area_of_interest(rcti *input, ReadBufferOperation *read_operation, rcti *output)
virtual void * initialize_tile_data(rcti *)
device representing an GPU OpenCL device. an instance of this class represents a single cl_device
void COM_cl_attach_size_to_kernel_parameter(cl_kernel kernel, int offset_index, NodeOperation *operation)
cl_mem COM_cl_attach_memory_buffer_to_kernel_parameter(cl_kernel kernel, int parameter_index, int offset_index, std::list< cl_mem > *cleanup, MemoryBuffer **input_memory_buffers, SocketReader *reader)
void COM_cl_attach_output_memory_buffer_to_kernel_parameter(cl_kernel kernel, int parameter_index, cl_mem cl_output_memory_buffer)
void COM_cl_enqueue_range(cl_kernel kernel, MemoryBuffer *output_memory_buffer)
cl_kernel COM_cl_create_kernel(const char *kernelname, std::list< cl_kernel > *cl_kernels_to_clean_up)
void COM_cl_attach_memory_buffer_offset_to_kernel_parameter(cl_kernel kernel, int offset_index, MemoryBuffer *memory_buffers)
ccl_global float * buffer
ccl_global KernelShaderEvalInput ccl_global float * output
ccl_global KernelShaderEvalInput * input
void(* MEM_freeN)(void *vmemh)
Definition: mallocn.c:27
ccl_device_inline float3 ceil(const float3 &a)
Definition: math_float3.h:363
int ymin
Definition: DNA_vec_types.h:64
int ymax
Definition: DNA_vec_types.h:64
int xmin
Definition: DNA_vec_types.h:63
int xmax
Definition: DNA_vec_types.h:63