Blender  V3.3
COM_OpenCLDevice.cc
Go to the documentation of this file.
1 /* SPDX-License-Identifier: GPL-2.0-or-later
2  * Copyright 2011 Blender Foundation. */
3 
4 #include "COM_OpenCLDevice.h"
5 
6 #include "COM_ExecutionGroup.h"
8 
9 namespace blender::compositor {
10 
11 enum COM_VendorID { NVIDIA = 0x10DE, AMD = 0x1002 };
12 const cl_image_format IMAGE_FORMAT_COLOR = {
13  CL_RGBA,
14  CL_FLOAT,
15 };
16 const cl_image_format IMAGE_FORMAT_VECTOR = {
17  CL_RGB,
18  CL_FLOAT,
19 };
20 const cl_image_format IMAGE_FORMAT_VALUE = {
21  CL_R,
22  CL_FLOAT,
23 };
24 
26  cl_device_id device,
27  cl_program program,
28  cl_int vendor_id)
29 {
30  device_ = device;
31  context_ = context;
32  program_ = program;
33  queue_ = nullptr;
34  vendor_id_ = vendor_id;
35 
36  cl_int error;
37  queue_ = clCreateCommandQueue(context_, device_, 0, &error);
38 }
39 
41  : context_(other.context_),
42  device_(other.device_),
43  program_(other.program_),
44  queue_(other.queue_),
45  vendor_id_(other.vendor_id_)
46 {
47  other.queue_ = nullptr;
48 }
49 
51 {
52  if (queue_) {
53  clReleaseCommandQueue(queue_);
54  }
55 }
56 
58 {
59  const unsigned int chunk_number = work_package->chunk_number;
60  ExecutionGroup *execution_group = work_package->execution_group;
61 
62  MemoryBuffer **input_buffers = execution_group->get_input_buffers_opencl(chunk_number);
63  MemoryBuffer *output_buffer = execution_group->allocate_output_buffer(work_package->rect);
64 
65  execution_group->get_output_operation()->execute_opencl_region(
66  this, &work_package->rect, chunk_number, input_buffers, output_buffer);
67 
68  delete output_buffer;
69 
70  execution_group->finalize_chunk_execution(chunk_number, input_buffers);
71 }
73  cl_kernel kernel,
74  int parameter_index,
75  int offset_index,
76  std::list<cl_mem> *cleanup,
77  MemoryBuffer **input_memory_buffers,
78  SocketReader *reader)
79 {
81  parameter_index,
82  offset_index,
83  cleanup,
84  input_memory_buffers,
85  (ReadBufferOperation *)reader);
86 }
87 
88 const cl_image_format *OpenCLDevice::determine_image_format(MemoryBuffer *memory_buffer)
89 {
90  switch (memory_buffer->get_num_channels()) {
91  case 1:
92  return &IMAGE_FORMAT_VALUE;
93  break;
94  case 3:
95  return &IMAGE_FORMAT_VECTOR;
96  break;
97  case 4:
98  return &IMAGE_FORMAT_COLOR;
99  break;
100  default:
101  BLI_assert_msg(0, "Unsupported num_channels.");
102  }
103 
104  return &IMAGE_FORMAT_COLOR;
105 }
106 
108  cl_kernel kernel,
109  int parameter_index,
110  int offset_index,
111  std::list<cl_mem> *cleanup,
112  MemoryBuffer **input_memory_buffers,
113  ReadBufferOperation *reader)
114 {
115  cl_int error;
116 
117  MemoryBuffer *result = reader->get_input_memory_buffer(input_memory_buffers);
118 
119  const cl_image_format *image_format = determine_image_format(result);
120 
121  cl_mem cl_buffer = clCreateImage2D(context_,
122  CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
123  image_format,
124  result->get_width(),
125  result->get_height(),
126  0,
127  result->get_buffer(),
128  &error);
129 
130  if (error != CL_SUCCESS) {
131  printf("CLERROR[%d]: %s\n", error, clewErrorString(error));
132  }
133  if (error == CL_SUCCESS) {
134  cleanup->push_back(cl_buffer);
135  }
136 
137  error = clSetKernelArg(kernel, parameter_index, sizeof(cl_mem), &cl_buffer);
138  if (error != CL_SUCCESS) {
139  printf("CLERROR[%d]: %s\n", error, clewErrorString(error));
140  }
141 
143  return cl_buffer;
144 }
145 
147  cl_kernel kernel, int offset_index, MemoryBuffer *memory_buffer)
148 {
149  if (offset_index != -1) {
150  cl_int error;
151  const rcti &rect = memory_buffer->get_rect();
152  cl_int2 offset = {{rect.xmin, rect.ymin}};
153 
154  error = clSetKernelArg(kernel, offset_index, sizeof(cl_int2), &offset);
155  if (error != CL_SUCCESS) {
156  printf("CLERROR[%d]: %s\n", error, clewErrorString(error));
157  }
158  }
159 }
160 
162  int offset_index,
163  NodeOperation *operation)
164 {
165  if (offset_index != -1) {
166  cl_int error;
167  cl_int2 offset = {{(cl_int)operation->get_width(), (cl_int)operation->get_height()}};
168 
169  error = clSetKernelArg(kernel, offset_index, sizeof(cl_int2), &offset);
170  if (error != CL_SUCCESS) {
171  printf("CLERROR[%d]: %s\n", error, clewErrorString(error));
172  }
173  }
174 }
175 
177  cl_kernel kernel, int parameter_index, cl_mem cl_output_memory_buffer)
178 {
179  cl_int error;
180  error = clSetKernelArg(kernel, parameter_index, sizeof(cl_mem), &cl_output_memory_buffer);
181  if (error != CL_SUCCESS) {
182  printf("CLERROR[%d]: %s\n", error, clewErrorString(error));
183  }
184 }
185 
186 void OpenCLDevice::COM_cl_enqueue_range(cl_kernel kernel, MemoryBuffer *output_memory_buffer)
187 {
188  cl_int error;
189  const size_t size[] = {
190  (size_t)output_memory_buffer->get_width(),
191  (size_t)output_memory_buffer->get_height(),
192  };
193 
194  error = clEnqueueNDRangeKernel(queue_, kernel, 2, nullptr, size, nullptr, 0, nullptr, nullptr);
195  if (error != CL_SUCCESS) {
196  printf("CLERROR[%d]: %s\n", error, clewErrorString(error));
197  }
198 }
199 
201  MemoryBuffer *output_memory_buffer,
202  int offset_index,
203  NodeOperation *operation)
204 {
205  cl_int error;
206  const int width = output_memory_buffer->get_width();
207  const int height = output_memory_buffer->get_height();
208  int offsetx;
209  int offsety;
210  int local_size = 1024;
211  size_t size[2];
212  cl_int2 offset;
213 
214  if (vendor_id_ == NVIDIA) {
215  local_size = 32;
216  }
217 
218  bool breaked = false;
219  for (offsety = 0; offsety < height && (!breaked); offsety += local_size) {
220  offset.s[1] = offsety;
221  if (offsety + local_size < height) {
222  size[1] = local_size;
223  }
224  else {
225  size[1] = height - offsety;
226  }
227 
228  for (offsetx = 0; offsetx < width && (!breaked); offsetx += local_size) {
229  if (offsetx + local_size < width) {
230  size[0] = local_size;
231  }
232  else {
233  size[0] = width - offsetx;
234  }
235  offset.s[0] = offsetx;
236 
237  error = clSetKernelArg(kernel, offset_index, sizeof(cl_int2), &offset);
238  if (error != CL_SUCCESS) {
239  printf("CLERROR[%d]: %s\n", error, clewErrorString(error));
240  }
241  error = clEnqueueNDRangeKernel(
242  queue_, kernel, 2, nullptr, size, nullptr, 0, nullptr, nullptr);
243  if (error != CL_SUCCESS) {
244  printf("CLERROR[%d]: %s\n", error, clewErrorString(error));
245  }
246  clFlush(queue_);
247  if (operation->is_braked()) {
248  breaked = false;
249  }
250  }
251  }
252 }
253 
254 cl_kernel OpenCLDevice::COM_cl_create_kernel(const char *kernelname,
255  std::list<cl_kernel> *cl_kernels_to_clean_up)
256 {
257  cl_int error;
258  cl_kernel kernel = clCreateKernel(program_, kernelname, &error);
259  if (error != CL_SUCCESS) {
260  printf("CLERROR[%d]: %s\n", error, clewErrorString(error));
261  }
262  else {
263  if (cl_kernels_to_clean_up) {
264  cl_kernels_to_clean_up->push_back(kernel);
265  }
266  }
267  return kernel;
268 }
269 
270 } // namespace blender::compositor
#define BLI_assert_msg(a, msg)
Definition: BLI_assert.h:53
_GL_VOID GLfloat value _GL_VOID_RET _GL_VOID const GLuint GLboolean *residences _GL_BOOL_RET _GL_VOID GLsizei height
_GL_VOID GLfloat value _GL_VOID_RET _GL_VOID const GLuint GLboolean *residences _GL_BOOL_RET _GL_VOID GLsizei GLfloat GLfloat GLfloat GLfloat const GLubyte *bitmap _GL_VOID_RET _GL_VOID GLenum const void *lists _GL_VOID_RET _GL_VOID const GLdouble *equation _GL_VOID_RET _GL_VOID GLdouble GLdouble blue _GL_VOID_RET _GL_VOID GLfloat GLfloat blue _GL_VOID_RET _GL_VOID GLint GLint blue _GL_VOID_RET _GL_VOID GLshort GLshort blue _GL_VOID_RET _GL_VOID GLubyte GLubyte blue _GL_VOID_RET _GL_VOID GLuint GLuint blue _GL_VOID_RET _GL_VOID GLushort GLushort blue _GL_VOID_RET _GL_VOID GLbyte GLbyte GLbyte alpha _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble alpha _GL_VOID_RET _GL_VOID GLfloat GLfloat GLfloat alpha _GL_VOID_RET _GL_VOID GLint GLint GLint alpha _GL_VOID_RET _GL_VOID GLshort GLshort GLshort alpha _GL_VOID_RET _GL_VOID GLubyte GLubyte GLubyte alpha _GL_VOID_RET _GL_VOID GLuint GLuint GLuint alpha _GL_VOID_RET _GL_VOID GLushort GLushort GLushort alpha _GL_VOID_RET _GL_VOID GLenum mode _GL_VOID_RET _GL_VOID GLint GLsizei width
static DBVT_INLINE btScalar size(const btDbvtVolume &a)
Definition: btDbvt.cpp:52
Class ExecutionGroup is a group of Operations that are executed as one. This grouping is used to comb...
void finalize_chunk_execution(int chunk_number, MemoryBuffer **memory_buffers)
after a chunk is executed the needed resources can be freed or unlocked.
NodeOperation * get_output_operation() const
get the output operation of this ExecutionGroup
MemoryBuffer ** get_input_buffers_opencl(int chunk_number)
get all inputbuffers needed to calculate an chunk
MemoryBuffer * allocate_output_buffer(rcti &rect)
allocate the outputbuffer of a chunk
a MemoryBuffer contains access to the data of a chunk
const rcti & get_rect() const
get the rect of this MemoryBuffer
const int get_width() const
get the width of this MemoryBuffer
const int get_height() const
get the height of this MemoryBuffer
NodeOperation contains calculation logic.
virtual void execute_opencl_region(OpenCLDevice *, rcti *, unsigned int, MemoryBuffer **, MemoryBuffer *)
when a chunk is executed by an OpenCLDevice, this method is called
device representing an GPU OpenCL device. an instance of this class represents a single cl_device
void COM_cl_attach_size_to_kernel_parameter(cl_kernel kernel, int offset_index, NodeOperation *operation)
cl_mem COM_cl_attach_memory_buffer_to_kernel_parameter(cl_kernel kernel, int parameter_index, int offset_index, std::list< cl_mem > *cleanup, MemoryBuffer **input_memory_buffers, SocketReader *reader)
void COM_cl_attach_output_memory_buffer_to_kernel_parameter(cl_kernel kernel, int parameter_index, cl_mem cl_output_memory_buffer)
void COM_cl_enqueue_range(cl_kernel kernel, MemoryBuffer *output_memory_buffer)
cl_kernel COM_cl_create_kernel(const char *kernelname, std::list< cl_kernel > *cl_kernels_to_clean_up)
static const cl_image_format * determine_image_format(MemoryBuffer *memory_buffer)
determine an image format
void COM_cl_attach_memory_buffer_offset_to_kernel_parameter(cl_kernel kernel, int offset_index, MemoryBuffer *memory_buffers)
OpenCLDevice(cl_context context, cl_device_id device, cl_program program, cl_int vendor_id)
constructor with opencl device
void execute(WorkPackage *work) override
execute a WorkPackage
MemoryBuffer * get_input_memory_buffer(MemoryBuffer **memory_buffers) override
SyclQueue void void size_t num_bytes SyclQueue void const char void *memory_device_pointer KernelContext int kernel
ccl_gpu_kernel_postfix ccl_global float int int int int float bool int offset
static void error(const char *str)
Definition: meshlaplacian.c:51
const cl_image_format IMAGE_FORMAT_COLOR
const cl_image_format IMAGE_FORMAT_VECTOR
const cl_image_format IMAGE_FORMAT_VALUE
contains data about work that can be scheduled
unsigned int chunk_number
number of the chunk to be executed
ExecutionGroup * execution_group
execution_group with the operations-setup to be evaluated
int ymin
Definition: DNA_vec_types.h:64
int xmin
Definition: DNA_vec_types.h:63