34 vendor_id_ = vendor_id;
37 queue_ = clCreateCommandQueue(context_, device_, 0, &
error);
41 : context_(other.context_),
42 device_(other.device_),
43 program_(other.program_),
45 vendor_id_(other.vendor_id_)
47 other.queue_ =
nullptr;
53 clReleaseCommandQueue(queue_);
59 const unsigned int chunk_number = work_package->
chunk_number;
66 this, &work_package->
rect, chunk_number, input_buffers, output_buffer);
76 std::list<cl_mem> *cleanup,
111 std::list<cl_mem> *cleanup,
121 cl_mem cl_buffer = clCreateImage2D(context_,
122 CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
130 if (
error != CL_SUCCESS) {
131 printf(
"CLERROR[%d]: %s\n",
error, clewErrorString(
error));
133 if (
error == CL_SUCCESS) {
134 cleanup->push_back(cl_buffer);
137 error = clSetKernelArg(
kernel, parameter_index,
sizeof(cl_mem), &cl_buffer);
138 if (
error != CL_SUCCESS) {
139 printf(
"CLERROR[%d]: %s\n",
error, clewErrorString(
error));
149 if (offset_index != -1) {
155 if (
error != CL_SUCCESS) {
156 printf(
"CLERROR[%d]: %s\n",
error, clewErrorString(
error));
165 if (offset_index != -1) {
170 if (
error != CL_SUCCESS) {
171 printf(
"CLERROR[%d]: %s\n",
error, clewErrorString(
error));
177 cl_kernel
kernel,
int parameter_index, cl_mem cl_output_memory_buffer)
180 error = clSetKernelArg(
kernel, parameter_index,
sizeof(cl_mem), &cl_output_memory_buffer);
181 if (
error != CL_SUCCESS) {
182 printf(
"CLERROR[%d]: %s\n",
error, clewErrorString(
error));
189 const size_t size[] = {
190 (size_t)output_memory_buffer->
get_width(),
194 error = clEnqueueNDRangeKernel(queue_,
kernel, 2,
nullptr,
size,
nullptr, 0,
nullptr,
nullptr);
195 if (
error != CL_SUCCESS) {
196 printf(
"CLERROR[%d]: %s\n",
error, clewErrorString(
error));
210 int local_size = 1024;
214 if (vendor_id_ ==
NVIDIA) {
218 bool breaked =
false;
219 for (offsety = 0; offsety <
height && (!breaked); offsety += local_size) {
221 if (offsety + local_size <
height) {
222 size[1] = local_size;
228 for (offsetx = 0; offsetx <
width && (!breaked); offsetx += local_size) {
229 if (offsetx + local_size <
width) {
230 size[0] = local_size;
238 if (
error != CL_SUCCESS) {
239 printf(
"CLERROR[%d]: %s\n",
error, clewErrorString(
error));
241 error = clEnqueueNDRangeKernel(
242 queue_,
kernel, 2,
nullptr,
size,
nullptr, 0,
nullptr,
nullptr);
243 if (
error != CL_SUCCESS) {
244 printf(
"CLERROR[%d]: %s\n",
error, clewErrorString(
error));
255 std::list<cl_kernel> *cl_kernels_to_clean_up)
258 cl_kernel
kernel = clCreateKernel(program_, kernelname, &
error);
259 if (
error != CL_SUCCESS) {
260 printf(
"CLERROR[%d]: %s\n",
error, clewErrorString(
error));
263 if (cl_kernels_to_clean_up) {
264 cl_kernels_to_clean_up->push_back(
kernel);
#define BLI_assert_msg(a, msg)
_GL_VOID GLfloat value _GL_VOID_RET _GL_VOID const GLuint GLboolean *residences _GL_BOOL_RET _GL_VOID GLsizei height
_GL_VOID GLfloat value _GL_VOID_RET _GL_VOID const GLuint GLboolean *residences _GL_BOOL_RET _GL_VOID GLsizei GLfloat GLfloat GLfloat GLfloat const GLubyte *bitmap _GL_VOID_RET _GL_VOID GLenum const void *lists _GL_VOID_RET _GL_VOID const GLdouble *equation _GL_VOID_RET _GL_VOID GLdouble GLdouble blue _GL_VOID_RET _GL_VOID GLfloat GLfloat blue _GL_VOID_RET _GL_VOID GLint GLint blue _GL_VOID_RET _GL_VOID GLshort GLshort blue _GL_VOID_RET _GL_VOID GLubyte GLubyte blue _GL_VOID_RET _GL_VOID GLuint GLuint blue _GL_VOID_RET _GL_VOID GLushort GLushort blue _GL_VOID_RET _GL_VOID GLbyte GLbyte GLbyte alpha _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble alpha _GL_VOID_RET _GL_VOID GLfloat GLfloat GLfloat alpha _GL_VOID_RET _GL_VOID GLint GLint GLint alpha _GL_VOID_RET _GL_VOID GLshort GLshort GLshort alpha _GL_VOID_RET _GL_VOID GLubyte GLubyte GLubyte alpha _GL_VOID_RET _GL_VOID GLuint GLuint GLuint alpha _GL_VOID_RET _GL_VOID GLushort GLushort GLushort alpha _GL_VOID_RET _GL_VOID GLenum mode _GL_VOID_RET _GL_VOID GLint GLsizei width
static DBVT_INLINE btScalar size(const btDbvtVolume &a)
Class ExecutionGroup is a group of Operations that are executed as one. This grouping is used to comb...
void finalize_chunk_execution(int chunk_number, MemoryBuffer **memory_buffers)
after a chunk is executed the needed resources can be freed or unlocked.
NodeOperation * get_output_operation() const
get the output operation of this ExecutionGroup
MemoryBuffer ** get_input_buffers_opencl(int chunk_number)
get all inputbuffers needed to calculate an chunk
MemoryBuffer * allocate_output_buffer(rcti &rect)
allocate the outputbuffer of a chunk
a MemoryBuffer contains access to the data of a chunk
const rcti & get_rect() const
get the rect of this MemoryBuffer
uint8_t get_num_channels() const
const int get_width() const
get the width of this MemoryBuffer
const int get_height() const
get the height of this MemoryBuffer
NodeOperation contains calculation logic.
unsigned int get_height() const
virtual void execute_opencl_region(OpenCLDevice *, rcti *, unsigned int, MemoryBuffer **, MemoryBuffer *)
when a chunk is executed by an OpenCLDevice, this method is called
unsigned int get_width() const
device representing an GPU OpenCL device. an instance of this class represents a single cl_device
void COM_cl_attach_size_to_kernel_parameter(cl_kernel kernel, int offset_index, NodeOperation *operation)
cl_mem COM_cl_attach_memory_buffer_to_kernel_parameter(cl_kernel kernel, int parameter_index, int offset_index, std::list< cl_mem > *cleanup, MemoryBuffer **input_memory_buffers, SocketReader *reader)
void COM_cl_attach_output_memory_buffer_to_kernel_parameter(cl_kernel kernel, int parameter_index, cl_mem cl_output_memory_buffer)
void COM_cl_enqueue_range(cl_kernel kernel, MemoryBuffer *output_memory_buffer)
cl_kernel COM_cl_create_kernel(const char *kernelname, std::list< cl_kernel > *cl_kernels_to_clean_up)
static const cl_image_format * determine_image_format(MemoryBuffer *memory_buffer)
determine an image format
void COM_cl_attach_memory_buffer_offset_to_kernel_parameter(cl_kernel kernel, int offset_index, MemoryBuffer *memory_buffers)
OpenCLDevice(cl_context context, cl_device_id device, cl_program program, cl_int vendor_id)
constructor with opencl device
void execute(WorkPackage *work) override
execute a WorkPackage
MemoryBuffer * get_input_memory_buffer(MemoryBuffer **memory_buffers) override
SyclQueue void void size_t num_bytes SyclQueue void const char void *memory_device_pointer KernelContext int kernel
ccl_gpu_kernel_postfix ccl_global float int int int int float bool int offset
static void error(const char *str)
const cl_image_format IMAGE_FORMAT_COLOR
const cl_image_format IMAGE_FORMAT_VECTOR
const cl_image_format IMAGE_FORMAT_VALUE
contains data about work that can be scheduled
unsigned int chunk_number
number of the chunk to be executed
ExecutionGroup * execution_group
execution_group with the operations-setup to be evaluated