29 #include <opensubdiv/far/error.h>
30 #include <opensubdiv/far/patchDescriptor.h>
31 #include <opensubdiv/far/stencilTable.h>
32 #include <opensubdiv/osd/glslPatchShaderSource.h>
40 using OpenSubdiv::Far::LimitStencilTable;
41 using OpenSubdiv::Far::StencilTable;
42 using OpenSubdiv::Osd::BufferDescriptor;
43 using OpenSubdiv::Osd::PatchArray;
44 using OpenSubdiv::Osd::PatchArrayVector;
49 namespace opensubdiv {
59 #if defined(GL_ARB_direct_state_access)
60 if (GLEW_ARB_direct_state_access) {
61 glCreateBuffers(1, &devicePtr);
62 glNamedBufferData(devicePtr,
src.size() *
sizeof(
T), &
src.at(0), GL_STATIC_DRAW);
68 glGetIntegerv(GL_SHADER_STORAGE_BUFFER_BINDING, &
prev);
69 glGenBuffers(1, &devicePtr);
70 glBindBuffer(GL_SHADER_STORAGE_BUFFER, devicePtr);
71 glBufferData(GL_SHADER_STORAGE_BUFFER,
src.size() *
sizeof(
T), &
src.at(0), GL_STATIC_DRAW);
72 glBindBuffer(GL_SHADER_STORAGE_BUFFER,
prev);
80 _numStencils = stencilTable->GetNumStencils();
81 if (_numStencils > 0) {
83 _offsets =
createSSBO(stencilTable->GetOffsets());
84 _indices =
createSSBO(stencilTable->GetControlIndices());
85 _weights =
createSSBO(stencilTable->GetWeights());
86 _duWeights = _dvWeights = 0;
87 _duuWeights = _duvWeights = _dvvWeights = 0;
90 _sizes = _offsets = _indices = _weights = 0;
91 _duWeights = _dvWeights = 0;
92 _duuWeights = _duvWeights = _dvvWeights = 0;
98 _numStencils = limitStencilTable->GetNumStencils();
99 if (_numStencils > 0) {
100 _sizes =
createSSBO(limitStencilTable->GetSizes());
101 _offsets =
createSSBO(limitStencilTable->GetOffsets());
102 _indices =
createSSBO(limitStencilTable->GetControlIndices());
103 _weights =
createSSBO(limitStencilTable->GetWeights());
104 _duWeights =
createSSBO(limitStencilTable->GetDuWeights());
105 _dvWeights =
createSSBO(limitStencilTable->GetDvWeights());
106 _duuWeights =
createSSBO(limitStencilTable->GetDuuWeights());
107 _duvWeights =
createSSBO(limitStencilTable->GetDuvWeights());
108 _dvvWeights =
createSSBO(limitStencilTable->GetDvvWeights());
111 _sizes = _offsets = _indices = _weights = 0;
112 _duWeights = _dvWeights = 0;
113 _duuWeights = _duvWeights = _dvvWeights = 0;
120 glDeleteBuffers(1, &_sizes);
122 glDeleteBuffers(1, &_offsets);
124 glDeleteBuffers(1, &_indices);
126 glDeleteBuffers(1, &_weights);
128 glDeleteBuffers(1, &_duWeights);
130 glDeleteBuffers(1, &_dvWeights);
132 glDeleteBuffers(1, &_duuWeights);
134 glDeleteBuffers(1, &_duvWeights);
136 glDeleteBuffers(1, &_dvvWeights);
143 memset((
void *)&_stencilKernel, 0,
sizeof(_stencilKernel));
144 memset((
void *)&_patchKernel, 0,
sizeof(_patchKernel));
149 if (_patchArraysSSBO) {
150 glDeleteBuffers(1, &_patchArraysSSBO);
155 BufferDescriptor
const &dstDesc,
156 BufferDescriptor
const &duDesc,
157 BufferDescriptor
const &dvDesc,
158 BufferDescriptor
const &duuDesc,
159 BufferDescriptor
const &duvDesc,
160 BufferDescriptor
const &dvvDesc,
161 const char *kernelDefine,
164 GLuint
program = glCreateProgram();
166 GLuint shader = glCreateShader(GL_COMPUTE_SHADER);
168 std::string patchBasisShaderSource =
169 OpenSubdiv::Osd::GLSLPatchShaderSource::GetPatchBasisShaderSource();
170 const char *patchBasisShaderSourceDefine =
"#define OSD_PATCH_BASIS_GLSL\n";
172 std::ostringstream defines;
173 defines <<
"#define LENGTH " << srcDesc.length <<
"\n"
174 <<
"#define SRC_STRIDE " << srcDesc.stride <<
"\n"
175 <<
"#define DST_STRIDE " << dstDesc.stride <<
"\n"
176 <<
"#define WORK_GROUP_SIZE " << workGroupSize <<
"\n"
177 << kernelDefine <<
"\n"
178 << patchBasisShaderSourceDefine <<
"\n";
180 bool deriv1 = (duDesc.length > 0 || dvDesc.length > 0);
181 bool deriv2 = (duuDesc.length > 0 || duvDesc.length > 0 || dvvDesc.length > 0);
183 defines <<
"#define OPENSUBDIV_GLSL_COMPUTE_USE_1ST_DERIVATIVES\n";
186 defines <<
"#define OPENSUBDIV_GLSL_COMPUTE_USE_2ND_DERIVATIVES\n";
189 std::string defineStr = defines.str();
191 const char *shaderSources[4] = {
"#version 430\n", 0, 0, 0};
193 shaderSources[1] = defineStr.c_str();
194 shaderSources[2] = patchBasisShaderSource.c_str();
196 glShaderSource(shader, 4, shaderSources,
NULL);
197 glCompileShader(shader);
198 glAttachShader(
program, shader);
202 glGetProgramiv(
program, GL_LINK_STATUS, &linked);
204 if (linked == GL_FALSE) {
206 glGetShaderInfoLog(shader, 1024,
NULL,
buffer);
207 OpenSubdiv::Far::Error(OpenSubdiv::Far::FAR_RUNTIME_ERROR,
buffer);
210 OpenSubdiv::Far::Error(OpenSubdiv::Far::FAR_RUNTIME_ERROR,
buffer);
216 glDeleteShader(shader);
222 BufferDescriptor
const &dstDesc,
223 BufferDescriptor
const &duDesc,
224 BufferDescriptor
const &dvDesc,
225 BufferDescriptor
const &duuDesc,
226 BufferDescriptor
const &duvDesc,
227 BufferDescriptor
const &dvvDesc)
231 if (!_stencilKernel.Compile(
232 srcDesc, dstDesc, duDesc, dvDesc, duuDesc, duvDesc, dvvDesc, _workGroupSize)) {
237 if (!_patchKernel.Compile(
238 srcDesc, dstDesc, duDesc, dvDesc, duuDesc, duvDesc, dvvDesc, _workGroupSize)) {
243 if (!_patchArraysSSBO) {
244 glGenBuffers(1, &_patchArraysSSBO);
258 int GLComputeEvaluator::GetDispatchSize(
int count)
const
260 return (
count + _workGroupSize - 1) / _workGroupSize;
263 void GLComputeEvaluator::DispatchCompute(
int totalDispatchSize)
const
265 int maxWorkGroupCount[2] = {0, 0};
267 glGetIntegeri_v(GL_MAX_COMPUTE_WORK_GROUP_COUNT, 0, &maxWorkGroupCount[0]);
268 glGetIntegeri_v(GL_MAX_COMPUTE_WORK_GROUP_COUNT, 1, &maxWorkGroupCount[1]);
270 const GLuint maxResX =
static_cast<GLuint
>(maxWorkGroupCount[0]);
272 const int dispatchSize = GetDispatchSize(totalDispatchSize);
273 GLuint dispatchRX =
static_cast<GLuint
>(dispatchSize);
274 GLuint dispatchRY = 1u;
275 if (dispatchRX > maxResX) {
285 if ((dispatchRX * (dispatchRY - 1)) >= dispatchSize) {
293 assert(dispatchRY <
static_cast<GLuint
>(maxWorkGroupCount[1]));
295 glDispatchCompute(dispatchRX, dispatchRY, 1);
299 BufferDescriptor
const &srcDesc,
301 BufferDescriptor
const &dstDesc,
303 BufferDescriptor
const &duDesc,
305 BufferDescriptor
const &dvDesc,
307 GLuint offsetsBuffer,
308 GLuint indicesBuffer,
309 GLuint weightsBuffer,
310 GLuint duWeightsBuffer,
311 GLuint dvWeightsBuffer,
344 BufferDescriptor
const &srcDesc,
346 BufferDescriptor
const &dstDesc,
348 BufferDescriptor
const &duDesc,
350 BufferDescriptor
const &dvDesc,
352 BufferDescriptor
const &duuDesc,
354 BufferDescriptor
const &duvDesc,
356 BufferDescriptor
const &dvvDesc,
358 GLuint offsetsBuffer,
359 GLuint indicesBuffer,
360 GLuint weightsBuffer,
361 GLuint duWeightsBuffer,
362 GLuint dvWeightsBuffer,
363 GLuint duuWeightsBuffer,
364 GLuint duvWeightsBuffer,
365 GLuint dvvWeightsBuffer,
370 if (!_stencilKernel.program)
372 int count = end - start;
377 glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, srcBuffer);
378 glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, dstBuffer);
379 glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, duBuffer);
380 glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 3, dvBuffer);
381 glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 10, duuBuffer);
382 glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 11, duvBuffer);
383 glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 12, dvvBuffer);
384 glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 4, sizesBuffer);
385 glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 5, offsetsBuffer);
386 glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 6, indicesBuffer);
387 glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 7, weightsBuffer);
389 glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 8, duWeightsBuffer);
391 glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 9, dvWeightsBuffer);
392 if (duuWeightsBuffer)
393 glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 13, duuWeightsBuffer);
394 if (duvWeightsBuffer)
395 glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 14, duvWeightsBuffer);
396 if (dvvWeightsBuffer)
397 glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 15, dvvWeightsBuffer);
400 glGetIntegerv(GL_CURRENT_PROGRAM, &activeProgram);
401 glUseProgram(_stencilKernel.program);
403 glUniform1i(_stencilKernel.uniformStart, start);
404 glUniform1i(_stencilKernel.uniformEnd, end);
405 glUniform1i(_stencilKernel.uniformSrcOffset, srcDesc.offset);
406 glUniform1i(_stencilKernel.uniformDstOffset, dstDesc.offset);
407 if (_stencilKernel.uniformDuDesc > 0) {
408 glUniform3i(_stencilKernel.uniformDuDesc, duDesc.offset, duDesc.length, duDesc.stride);
410 if (_stencilKernel.uniformDvDesc > 0) {
411 glUniform3i(_stencilKernel.uniformDvDesc, dvDesc.offset, dvDesc.length, dvDesc.stride);
413 if (_stencilKernel.uniformDuuDesc > 0) {
414 glUniform3i(_stencilKernel.uniformDuuDesc, duuDesc.offset, duuDesc.length, duuDesc.stride);
416 if (_stencilKernel.uniformDuvDesc > 0) {
417 glUniform3i(_stencilKernel.uniformDuvDesc, duvDesc.offset, duvDesc.length, duvDesc.stride);
419 if (_stencilKernel.uniformDvvDesc > 0) {
420 glUniform3i(_stencilKernel.uniformDvvDesc, dvvDesc.offset, dvvDesc.length, dvvDesc.stride);
423 DispatchCompute(
count);
425 glUseProgram(activeProgram);
427 glMemoryBarrier(GL_TEXTURE_FETCH_BARRIER_BIT);
428 for (
int i = 0; i < 16; ++i) {
429 glBindBufferBase(GL_SHADER_STORAGE_BUFFER, i, 0);
436 BufferDescriptor
const &srcDesc,
438 BufferDescriptor
const &dstDesc,
440 BufferDescriptor
const &duDesc,
442 BufferDescriptor
const &dvDesc,
444 GLuint patchCoordsBuffer,
445 const PatchArrayVector &patchArrays,
446 GLuint patchIndexBuffer,
447 GLuint patchParamsBuffer)
const
472 BufferDescriptor
const &srcDesc,
474 BufferDescriptor
const &dstDesc,
476 BufferDescriptor
const &duDesc,
478 BufferDescriptor
const &dvDesc,
480 BufferDescriptor
const &duuDesc,
482 BufferDescriptor
const &duvDesc,
484 BufferDescriptor
const &dvvDesc,
486 GLuint patchCoordsBuffer,
487 const PatchArrayVector &patchArrays,
488 GLuint patchIndexBuffer,
489 GLuint patchParamsBuffer)
const
492 if (!_patchKernel.program)
495 glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, srcBuffer);
496 glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, dstBuffer);
497 glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, duBuffer);
498 glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 3, dvBuffer);
499 glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 10, duuBuffer);
500 glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 11, duvBuffer);
501 glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 12, dvvBuffer);
502 glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 5, patchCoordsBuffer);
503 glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 6, patchIndexBuffer);
504 glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 7, patchParamsBuffer);
507 glGetIntegerv(GL_CURRENT_PROGRAM, &activeProgram);
508 glUseProgram(_patchKernel.program);
510 glUniform1i(_patchKernel.uniformSrcOffset, srcDesc.offset);
511 glUniform1i(_patchKernel.uniformDstOffset, dstDesc.offset);
513 int patchArraySize =
sizeof(PatchArray);
514 glBindBuffer(GL_SHADER_STORAGE_BUFFER, _patchArraysSSBO);
516 GL_SHADER_STORAGE_BUFFER, patchArrays.size() * patchArraySize,
NULL, GL_STATIC_DRAW);
517 for (
int i = 0; i < (int)patchArrays.size(); ++i) {
519 GL_SHADER_STORAGE_BUFFER, i * patchArraySize,
sizeof(PatchArray), &patchArrays[i]);
521 glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 4, _patchArraysSSBO);
523 if (_patchKernel.uniformDuDesc > 0) {
524 glUniform3i(_patchKernel.uniformDuDesc, duDesc.offset, duDesc.length, duDesc.stride);
526 if (_patchKernel.uniformDvDesc > 0) {
527 glUniform3i(_patchKernel.uniformDvDesc, dvDesc.offset, dvDesc.length, dvDesc.stride);
529 if (_patchKernel.uniformDuuDesc > 0) {
530 glUniform3i(_patchKernel.uniformDuuDesc, duuDesc.offset, duuDesc.length, duuDesc.stride);
532 if (_patchKernel.uniformDuvDesc > 0) {
533 glUniform3i(_patchKernel.uniformDuvDesc, duvDesc.offset, duvDesc.length, duvDesc.stride);
535 if (_patchKernel.uniformDvvDesc > 0) {
536 glUniform3i(_patchKernel.uniformDvvDesc, dvvDesc.offset, dvvDesc.length, dvvDesc.stride);
539 DispatchCompute(numPatchCoords);
541 glUseProgram(activeProgram);
543 glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, 0);
544 glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, 0);
545 glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, 0);
546 glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 3, 0);
547 glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 4, 0);
548 glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 5, 0);
549 glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 6, 0);
551 glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 10, 0);
552 glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 11, 0);
553 glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 12, 0);
559 GLComputeEvaluator::_StencilKernel::_StencilKernel() :
program(0)
562 GLComputeEvaluator::_StencilKernel::~_StencilKernel()
569 bool GLComputeEvaluator::_StencilKernel::Compile(BufferDescriptor
const &srcDesc,
570 BufferDescriptor
const &dstDesc,
571 BufferDescriptor
const &duDesc,
572 BufferDescriptor
const &dvDesc,
573 BufferDescriptor
const &duuDesc,
574 BufferDescriptor
const &duvDesc,
575 BufferDescriptor
const &dvvDesc,
583 const char *kernelDefine =
"#define OPENSUBDIV_GLSL_COMPUTE_KERNEL_EVAL_STENCILS\n";
586 srcDesc, dstDesc, duDesc, dvDesc, duuDesc, duvDesc, dvvDesc, kernelDefine, workGroupSize);
591 uniformStart = glGetUniformLocation(
program,
"batchStart");
592 uniformEnd = glGetUniformLocation(
program,
"batchEnd");
593 uniformSrcOffset = glGetUniformLocation(
program,
"srcOffset");
594 uniformDstOffset = glGetUniformLocation(
program,
"dstOffset");
595 uniformDuDesc = glGetUniformLocation(
program,
"duDesc");
596 uniformDvDesc = glGetUniformLocation(
program,
"dvDesc");
597 uniformDuuDesc = glGetUniformLocation(
program,
"duuDesc");
598 uniformDuvDesc = glGetUniformLocation(
program,
"duvDesc");
599 uniformDvvDesc = glGetUniformLocation(
program,
"dvvDesc");
606 GLComputeEvaluator::_PatchKernel::_PatchKernel() :
program(0)
609 GLComputeEvaluator::_PatchKernel::~_PatchKernel()
616 bool GLComputeEvaluator::_PatchKernel::Compile(BufferDescriptor
const &srcDesc,
617 BufferDescriptor
const &dstDesc,
618 BufferDescriptor
const &duDesc,
619 BufferDescriptor
const &dvDesc,
620 BufferDescriptor
const &duuDesc,
621 BufferDescriptor
const &duvDesc,
622 BufferDescriptor
const &dvvDesc,
630 const char *kernelDefine =
"#define OPENSUBDIV_GLSL_COMPUTE_KERNEL_EVAL_PATCHES\n";
633 srcDesc, dstDesc, duDesc, dvDesc, duuDesc, duvDesc, dvvDesc, kernelDefine, workGroupSize);
638 uniformSrcOffset = glGetUniformLocation(
program,
"srcOffset");
639 uniformDstOffset = glGetUniformLocation(
program,
"dstOffset");
640 uniformPatchArray = glGetUniformLocation(
program,
"patchArray");
641 uniformDuDesc = glGetUniformLocation(
program,
"duDesc");
642 uniformDvDesc = glGetUniformLocation(
program,
"dvDesc");
643 uniformDuuDesc = glGetUniformLocation(
program,
"duuDesc");
644 uniformDuvDesc = glGetUniformLocation(
program,
"duvDesc");
645 uniformDvvDesc = glGetUniformLocation(
program,
"dvvDesc");
bool Compile(OpenSubdiv::Osd::BufferDescriptor const &srcDesc, OpenSubdiv::Osd::BufferDescriptor const &dstDesc, OpenSubdiv::Osd::BufferDescriptor const &duDesc=OpenSubdiv::Osd::BufferDescriptor(), OpenSubdiv::Osd::BufferDescriptor const &dvDesc=OpenSubdiv::Osd::BufferDescriptor(), OpenSubdiv::Osd::BufferDescriptor const &duuDesc=OpenSubdiv::Osd::BufferDescriptor(), OpenSubdiv::Osd::BufferDescriptor const &duvDesc=OpenSubdiv::Osd::BufferDescriptor(), OpenSubdiv::Osd::BufferDescriptor const &dvvDesc=OpenSubdiv::Osd::BufferDescriptor())
static bool EvalPatches(SRC_BUFFER *srcBuffer, OpenSubdiv::Osd::BufferDescriptor const &srcDesc, DST_BUFFER *dstBuffer, OpenSubdiv::Osd::BufferDescriptor const &dstDesc, int numPatchCoords, PATCHCOORD_BUFFER *patchCoords, PATCH_TABLE *patchTable, GLComputeEvaluator const *instance, void *deviceContext=NULL)
Generic limit eval function. This function has a same signature as other device kernels have so that ...
static bool EvalStencils(SRC_BUFFER *srcBuffer, OpenSubdiv::Osd::BufferDescriptor const &srcDesc, DST_BUFFER *dstBuffer, OpenSubdiv::Osd::BufferDescriptor const &dstDesc, STENCIL_TABLE const *stencilTable, GLComputeEvaluator const *instance, void *deviceContext=NULL)
Generic static stencil function. This function has a same signature as other device kernels have so t...
static void Synchronize(void *deviceContext)
Wait the dispatched kernel finishes.
~GLComputeEvaluator()
Destructor. note that the GL context must be made current.
GLComputeEvaluator()
Constructor.
GLStencilTableSSBO(OpenSubdiv::Far::StencilTable const *stencilTable)
SyclQueue void void * src
char datatoc_glsl_compute_kernel_glsl[]
ccl_global float * buffer
ccl_device_inline float3 ceil(const float3 &a)
SymEdge< T > * prev(const SymEdge< T > *se)
static GLuint compileKernel(BufferDescriptor const &srcDesc, BufferDescriptor const &dstDesc, BufferDescriptor const &duDesc, BufferDescriptor const &dvDesc, BufferDescriptor const &duuDesc, BufferDescriptor const &duvDesc, BufferDescriptor const &dvvDesc, const char *kernelDefine, int workGroupSize)
GLuint createSSBO(std::vector< T > const &src)