15 #ifdef __KERNEL_METAL__
17 #elif defined(__KERNEL_ONEAPI__)
43 #ifdef __KERNEL_METAL__
45 #elif defined(__KERNEL_ONEAPI__)
248 #if defined(__KERNEL_METAL_APPLE__) && defined(__METALRT__)
263 #if defined(__KERNEL_METAL_APPLE__) && defined(__METALRT__)
267 kg += __dummy_constant;
321 ccl_gpu_kernel_lambda_pass);
340 ccl_gpu_kernel_lambda_pass);
356 ccl_gpu_kernel_lambda_pass);
373 ccl_gpu_kernel_lambda_pass);
390 ccl_gpu_kernel_lambda_pass);
418 ccl_gpu_kernel_lambda_pass);
438 ccl_gpu_kernel_lambda_pass);
476 ccl_gpu_kernel_lambda_pass);
602 const int rgba_offset,
603 const int rgba_stride,
606 const half4 half_pixel)
609 #ifdef __KERNEL_HIP__
611 out[0] = half_pixel.
x;
612 out[1] = half_pixel.
y;
613 out[2] = half_pixel.
z;
614 out[3] = half_pixel.
w;
621 #ifdef __KERNEL_METAL__
625 # define FILM_GET_PASS_PIXEL_F32(variant, input_channel_count) \
626 float local_pixel[4]; \
627 film_get_pass_pixel_##variant(&kfilm_convert, buffer, local_pixel); \
628 if (input_channel_count >= 1) { \
629 pixel[0] = local_pixel[0]; \
631 if (input_channel_count >= 2) { \
632 pixel[1] = local_pixel[1]; \
634 if (input_channel_count >= 3) { \
635 pixel[2] = local_pixel[2]; \
637 if (input_channel_count >= 4) { \
638 pixel[3] = local_pixel[3]; \
643 # define FILM_GET_PASS_PIXEL_F32(variant, input_channel_count) \
644 film_get_pass_pixel_##variant(&kfilm_convert, buffer, pixel);
648 #define KERNEL_FILM_CONVERT_VARIANT(variant, input_channel_count) \
649 ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS) \
650 ccl_gpu_kernel_signature(film_convert_##variant, \
651 const KernelFilmConvert kfilm_convert, \
652 ccl_global float *pixels, \
653 ccl_global float *render_buffer, \
661 const int render_pixel_index = ccl_gpu_global_id_x(); \
662 if (render_pixel_index >= num_pixels) { \
666 const int x = render_pixel_index % width; \
667 const int y = render_pixel_index / width; \
669 const uint64_t buffer_pixel_index = x + y * stride; \
670 ccl_global const float *buffer = render_buffer + offset + \
671 buffer_pixel_index * kfilm_convert.pass_stride; \
673 ccl_global float *pixel = pixels + \
674 (render_pixel_index + rgba_offset) * kfilm_convert.pixel_stride; \
676 FILM_GET_PASS_PIXEL_F32(variant, input_channel_count); \
678 ccl_gpu_kernel_postfix \
680 ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS) \
681 ccl_gpu_kernel_signature(film_convert_##variant##_half_rgba, \
682 const KernelFilmConvert kfilm_convert, \
683 ccl_global uchar4 *rgba, \
684 ccl_global float *render_buffer, \
692 const int render_pixel_index = ccl_gpu_global_id_x(); \
693 if (render_pixel_index >= num_pixels) { \
697 const int x = render_pixel_index % width; \
698 const int y = render_pixel_index / width; \
700 const uint64_t buffer_pixel_index = x + y * stride; \
701 ccl_global const float *buffer = render_buffer + offset + \
702 buffer_pixel_index * kfilm_convert.pass_stride; \
705 film_get_pass_pixel_##variant(&kfilm_convert, buffer, pixel); \
707 if (input_channel_count == 1) { \
708 pixel[1] = pixel[2] = pixel[0]; \
710 if (input_channel_count <= 3) { \
714 film_apply_pass_pixel_overlays_rgba(&kfilm_convert, buffer, pixel); \
716 const half4 half_pixel = float4_to_half4_display( \
717 make_float4(pixel[0], pixel[1], pixel[2], pixel[3])); \
718 kernel_gpu_film_convert_half_write(rgba, rgba_offset, rgba_stride, x, y, half_pixel); \
720 ccl_gpu_kernel_postfix
740 #undef KERNEL_FILM_CONVERT_VARIANT
ATOMIC_INLINE uint32_t atomic_fetch_and_add_uint32(uint32_t *p, uint32_t x)
#define kernel_assert(cond)
#define ccl_gpu_thread_idx_x
#define ccl_gpu_global_id_x()
#define ccl_gpu_warp_size
#define ccl_device_inline
#define ccl_gpu_ballot(predicate)
const KernelGlobalsCPU *ccl_restrict KernelGlobals
ccl_device_inline void kernel_cryptomatte_post(KernelGlobals kg, ccl_global float *render_buffer, int pixel_index)
ccl_device bool integrator_init_from_bake(KernelGlobals kg, IntegratorState state, ccl_global const KernelWorkTile *ccl_restrict tile, ccl_global float *render_buffer, const int x, const int y, const int scheduled_sample)
ccl_device bool integrator_init_from_camera(KernelGlobals kg, IntegratorState state, ccl_global const KernelWorkTile *ccl_restrict tile, ccl_global float *render_buffer, const int x, const int y, const int scheduled_sample)
ccl_device void integrator_intersect_closest(KernelGlobals kg, IntegratorState state, ccl_global float *ccl_restrict render_buffer)
ccl_device void integrator_intersect_shadow(KernelGlobals kg, IntegratorShadowState state)
CCL_NAMESPACE_BEGIN ccl_device void integrator_intersect_subsurface(KernelGlobals kg, IntegratorState state)
ccl_device void integrator_intersect_volume_stack(KernelGlobals kg, IntegratorState state)
CCL_NAMESPACE_BEGIN ccl_device void kernel_displace_evaluate(KernelGlobals kg, ccl_global const KernelShaderEvalInput *input, ccl_global float *output, const int offset)
ccl_device void kernel_curve_shadow_transparency_evaluate(KernelGlobals kg, ccl_global const KernelShaderEvalInput *input, ccl_global float *output, const int offset)
ccl_device void kernel_background_evaluate(KernelGlobals kg, ccl_global const KernelShaderEvalInput *input, ccl_global float *output, const int offset)
#define GPU_KERNEL_MAX_REGISTERS
#define GPU_KERNEL_BLOCK_NUM_THREADS
#define ccl_gpu_kernel_postfix
#define ccl_gpu_kernel_lambda(func,...)
#define ccl_gpu_kernel_signature(name,...)
ccl_gpu_kernel_postfix ccl_global int ccl_global int int num_values
ccl_gpu_kernel_postfix ccl_global float * guiding_buffer
const int tile_work_index
ccl_gpu_kernel_postfix ccl_global int ccl_global int * prefix_sum
ccl_global float * color_out
ccl_gpu_kernel_postfix ccl_global float int full_x
ccl_gpu_kernel_postfix ccl_global float int int int int ccl_global const float int int render_stride
ccl_gpu_kernel_postfix int ccl_global uint * num_possible_splits
ccl_gpu_kernel_postfix ccl_global const int ccl_global float const int work_size
ccl_gpu_kernel_postfix ccl_global float int int int int int int int int int int int int num_components
ccl_gpu_kernel_postfix ccl_global KernelWorkTile const int num_tiles
ccl_gpu_kernel_postfix ccl_global float int int int int guiding_pass_flow
ccl_gpu_kernel_postfix ccl_global float int int int int ccl_global const float int int int int int int render_pass_denoising_normal
ccl_gpu_kernel_postfix int int ccl_global int ccl_global int ccl_global int * key_counter
gpu_parallel_sorted_index_array(state_index, num_states, num_states_limit, indices, num_indices, key_counter, key_prefix_sum, ccl_gpu_kernel_lambda_pass)
ccl_gpu_kernel_postfix ccl_global float int int int int ccl_global const float int render_offset
ccl_global float * buffer
ccl_gpu_kernel_call(get_work_pixel(tile, tile_work_index, &x, &y, &sample))
ccl_gpu_kernel_postfix ccl_gpu_kernel_threads(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE) ccl_gpu_kernel_signature(integrator_queued_paths_array
ccl_gpu_kernel_postfix ccl_global const int const int active_states_offset
ccl_gpu_kernel_postfix ccl_global const int * path_index_array
#define KERNEL_FILM_CONVERT_VARIANT(variant, input_channel_count)
ccl_gpu_kernel_postfix int ccl_global int ccl_global int int kernel_index
ccl_gpu_kernel_postfix ccl_global float int int int int float threshold
ccl_global float * albedo_out
ccl_gpu_kernel_postfix ccl_global float int int int int height
gpu_parallel_active_index_array(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE, num_states, indices, num_indices, ccl_gpu_kernel_lambda_pass)
ccl_gpu_kernel_postfix ccl_global KernelWorkTile * tiles
ccl_gpu_kernel_postfix int ccl_global int ccl_global int * num_indices
ccl_gpu_kernel_postfix ccl_global int * counter
ccl_gpu_kernel_postfix ccl_global KernelWorkTile const int ccl_global float * render_buffer
ccl_gpu_kernel_postfix ccl_global float int int int int ccl_global const float int int int render_pass_stride
ccl_gpu_kernel_postfix ccl_global KernelWorkTile const int ccl_global float const int max_tile_work_size
ccl_gpu_kernel_postfix ccl_global float int int int int ccl_global const float int int int int int int int render_pass_motion
ccl_gpu_kernel_postfix int int ccl_global int ccl_global int ccl_global int ccl_global int * key_prefix_sum
ccl_gpu_kernel_postfix ccl_global float int num_pixels
ccl_gpu_kernel_postfix ccl_global float int int sy
ccl_gpu_kernel_postfix ccl_global float int int int guiding_pass_normal
ccl_gpu_kernel_postfix int ccl_global int ccl_global int int num_active_paths
ccl_gpu_kernel_postfix ccl_global float int int int int int int int int pass_denoised
const auto can_split_mask
ccl_gpu_kernel_postfix ccl_global float int int int int ccl_global const float int int int int render_pass_sample_count
ccl_global KernelShaderEvalInput ccl_global float * output
ccl_gpu_kernel_postfix ccl_global float int guiding_pass_stride
const uint64_t render_pixel_index
ccl_gpu_kernel_postfix ccl_global float int int guiding_pass_albedo
ccl_gpu_kernel_postfix ccl_global float int int int sw
ccl_global const KernelWorkTile * tile
ccl_gpu_kernel_postfix ccl_global float int int int width
ccl_gpu_kernel_postfix ccl_global float int int int int float bool int int ccl_global uint * num_active_pixels
ccl_gpu_kernel_postfix ccl_global const int const int const int terminated_states_offset
ccl_gpu_kernel_postfix ccl_global float int int int int float bool int offset
ccl_global KernelShaderEvalInput * input
ccl_gpu_kernel_postfix ccl_global float int int int int ccl_global const float int int int int int render_pass_denoising_albedo
ccl_gpu_kernel_postfix ccl_global float int int int int int int int pass_stride
ccl_gpu_kernel_postfix ccl_global const int * active_terminated_states
ccl_gpu_kernel_postfix ccl_global float int int int int float bool int int stride
const uint64_t guiding_pixel_index
ccl_global float * guiding_pixel
ccl_gpu_kernel_postfix ccl_global float int int int int float bool reset
clear internal cached data and reset random seed
ccl_global float * denoised_pixel
ccl_gpu_kernel_postfix int ccl_global int ccl_global int int indices_offset
ccl_gpu_kernel_postfix ccl_global float int int int int int int int int int int int pass_sample_count
const auto num_active_pixels_mask
ccl_gpu_kernel_postfix int int num_states_limit
ccl_gpu_kernel_postfix int ccl_global int * indices
ccl_gpu_kernel_postfix ccl_global float int sx
ccl_gpu_kernel_postfix ccl_global float int int full_y
ccl_gpu_kernel_postfix ccl_global float int int int int sh
ccl_gpu_kernel_postfix ccl_global float int int int int ccl_global const float int int int int int int int int int int int int num_samples
ccl_gpu_kernel_postfix ccl_global float int int int int int int int int int pass_noisy
ccl_gpu_kernel_postfix ccl_device_inline void kernel_gpu_film_convert_half_write(ccl_global uchar4 *rgba, const int rgba_offset, const int rgba_stride, const int x, const int y, const half4 half_pixel)
ccl_gpu_kernel_postfix ccl_global float int int int int int int int int int int int int bool use_compositing
ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS) ccl_gpu_kernel_signature(integrator_reset
ccl_device void kernel_adaptive_sampling_filter_x(KernelGlobals kg, ccl_global float *render_buffer, int y, int start_x, int width, int offset, int stride)
ccl_device void kernel_adaptive_sampling_filter_y(KernelGlobals kg, ccl_global float *render_buffer, int x, int start_y, int height, int offset, int stride)
ccl_device bool kernel_adaptive_sampling_convergence_check(KernelGlobals kg, ccl_global float *render_buffer, int x, int y, float threshold, bool reset, int offset, int stride)
void KERNEL_FUNCTION_FULL_NAME() adaptive_sampling_filter_x(const KernelGlobalsCPU *kg, ccl_global float *render_buffer, int y, int start_x, int width, int offset, int stride)
bool KERNEL_FUNCTION_FULL_NAME() adaptive_sampling_convergence_check(const KernelGlobalsCPU *kg, ccl_global float *render_buffer, int x, int y, float threshold, bool reset, int offset, int stride)
void KERNEL_FUNCTION_FULL_NAME() shader_eval_displace(const KernelGlobalsCPU *kg, const KernelShaderEvalInput *input, float *output, const int offset)
void KERNEL_FUNCTION_FULL_NAME() cryptomatte_postprocess(const KernelGlobalsCPU *kg, ccl_global float *render_buffer, int pixel_index)
void KERNEL_FUNCTION_FULL_NAME() shader_eval_background(const KernelGlobalsCPU *kg, const KernelShaderEvalInput *input, float *output, const int offset)
void KERNEL_FUNCTION_FULL_NAME() shader_eval_curve_shadow_transparency(const KernelGlobalsCPU *kg, const KernelShaderEvalInput *input, float *output, const int offset)
void KERNEL_FUNCTION_FULL_NAME() adaptive_sampling_filter_y(const KernelGlobalsCPU *kg, ccl_global float *render_buffer, int x, int start_y, int height, int offset, int stride)
T clamp(const T &a, const T &min, const T &max)
static const pxr::TfToken out("out", pxr::TfToken::Immortal)
static const pxr::TfToken rgba("rgba", pxr::TfToken::Immortal)
#define GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE
__device__ void gpu_parallel_prefix_sum(const int global_id, ccl_global int *counter, ccl_global int *prefix_sum, const int num_values)
#define GPU_PARALLEL_PREFIX_SUM_DEFAULT_BLOCK_SIZE
#define GPU_PARALLEL_SORTED_INDEX_INACTIVE_KEY
#define GPU_PARALLEL_SORTED_INDEX_DEFAULT_BLOCK_SIZE
ccl_device void integrator_shade_background(KernelGlobals kg, IntegratorState state, ccl_global float *ccl_restrict render_buffer)
ccl_device void integrator_shade_light(KernelGlobals kg, IntegratorState state, ccl_global float *ccl_restrict render_buffer)
ccl_device void integrator_shade_shadow(KernelGlobals kg, IntegratorShadowState state, ccl_global float *ccl_restrict render_buffer)
ccl_device_forceinline void integrator_shade_surface(KernelGlobals kg, IntegratorState state, ccl_global float *ccl_restrict render_buffer)
ccl_device_forceinline void integrator_shade_surface_mnee(KernelGlobals kg, IntegratorState state, ccl_global float *ccl_restrict render_buffer)
ccl_device_forceinline void integrator_shade_surface_raytrace(KernelGlobals kg, IntegratorState state, ccl_global float *ccl_restrict render_buffer)
CCL_NAMESPACE_BEGIN ccl_device void integrator_shade_volume(KernelGlobals kg, IntegratorState state, ccl_global float *ccl_restrict render_buffer)
ccl_device_inline bool kernel_shadow_catcher_path_can_split(KernelGlobals kg, ConstIntegratorState state)
#define INTEGRATOR_STATE_WRITE(state, nested_struct, member)
#define INTEGRATOR_STATE(state, nested_struct, member)
unsigned __int64 uint64_t
ccl_device_inline uint __float_as_uint(float f)
ccl_device_inline uint popcount(uint x)
CCL_NAMESPACE_BEGIN ccl_device_inline void get_work_pixel(ccl_global const KernelWorkTile *tile, uint global_work_index, ccl_private uint *x, ccl_private uint *y, ccl_private uint *sample)