Blender  V3.3
parallel_sorted_index.h
Go to the documentation of this file.
1 /* SPDX-License-Identifier: Apache-2.0
2  * Copyright 2021-2022 Blender Foundation */
3 
4 #pragma once
5 
7 
8 /* Given an array of states, build an array of indices for which the states
9  * are active and sorted by a given key. The prefix sum of the number of active
10  * states per key must have already been computed.
11  *
12  * TODO: there may be ways to optimize this to avoid this many atomic ops? */
13 
14 #include "util/atomic.h"
15 
16 #ifdef __HIP__
17 # define GPU_PARALLEL_SORTED_INDEX_DEFAULT_BLOCK_SIZE 1024
18 #else
19 # define GPU_PARALLEL_SORTED_INDEX_DEFAULT_BLOCK_SIZE 512
20 #endif
21 #define GPU_PARALLEL_SORTED_INDEX_INACTIVE_KEY (~0)
22 
23 template<typename GetKeyOp>
25  const uint num_states,
26  const int num_states_limit,
27  ccl_global int *indices,
31  GetKeyOp get_key_op)
32 {
33  const int key = (state_index < num_states) ? get_key_op(state_index) :
35 
37  const uint index = atomic_fetch_and_add_uint32(&key_prefix_sum[key], 1);
38  if (index < num_states_limit) {
39  /* Assign state index. */
40  indices[index] = state_index;
41  }
42  else {
43  /* Can't process this state now, increase the counter again so that
44  * it will be handled in another iteration. */
46  }
47  }
48 }
49 
unsigned int uint
Definition: BLI_sys_types.h:67
ATOMIC_INLINE uint32_t atomic_fetch_and_add_uint32(uint32_t *p, uint32_t x)
#define ccl_global
Definition: cuda/compat.h:43
#define CCL_NAMESPACE_END
Definition: cuda/compat.h:9
ccl_gpu_kernel_postfix int int ccl_global int ccl_global int ccl_global int * key_counter
int num_states
const uint state_index
ccl_gpu_kernel_postfix int ccl_global int ccl_global int * num_indices
ccl_gpu_kernel_postfix int int ccl_global int ccl_global int ccl_global int ccl_global int * key_prefix_sum
ccl_gpu_kernel_postfix int int num_states_limit
ccl_gpu_kernel_postfix int ccl_global int * indices
#define __device__
Definition: metal/compat.h:248
#define GPU_PARALLEL_SORTED_INDEX_INACTIVE_KEY
__device__ void gpu_parallel_sorted_index_array(const uint state_index, const uint num_states, const int num_states_limit, ccl_global int *indices, ccl_global int *num_indices, ccl_global int *key_counter, ccl_global int *key_prefix_sum, GetKeyOp get_key_op)