Blender  V3.3
Classes | Macros
array_store.c File Reference

Array storage to minimize duplication. More...

#include <stdlib.h>
#include <string.h>
#include "MEM_guardedalloc.h"
#include "BLI_listbase.h"
#include "BLI_mempool.h"
#include "BLI_strict_flags.h"
#include "BLI_array_store.h"
#include "BLI_ghash.h"

Go to the source code of this file.

Classes

struct  BArrayInfo
 
struct  BArrayMemory
 
struct  BArrayStore
 
struct  BArrayState
 
struct  BChunkList
 
struct  BChunk
 
struct  BChunkRef
 
struct  BTableRef
 

Macros

#define data_len_original   invalid_usage
 
#define GHASH_PTR_ADD_USER(gh, pt)
 
Defines

Some of the logic for merging is quite involved, support disabling some parts of this.

#define USE_FASTPATH_CHUNKS_FIRST
 
#define USE_FASTPATH_CHUNKS_LAST
 
#define USE_ALIGN_CHUNKS_TEST
 
#define USE_HASH_TABLE_ACCUMULATE
 
#define BCHUNK_HASH_TABLE_ACCUMULATE_STEPS   4
 
#define USE_HASH_TABLE_KEY_CACHE
 
#define HASH_TABLE_KEY_UNSET   ((uint64_t)-1)
 
#define HASH_TABLE_KEY_FALLBACK   ((uint64_t)-2)
 
#define BCHUNK_HASH_TABLE_MUL   3
 
#define USE_MERGE_CHUNKS
 
#define BCHUNK_SIZE_MIN_DIV   8
 
#define BCHUNK_SIZE_MAX_MUL   2
 

Typedefs

Internal Structs
typedef uint64_t hash_key
 
typedef struct BArrayInfo BArrayInfo
 
typedef struct BArrayMemory BArrayMemory
 
typedef struct BChunkList BChunkList
 
typedef struct BChunk BChunk
 
typedef struct BChunkRef BChunkRef
 
typedef struct BTableRef BTableRef
 

Functions

Debugging API (for testing).
static size_t bchunk_list_size (const BChunkList *chunk_list)
 
bool BLI_array_store_is_valid (BArrayStore *bs)
 
Internal BChunk API
static BChunkbchunk_new (BArrayMemory *bs_mem, const uchar *data, const size_t data_len)
 
static BChunkbchunk_new_copydata (BArrayMemory *bs_mem, const uchar *data, const size_t data_len)
 
static void bchunk_decref (BArrayMemory *bs_mem, BChunk *chunk)
 
static bool bchunk_data_compare (const BChunk *chunk, const uchar *data_base, const size_t data_base_len, const size_t offset)
 
Main Data De-Duplication Function
static BChunkListbchunk_list_from_data_merge (const BArrayInfo *info, BArrayMemory *bs_mem, const uchar *data, const size_t data_len_original, const BChunkList *chunk_list_reference)
 
Main Array Storage API
BArrayStoreBLI_array_store_create (uint stride, uint chunk_count)
 
static void array_store_free_data (BArrayStore *bs)
 
void BLI_array_store_destroy (BArrayStore *bs)
 
void BLI_array_store_clear (BArrayStore *bs)
 
BArrayStore Statistics
size_t BLI_array_store_calc_size_expanded_get (const BArrayStore *bs)
 
size_t BLI_array_store_calc_size_compacted_get (const BArrayStore *bs)
 
BArrayState Access
BArrayStateBLI_array_store_state_add (BArrayStore *bs, const void *data, const size_t data_len, const BArrayState *state_reference)
 
void BLI_array_store_state_remove (BArrayStore *bs, BArrayState *state)
 
size_t BLI_array_store_state_size_get (BArrayState *state)
 
void BLI_array_store_state_data_get (BArrayState *state, void *data)
 
voidBLI_array_store_state_data_get_alloc (BArrayState *state, size_t *r_data_len)
 

Internal BChunkList API

#define ASSERT_CHUNKLIST_SIZE(chunk_list, n)   (EXPR_NOP(chunk_list), EXPR_NOP(n))
 
#define ASSERT_CHUNKLIST_DATA(chunk_list, data)   (EXPR_NOP(chunk_list), EXPR_NOP(data))
 
static BChunkListbchunk_list_new (BArrayMemory *bs_mem, size_t total_size)
 
static void bchunk_list_decref (BArrayMemory *bs_mem, BChunkList *chunk_list)
 
static void bchunk_list_ensure_min_size_last (const BArrayInfo *info, BArrayMemory *bs_mem, BChunkList *chunk_list)
 
static void bchunk_list_calc_trim_len (const BArrayInfo *info, const size_t data_len, size_t *r_data_trim_len, size_t *r_data_last_chunk_len)
 
static void bchunk_list_append_only (BArrayMemory *bs_mem, BChunkList *chunk_list, BChunk *chunk)
 
static void bchunk_list_append_data (const BArrayInfo *info, BArrayMemory *bs_mem, BChunkList *chunk_list, const uchar *data, const size_t data_len)
 
static void bchunk_list_append_data_n (const BArrayInfo *info, BArrayMemory *bs_mem, BChunkList *chunk_list, const uchar *data, size_t data_len)
 
static void bchunk_list_append (const BArrayInfo *info, BArrayMemory *bs_mem, BChunkList *chunk_list, BChunk *chunk)
 
static void bchunk_list_fill_from_array (const BArrayInfo *info, BArrayMemory *bs_mem, BChunkList *chunk_list, const uchar *data, const size_t data_len)
 

Internal Hashing/De-Duplication API

Only used by bchunk_list_from_data_merge

#define HASH_INIT   (5381)
 
BLI_INLINE uint hash_data_single (const uchar p)
 
static uint hash_data (const uchar *key, size_t n)
 
static void hash_array_from_data (const BArrayInfo *info, const uchar *data_slice, const size_t data_slice_len, hash_key *hash_array)
 
static void hash_array_from_cref (const BArrayInfo *info, const BChunkRef *cref, const size_t data_len, hash_key *hash_array)
 
static void hash_accum (hash_key *hash_array, const size_t hash_array_len, size_t iter_steps)
 
static void hash_accum_single (hash_key *hash_array, const size_t hash_array_len, size_t iter_steps)
 
static hash_key key_from_chunk_ref (const BArrayInfo *info, const BChunkRef *cref, hash_key *hash_store, const size_t hash_store_len)
 
static const BChunkReftable_lookup (const BArrayInfo *info, BTableRef **table, const size_t table_len, const size_t i_table_start, const uchar *data, const size_t data_len, const size_t offset, const hash_key *table_hash_array)
 

Detailed Description

Array storage to minimize duplication.

This is done by splitting arrays into chunks and using copy-on-write (COW), to de-duplicate chunks, from the users perspective this is an implementation detail.

Overview

Data Structure

This diagram is an overview of the structure of a single array-store.

Note
The only 2 structures here which are referenced externally are the.
<+> BArrayStore: root data-structure,
 |  can store many 'states', which share memory.
 |
 |  This can store many arrays, however they must share the same 'stride'.
 |  Arrays of different types will need to use a new BArrayStore.
 |
 +- <+> states (Collection of BArrayState's):
 |   |  Each represents an array added by the user of this API.
 |   |  and references a chunk_list (each state is a chunk_list user).
 |   |  Note that the list order has no significance.
 |   |
 |   +- <+> chunk_list (BChunkList):
 |       |  The chunks that make up this state.
 |       |  Each state is a chunk_list user,
 |       |  avoids duplicating lists when there is no change between states.
 |       |
 |       +- chunk_refs (List of BChunkRef): Each chunk_ref links to a BChunk.
 |          Each reference is a chunk user,
 |          avoids duplicating smaller chunks of memory found in multiple states.
 |
 +- info (BArrayInfo):
 |  Sizes and offsets for this array-store.
 |  Also caches some variables for reuse.
 |
 +- <+> memory (BArrayMemory):
     |  Memory pools for storing BArrayStore data.
     |
     +- chunk_list (Pool of BChunkList):
     |  All chunk_lists, (reference counted, used by BArrayState).
     |
     +- chunk_ref (Pool of BChunkRef):
     |  All chunk_refs (link between BChunkList & BChunk).
     |
     +- chunks (Pool of BChunk):
        All chunks, (reference counted, used by BChunkList).
        These have their headers hashed for reuse so we can quickly check for duplicates.

De<h2>-Duplication

When creating a new state, a previous state can be given as a reference, matching chunks from this state are re-used in the new state.

First matches at either end of the array are detected. For identical arrays this is all that's needed.

De-duplication is performed on any remaining chunks, by hashing the first few bytes of the chunk (see: BCHUNK_HASH_TABLE_ACCUMULATE_STEPS).

Note
This is cached for reuse since the referenced data never changes.

An array is created to store hash values at every 'stride', then stepped over to search for matching chunks.

Once a match is found, there is a high chance next chunks match too, so this is checked to avoid performing so many hash-lookups. Otherwise new chunks are created.

Definition in file array_store.c.

Macro Definition Documentation

◆ ASSERT_CHUNKLIST_DATA

#define ASSERT_CHUNKLIST_DATA (   chunk_list,
  data 
)    (EXPR_NOP(chunk_list), EXPR_NOP(data))

Definition at line 408 of file array_store.c.

◆ ASSERT_CHUNKLIST_SIZE

#define ASSERT_CHUNKLIST_SIZE (   chunk_list,
 
)    (EXPR_NOP(chunk_list), EXPR_NOP(n))

Definition at line 390 of file array_store.c.

◆ BCHUNK_HASH_TABLE_ACCUMULATE_STEPS

#define BCHUNK_HASH_TABLE_ACCUMULATE_STEPS   4

Definition at line 143 of file array_store.c.

◆ BCHUNK_HASH_TABLE_MUL

#define BCHUNK_HASH_TABLE_MUL   3

Definition at line 160 of file array_store.c.

◆ BCHUNK_SIZE_MAX_MUL

#define BCHUNK_SIZE_MAX_MUL   2

Definition at line 184 of file array_store.c.

◆ BCHUNK_SIZE_MIN_DIV

#define BCHUNK_SIZE_MIN_DIV   8

Definition at line 177 of file array_store.c.

◆ data_len_original

#define data_len_original   invalid_usage

◆ GHASH_PTR_ADD_USER

#define GHASH_PTR_ADD_USER (   gh,
  pt 
)
Value:
{ \
void **val; \
if (BLI_ghash_ensure_p((gh), (pt), &val)) { \
*((int *)val) += 1; \
} \
else { \
*((int *)val) = 1; \
} \
} \
((void)0)
bool BLI_ghash_ensure_p(GHash *gh, void *key, void ***r_val) ATTR_WARN_UNUSED_RESULT
Definition: BLI_ghash.c:755
SyclQueue void void size_t num_bytes void

◆ HASH_INIT

#define HASH_INIT   (5381)

Definition at line 735 of file array_store.c.

◆ HASH_TABLE_KEY_FALLBACK

#define HASH_TABLE_KEY_FALLBACK   ((uint64_t)-2)

Definition at line 155 of file array_store.c.

◆ HASH_TABLE_KEY_UNSET

#define HASH_TABLE_KEY_UNSET   ((uint64_t)-1)

Definition at line 154 of file array_store.c.

◆ USE_ALIGN_CHUNKS_TEST

#define USE_ALIGN_CHUNKS_TEST

Definition at line 131 of file array_store.c.

◆ USE_FASTPATH_CHUNKS_FIRST

#define USE_FASTPATH_CHUNKS_FIRST

Definition at line 113 of file array_store.c.

◆ USE_FASTPATH_CHUNKS_LAST

#define USE_FASTPATH_CHUNKS_LAST

Definition at line 123 of file array_store.c.

◆ USE_HASH_TABLE_ACCUMULATE

#define USE_HASH_TABLE_ACCUMULATE

Definition at line 136 of file array_store.c.

◆ USE_HASH_TABLE_KEY_CACHE

#define USE_HASH_TABLE_KEY_CACHE

Definition at line 152 of file array_store.c.

◆ USE_MERGE_CHUNKS

#define USE_MERGE_CHUNKS

Definition at line 172 of file array_store.c.

Typedef Documentation

◆ BArrayInfo

typedef struct BArrayInfo BArrayInfo

◆ BArrayMemory

typedef struct BArrayMemory BArrayMemory

◆ BChunk

typedef struct BChunk BChunk

◆ BChunkList

typedef struct BChunkList BChunkList

◆ BChunkRef

typedef struct BChunkRef BChunkRef

Links to store BChunk data in BChunkList.chunk_refs.

◆ BTableRef

typedef struct BTableRef BTableRef

Single linked list used when putting chunks into a temporary table, used for lookups.

Point to the BChunkRef, not the BChunk, to allow talking down the chunks in-order until a mismatch is found, this avoids having to do so many table lookups.

◆ hash_key

typedef uint64_t hash_key

Definition at line 200 of file array_store.c.

Function Documentation

◆ array_store_free_data()

static void array_store_free_data ( BArrayStore bs)
static

◆ bchunk_data_compare()

static bool bchunk_data_compare ( const BChunk chunk,
const uchar data_base,
const size_t  data_base_len,
const size_t  offset 
)
static

Definition at line 339 of file array_store.c.

References BChunk::data, BChunk::data_len, and offset.

Referenced by bchunk_list_from_data_merge(), and table_lookup().

◆ bchunk_decref()

static void bchunk_decref ( BArrayMemory bs_mem,
BChunk chunk 
)
static

◆ bchunk_list_append()

static void bchunk_list_append ( const BArrayInfo info,
BArrayMemory bs_mem,
BChunkList chunk_list,
BChunk chunk 
)
static

◆ bchunk_list_append_data()

static void bchunk_list_append_data ( const BArrayInfo info,
BArrayMemory bs_mem,
BChunkList chunk_list,
const uchar data,
const size_t  data_len 
)
static

◆ bchunk_list_append_data_n()

static void bchunk_list_append_data_n ( const BArrayInfo info,
BArrayMemory bs_mem,
BChunkList chunk_list,
const uchar data,
size_t  data_len 
)
static

Similar to bchunk_list_append_data, but handle multiple chunks. Use for adding arrays of arbitrary sized memory at once.

Note
This function takes care not to perform redundant chunk-merging checks, so we can write successive fixed size chunks quickly.

Definition at line 618 of file array_store.c.

References bchunk_list_append_data(), bchunk_list_append_only(), bchunk_list_calc_trim_len(), bchunk_new_copydata(), BLI_assert, BArrayInfo::chunk_byte_size, BArrayInfo::chunk_byte_size_min, BChunkList::chunk_refs, data, and ListBase::last.

Referenced by bchunk_list_from_data_merge().

◆ bchunk_list_append_only()

static void bchunk_list_append_only ( BArrayMemory bs_mem,
BChunkList chunk_list,
BChunk chunk 
)
static

◆ bchunk_list_calc_trim_len()

static void bchunk_list_calc_trim_len ( const BArrayInfo info,
const size_t  data_len,
size_t *  r_data_trim_len,
size_t *  r_data_last_chunk_len 
)
static

Split length into 2 values

Parameters
r_data_trim_lenLength which is aligned to the BArrayInfo.chunk_byte_size
r_data_last_chunk_lenThe remaining bytes.
Note
This function ensures the size of r_data_last_chunk_len is larger than BArrayInfo.chunk_byte_size_min.

Definition at line 506 of file array_store.c.

References BLI_assert, and BArrayInfo::chunk_byte_size.

Referenced by bchunk_list_append_data_n(), and bchunk_list_fill_from_array().

◆ bchunk_list_decref()

static void bchunk_list_decref ( BArrayMemory bs_mem,
BChunkList chunk_list 
)
static

◆ bchunk_list_ensure_min_size_last()

static void bchunk_list_ensure_min_size_last ( const BArrayInfo info,
BArrayMemory bs_mem,
BChunkList chunk_list 
)
static

◆ bchunk_list_fill_from_array()

static void bchunk_list_fill_from_array ( const BArrayInfo info,
BArrayMemory bs_mem,
BChunkList chunk_list,
const uchar data,
const size_t  data_len 
)
static

◆ bchunk_list_from_data_merge()

static BChunkList* bchunk_list_from_data_merge ( const BArrayInfo info,
BArrayMemory bs_mem,
const uchar data,
const size_t  data_len_original,
const BChunkList chunk_list_reference 
)
static

◆ bchunk_list_new()

static BChunkList* bchunk_list_new ( BArrayMemory bs_mem,
size_t  total_size 
)
static

◆ bchunk_list_size()

static size_t bchunk_list_size ( const BChunkList chunk_list)
static

◆ bchunk_new()

static BChunk* bchunk_new ( BArrayMemory bs_mem,
const uchar data,
const size_t  data_len 
)
static

◆ bchunk_new_copydata()

static BChunk* bchunk_new_copydata ( BArrayMemory bs_mem,
const uchar data,
const size_t  data_len 
)
static

◆ BLI_array_store_calc_size_compacted_get()

size_t BLI_array_store_calc_size_compacted_get ( const BArrayStore bs)

◆ BLI_array_store_calc_size_expanded_get()

size_t BLI_array_store_calc_size_expanded_get ( const BArrayStore bs)

Find the memory used by all states (expanded & real).

Returns
the total amount of memory that would be used by getting the arrays for all states.

Definition at line 1469 of file array_store.c.

References LISTBASE_FOREACH, state, and BArrayStore::states.

Referenced by BLI_array_store_at_size_calc_memory_usage(), and TEST().

◆ BLI_array_store_clear()

void BLI_array_store_clear ( BArrayStore bs)

◆ BLI_array_store_create()

BArrayStore* BLI_array_store_create ( unsigned int  stride,
unsigned int  chunk_count 
)

Create a new array store, which can store any number of arrays as long as their stride matches.

Parameters
stridesizeof() each element,
Note
while a stride of 1 will always work, its less efficient since duplicate chunks of memory will be searched at positions unaligned with the array data.
Parameters
chunk_countNumber of elements to split each chunk into.
  • A small value increases the ability to de-duplicate chunks, but adds overhead by increasing the number of chunks to look up when searching for duplicates, as well as some overhead constructing the original array again, with more calls to memcpy.
  • Larger values reduce the book keeping overhead, but increase the chance a small, isolated change will cause a larger amount of data to be duplicated.
Returns
A new array store, to be freed with BLI_array_store_destroy.

Definition at line 1388 of file array_store.c.

References BArrayInfo::accum_read_ahead_bytes, BArrayInfo::accum_read_ahead_len, BArrayInfo::accum_steps, BCHUNK_HASH_TABLE_ACCUMULATE_STEPS, BCHUNK_SIZE_MAX_MUL, BCHUNK_SIZE_MIN_DIV, BLI_MEMPOOL_ALLOW_ITER, BLI_mempool_create(), BLI_MEMPOOL_NOP, BArrayMemory::chunk, BArrayInfo::chunk_byte_size, BArrayInfo::chunk_byte_size_max, BArrayInfo::chunk_byte_size_min, BArrayMemory::chunk_list, BArrayMemory::chunk_ref, BArrayInfo::chunk_stride, BArrayStore::info, MAX2, MEM_callocN, BArrayStore::memory, and stride.

Referenced by BLI_array_store_at_size_ensure(), random_chunk_mutate_helper(), TEST(), testbuffer_run_tests_simple(), and text_undosys_step_encode_to_state().

◆ BLI_array_store_destroy()

void BLI_array_store_destroy ( BArrayStore bs)

◆ BLI_array_store_is_valid()

bool BLI_array_store_is_valid ( BArrayStore bs)

◆ BLI_array_store_state_add()

BArrayState* BLI_array_store_state_add ( BArrayStore bs,
const void data,
size_t  data_len,
const BArrayState state_reference 
)
Parameters
dataData used to create
state_referenceThe state to use as a reference when adding the new state, typically this is the previous state, however it can be any previously created state from this bs.
Returns
The new state, which is used by the caller as a handle to get back the contents of data. This may be removed using BLI_array_store_state_remove, otherwise it will be removed with BLI_array_store_destroy.

Definition at line 1497 of file array_store.c.

References bchunk_list_fill_from_array(), bchunk_list_from_data_merge(), bchunk_list_new(), BLI_addtail(), BLI_array_store_state_data_get_alloc(), BLI_assert, BLI_findindex(), BArrayState::chunk_list, BArrayInfo::chunk_stride, data, BArrayStore::info, MEM_callocN, MEM_freeN, BArrayStore::memory, state, BArrayStore::states, and BChunkList::users.

Referenced by TEST(), testbuffer_list_store_populate(), text_state_encode(), um_arraystore_cd_compact(), and um_arraystore_compact_ex().

◆ BLI_array_store_state_data_get()

void BLI_array_store_state_data_get ( BArrayState state,
void data 
)

Fill in existing allocated memory with the contents of state.

Definition at line 1562 of file array_store.c.

References BLI_assert, BTableRef::cref, BChunk::data, data, BChunk::data_len, BChunkRef::link, LISTBASE_FOREACH, state, and BChunk::users.

Referenced by BLI_array_store_state_data_get_alloc().

◆ BLI_array_store_state_data_get_alloc()

void* BLI_array_store_state_data_get_alloc ( BArrayState state,
size_t *  r_data_len 
)

◆ BLI_array_store_state_remove()

void BLI_array_store_state_remove ( BArrayStore bs,
BArrayState state 
)

Remove a state and free any unused BChunk data.

The states can be freed in any order.

Definition at line 1545 of file array_store.c.

References bchunk_list_decref(), BLI_assert, BLI_findindex(), BLI_remlink(), MEM_freeN, BArrayStore::memory, state, and BArrayStore::states.

Referenced by TEST(), testbuffer_list_store_clear(), text_undosys_step_free(), um_arraystore_cd_free(), and um_arraystore_free().

◆ BLI_array_store_state_size_get()

size_t BLI_array_store_state_size_get ( BArrayState state)
Returns
the expanded size of the array, use this to know how much memory to allocate BLI_array_store_state_data_get's argument.

Definition at line 1557 of file array_store.c.

References state.

Referenced by TEST().

◆ hash_accum()

static void hash_accum ( hash_key hash_array,
const size_t  hash_array_len,
size_t  iter_steps 
)
static

Definition at line 805 of file array_store.c.

References UNLIKELY.

Referenced by bchunk_list_from_data_merge().

◆ hash_accum_single()

static void hash_accum_single ( hash_key hash_array,
const size_t  hash_array_len,
size_t  iter_steps 
)
static

When we only need a single value, can use a small optimization. we can avoid accumulating the tail of the array a little, each iteration.

Definition at line 826 of file array_store.c.

References BLI_assert, and UNLIKELY.

Referenced by key_from_chunk_ref().

◆ hash_array_from_cref()

static void hash_array_from_cref ( const BArrayInfo info,
const BChunkRef cref,
const size_t  data_len,
hash_key hash_array 
)
static

◆ hash_array_from_data()

static void hash_array_from_data ( const BArrayInfo info,
const uchar data_slice,
const size_t  data_slice_len,
hash_key hash_array 
)
static

◆ hash_data()

static uint hash_data ( const uchar key,
size_t  n 
)
static

Definition at line 743 of file array_store.c.

References HASH_INIT.

Referenced by hash_array_from_data().

◆ hash_data_single()

BLI_INLINE uint hash_data_single ( const uchar  p)

Definition at line 737 of file array_store.c.

References HASH_INIT.

Referenced by hash_array_from_data().

◆ key_from_chunk_ref()

static hash_key key_from_chunk_ref ( const BArrayInfo info,
const BChunkRef cref,
hash_key hash_store,
const size_t  hash_store_len 
)
static

◆ table_lookup()

static const BChunkRef* table_lookup ( const BArrayInfo info,
BTableRef **  table,
const size_t  table_len,
const size_t  i_table_start,
const uchar data,
const size_t  data_len,
const size_t  offset,
const hash_key table_hash_array 
)
static