Blender  V3.3
BLI_ghash_performance_test.cc
Go to the documentation of this file.
1 /* SPDX-License-Identifier: Apache-2.0 */
2 
4 #include "testing/testing.h"
5 
6 #define GHASH_INTERNAL_API
7 
8 #include "MEM_guardedalloc.h"
9 
10 #include "BLI_ghash.h"
11 #include "BLI_rand.h"
12 #include "BLI_string.h"
13 #include "BLI_utildefines.h"
14 #include "PIL_time_utildefines.h"
15 
16 /* Using http://corpora.uni-leipzig.de/downloads/eng_wikipedia_2010_1M-text.tar.gz
17  * (1 million of words, about 122MB of text) from
18  * http://corpora.informatik.uni-leipzig.de/download.html */
19 #if 0
20 # define TEXT_CORPUS_PATH \
21  "/path/to/TĂ©lĂ©chargements/eng_wikipedia_2010_1M-text/eng_wikipedia_2010_1M-sentences.txt"
22 #endif
23 
24 /* Resizing the hash has a huge cost over global filling operation! */
25 //#define GHASH_RESERVE
26 
27 /* Run the longest tests! */
28 //#define GHASH_RUN_BIG
29 
30 /* Size of 'small case' ghash (number of entries). */
31 #define TESTCASE_SIZE_SMALL 17
32 
33 #define PRINTF_GHASH_STATS(_gh) \
34  { \
35  double q, lf, var, pempty, poverloaded; \
36  int bigb; \
37  q = BLI_ghash_calc_quality_ex((_gh), &lf, &var, &pempty, &poverloaded, &bigb); \
38  printf( \
39  "GHash stats (%u entries):\n\t" \
40  "Quality (the lower the better): %f\n\tVariance (the lower the better): %f\n\tLoad: " \
41  "%f\n\t" \
42  "Empty buckets: %.2f%%\n\tOverloaded buckets: %.2f%% (biggest bucket: %d)\n", \
43  BLI_ghash_len(_gh), \
44  q, \
45  var, \
46  lf, \
47  pempty * 100.0, \
48  poverloaded * 100.0, \
49  bigb); \
50  } \
51  void(0)
52 
53 /* Str: whole text, lines and words from a 'corpus' text. */
54 
55 static void str_ghash_tests(GHash *ghash, const char *id)
56 {
57  printf("\n========== STARTING %s ==========\n", id);
58 
59 #ifdef TEXT_CORPUS_PATH
60  size_t data_size = 0;
61  char *data;
62  {
63  struct stat st;
64  if (stat(TEXT_CORPUS_PATH, &st) == 0)
65  data_size = st.st_size;
66  }
67  if (data_size != 0) {
68  FILE *f = fopen(TEXT_CORPUS_PATH, "r");
69 
70  data = (char *)MEM_mallocN(data_size + 1, __func__);
71  if (fread(data, sizeof(*data), data_size, f) != data_size) {
72  printf("ERROR in reading file %s!", TEXT_CORPUS_PATH);
73  MEM_freeN(data);
75  }
76  data[data_size] = '\0';
77  fclose(f);
78  }
79  else {
81  }
82 #else
83  char *data = BLI_strdup(words10k);
84 #endif
85  char *data_p = BLI_strdup(data);
86  char *data_w = BLI_strdup(data);
87  char *data_bis = BLI_strdup(data);
88 
89  {
90  char *p, *w, *c_p, *c_w;
91 
92  TIMEIT_START(string_insert);
93 
94 #ifdef GHASH_RESERVE
95  BLI_ghash_reserve(ghash, strlen(data) / 32); /* rough estimation... */
96 #endif
97 
99 
100  for (p = c_p = data_p, w = c_w = data_w; *c_w; c_w++, c_p++) {
101  if (*c_p == '.') {
102  *c_p = *c_w = '\0';
103  if (!BLI_ghash_haskey(ghash, p)) {
104  BLI_ghash_insert(ghash, p, POINTER_FROM_INT(p[0]));
105  }
106  if (!BLI_ghash_haskey(ghash, w)) {
107  BLI_ghash_insert(ghash, w, POINTER_FROM_INT(w[0]));
108  }
109  p = c_p + 1;
110  w = c_w + 1;
111  }
112  else if (*c_w == ' ') {
113  *c_w = '\0';
114  if (!BLI_ghash_haskey(ghash, w)) {
115  BLI_ghash_insert(ghash, w, POINTER_FROM_INT(w[0]));
116  }
117  w = c_w + 1;
118  }
119  }
120 
121  TIMEIT_END(string_insert);
122  }
123 
124  PRINTF_GHASH_STATS(ghash);
125 
126  {
127  char *p, *w, *c;
128  void *v;
129 
130  TIMEIT_START(string_lookup);
131 
132  v = BLI_ghash_lookup(ghash, data_bis);
133  EXPECT_EQ(POINTER_AS_INT(v), data_bis[0]);
134 
135  for (p = w = c = data_bis; *c; c++) {
136  if (*c == '.') {
137  *c = '\0';
138  v = BLI_ghash_lookup(ghash, w);
139  EXPECT_EQ(POINTER_AS_INT(v), w[0]);
140  v = BLI_ghash_lookup(ghash, p);
141  EXPECT_EQ(POINTER_AS_INT(v), p[0]);
142  p = w = c + 1;
143  }
144  else if (*c == ' ') {
145  *c = '\0';
146  v = BLI_ghash_lookup(ghash, w);
147  EXPECT_EQ(POINTER_AS_INT(v), w[0]);
148  w = c + 1;
149  }
150  }
151 
152  TIMEIT_END(string_lookup);
153  }
154 
155  BLI_ghash_free(ghash, nullptr, nullptr);
156  MEM_freeN(data);
157  MEM_freeN(data_p);
158  MEM_freeN(data_w);
159  MEM_freeN(data_bis);
160 
161  printf("========== ENDED %s ==========\n\n", id);
162 }
163 
164 TEST(ghash, TextGHash)
165 {
167 
168  str_ghash_tests(ghash, "StrGHash - GHash");
169 }
170 
171 TEST(ghash, TextMurmur2a)
172 {
174 
175  str_ghash_tests(ghash, "StrGHash - Murmur");
176 }
177 
178 /* Int: uniform 100M first integers. */
179 
180 static void int_ghash_tests(GHash *ghash, const char *id, const unsigned int count)
181 {
182  printf("\n========== STARTING %s ==========\n", id);
183 
184  {
185  unsigned int i = count;
186 
187  TIMEIT_START(int_insert);
188 
189 #ifdef GHASH_RESERVE
190  BLI_ghash_reserve(ghash, count);
191 #endif
192 
193  while (i--) {
195  }
196 
197  TIMEIT_END(int_insert);
198  }
199 
200  PRINTF_GHASH_STATS(ghash);
201 
202  {
203  unsigned int i = count;
204 
205  TIMEIT_START(int_lookup);
206 
207  while (i--) {
208  void *v = BLI_ghash_lookup(ghash, POINTER_FROM_UINT(i));
210  }
211 
212  TIMEIT_END(int_lookup);
213  }
214 
215  {
216  void *k, *v;
217 
218  TIMEIT_START(int_pop);
219 
220  GHashIterState pop_state = {0};
221 
222  while (BLI_ghash_pop(ghash, &pop_state, &k, &v)) {
223  EXPECT_EQ(k, v);
224  }
225 
226  TIMEIT_END(int_pop);
227  }
228  EXPECT_EQ(BLI_ghash_len(ghash), 0);
229 
230  BLI_ghash_free(ghash, nullptr, nullptr);
231 
232  printf("========== ENDED %s ==========\n\n", id);
233 }
234 
235 TEST(ghash, IntGHash12000)
236 {
238 
239  int_ghash_tests(ghash, "IntGHash - GHash - 12000", 12000);
240 }
241 
242 #ifdef GHASH_RUN_BIG
243 TEST(ghash, IntGHash100000000)
244 {
246 
247  int_ghash_tests(ghash, "IntGHash - GHash - 100000000", 100000000);
248 }
249 #endif
250 
251 TEST(ghash, IntMurmur2a12000)
252 {
254 
255  int_ghash_tests(ghash, "IntGHash - Murmur - 12000", 12000);
256 }
257 
258 #ifdef GHASH_RUN_BIG
259 TEST(ghash, IntMurmur2a100000000)
260 {
262 
263  int_ghash_tests(ghash, "IntGHash - Murmur - 100000000", 100000000);
264 }
265 #endif
266 
267 /* Int: random 50M integers. */
268 
269 static void randint_ghash_tests(GHash *ghash, const char *id, const unsigned int count)
270 {
271  printf("\n========== STARTING %s ==========\n", id);
272 
273  unsigned int *data = (unsigned int *)MEM_mallocN(sizeof(*data) * (size_t)count, __func__);
274  unsigned int *dt;
275  unsigned int i;
276 
277  {
278  RNG *rng = BLI_rng_new(1);
279  for (i = count, dt = data; i--; dt++) {
280  *dt = BLI_rng_get_uint(rng);
281  }
282  BLI_rng_free(rng);
283  }
284 
285  {
286  TIMEIT_START(int_insert);
287 
288 #ifdef GHASH_RESERVE
289  BLI_ghash_reserve(ghash, count);
290 #endif
291 
292  for (i = count, dt = data; i--; dt++) {
294  }
295 
296  TIMEIT_END(int_insert);
297  }
298 
299  PRINTF_GHASH_STATS(ghash);
300 
301  {
302  TIMEIT_START(int_lookup);
303 
304  for (i = count, dt = data; i--; dt++) {
305  void *v = BLI_ghash_lookup(ghash, POINTER_FROM_UINT(*dt));
306  EXPECT_EQ(POINTER_AS_UINT(v), *dt);
307  }
308 
309  TIMEIT_END(int_lookup);
310  }
311 
312  BLI_ghash_free(ghash, nullptr, nullptr);
313  MEM_freeN(data);
314 
315  printf("========== ENDED %s ==========\n\n", id);
316 }
317 
318 TEST(ghash, IntRandGHash12000)
319 {
321 
322  randint_ghash_tests(ghash, "RandIntGHash - GHash - 12000", 12000);
323 }
324 
325 #ifdef GHASH_RUN_BIG
326 TEST(ghash, IntRandGHash50000000)
327 {
329 
330  randint_ghash_tests(ghash, "RandIntGHash - GHash - 50000000", 50000000);
331 }
332 #endif
333 
334 TEST(ghash, IntRandMurmur2a12000)
335 {
337 
338  randint_ghash_tests(ghash, "RandIntGHash - Murmur - 12000", 12000);
339 }
340 
341 #ifdef GHASH_RUN_BIG
342 TEST(ghash, IntRandMurmur2a50000000)
343 {
345 
346  randint_ghash_tests(ghash, "RandIntGHash - Murmur - 50000000", 50000000);
347 }
348 #endif
349 
350 static unsigned int ghashutil_tests_nohash_p(const void *p)
351 {
352  return POINTER_AS_UINT(p);
353 }
354 
355 static bool ghashutil_tests_cmp_p(const void *a, const void *b)
356 {
357  return a != b;
358 }
359 
360 TEST(ghash, Int4NoHash12000)
361 {
363 
364  randint_ghash_tests(ghash, "RandIntGHash - No Hash - 12000", 12000);
365 }
366 
367 #ifdef GHASH_RUN_BIG
368 TEST(ghash, Int4NoHash50000000)
369 {
371 
372  randint_ghash_tests(ghash, "RandIntGHash - No Hash - 50000000", 50000000);
373 }
374 #endif
375 
376 /* Int_v4: 20M of randomly-generated integer vectors. */
377 
378 static void int4_ghash_tests(GHash *ghash, const char *id, const unsigned int count)
379 {
380  printf("\n========== STARTING %s ==========\n", id);
381 
382  void *data_v = MEM_mallocN(sizeof(unsigned int[4]) * (size_t)count, __func__);
383  unsigned int(*data)[4] = (unsigned int(*)[4])data_v;
384  unsigned int(*dt)[4];
385  unsigned int i, j;
386 
387  {
388  RNG *rng = BLI_rng_new(1);
389  for (i = count, dt = data; i--; dt++) {
390  for (j = 4; j--;) {
391  (*dt)[j] = BLI_rng_get_uint(rng);
392  }
393  }
394  BLI_rng_free(rng);
395  }
396 
397  {
398  TIMEIT_START(int_v4_insert);
399 
400 #ifdef GHASH_RESERVE
401  BLI_ghash_reserve(ghash, count);
402 #endif
403 
404  for (i = count, dt = data; i--; dt++) {
405  BLI_ghash_insert(ghash, *dt, POINTER_FROM_UINT(i));
406  }
407 
408  TIMEIT_END(int_v4_insert);
409  }
410 
411  PRINTF_GHASH_STATS(ghash);
412 
413  {
414  TIMEIT_START(int_v4_lookup);
415 
416  for (i = count, dt = data; i--; dt++) {
417  void *v = BLI_ghash_lookup(ghash, (void *)(*dt));
419  }
420 
421  TIMEIT_END(int_v4_lookup);
422  }
423 
424  BLI_ghash_free(ghash, nullptr, nullptr);
425  MEM_freeN(data);
426 
427  printf("========== ENDED %s ==========\n\n", id);
428 }
429 
430 TEST(ghash, Int4GHash2000)
431 {
432  GHash *ghash = BLI_ghash_new(
434 
435  int4_ghash_tests(ghash, "Int4GHash - GHash - 2000", 2000);
436 }
437 
438 #ifdef GHASH_RUN_BIG
439 TEST(ghash, Int4GHash20000000)
440 {
441  GHash *ghash = BLI_ghash_new(
443 
444  int4_ghash_tests(ghash, "Int4GHash - GHash - 20000000", 20000000);
445 }
446 #endif
447 
448 TEST(ghash, Int4Murmur2a2000)
449 {
450  GHash *ghash = BLI_ghash_new(
452 
453  int4_ghash_tests(ghash, "Int4GHash - Murmur - 2000", 2000);
454 }
455 
456 #ifdef GHASH_RUN_BIG
457 TEST(ghash, Int4Murmur2a20000000)
458 {
459  GHash *ghash = BLI_ghash_new(
461 
462  int4_ghash_tests(ghash, "Int4GHash - Murmur - 20000000", 20000000);
463 }
464 #endif
465 
466 /* GHash inthash_v2 tests */
467 TEST(ghash, Int2NoHash12000)
468 {
470 
471  randint_ghash_tests(ghash, "RandIntGHash - No Hash - 12000", 12000);
472 }
473 
474 #ifdef GHASH_RUN_BIG
475 TEST(ghash, Int2NoHash50000000)
476 {
478 
479  randint_ghash_tests(ghash, "RandIntGHash - No Hash - 50000000", 50000000);
480 }
481 #endif
482 
483 /* MultiSmall: create and manipulate a lot of very small ghash's
484  * (90% < 10 items, 9% < 100 items, 1% < 1000 items). */
485 
486 static void multi_small_ghash_tests_one(GHash *ghash, RNG *rng, const unsigned int count)
487 {
488  unsigned int *data = (unsigned int *)MEM_mallocN(sizeof(*data) * (size_t)count, __func__);
489  unsigned int *dt;
490  unsigned int i;
491 
492  for (i = count, dt = data; i--; dt++) {
493  *dt = BLI_rng_get_uint(rng);
494  }
495 
496 #ifdef GHASH_RESERVE
497  BLI_ghash_reserve(ghash, count);
498 #endif
499 
500  for (i = count, dt = data; i--; dt++) {
502  }
503 
504  for (i = count, dt = data; i--; dt++) {
505  void *v = BLI_ghash_lookup(ghash, POINTER_FROM_UINT(*dt));
506  EXPECT_EQ(POINTER_AS_UINT(v), *dt);
507  }
508 
509  BLI_ghash_clear(ghash, nullptr, nullptr);
510  MEM_freeN(data);
511 }
512 
513 static void multi_small_ghash_tests(GHash *ghash, const char *id, const unsigned int count)
514 {
515  printf("\n========== STARTING %s ==========\n", id);
516 
517  RNG *rng = BLI_rng_new(1);
518 
519  TIMEIT_START(multi_small_ghash);
520 
521  unsigned int i = count;
522  while (i--) {
523  const int count = 1 + (BLI_rng_get_int(rng) % TESTCASE_SIZE_SMALL) *
524  (!(i % 100) ? 100 : (!(i % 10) ? 10 : 1));
525  multi_small_ghash_tests_one(ghash, rng, count);
526  }
527 
528  TIMEIT_END(multi_small_ghash);
529 
530  TIMEIT_START(multi_small2_ghash);
531 
532  unsigned int i = count;
533  while (i--) {
534  const int count = 1 + (BLI_rng_get_int(rng) % TESTCASE_SIZE_SMALL) / 2 *
535  (!(i % 100) ? 100 : (!(i % 10) ? 10 : 1));
536  multi_small_ghash_tests_one(ghash, rng, count);
537  }
538 
539  TIMEIT_END(multi_small2_ghash);
540 
541  BLI_ghash_free(ghash, nullptr, nullptr);
542  BLI_rng_free(rng);
543 
544  printf("========== ENDED %s ==========\n\n", id);
545 }
546 
547 TEST(ghash, MultiRandIntGHash2000)
548 {
550 
551  multi_small_ghash_tests(ghash, "MultiSmall RandIntGHash - GHash - 2000", 2000);
552 }
553 
554 TEST(ghash, MultiRandIntGHash200000)
555 {
557 
558  multi_small_ghash_tests(ghash, "MultiSmall RandIntGHash - GHash - 200000", 200000);
559 }
560 
561 TEST(ghash, MultiRandIntMurmur2a2000)
562 {
564 
565  multi_small_ghash_tests(ghash, "MultiSmall RandIntGHash - Murmur2a - 2000", 2000);
566 }
567 
568 TEST(ghash, MultiRandIntMurmur2a200000)
569 {
571 
572  multi_small_ghash_tests(ghash, "MultiSmall RandIntGHash - Murmur2a - 200000", 200000);
573 }
EXPECT_EQ(BLI_expr_pylike_eval(expr, nullptr, 0, &result), EXPR_PYLIKE_INVALID)
bool BLI_ghashutil_strcmp(const void *a, const void *b)
unsigned int BLI_ghashutil_inthash_p_murmur(const void *ptr)
#define BLI_ghashutil_uinthash_v4_p_murmur
Definition: BLI_ghash.h:599
bool BLI_ghash_haskey(const GHash *gh, const void *key) ATTR_WARN_UNUSED_RESULT
Definition: BLI_ghash.c:822
bool BLI_ghash_pop(GHash *gh, GHashIterState *state, void **r_key, void **r_val) ATTR_WARN_UNUSED_RESULT ATTR_NONNULL()
Definition: BLI_ghash.c:827
void BLI_ghash_clear(GHash *gh, GHashKeyFreeFP keyfreefp, GHashValFreeFP valfreefp)
Definition: BLI_ghash.c:858
unsigned int BLI_ghashutil_strhash_p(const void *ptr)
GHash * BLI_ghash_new(GHashHashFP hashfp, GHashCmpFP cmpfp, const char *info) ATTR_MALLOC ATTR_WARN_UNUSED_RESULT
Definition: BLI_ghash.c:689
#define BLI_ghashutil_uinthash_v4_p
Definition: BLI_ghash.h:593
void BLI_ghash_reserve(GHash *gh, unsigned int nentries_reserve)
Definition: BLI_ghash.c:699
bool BLI_ghashutil_intcmp(const void *a, const void *b)
void * BLI_ghash_lookup(const GHash *gh, const void *key) ATTR_WARN_UNUSED_RESULT
Definition: BLI_ghash.c:734
unsigned int BLI_ghash_len(const GHash *gh) ATTR_WARN_UNUSED_RESULT
Definition: BLI_ghash.c:705
unsigned int BLI_ghashutil_inthash_p(const void *ptr)
unsigned int BLI_ghashutil_strhash_p_murmur(const void *ptr)
void BLI_ghash_insert(GHash *gh, void *key, void *val)
Definition: BLI_ghash.c:710
void BLI_ghash_free(GHash *gh, GHashKeyFreeFP keyfreefp, GHashValFreeFP valfreefp)
Definition: BLI_ghash.c:863
bool BLI_ghashutil_uinthash_v4_cmp(const void *a, const void *b)
static void randint_ghash_tests(GHash *ghash, const char *id, const unsigned int count)
TEST(ghash, TextGHash)
static void int_ghash_tests(GHash *ghash, const char *id, const unsigned int count)
static bool ghashutil_tests_cmp_p(const void *a, const void *b)
static void int4_ghash_tests(GHash *ghash, const char *id, const unsigned int count)
static unsigned int ghashutil_tests_nohash_p(const void *p)
#define PRINTF_GHASH_STATS(_gh)
#define TESTCASE_SIZE_SMALL
static void str_ghash_tests(GHash *ghash, const char *id)
static void multi_small_ghash_tests_one(GHash *ghash, RNG *rng, const unsigned int count)
static void multi_small_ghash_tests(GHash *ghash, const char *id, const unsigned int count)
Random number functions.
void int BLI_rng_get_int(struct RNG *rng) ATTR_WARN_UNUSED_RESULT ATTR_NONNULL(1)
Definition: rand.cc:78
void BLI_rng_free(struct RNG *rng) ATTR_NONNULL(1)
Definition: rand.cc:58
unsigned int BLI_rng_get_uint(struct RNG *rng) ATTR_WARN_UNUSED_RESULT ATTR_NONNULL(1)
Definition: rand.cc:83
struct RNG * BLI_rng_new(unsigned int seed)
Definition: rand.cc:39
const char words10k[]
char * BLI_strdup(const char *str) ATTR_WARN_UNUSED_RESULT ATTR_NONNULL() ATTR_MALLOC
Definition: string.c:42
#define POINTER_FROM_INT(i)
#define POINTER_AS_UINT(i)
#define POINTER_AS_INT(i)
#define POINTER_FROM_UINT(i)
Read Guarded memory(de)allocation.
Utility defines for timing/benchmarks.
#define TIMEIT_START(var)
#define TIMEIT_END(var)
ATTR_WARN_UNUSED_RESULT const BMVert * v
SIMD_FORCE_INLINE const btScalar & w() const
Return the w value.
Definition: btQuadWord.h:119
int count
void(* MEM_freeN)(void *vmemh)
Definition: mallocn.c:27
void *(* MEM_mallocN)(size_t len, const char *str)
Definition: mallocn.c:33
static unsigned c
Definition: RandGen.cpp:83
static unsigned a[3]
Definition: RandGen.cpp:78
static const pxr::TfToken st("st", pxr::TfToken::Immortal)
static const pxr::TfToken b("b", pxr::TfToken::Immortal)
Definition: rand.cc:33