Ruby  3.1.4p223 (2023-03-30 revision HEAD)
st.c
1 /* This is a public domain general purpose hash table package
2  originally written by Peter Moore @ UCB.
3 
4  The hash table data structures were redesigned and the package was
5  rewritten by Vladimir Makarov <vmakarov@redhat.com>. */
6 
7 /* The original package implemented classic bucket-based hash tables
8  with entries doubly linked for an access by their insertion order.
9  To decrease pointer chasing and as a consequence to improve a data
10  locality the current implementation is based on storing entries in
11  an array and using hash tables with open addressing. The current
12  entries are more compact in comparison with the original ones and
13  this also improves the data locality.
14 
15  The hash table has two arrays called *bins* and *entries*.
16 
17  bins:
18  -------
19  | | entries array:
20  |-------| --------------------------------
21  | index | | | entry: | | |
22  |-------| | | | | |
23  | ... | | ... | hash | ... | ... |
24  |-------| | | key | | |
25  | empty | | | record | | |
26  |-------| --------------------------------
27  | ... | ^ ^
28  |-------| |_ entries start |_ entries bound
29  |deleted|
30  -------
31 
32  o The entry array contains table entries in the same order as they
33  were inserted.
34 
35  When the first entry is deleted, a variable containing index of
36  the current first entry (*entries start*) is changed. In all
37  other cases of the deletion, we just mark the entry as deleted by
38  using a reserved hash value.
39 
40  Such organization of the entry storage makes operations of the
41  table shift and the entries traversal very fast.
42 
43  o The bins provide access to the entries by their keys. The
44  key hash is mapped to a bin containing *index* of the
45  corresponding entry in the entry array.
46 
47  The bin array size is always power of two, it makes mapping very
48  fast by using the corresponding lower bits of the hash.
49  Generally it is not a good idea to ignore some part of the hash.
50  But alternative approach is worse. For example, we could use a
51  modulo operation for mapping and a prime number for the size of
52  the bin array. Unfortunately, the modulo operation for big
53  64-bit numbers are extremely slow (it takes more than 100 cycles
54  on modern Intel CPUs).
55 
56  Still other bits of the hash value are used when the mapping
57  results in a collision. In this case we use a secondary hash
58  value which is a result of a function of the collision bin
59  index and the original hash value. The function choice
60  guarantees that we can traverse all bins and finally find the
61  corresponding bin as after several iterations the function
62  becomes a full cycle linear congruential generator because it
63  satisfies requirements of the Hull-Dobell theorem.
64 
65  When an entry is removed from the table besides marking the
66  hash in the corresponding entry described above, we also mark
67  the bin by a special value in order to find entries which had
68  a collision with the removed entries.
69 
70  There are two reserved values for the bins. One denotes an
71  empty bin, another one denotes a bin for a deleted entry.
72 
73  o The length of the bin array is at least two times more than the
74  entry array length. This keeps the table load factor healthy.
75  The trigger of rebuilding the table is always a case when we can
76  not insert an entry anymore at the entries bound. We could
77  change the entries bound too in case of deletion but than we need
78  a special code to count bins with corresponding deleted entries
79  and reset the bin values when there are too many bins
80  corresponding deleted entries
81 
82  Table rebuilding is done by creation of a new entry array and
83  bins of an appropriate size. We also try to reuse the arrays
84  in some cases by compacting the array and removing deleted
85  entries.
86 
87  o To save memory very small tables have no allocated arrays
88  bins. We use a linear search for an access by a key.
89 
90  o To save more memory we use 8-, 16-, 32- and 64- bit indexes in
91  bins depending on the current hash table size.
92 
93  o The implementation takes into account that the table can be
94  rebuilt during hashing or comparison functions. It can happen if
95  the functions are implemented in Ruby and a thread switch occurs
96  during their execution.
97 
98  This implementation speeds up the Ruby hash table benchmarks in
99  average by more 40% on Intel Haswell CPU.
100 
101 */
102 
103 #ifdef NOT_RUBY
104 #include "regint.h"
105 #include "st.h"
106 #else
107 #include "internal.h"
108 #include "internal/bits.h"
109 #include "internal/hash.h"
110 #include "internal/sanitizers.h"
111 #endif
112 
113 #include <stdio.h>
114 #ifdef HAVE_STDLIB_H
115 #include <stdlib.h>
116 #endif
117 #include <string.h>
118 #include <assert.h>
119 
120 #ifdef __GNUC__
121 #define PREFETCH(addr, write_p) __builtin_prefetch(addr, write_p)
122 #define EXPECT(expr, val) __builtin_expect(expr, val)
123 #define ATTRIBUTE_UNUSED __attribute__((unused))
124 #else
125 #define PREFETCH(addr, write_p)
126 #define EXPECT(expr, val) (expr)
127 #define ATTRIBUTE_UNUSED
128 #endif
129 
130 /* The type of hashes. */
131 typedef st_index_t st_hash_t;
132 
134  st_hash_t hash;
135  st_data_t key;
136  st_data_t record;
137 };
138 
139 #define type_numhash st_hashtype_num
140 static const struct st_hash_type st_hashtype_num = {
141  st_numcmp,
142  st_numhash,
143 };
144 
145 static int st_strcmp(st_data_t, st_data_t);
146 static st_index_t strhash(st_data_t);
147 static const struct st_hash_type type_strhash = {
148  st_strcmp,
149  strhash,
150 };
151 
152 static int st_locale_insensitive_strcasecmp_i(st_data_t lhs, st_data_t rhs);
153 static st_index_t strcasehash(st_data_t);
154 static const struct st_hash_type type_strcasehash = {
155  st_locale_insensitive_strcasecmp_i,
156  strcasehash,
157 };
158 
159 /* Value used to catch uninitialized entries/bins during debugging.
160  There is a possibility for a false alarm, but its probability is
161  extremely small. */
162 #define ST_INIT_VAL 0xafafafafafafafaf
163 #define ST_INIT_VAL_BYTE 0xafa
164 
165 #ifdef RUBY
166 #undef malloc
167 #undef realloc
168 #undef calloc
169 #undef free
170 #define malloc ruby_xmalloc
171 #define calloc ruby_xcalloc
172 #define realloc ruby_xrealloc
173 #define free ruby_xfree
174 #endif
175 
176 #define EQUAL(tab,x,y) ((x) == (y) || (*(tab)->type->compare)((x),(y)) == 0)
177 #define PTR_EQUAL(tab, ptr, hash_val, key_) \
178  ((ptr)->hash == (hash_val) && EQUAL((tab), (key_), (ptr)->key))
179 
180 /* As PRT_EQUAL only its result is returned in RES. REBUILT_P is set
181  up to TRUE if the table is rebuilt during the comparison. */
182 #define DO_PTR_EQUAL_CHECK(tab, ptr, hash_val, key, res, rebuilt_p) \
183  do { \
184  unsigned int _old_rebuilds_num = (tab)->rebuilds_num; \
185  res = PTR_EQUAL(tab, ptr, hash_val, key); \
186  rebuilt_p = _old_rebuilds_num != (tab)->rebuilds_num; \
187  } while (FALSE)
188 
189 /* Features of a table. */
190 struct st_features {
191  /* Power of 2 used for number of allocated entries. */
192  unsigned char entry_power;
193  /* Power of 2 used for number of allocated bins. Depending on the
194  table size, the number of bins is 2-4 times more than the
195  number of entries. */
196  unsigned char bin_power;
197  /* Enumeration of sizes of bins (8-bit, 16-bit etc). */
198  unsigned char size_ind;
199  /* Bins are packed in words of type st_index_t. The following is
200  a size of bins counted by words. */
201  st_index_t bins_words;
202 };
203 
204 /* Features of all possible size tables. */
205 #if SIZEOF_ST_INDEX_T == 8
206 #define MAX_POWER2 62
207 static const struct st_features features[] = {
208  {0, 1, 0, 0x0},
209  {1, 2, 0, 0x1},
210  {2, 3, 0, 0x1},
211  {3, 4, 0, 0x2},
212  {4, 5, 0, 0x4},
213  {5, 6, 0, 0x8},
214  {6, 7, 0, 0x10},
215  {7, 8, 0, 0x20},
216  {8, 9, 1, 0x80},
217  {9, 10, 1, 0x100},
218  {10, 11, 1, 0x200},
219  {11, 12, 1, 0x400},
220  {12, 13, 1, 0x800},
221  {13, 14, 1, 0x1000},
222  {14, 15, 1, 0x2000},
223  {15, 16, 1, 0x4000},
224  {16, 17, 2, 0x10000},
225  {17, 18, 2, 0x20000},
226  {18, 19, 2, 0x40000},
227  {19, 20, 2, 0x80000},
228  {20, 21, 2, 0x100000},
229  {21, 22, 2, 0x200000},
230  {22, 23, 2, 0x400000},
231  {23, 24, 2, 0x800000},
232  {24, 25, 2, 0x1000000},
233  {25, 26, 2, 0x2000000},
234  {26, 27, 2, 0x4000000},
235  {27, 28, 2, 0x8000000},
236  {28, 29, 2, 0x10000000},
237  {29, 30, 2, 0x20000000},
238  {30, 31, 2, 0x40000000},
239  {31, 32, 2, 0x80000000},
240  {32, 33, 3, 0x200000000},
241  {33, 34, 3, 0x400000000},
242  {34, 35, 3, 0x800000000},
243  {35, 36, 3, 0x1000000000},
244  {36, 37, 3, 0x2000000000},
245  {37, 38, 3, 0x4000000000},
246  {38, 39, 3, 0x8000000000},
247  {39, 40, 3, 0x10000000000},
248  {40, 41, 3, 0x20000000000},
249  {41, 42, 3, 0x40000000000},
250  {42, 43, 3, 0x80000000000},
251  {43, 44, 3, 0x100000000000},
252  {44, 45, 3, 0x200000000000},
253  {45, 46, 3, 0x400000000000},
254  {46, 47, 3, 0x800000000000},
255  {47, 48, 3, 0x1000000000000},
256  {48, 49, 3, 0x2000000000000},
257  {49, 50, 3, 0x4000000000000},
258  {50, 51, 3, 0x8000000000000},
259  {51, 52, 3, 0x10000000000000},
260  {52, 53, 3, 0x20000000000000},
261  {53, 54, 3, 0x40000000000000},
262  {54, 55, 3, 0x80000000000000},
263  {55, 56, 3, 0x100000000000000},
264  {56, 57, 3, 0x200000000000000},
265  {57, 58, 3, 0x400000000000000},
266  {58, 59, 3, 0x800000000000000},
267  {59, 60, 3, 0x1000000000000000},
268  {60, 61, 3, 0x2000000000000000},
269  {61, 62, 3, 0x4000000000000000},
270  {62, 63, 3, 0x8000000000000000},
271 };
272 
273 #else
274 #define MAX_POWER2 30
275 
276 static const struct st_features features[] = {
277  {0, 1, 0, 0x1},
278  {1, 2, 0, 0x1},
279  {2, 3, 0, 0x2},
280  {3, 4, 0, 0x4},
281  {4, 5, 0, 0x8},
282  {5, 6, 0, 0x10},
283  {6, 7, 0, 0x20},
284  {7, 8, 0, 0x40},
285  {8, 9, 1, 0x100},
286  {9, 10, 1, 0x200},
287  {10, 11, 1, 0x400},
288  {11, 12, 1, 0x800},
289  {12, 13, 1, 0x1000},
290  {13, 14, 1, 0x2000},
291  {14, 15, 1, 0x4000},
292  {15, 16, 1, 0x8000},
293  {16, 17, 2, 0x20000},
294  {17, 18, 2, 0x40000},
295  {18, 19, 2, 0x80000},
296  {19, 20, 2, 0x100000},
297  {20, 21, 2, 0x200000},
298  {21, 22, 2, 0x400000},
299  {22, 23, 2, 0x800000},
300  {23, 24, 2, 0x1000000},
301  {24, 25, 2, 0x2000000},
302  {25, 26, 2, 0x4000000},
303  {26, 27, 2, 0x8000000},
304  {27, 28, 2, 0x10000000},
305  {28, 29, 2, 0x20000000},
306  {29, 30, 2, 0x40000000},
307  {30, 31, 2, 0x80000000},
308 };
309 
310 #endif
311 
312 /* The reserved hash value and its substitution. */
313 #define RESERVED_HASH_VAL (~(st_hash_t) 0)
314 #define RESERVED_HASH_SUBSTITUTION_VAL ((st_hash_t) 0)
315 
316 /* Return hash value of KEY for table TAB. */
317 static inline st_hash_t
318 do_hash(st_data_t key, st_table *tab)
319 {
320  st_hash_t hash = (st_hash_t)(tab->type->hash)(key);
321 
322  /* RESERVED_HASH_VAL is used for a deleted entry. Map it into
323  another value. Such mapping should be extremely rare. */
324  return hash == RESERVED_HASH_VAL ? RESERVED_HASH_SUBSTITUTION_VAL : hash;
325 }
326 
327 /* Power of 2 defining the minimal number of allocated entries. */
328 #define MINIMAL_POWER2 2
329 
330 #if MINIMAL_POWER2 < 2
331 #error "MINIMAL_POWER2 should be >= 2"
332 #endif
333 
334 /* If the power2 of the allocated `entries` is less than the following
335  value, don't allocate bins and use a linear search. */
336 #define MAX_POWER2_FOR_TABLES_WITHOUT_BINS 4
337 
338 /* Return smallest n >= MINIMAL_POWER2 such 2^n > SIZE. */
339 static int
340 get_power2(st_index_t size)
341 {
342  unsigned int n = ST_INDEX_BITS - nlz_intptr(size);
343  if (n <= MAX_POWER2)
344  return n < MINIMAL_POWER2 ? MINIMAL_POWER2 : n;
345 #ifndef NOT_RUBY
346  /* Ran out of the table entries */
347  rb_raise(rb_eRuntimeError, "st_table too big");
348 #endif
349  /* should raise exception */
350  return -1;
351 }
352 
353 /* Return value of N-th bin in array BINS of table with bins size
354  index S. */
355 static inline st_index_t
356 get_bin(st_index_t *bins, int s, st_index_t n)
357 {
358  return (s == 0 ? ((unsigned char *) bins)[n]
359  : s == 1 ? ((unsigned short *) bins)[n]
360  : s == 2 ? ((unsigned int *) bins)[n]
361  : ((st_index_t *) bins)[n]);
362 }
363 
364 /* Set up N-th bin in array BINS of table with bins size index S to
365  value V. */
366 static inline void
367 set_bin(st_index_t *bins, int s, st_index_t n, st_index_t v)
368 {
369  if (s == 0) ((unsigned char *) bins)[n] = (unsigned char) v;
370  else if (s == 1) ((unsigned short *) bins)[n] = (unsigned short) v;
371  else if (s == 2) ((unsigned int *) bins)[n] = (unsigned int) v;
372  else ((st_index_t *) bins)[n] = v;
373 }
374 
375 /* These macros define reserved values for empty table bin and table
376  bin which contains a deleted entry. We will never use such values
377  for an entry index in bins. */
378 #define EMPTY_BIN 0
379 #define DELETED_BIN 1
380 /* Base of a real entry index in the bins. */
381 #define ENTRY_BASE 2
382 
383 /* Mark I-th bin of table TAB as empty, in other words not
384  corresponding to any entry. */
385 #define MARK_BIN_EMPTY(tab, i) (set_bin((tab)->bins, get_size_ind(tab), i, EMPTY_BIN))
386 
387 /* Values used for not found entry and bin with given
388  characteristics. */
389 #define UNDEFINED_ENTRY_IND (~(st_index_t) 0)
390 #define UNDEFINED_BIN_IND (~(st_index_t) 0)
391 
392 /* Entry and bin values returned when we found a table rebuild during
393  the search. */
394 #define REBUILT_TABLE_ENTRY_IND (~(st_index_t) 1)
395 #define REBUILT_TABLE_BIN_IND (~(st_index_t) 1)
396 
397 /* Mark I-th bin of table TAB as corresponding to a deleted table
398  entry. Update number of entries in the table and number of bins
399  corresponding to deleted entries. */
400 #define MARK_BIN_DELETED(tab, i) \
401  do { \
402  set_bin((tab)->bins, get_size_ind(tab), i, DELETED_BIN); \
403  } while (0)
404 
405 /* Macros to check that value B is used empty bins and bins
406  corresponding deleted entries. */
407 #define EMPTY_BIN_P(b) ((b) == EMPTY_BIN)
408 #define DELETED_BIN_P(b) ((b) == DELETED_BIN)
409 #define EMPTY_OR_DELETED_BIN_P(b) ((b) <= DELETED_BIN)
410 
411 /* Macros to check empty bins and bins corresponding to deleted
412  entries. Bins are given by their index I in table TAB. */
413 #define IND_EMPTY_BIN_P(tab, i) (EMPTY_BIN_P(get_bin((tab)->bins, get_size_ind(tab), i)))
414 #define IND_DELETED_BIN_P(tab, i) (DELETED_BIN_P(get_bin((tab)->bins, get_size_ind(tab), i)))
415 #define IND_EMPTY_OR_DELETED_BIN_P(tab, i) (EMPTY_OR_DELETED_BIN_P(get_bin((tab)->bins, get_size_ind(tab), i)))
416 
417 /* Macros for marking and checking deleted entries given by their
418  pointer E_PTR. */
419 #define MARK_ENTRY_DELETED(e_ptr) ((e_ptr)->hash = RESERVED_HASH_VAL)
420 #define DELETED_ENTRY_P(e_ptr) ((e_ptr)->hash == RESERVED_HASH_VAL)
421 
422 /* Return bin size index of table TAB. */
423 static inline unsigned int
424 get_size_ind(const st_table *tab)
425 {
426  return tab->size_ind;
427 }
428 
429 /* Return the number of allocated bins of table TAB. */
430 static inline st_index_t
431 get_bins_num(const st_table *tab)
432 {
433  return ((st_index_t) 1)<<tab->bin_power;
434 }
435 
436 /* Return mask for a bin index in table TAB. */
437 static inline st_index_t
438 bins_mask(const st_table *tab)
439 {
440  return get_bins_num(tab) - 1;
441 }
442 
443 /* Return the index of table TAB bin corresponding to
444  HASH_VALUE. */
445 static inline st_index_t
446 hash_bin(st_hash_t hash_value, st_table *tab)
447 {
448  return hash_value & bins_mask(tab);
449 }
450 
451 /* Return the number of allocated entries of table TAB. */
452 static inline st_index_t
453 get_allocated_entries(const st_table *tab)
454 {
455  return ((st_index_t) 1)<<tab->entry_power;
456 }
457 
458 /* Return size of the allocated bins of table TAB. */
459 static inline st_index_t
460 bins_size(const st_table *tab)
461 {
462  return features[tab->entry_power].bins_words * sizeof (st_index_t);
463 }
464 
465 /* Mark all bins of table TAB as empty. */
466 static void
467 initialize_bins(st_table *tab)
468 {
469  memset(tab->bins, 0, bins_size(tab));
470 }
471 
472 /* Make table TAB empty. */
473 static void
474 make_tab_empty(st_table *tab)
475 {
476  tab->num_entries = 0;
477  tab->entries_start = tab->entries_bound = 0;
478  if (tab->bins != NULL)
479  initialize_bins(tab);
480 }
481 
482 #ifdef HASH_LOG
483 #ifdef HAVE_UNISTD_H
484 #include <unistd.h>
485 #endif
486 static struct {
487  int all, total, num, str, strcase;
488 } collision;
489 
490 /* Flag switching off output of package statistics at the end of
491  program. */
492 static int init_st = 0;
493 
494 /* Output overall number of table searches and collisions into a
495  temporary file. */
496 static void
497 stat_col(void)
498 {
499  char fname[10+sizeof(long)*3];
500  FILE *f;
501  if (!collision.total) return;
502  f = fopen((snprintf(fname, sizeof(fname), "/tmp/col%ld", (long)getpid()), fname), "w");
503  if (f == NULL)
504  return;
505  fprintf(f, "collision: %d / %d (%6.2f)\n", collision.all, collision.total,
506  ((double)collision.all / (collision.total)) * 100);
507  fprintf(f, "num: %d, str: %d, strcase: %d\n", collision.num, collision.str, collision.strcase);
508  fclose(f);
509 }
510 #endif
511 
512 /* Create and return table with TYPE which can hold at least SIZE
513  entries. The real number of entries which the table can hold is
514  the nearest power of two for SIZE. */
515 st_table *
516 st_init_table_with_size(const struct st_hash_type *type, st_index_t size)
517 {
518  st_table *tab;
519  int n;
520 
521 #ifdef HASH_LOG
522 #if HASH_LOG+0 < 0
523  {
524  const char *e = getenv("ST_HASH_LOG");
525  if (!e || !*e) init_st = 1;
526  }
527 #endif
528  if (init_st == 0) {
529  init_st = 1;
530  atexit(stat_col);
531  }
532 #endif
533 
534  n = get_power2(size);
535 #ifndef RUBY
536  if (n < 0)
537  return NULL;
538 #endif
539  tab = (st_table *) malloc(sizeof (st_table));
540 #ifndef RUBY
541  if (tab == NULL)
542  return NULL;
543 #endif
544  tab->type = type;
545  tab->entry_power = n;
546  tab->bin_power = features[n].bin_power;
547  tab->size_ind = features[n].size_ind;
548  if (n <= MAX_POWER2_FOR_TABLES_WITHOUT_BINS)
549  tab->bins = NULL;
550  else {
551  tab->bins = (st_index_t *) malloc(bins_size(tab));
552 #ifndef RUBY
553  if (tab->bins == NULL) {
554  free(tab);
555  return NULL;
556  }
557 #endif
558  }
559  tab->entries = (st_table_entry *) malloc(get_allocated_entries(tab)
560  * sizeof(st_table_entry));
561 #ifndef RUBY
562  if (tab->entries == NULL) {
563  st_free_table(tab);
564  return NULL;
565  }
566 #endif
567  make_tab_empty(tab);
568  tab->rebuilds_num = 0;
569  return tab;
570 }
571 
572 /* Create and return table with TYPE which can hold a minimal number
573  of entries (see comments for get_power2). */
574 st_table *
575 st_init_table(const struct st_hash_type *type)
576 {
577  return st_init_table_with_size(type, 0);
578 }
579 
580 /* Create and return table which can hold a minimal number of
581  numbers. */
582 st_table *
583 st_init_numtable(void)
584 {
585  return st_init_table(&type_numhash);
586 }
587 
588 /* Create and return table which can hold SIZE numbers. */
589 st_table *
590 st_init_numtable_with_size(st_index_t size)
591 {
592  return st_init_table_with_size(&type_numhash, size);
593 }
594 
595 /* Create and return table which can hold a minimal number of
596  strings. */
597 st_table *
598 st_init_strtable(void)
599 {
600  return st_init_table(&type_strhash);
601 }
602 
603 /* Create and return table which can hold SIZE strings. */
604 st_table *
605 st_init_strtable_with_size(st_index_t size)
606 {
607  return st_init_table_with_size(&type_strhash, size);
608 }
609 
610 /* Create and return table which can hold a minimal number of strings
611  whose character case is ignored. */
612 st_table *
613 st_init_strcasetable(void)
614 {
615  return st_init_table(&type_strcasehash);
616 }
617 
618 /* Create and return table which can hold SIZE strings whose character
619  case is ignored. */
620 st_table *
621 st_init_strcasetable_with_size(st_index_t size)
622 {
623  return st_init_table_with_size(&type_strcasehash, size);
624 }
625 
626 /* Make table TAB empty. */
627 void
628 st_clear(st_table *tab)
629 {
630  make_tab_empty(tab);
631  tab->rebuilds_num++;
632 }
633 
634 /* Free table TAB space. */
635 void
636 st_free_table(st_table *tab)
637 {
638  if (tab->bins != NULL)
639  free(tab->bins);
640  free(tab->entries);
641  free(tab);
642 }
643 
644 /* Return byte size of memory allocated for table TAB. */
645 size_t
646 st_memsize(const st_table *tab)
647 {
648  return(sizeof(st_table)
649  + (tab->bins == NULL ? 0 : bins_size(tab))
650  + get_allocated_entries(tab) * sizeof(st_table_entry));
651 }
652 
653 static st_index_t
654 find_table_entry_ind(st_table *tab, st_hash_t hash_value, st_data_t key);
655 
656 static st_index_t
657 find_table_bin_ind(st_table *tab, st_hash_t hash_value, st_data_t key);
658 
659 static st_index_t
660 find_table_bin_ind_direct(st_table *table, st_hash_t hash_value, st_data_t key);
661 
662 static st_index_t
663 find_table_bin_ptr_and_reserve(st_table *tab, st_hash_t *hash_value,
664  st_data_t key, st_index_t *bin_ind);
665 
666 #ifdef HASH_LOG
667 static void
668 count_collision(const struct st_hash_type *type)
669 {
670  collision.all++;
671  if (type == &type_numhash) {
672  collision.num++;
673  }
674  else if (type == &type_strhash) {
675  collision.strcase++;
676  }
677  else if (type == &type_strcasehash) {
678  collision.str++;
679  }
680 }
681 
682 #define COLLISION (collision_check ? count_collision(tab->type) : (void)0)
683 #define FOUND_BIN (collision_check ? collision.total++ : (void)0)
684 #define collision_check 0
685 #else
686 #define COLLISION
687 #define FOUND_BIN
688 #endif
689 
690 /* If the number of entries in the table is at least REBUILD_THRESHOLD
691  times less than the entry array length, decrease the table
692  size. */
693 #define REBUILD_THRESHOLD 4
694 
695 #if REBUILD_THRESHOLD < 2
696 #error "REBUILD_THRESHOLD should be >= 2"
697 #endif
698 
699 /* Rebuild table TAB. Rebuilding removes all deleted bins and entries
700  and can change size of the table entries and bins arrays.
701  Rebuilding is implemented by creation of a new table or by
702  compaction of the existing one. */
703 static void
704 rebuild_table(st_table *tab)
705 {
706  st_index_t i, ni;
707  unsigned int size_ind;
708  st_table *new_tab;
709  st_table_entry *new_entries;
710  st_table_entry *curr_entry_ptr;
711  st_index_t *bins;
712  st_index_t bin_ind;
713 
714  if ((2 * tab->num_entries <= get_allocated_entries(tab)
715  && REBUILD_THRESHOLD * tab->num_entries > get_allocated_entries(tab))
716  || tab->num_entries < (1 << MINIMAL_POWER2)) {
717  /* Compaction: */
718  tab->num_entries = 0;
719  if (tab->bins != NULL)
720  initialize_bins(tab);
721  new_tab = tab;
722  new_entries = tab->entries;
723  }
724  else {
725  /* This allocation could trigger GC and compaction. If tab is the
726  * gen_iv_tbl, then tab could have changed in size due to objects being
727  * freed and/or moved. Do not store attributes of tab before this line. */
728  new_tab = st_init_table_with_size(tab->type,
729  2 * tab->num_entries - 1);
730  new_entries = new_tab->entries;
731  }
732 
733  ni = 0;
734  bins = new_tab->bins;
735  size_ind = get_size_ind(new_tab);
736  st_index_t bound = tab->entries_bound;
737  st_table_entry *entries = tab->entries;
738 
739  for (i = tab->entries_start; i < bound; i++) {
740  curr_entry_ptr = &entries[i];
741  PREFETCH(entries + i + 1, 0);
742  if (EXPECT(DELETED_ENTRY_P(curr_entry_ptr), 0))
743  continue;
744  if (&new_entries[ni] != curr_entry_ptr)
745  new_entries[ni] = *curr_entry_ptr;
746  if (EXPECT(bins != NULL, 1)) {
747  bin_ind = find_table_bin_ind_direct(new_tab, curr_entry_ptr->hash,
748  curr_entry_ptr->key);
749  set_bin(bins, size_ind, bin_ind, ni + ENTRY_BASE);
750  }
751  new_tab->num_entries++;
752  ni++;
753  }
754  if (new_tab != tab) {
755  tab->entry_power = new_tab->entry_power;
756  tab->bin_power = new_tab->bin_power;
757  tab->size_ind = new_tab->size_ind;
758  if (tab->bins != NULL)
759  free(tab->bins);
760  tab->bins = new_tab->bins;
761  free(tab->entries);
762  tab->entries = new_tab->entries;
763  free(new_tab);
764  }
765  tab->entries_start = 0;
766  tab->entries_bound = tab->num_entries;
767  tab->rebuilds_num++;
768 }
769 
770 /* Return the next secondary hash index for table TAB using previous
771  index IND and PERTERB. Finally modulo of the function becomes a
772  full *cycle linear congruential generator*, in other words it
773  guarantees traversing all table bins in extreme case.
774 
775  According the Hull-Dobell theorem a generator
776  "Xnext = (a*Xprev + c) mod m" is a full cycle generator if and only if
777  o m and c are relatively prime
778  o a-1 is divisible by all prime factors of m
779  o a-1 is divisible by 4 if m is divisible by 4.
780 
781  For our case a is 5, c is 1, and m is a power of two. */
782 static inline st_index_t
783 secondary_hash(st_index_t ind, st_table *tab, st_index_t *perterb)
784 {
785  *perterb >>= 11;
786  ind = (ind << 2) + ind + *perterb + 1;
787  return hash_bin(ind, tab);
788 }
789 
790 /* Find an entry with HASH_VALUE and KEY in TABLE using a linear
791  search. Return the index of the found entry in array `entries`.
792  If it is not found, return UNDEFINED_ENTRY_IND. If the table was
793  rebuilt during the search, return REBUILT_TABLE_ENTRY_IND. */
794 static inline st_index_t
795 find_entry(st_table *tab, st_hash_t hash_value, st_data_t key)
796 {
797  int eq_p, rebuilt_p;
798  st_index_t i, bound;
799  st_table_entry *entries;
800 
801  bound = tab->entries_bound;
802  entries = tab->entries;
803  for (i = tab->entries_start; i < bound; i++) {
804  DO_PTR_EQUAL_CHECK(tab, &entries[i], hash_value, key, eq_p, rebuilt_p);
805  if (EXPECT(rebuilt_p, 0))
806  return REBUILT_TABLE_ENTRY_IND;
807  if (eq_p)
808  return i;
809  }
810  return UNDEFINED_ENTRY_IND;
811 }
812 
813 /* Use the quadratic probing. The method has a better data locality
814  but more collisions than the current approach. In average it
815  results in a bit slower search. */
816 /*#define QUADRATIC_PROBE*/
817 
818 /* Return index of entry with HASH_VALUE and KEY in table TAB. If
819  there is no such entry, return UNDEFINED_ENTRY_IND. If the table
820  was rebuilt during the search, return REBUILT_TABLE_ENTRY_IND. */
821 static st_index_t
822 find_table_entry_ind(st_table *tab, st_hash_t hash_value, st_data_t key)
823 {
824  int eq_p, rebuilt_p;
825  st_index_t ind;
826 #ifdef QUADRATIC_PROBE
827  st_index_t d;
828 #else
829  st_index_t peterb;
830 #endif
831  st_index_t bin;
832  st_table_entry *entries = tab->entries;
833 
834  ind = hash_bin(hash_value, tab);
835 #ifdef QUADRATIC_PROBE
836  d = 1;
837 #else
838  peterb = hash_value;
839 #endif
840  FOUND_BIN;
841  for (;;) {
842  bin = get_bin(tab->bins, get_size_ind(tab), ind);
843  if (! EMPTY_OR_DELETED_BIN_P(bin)) {
844  DO_PTR_EQUAL_CHECK(tab, &entries[bin - ENTRY_BASE], hash_value, key, eq_p, rebuilt_p);
845  if (EXPECT(rebuilt_p, 0))
846  return REBUILT_TABLE_ENTRY_IND;
847  if (eq_p)
848  break;
849  }
850  else if (EMPTY_BIN_P(bin))
851  return UNDEFINED_ENTRY_IND;
852 #ifdef QUADRATIC_PROBE
853  ind = hash_bin(ind + d, tab);
854  d++;
855 #else
856  ind = secondary_hash(ind, tab, &peterb);
857 #endif
858  COLLISION;
859  }
860  return bin;
861 }
862 
863 /* Find and return index of table TAB bin corresponding to an entry
864  with HASH_VALUE and KEY. If there is no such bin, return
865  UNDEFINED_BIN_IND. If the table was rebuilt during the search,
866  return REBUILT_TABLE_BIN_IND. */
867 static st_index_t
868 find_table_bin_ind(st_table *tab, st_hash_t hash_value, st_data_t key)
869 {
870  int eq_p, rebuilt_p;
871  st_index_t ind;
872 #ifdef QUADRATIC_PROBE
873  st_index_t d;
874 #else
875  st_index_t peterb;
876 #endif
877  st_index_t bin;
878  st_table_entry *entries = tab->entries;
879 
880  ind = hash_bin(hash_value, tab);
881 #ifdef QUADRATIC_PROBE
882  d = 1;
883 #else
884  peterb = hash_value;
885 #endif
886  FOUND_BIN;
887  for (;;) {
888  bin = get_bin(tab->bins, get_size_ind(tab), ind);
889  if (! EMPTY_OR_DELETED_BIN_P(bin)) {
890  DO_PTR_EQUAL_CHECK(tab, &entries[bin - ENTRY_BASE], hash_value, key, eq_p, rebuilt_p);
891  if (EXPECT(rebuilt_p, 0))
892  return REBUILT_TABLE_BIN_IND;
893  if (eq_p)
894  break;
895  }
896  else if (EMPTY_BIN_P(bin))
897  return UNDEFINED_BIN_IND;
898 #ifdef QUADRATIC_PROBE
899  ind = hash_bin(ind + d, tab);
900  d++;
901 #else
902  ind = secondary_hash(ind, tab, &peterb);
903 #endif
904  COLLISION;
905  }
906  return ind;
907 }
908 
909 /* Find and return index of table TAB bin corresponding to an entry
910  with HASH_VALUE and KEY. The entry should be in the table
911  already. */
912 static st_index_t
913 find_table_bin_ind_direct(st_table *tab, st_hash_t hash_value, st_data_t key)
914 {
915  st_index_t ind;
916 #ifdef QUADRATIC_PROBE
917  st_index_t d;
918 #else
919  st_index_t peterb;
920 #endif
921  st_index_t bin;
922 
923  ind = hash_bin(hash_value, tab);
924 #ifdef QUADRATIC_PROBE
925  d = 1;
926 #else
927  peterb = hash_value;
928 #endif
929  FOUND_BIN;
930  for (;;) {
931  bin = get_bin(tab->bins, get_size_ind(tab), ind);
932  if (EMPTY_OR_DELETED_BIN_P(bin))
933  return ind;
934 #ifdef QUADRATIC_PROBE
935  ind = hash_bin(ind + d, tab);
936  d++;
937 #else
938  ind = secondary_hash(ind, tab, &peterb);
939 #endif
940  COLLISION;
941  }
942 }
943 
944 /* Return index of table TAB bin for HASH_VALUE and KEY through
945  BIN_IND and the pointed value as the function result. Reserve the
946  bin for inclusion of the corresponding entry into the table if it
947  is not there yet. We always find such bin as bins array length is
948  bigger entries array. Although we can reuse a deleted bin, the
949  result bin value is always empty if the table has no entry with
950  KEY. Return the entries array index of the found entry or
951  UNDEFINED_ENTRY_IND if it is not found. If the table was rebuilt
952  during the search, return REBUILT_TABLE_ENTRY_IND. */
953 static st_index_t
954 find_table_bin_ptr_and_reserve(st_table *tab, st_hash_t *hash_value,
955  st_data_t key, st_index_t *bin_ind)
956 {
957  int eq_p, rebuilt_p;
958  st_index_t ind;
959  st_hash_t curr_hash_value = *hash_value;
960 #ifdef QUADRATIC_PROBE
961  st_index_t d;
962 #else
963  st_index_t peterb;
964 #endif
965  st_index_t entry_index;
966  st_index_t first_deleted_bin_ind;
967  st_table_entry *entries;
968 
969  ind = hash_bin(curr_hash_value, tab);
970 #ifdef QUADRATIC_PROBE
971  d = 1;
972 #else
973  peterb = curr_hash_value;
974 #endif
975  FOUND_BIN;
976  first_deleted_bin_ind = UNDEFINED_BIN_IND;
977  entries = tab->entries;
978  for (;;) {
979  entry_index = get_bin(tab->bins, get_size_ind(tab), ind);
980  if (EMPTY_BIN_P(entry_index)) {
981  tab->num_entries++;
982  entry_index = UNDEFINED_ENTRY_IND;
983  if (first_deleted_bin_ind != UNDEFINED_BIN_IND) {
984  /* We can reuse bin of a deleted entry. */
985  ind = first_deleted_bin_ind;
986  MARK_BIN_EMPTY(tab, ind);
987  }
988  break;
989  }
990  else if (! DELETED_BIN_P(entry_index)) {
991  DO_PTR_EQUAL_CHECK(tab, &entries[entry_index - ENTRY_BASE], curr_hash_value, key, eq_p, rebuilt_p);
992  if (EXPECT(rebuilt_p, 0))
993  return REBUILT_TABLE_ENTRY_IND;
994  if (eq_p)
995  break;
996  }
997  else if (first_deleted_bin_ind == UNDEFINED_BIN_IND)
998  first_deleted_bin_ind = ind;
999 #ifdef QUADRATIC_PROBE
1000  ind = hash_bin(ind + d, tab);
1001  d++;
1002 #else
1003  ind = secondary_hash(ind, tab, &peterb);
1004 #endif
1005  COLLISION;
1006  }
1007  *bin_ind = ind;
1008  return entry_index;
1009 }
1010 
1011 /* Find an entry with KEY in table TAB. Return non-zero if we found
1012  it. Set up *RECORD to the found entry record. */
1013 int
1014 st_lookup(st_table *tab, st_data_t key, st_data_t *value)
1015 {
1016  st_index_t bin;
1017  st_hash_t hash = do_hash(key, tab);
1018 
1019  retry:
1020  if (tab->bins == NULL) {
1021  bin = find_entry(tab, hash, key);
1022  if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1023  goto retry;
1024  if (bin == UNDEFINED_ENTRY_IND)
1025  return 0;
1026  }
1027  else {
1028  bin = find_table_entry_ind(tab, hash, key);
1029  if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1030  goto retry;
1031  if (bin == UNDEFINED_ENTRY_IND)
1032  return 0;
1033  bin -= ENTRY_BASE;
1034  }
1035  if (value != 0)
1036  *value = tab->entries[bin].record;
1037  return 1;
1038 }
1039 
1040 /* Find an entry with KEY in table TAB. Return non-zero if we found
1041  it. Set up *RESULT to the found table entry key. */
1042 int
1043 st_get_key(st_table *tab, st_data_t key, st_data_t *result)
1044 {
1045  st_index_t bin;
1046  st_hash_t hash = do_hash(key, tab);
1047 
1048  retry:
1049  if (tab->bins == NULL) {
1050  bin = find_entry(tab, hash, key);
1051  if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1052  goto retry;
1053  if (bin == UNDEFINED_ENTRY_IND)
1054  return 0;
1055  }
1056  else {
1057  bin = find_table_entry_ind(tab, hash, key);
1058  if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1059  goto retry;
1060  if (bin == UNDEFINED_ENTRY_IND)
1061  return 0;
1062  bin -= ENTRY_BASE;
1063  }
1064  if (result != 0)
1065  *result = tab->entries[bin].key;
1066  return 1;
1067 }
1068 
1069 /* Check the table and rebuild it if it is necessary. */
1070 static inline void
1071 rebuild_table_if_necessary (st_table *tab)
1072 {
1073  st_index_t bound = tab->entries_bound;
1074 
1075  if (bound == get_allocated_entries(tab))
1076  rebuild_table(tab);
1077 }
1078 
1079 /* Insert (KEY, VALUE) into table TAB and return zero. If there is
1080  already entry with KEY in the table, return nonzero and update
1081  the value of the found entry. */
1082 int
1083 st_insert(st_table *tab, st_data_t key, st_data_t value)
1084 {
1085  st_table_entry *entry;
1086  st_index_t bin;
1087  st_index_t ind;
1088  st_hash_t hash_value;
1089  st_index_t bin_ind;
1090  int new_p;
1091 
1092  hash_value = do_hash(key, tab);
1093  retry:
1094  rebuild_table_if_necessary(tab);
1095  if (tab->bins == NULL) {
1096  bin = find_entry(tab, hash_value, key);
1097  if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1098  goto retry;
1099  new_p = bin == UNDEFINED_ENTRY_IND;
1100  if (new_p)
1101  tab->num_entries++;
1102  bin_ind = UNDEFINED_BIN_IND;
1103  }
1104  else {
1105  bin = find_table_bin_ptr_and_reserve(tab, &hash_value,
1106  key, &bin_ind);
1107  if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1108  goto retry;
1109  new_p = bin == UNDEFINED_ENTRY_IND;
1110  bin -= ENTRY_BASE;
1111  }
1112  if (new_p) {
1113  ind = tab->entries_bound++;
1114  entry = &tab->entries[ind];
1115  entry->hash = hash_value;
1116  entry->key = key;
1117  entry->record = value;
1118  if (bin_ind != UNDEFINED_BIN_IND)
1119  set_bin(tab->bins, get_size_ind(tab), bin_ind, ind + ENTRY_BASE);
1120  return 0;
1121  }
1122  tab->entries[bin].record = value;
1123  return 1;
1124 }
1125 
1126 /* Insert (KEY, VALUE, HASH) into table TAB. The table should not have
1127  entry with KEY before the insertion. */
1128 static inline void
1129 st_add_direct_with_hash(st_table *tab,
1130  st_data_t key, st_data_t value, st_hash_t hash)
1131 {
1132  st_table_entry *entry;
1133  st_index_t ind;
1134  st_index_t bin_ind;
1135 
1136  rebuild_table_if_necessary(tab);
1137  ind = tab->entries_bound++;
1138  entry = &tab->entries[ind];
1139  entry->hash = hash;
1140  entry->key = key;
1141  entry->record = value;
1142  tab->num_entries++;
1143  if (tab->bins != NULL) {
1144  bin_ind = find_table_bin_ind_direct(tab, hash, key);
1145  set_bin(tab->bins, get_size_ind(tab), bin_ind, ind + ENTRY_BASE);
1146  }
1147 }
1148 
1149 /* Insert (KEY, VALUE) into table TAB. The table should not have
1150  entry with KEY before the insertion. */
1151 void
1152 st_add_direct(st_table *tab, st_data_t key, st_data_t value)
1153 {
1154  st_hash_t hash_value;
1155 
1156  hash_value = do_hash(key, tab);
1157  st_add_direct_with_hash(tab, key, value, hash_value);
1158 }
1159 
1160 /* Insert (FUNC(KEY), VALUE) into table TAB and return zero. If
1161  there is already entry with KEY in the table, return nonzero and
1162  update the value of the found entry. */
1163 int
1164 st_insert2(st_table *tab, st_data_t key, st_data_t value,
1165  st_data_t (*func)(st_data_t))
1166 {
1167  st_table_entry *entry;
1168  st_index_t bin;
1169  st_index_t ind;
1170  st_hash_t hash_value;
1171  st_index_t bin_ind;
1172  int new_p;
1173 
1174  hash_value = do_hash(key, tab);
1175  retry:
1176  rebuild_table_if_necessary (tab);
1177  if (tab->bins == NULL) {
1178  bin = find_entry(tab, hash_value, key);
1179  if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1180  goto retry;
1181  new_p = bin == UNDEFINED_ENTRY_IND;
1182  if (new_p)
1183  tab->num_entries++;
1184  bin_ind = UNDEFINED_BIN_IND;
1185  }
1186  else {
1187  bin = find_table_bin_ptr_and_reserve(tab, &hash_value,
1188  key, &bin_ind);
1189  if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1190  goto retry;
1191  new_p = bin == UNDEFINED_ENTRY_IND;
1192  bin -= ENTRY_BASE;
1193  }
1194  if (new_p) {
1195  key = (*func)(key);
1196  ind = tab->entries_bound++;
1197  entry = &tab->entries[ind];
1198  entry->hash = hash_value;
1199  entry->key = key;
1200  entry->record = value;
1201  if (bin_ind != UNDEFINED_BIN_IND)
1202  set_bin(tab->bins, get_size_ind(tab), bin_ind, ind + ENTRY_BASE);
1203  return 0;
1204  }
1205  tab->entries[bin].record = value;
1206  return 1;
1207 }
1208 
1209 /* Create and return a copy of table OLD_TAB. */
1210 st_table *
1211 st_copy(st_table *old_tab)
1212 {
1213  st_table *new_tab;
1214 
1215  new_tab = (st_table *) malloc(sizeof(st_table));
1216 #ifndef RUBY
1217  if (new_tab == NULL)
1218  return NULL;
1219 #endif
1220  *new_tab = *old_tab;
1221  if (old_tab->bins == NULL)
1222  new_tab->bins = NULL;
1223  else {
1224  new_tab->bins = (st_index_t *) malloc(bins_size(old_tab));
1225 #ifndef RUBY
1226  if (new_tab->bins == NULL) {
1227  free(new_tab);
1228  return NULL;
1229  }
1230 #endif
1231  }
1232  new_tab->entries = (st_table_entry *) malloc(get_allocated_entries(old_tab)
1233  * sizeof(st_table_entry));
1234 #ifndef RUBY
1235  if (new_tab->entries == NULL) {
1236  st_free_table(new_tab);
1237  return NULL;
1238  }
1239 #endif
1240  MEMCPY(new_tab->entries, old_tab->entries, st_table_entry,
1241  get_allocated_entries(old_tab));
1242  if (old_tab->bins != NULL)
1243  MEMCPY(new_tab->bins, old_tab->bins, char, bins_size(old_tab));
1244  return new_tab;
1245 }
1246 
1247 /* Update the entries start of table TAB after removing an entry
1248  with index N in the array entries. */
1249 static inline void
1250 update_range_for_deleted(st_table *tab, st_index_t n)
1251 {
1252  /* Do not update entries_bound here. Otherwise, we can fill all
1253  bins by deleted entry value before rebuilding the table. */
1254  if (tab->entries_start == n) {
1255  st_index_t start = n + 1;
1256  st_index_t bound = tab->entries_bound;
1257  st_table_entry *entries = tab->entries;
1258  while (start < bound && DELETED_ENTRY_P(&entries[start])) start++;
1259  tab->entries_start = start;
1260  }
1261 }
1262 
1263 /* Delete entry with KEY from table TAB, set up *VALUE (unless
1264  VALUE is zero) from deleted table entry, and return non-zero. If
1265  there is no entry with KEY in the table, clear *VALUE (unless VALUE
1266  is zero), and return zero. */
1267 static int
1268 st_general_delete(st_table *tab, st_data_t *key, st_data_t *value)
1269 {
1270  st_table_entry *entry;
1271  st_index_t bin;
1272  st_index_t bin_ind;
1273  st_hash_t hash;
1274 
1275  hash = do_hash(*key, tab);
1276  retry:
1277  if (tab->bins == NULL) {
1278  bin = find_entry(tab, hash, *key);
1279  if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1280  goto retry;
1281  if (bin == UNDEFINED_ENTRY_IND) {
1282  if (value != 0) *value = 0;
1283  return 0;
1284  }
1285  }
1286  else {
1287  bin_ind = find_table_bin_ind(tab, hash, *key);
1288  if (EXPECT(bin_ind == REBUILT_TABLE_BIN_IND, 0))
1289  goto retry;
1290  if (bin_ind == UNDEFINED_BIN_IND) {
1291  if (value != 0) *value = 0;
1292  return 0;
1293  }
1294  bin = get_bin(tab->bins, get_size_ind(tab), bin_ind) - ENTRY_BASE;
1295  MARK_BIN_DELETED(tab, bin_ind);
1296  }
1297  entry = &tab->entries[bin];
1298  *key = entry->key;
1299  if (value != 0) *value = entry->record;
1300  MARK_ENTRY_DELETED(entry);
1301  tab->num_entries--;
1302  update_range_for_deleted(tab, bin);
1303  return 1;
1304 }
1305 
1306 int
1307 st_delete(st_table *tab, st_data_t *key, st_data_t *value)
1308 {
1309  return st_general_delete(tab, key, value);
1310 }
1311 
1312 /* The function and other functions with suffix '_safe' or '_check'
1313  are originated from the previous implementation of the hash tables.
1314  It was necessary for correct deleting entries during traversing
1315  tables. The current implementation permits deletion during
1316  traversing without a specific way to do this. */
1317 int
1318 st_delete_safe(st_table *tab, st_data_t *key, st_data_t *value,
1319  st_data_t never ATTRIBUTE_UNUSED)
1320 {
1321  return st_general_delete(tab, key, value);
1322 }
1323 
1324 /* If table TAB is empty, clear *VALUE (unless VALUE is zero), and
1325  return zero. Otherwise, remove the first entry in the table.
1326  Return its key through KEY and its record through VALUE (unless
1327  VALUE is zero). */
1328 int
1329 st_shift(st_table *tab, st_data_t *key, st_data_t *value)
1330 {
1331  st_index_t i, bound;
1332  st_index_t bin;
1333  st_table_entry *entries, *curr_entry_ptr;
1334  st_index_t bin_ind;
1335 
1336  entries = tab->entries;
1337  bound = tab->entries_bound;
1338  for (i = tab->entries_start; i < bound; i++) {
1339  curr_entry_ptr = &entries[i];
1340  if (! DELETED_ENTRY_P(curr_entry_ptr)) {
1341  st_hash_t entry_hash = curr_entry_ptr->hash;
1342  st_data_t entry_key = curr_entry_ptr->key;
1343 
1344  if (value != 0) *value = curr_entry_ptr->record;
1345  *key = entry_key;
1346  retry:
1347  if (tab->bins == NULL) {
1348  bin = find_entry(tab, entry_hash, entry_key);
1349  if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0)) {
1350  entries = tab->entries;
1351  goto retry;
1352  }
1353  curr_entry_ptr = &entries[bin];
1354  }
1355  else {
1356  bin_ind = find_table_bin_ind(tab, entry_hash, entry_key);
1357  if (EXPECT(bin_ind == REBUILT_TABLE_BIN_IND, 0)) {
1358  entries = tab->entries;
1359  goto retry;
1360  }
1361  curr_entry_ptr = &entries[get_bin(tab->bins, get_size_ind(tab), bin_ind)
1362  - ENTRY_BASE];
1363  MARK_BIN_DELETED(tab, bin_ind);
1364  }
1365  MARK_ENTRY_DELETED(curr_entry_ptr);
1366  tab->num_entries--;
1367  update_range_for_deleted(tab, i);
1368  return 1;
1369  }
1370  }
1371  if (value != 0) *value = 0;
1372  return 0;
1373 }
1374 
1375 /* See comments for function st_delete_safe. */
1376 void
1377 st_cleanup_safe(st_table *tab ATTRIBUTE_UNUSED,
1378  st_data_t never ATTRIBUTE_UNUSED)
1379 {
1380 }
1381 
1382 /* Find entry with KEY in table TAB, call FUNC with pointers to copies
1383  of the key and the value of the found entry, and non-zero as the
1384  3rd argument. If the entry is not found, call FUNC with a pointer
1385  to KEY, a pointer to zero, and a zero argument. If the call
1386  returns ST_CONTINUE, the table will have an entry with key and
1387  value returned by FUNC through the 1st and 2nd parameters. If the
1388  call of FUNC returns ST_DELETE, the table will not have entry with
1389  KEY. The function returns flag of that the entry with KEY was in
1390  the table before the call. */
1391 int
1392 st_update(st_table *tab, st_data_t key,
1393  st_update_callback_func *func, st_data_t arg)
1394 {
1395  st_table_entry *entry = NULL; /* to avoid uninitialized value warning */
1396  st_index_t bin = 0; /* Ditto */
1397  st_table_entry *entries;
1398  st_index_t bin_ind;
1399  st_data_t value = 0, old_key;
1400  int retval, existing;
1401  st_hash_t hash = do_hash(key, tab);
1402 
1403  retry:
1404  entries = tab->entries;
1405  if (tab->bins == NULL) {
1406  bin = find_entry(tab, hash, key);
1407  if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1408  goto retry;
1409  existing = bin != UNDEFINED_ENTRY_IND;
1410  entry = &entries[bin];
1411  bin_ind = UNDEFINED_BIN_IND;
1412  }
1413  else {
1414  bin_ind = find_table_bin_ind(tab, hash, key);
1415  if (EXPECT(bin_ind == REBUILT_TABLE_BIN_IND, 0))
1416  goto retry;
1417  existing = bin_ind != UNDEFINED_BIN_IND;
1418  if (existing) {
1419  bin = get_bin(tab->bins, get_size_ind(tab), bin_ind) - ENTRY_BASE;
1420  entry = &entries[bin];
1421  }
1422  }
1423  if (existing) {
1424  key = entry->key;
1425  value = entry->record;
1426  }
1427  old_key = key;
1428  retval = (*func)(&key, &value, arg, existing);
1429  switch (retval) {
1430  case ST_CONTINUE:
1431  if (! existing) {
1432  st_add_direct_with_hash(tab, key, value, hash);
1433  break;
1434  }
1435  if (old_key != key) {
1436  entry->key = key;
1437  }
1438  entry->record = value;
1439  break;
1440  case ST_DELETE:
1441  if (existing) {
1442  if (bin_ind != UNDEFINED_BIN_IND)
1443  MARK_BIN_DELETED(tab, bin_ind);
1444  MARK_ENTRY_DELETED(entry);
1445  tab->num_entries--;
1446  update_range_for_deleted(tab, bin);
1447  }
1448  break;
1449  }
1450  return existing;
1451 }
1452 
1453 /* Traverse all entries in table TAB calling FUNC with current entry
1454  key and value and zero. If the call returns ST_STOP, stop
1455  traversing. If the call returns ST_DELETE, delete the current
1456  entry from the table. In case of ST_CHECK or ST_CONTINUE, continue
1457  traversing. The function returns zero unless an error is found.
1458  CHECK_P is flag of st_foreach_check call. The behavior is a bit
1459  different for ST_CHECK and when the current element is removed
1460  during traversing. */
1461 static inline int
1462 st_general_foreach(st_table *tab, st_foreach_check_callback_func *func, st_update_callback_func *replace, st_data_t arg,
1463  int check_p)
1464 {
1465  st_index_t bin;
1466  st_index_t bin_ind;
1467  st_table_entry *entries, *curr_entry_ptr;
1468  enum st_retval retval;
1469  st_index_t i, rebuilds_num;
1470  st_hash_t hash;
1471  st_data_t key;
1472  int error_p, packed_p = tab->bins == NULL;
1473 
1474  entries = tab->entries;
1475  /* The bound can change inside the loop even without rebuilding
1476  the table, e.g. by an entry insertion. */
1477  for (i = tab->entries_start; i < tab->entries_bound; i++) {
1478  curr_entry_ptr = &entries[i];
1479  if (EXPECT(DELETED_ENTRY_P(curr_entry_ptr), 0))
1480  continue;
1481  key = curr_entry_ptr->key;
1482  rebuilds_num = tab->rebuilds_num;
1483  hash = curr_entry_ptr->hash;
1484  retval = (*func)(key, curr_entry_ptr->record, arg, 0);
1485 
1486  if (retval == ST_REPLACE && replace) {
1487  st_data_t value;
1488  value = curr_entry_ptr->record;
1489  retval = (*replace)(&key, &value, arg, TRUE);
1490  curr_entry_ptr->key = key;
1491  curr_entry_ptr->record = value;
1492  }
1493 
1494  if (rebuilds_num != tab->rebuilds_num) {
1495  retry:
1496  entries = tab->entries;
1497  packed_p = tab->bins == NULL;
1498  if (packed_p) {
1499  i = find_entry(tab, hash, key);
1500  if (EXPECT(i == REBUILT_TABLE_ENTRY_IND, 0))
1501  goto retry;
1502  error_p = i == UNDEFINED_ENTRY_IND;
1503  }
1504  else {
1505  i = find_table_entry_ind(tab, hash, key);
1506  if (EXPECT(i == REBUILT_TABLE_ENTRY_IND, 0))
1507  goto retry;
1508  error_p = i == UNDEFINED_ENTRY_IND;
1509  i -= ENTRY_BASE;
1510  }
1511  if (error_p && check_p) {
1512  /* call func with error notice */
1513  retval = (*func)(0, 0, arg, 1);
1514  return 1;
1515  }
1516  curr_entry_ptr = &entries[i];
1517  }
1518  switch (retval) {
1519  case ST_REPLACE:
1520  break;
1521  case ST_CONTINUE:
1522  break;
1523  case ST_CHECK:
1524  if (check_p)
1525  break;
1526  case ST_STOP:
1527  return 0;
1528  case ST_DELETE: {
1529  st_data_t key = curr_entry_ptr->key;
1530 
1531  again:
1532  if (packed_p) {
1533  bin = find_entry(tab, hash, key);
1534  if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1535  goto again;
1536  if (bin == UNDEFINED_ENTRY_IND)
1537  break;
1538  }
1539  else {
1540  bin_ind = find_table_bin_ind(tab, hash, key);
1541  if (EXPECT(bin_ind == REBUILT_TABLE_BIN_IND, 0))
1542  goto again;
1543  if (bin_ind == UNDEFINED_BIN_IND)
1544  break;
1545  bin = get_bin(tab->bins, get_size_ind(tab), bin_ind) - ENTRY_BASE;
1546  MARK_BIN_DELETED(tab, bin_ind);
1547  }
1548  curr_entry_ptr = &entries[bin];
1549  MARK_ENTRY_DELETED(curr_entry_ptr);
1550  tab->num_entries--;
1551  update_range_for_deleted(tab, bin);
1552  break;
1553  }
1554  }
1555  }
1556  return 0;
1557 }
1558 
1559 int
1560 st_foreach_with_replace(st_table *tab, st_foreach_check_callback_func *func, st_update_callback_func *replace, st_data_t arg)
1561 {
1562  return st_general_foreach(tab, func, replace, arg, TRUE);
1563 }
1564 
1565 struct functor {
1566  st_foreach_callback_func *func;
1567  st_data_t arg;
1568 };
1569 
1570 static int
1571 apply_functor(st_data_t k, st_data_t v, st_data_t d, int _)
1572 {
1573  const struct functor *f = (void *)d;
1574  return f->func(k, v, f->arg);
1575 }
1576 
1577 int
1578 st_foreach(st_table *tab, st_foreach_callback_func *func, st_data_t arg)
1579 {
1580  const struct functor f = { func, arg };
1581  return st_general_foreach(tab, apply_functor, 0, (st_data_t)&f, FALSE);
1582 }
1583 
1584 /* See comments for function st_delete_safe. */
1585 int
1586 st_foreach_check(st_table *tab, st_foreach_check_callback_func *func, st_data_t arg,
1587  st_data_t never ATTRIBUTE_UNUSED)
1588 {
1589  return st_general_foreach(tab, func, 0, arg, TRUE);
1590 }
1591 
1592 /* Set up array KEYS by at most SIZE keys of head table TAB entries.
1593  Return the number of keys set up in array KEYS. */
1594 static inline st_index_t
1595 st_general_keys(st_table *tab, st_data_t *keys, st_index_t size)
1596 {
1597  st_index_t i, bound;
1598  st_data_t key, *keys_start, *keys_end;
1599  st_table_entry *curr_entry_ptr, *entries = tab->entries;
1600 
1601  bound = tab->entries_bound;
1602  keys_start = keys;
1603  keys_end = keys + size;
1604  for (i = tab->entries_start; i < bound; i++) {
1605  if (keys == keys_end)
1606  break;
1607  curr_entry_ptr = &entries[i];
1608  key = curr_entry_ptr->key;
1609  if (! DELETED_ENTRY_P(curr_entry_ptr))
1610  *keys++ = key;
1611  }
1612 
1613  return keys - keys_start;
1614 }
1615 
1616 st_index_t
1617 st_keys(st_table *tab, st_data_t *keys, st_index_t size)
1618 {
1619  return st_general_keys(tab, keys, size);
1620 }
1621 
1622 /* See comments for function st_delete_safe. */
1623 st_index_t
1624 st_keys_check(st_table *tab, st_data_t *keys, st_index_t size,
1625  st_data_t never ATTRIBUTE_UNUSED)
1626 {
1627  return st_general_keys(tab, keys, size);
1628 }
1629 
1630 /* Set up array VALUES by at most SIZE values of head table TAB
1631  entries. Return the number of values set up in array VALUES. */
1632 static inline st_index_t
1633 st_general_values(st_table *tab, st_data_t *values, st_index_t size)
1634 {
1635  st_index_t i, bound;
1636  st_data_t *values_start, *values_end;
1637  st_table_entry *curr_entry_ptr, *entries = tab->entries;
1638 
1639  values_start = values;
1640  values_end = values + size;
1641  bound = tab->entries_bound;
1642  for (i = tab->entries_start; i < bound; i++) {
1643  if (values == values_end)
1644  break;
1645  curr_entry_ptr = &entries[i];
1646  if (! DELETED_ENTRY_P(curr_entry_ptr))
1647  *values++ = curr_entry_ptr->record;
1648  }
1649 
1650  return values - values_start;
1651 }
1652 
1653 st_index_t
1654 st_values(st_table *tab, st_data_t *values, st_index_t size)
1655 {
1656  return st_general_values(tab, values, size);
1657 }
1658 
1659 /* See comments for function st_delete_safe. */
1660 st_index_t
1661 st_values_check(st_table *tab, st_data_t *values, st_index_t size,
1662  st_data_t never ATTRIBUTE_UNUSED)
1663 {
1664  return st_general_values(tab, values, size);
1665 }
1666 
1667 #define FNV1_32A_INIT 0x811c9dc5
1668 
1669 /*
1670  * 32 bit magic FNV-1a prime
1671  */
1672 #define FNV_32_PRIME 0x01000193
1673 
1674 #ifndef UNALIGNED_WORD_ACCESS
1675 # if defined(__i386) || defined(__i386__) || defined(_M_IX86) || \
1676  defined(__x86_64) || defined(__x86_64__) || defined(_M_AMD64) || \
1677  defined(__powerpc64__) || defined(__aarch64__) || \
1678  defined(__mc68020__)
1679 # define UNALIGNED_WORD_ACCESS 1
1680 # endif
1681 #endif
1682 #ifndef UNALIGNED_WORD_ACCESS
1683 # define UNALIGNED_WORD_ACCESS 0
1684 #endif
1685 
1686 /* This hash function is quite simplified MurmurHash3
1687  * Simplification is legal, cause most of magic still happens in finalizator.
1688  * And finalizator is almost the same as in MurmurHash3 */
1689 #define BIG_CONSTANT(x,y) ((st_index_t)(x)<<32|(st_index_t)(y))
1690 #define ROTL(x,n) ((x)<<(n)|(x)>>(SIZEOF_ST_INDEX_T*CHAR_BIT-(n)))
1691 
1692 #if ST_INDEX_BITS <= 32
1693 #define C1 (st_index_t)0xcc9e2d51
1694 #define C2 (st_index_t)0x1b873593
1695 #else
1696 #define C1 BIG_CONSTANT(0x87c37b91,0x114253d5);
1697 #define C2 BIG_CONSTANT(0x4cf5ad43,0x2745937f);
1698 #endif
1699 NO_SANITIZE("unsigned-integer-overflow", static inline st_index_t murmur_step(st_index_t h, st_index_t k));
1700 NO_SANITIZE("unsigned-integer-overflow", static inline st_index_t murmur_finish(st_index_t h));
1701 NO_SANITIZE("unsigned-integer-overflow", extern st_index_t st_hash(const void *ptr, size_t len, st_index_t h));
1702 
1703 static inline st_index_t
1704 murmur_step(st_index_t h, st_index_t k)
1705 {
1706 #if ST_INDEX_BITS <= 32
1707 #define r1 (17)
1708 #define r2 (11)
1709 #else
1710 #define r1 (33)
1711 #define r2 (24)
1712 #endif
1713  k *= C1;
1714  h ^= ROTL(k, r1);
1715  h *= C2;
1716  h = ROTL(h, r2);
1717  return h;
1718 }
1719 #undef r1
1720 #undef r2
1721 
1722 static inline st_index_t
1723 murmur_finish(st_index_t h)
1724 {
1725 #if ST_INDEX_BITS <= 32
1726 #define r1 (16)
1727 #define r2 (13)
1728 #define r3 (16)
1729  const st_index_t c1 = 0x85ebca6b;
1730  const st_index_t c2 = 0xc2b2ae35;
1731 #else
1732 /* values are taken from Mix13 on http://zimbry.blogspot.ru/2011/09/better-bit-mixing-improving-on.html */
1733 #define r1 (30)
1734 #define r2 (27)
1735 #define r3 (31)
1736  const st_index_t c1 = BIG_CONSTANT(0xbf58476d,0x1ce4e5b9);
1737  const st_index_t c2 = BIG_CONSTANT(0x94d049bb,0x133111eb);
1738 #endif
1739 #if ST_INDEX_BITS > 64
1740  h ^= h >> 64;
1741  h *= c2;
1742  h ^= h >> 65;
1743 #endif
1744  h ^= h >> r1;
1745  h *= c1;
1746  h ^= h >> r2;
1747  h *= c2;
1748  h ^= h >> r3;
1749  return h;
1750 }
1751 #undef r1
1752 #undef r2
1753 #undef r3
1754 
1755 st_index_t
1756 st_hash(const void *ptr, size_t len, st_index_t h)
1757 {
1758  const char *data = ptr;
1759  st_index_t t = 0;
1760  size_t l = len;
1761 
1762 #define data_at(n) (st_index_t)((unsigned char)data[(n)])
1763 #define UNALIGNED_ADD_4 UNALIGNED_ADD(2); UNALIGNED_ADD(1); UNALIGNED_ADD(0)
1764 #if SIZEOF_ST_INDEX_T > 4
1765 #define UNALIGNED_ADD_8 UNALIGNED_ADD(6); UNALIGNED_ADD(5); UNALIGNED_ADD(4); UNALIGNED_ADD(3); UNALIGNED_ADD_4
1766 #if SIZEOF_ST_INDEX_T > 8
1767 #define UNALIGNED_ADD_16 UNALIGNED_ADD(14); UNALIGNED_ADD(13); UNALIGNED_ADD(12); UNALIGNED_ADD(11); \
1768  UNALIGNED_ADD(10); UNALIGNED_ADD(9); UNALIGNED_ADD(8); UNALIGNED_ADD(7); UNALIGNED_ADD_8
1769 #define UNALIGNED_ADD_ALL UNALIGNED_ADD_16
1770 #endif
1771 #define UNALIGNED_ADD_ALL UNALIGNED_ADD_8
1772 #else
1773 #define UNALIGNED_ADD_ALL UNALIGNED_ADD_4
1774 #endif
1775 #undef SKIP_TAIL
1776  if (len >= sizeof(st_index_t)) {
1777 #if !UNALIGNED_WORD_ACCESS
1778  int align = (int)((st_data_t)data % sizeof(st_index_t));
1779  if (align) {
1780  st_index_t d = 0;
1781  int sl, sr, pack;
1782 
1783  switch (align) {
1784 #ifdef WORDS_BIGENDIAN
1785 # define UNALIGNED_ADD(n) case SIZEOF_ST_INDEX_T - (n) - 1: \
1786  t |= data_at(n) << CHAR_BIT*(SIZEOF_ST_INDEX_T - (n) - 2)
1787 #else
1788 # define UNALIGNED_ADD(n) case SIZEOF_ST_INDEX_T - (n) - 1: \
1789  t |= data_at(n) << CHAR_BIT*(n)
1790 #endif
1791  UNALIGNED_ADD_ALL;
1792 #undef UNALIGNED_ADD
1793  }
1794 
1795 #ifdef WORDS_BIGENDIAN
1796  t >>= (CHAR_BIT * align) - CHAR_BIT;
1797 #else
1798  t <<= (CHAR_BIT * align);
1799 #endif
1800 
1801  data += sizeof(st_index_t)-align;
1802  len -= sizeof(st_index_t)-align;
1803 
1804  sl = CHAR_BIT * (SIZEOF_ST_INDEX_T-align);
1805  sr = CHAR_BIT * align;
1806 
1807  while (len >= sizeof(st_index_t)) {
1808  d = *(st_index_t *)data;
1809 #ifdef WORDS_BIGENDIAN
1810  t = (t << sr) | (d >> sl);
1811 #else
1812  t = (t >> sr) | (d << sl);
1813 #endif
1814  h = murmur_step(h, t);
1815  t = d;
1816  data += sizeof(st_index_t);
1817  len -= sizeof(st_index_t);
1818  }
1819 
1820  pack = len < (size_t)align ? (int)len : align;
1821  d = 0;
1822  switch (pack) {
1823 #ifdef WORDS_BIGENDIAN
1824 # define UNALIGNED_ADD(n) case (n) + 1: \
1825  d |= data_at(n) << CHAR_BIT*(SIZEOF_ST_INDEX_T - (n) - 1)
1826 #else
1827 # define UNALIGNED_ADD(n) case (n) + 1: \
1828  d |= data_at(n) << CHAR_BIT*(n)
1829 #endif
1830  UNALIGNED_ADD_ALL;
1831 #undef UNALIGNED_ADD
1832  }
1833 #ifdef WORDS_BIGENDIAN
1834  t = (t << sr) | (d >> sl);
1835 #else
1836  t = (t >> sr) | (d << sl);
1837 #endif
1838 
1839  if (len < (size_t)align) goto skip_tail;
1840 # define SKIP_TAIL 1
1841  h = murmur_step(h, t);
1842  data += pack;
1843  len -= pack;
1844  }
1845  else
1846 #endif
1847 #ifdef HAVE_BUILTIN___BUILTIN_ASSUME_ALIGNED
1848 #define aligned_data __builtin_assume_aligned(data, sizeof(st_index_t))
1849 #else
1850 #define aligned_data data
1851 #endif
1852  {
1853  do {
1854  h = murmur_step(h, *(st_index_t *)aligned_data);
1855  data += sizeof(st_index_t);
1856  len -= sizeof(st_index_t);
1857  } while (len >= sizeof(st_index_t));
1858  }
1859  }
1860 
1861  t = 0;
1862  switch (len) {
1863 #if UNALIGNED_WORD_ACCESS && SIZEOF_ST_INDEX_T <= 8 && CHAR_BIT == 8
1864  /* in this case byteorder doesn't really matter */
1865 #if SIZEOF_ST_INDEX_T > 4
1866  case 7: t |= data_at(6) << 48;
1867  case 6: t |= data_at(5) << 40;
1868  case 5: t |= data_at(4) << 32;
1869  case 4:
1870  t |= (st_index_t)*(uint32_t*)aligned_data;
1871  goto skip_tail;
1872 # define SKIP_TAIL 1
1873 #endif
1874  case 3: t |= data_at(2) << 16;
1875  case 2: t |= data_at(1) << 8;
1876  case 1: t |= data_at(0);
1877 #else
1878 #ifdef WORDS_BIGENDIAN
1879 # define UNALIGNED_ADD(n) case (n) + 1: \
1880  t |= data_at(n) << CHAR_BIT*(SIZEOF_ST_INDEX_T - (n) - 1)
1881 #else
1882 # define UNALIGNED_ADD(n) case (n) + 1: \
1883  t |= data_at(n) << CHAR_BIT*(n)
1884 #endif
1885  UNALIGNED_ADD_ALL;
1886 #undef UNALIGNED_ADD
1887 #endif
1888 #ifdef SKIP_TAIL
1889  skip_tail:
1890 #endif
1891  h ^= t; h -= ROTL(t, 7);
1892  h *= C2;
1893  }
1894  h ^= l;
1895 #undef aligned_data
1896 
1897  return murmur_finish(h);
1898 }
1899 
1900 st_index_t
1901 st_hash_uint32(st_index_t h, uint32_t i)
1902 {
1903  return murmur_step(h, i);
1904 }
1905 
1906 NO_SANITIZE("unsigned-integer-overflow", extern st_index_t st_hash_uint(st_index_t h, st_index_t i));
1907 st_index_t
1908 st_hash_uint(st_index_t h, st_index_t i)
1909 {
1910  i += h;
1911 /* no matter if it is BigEndian or LittleEndian,
1912  * we hash just integers */
1913 #if SIZEOF_ST_INDEX_T*CHAR_BIT > 8*8
1914  h = murmur_step(h, i >> 8*8);
1915 #endif
1916  h = murmur_step(h, i);
1917  return h;
1918 }
1919 
1920 st_index_t
1921 st_hash_end(st_index_t h)
1922 {
1923  h = murmur_finish(h);
1924  return h;
1925 }
1926 
1927 #undef st_hash_start
1928 st_index_t
1929 rb_st_hash_start(st_index_t h)
1930 {
1931  return h;
1932 }
1933 
1934 static st_index_t
1935 strhash(st_data_t arg)
1936 {
1937  register const char *string = (const char *)arg;
1938  return st_hash(string, strlen(string), FNV1_32A_INIT);
1939 }
1940 
1941 int
1942 st_locale_insensitive_strcasecmp(const char *s1, const char *s2)
1943 {
1944  char c1, c2;
1945 
1946  while (1) {
1947  c1 = *s1++;
1948  c2 = *s2++;
1949  if (c1 == '\0' || c2 == '\0') {
1950  if (c1 != '\0') return 1;
1951  if (c2 != '\0') return -1;
1952  return 0;
1953  }
1954  if (('A' <= c1) && (c1 <= 'Z')) c1 += 'a' - 'A';
1955  if (('A' <= c2) && (c2 <= 'Z')) c2 += 'a' - 'A';
1956  if (c1 != c2) {
1957  if (c1 > c2)
1958  return 1;
1959  else
1960  return -1;
1961  }
1962  }
1963 }
1964 
1965 int
1966 st_locale_insensitive_strncasecmp(const char *s1, const char *s2, size_t n)
1967 {
1968  char c1, c2;
1969  size_t i;
1970 
1971  for (i = 0; i < n; i++) {
1972  c1 = *s1++;
1973  c2 = *s2++;
1974  if (c1 == '\0' || c2 == '\0') {
1975  if (c1 != '\0') return 1;
1976  if (c2 != '\0') return -1;
1977  return 0;
1978  }
1979  if (('A' <= c1) && (c1 <= 'Z')) c1 += 'a' - 'A';
1980  if (('A' <= c2) && (c2 <= 'Z')) c2 += 'a' - 'A';
1981  if (c1 != c2) {
1982  if (c1 > c2)
1983  return 1;
1984  else
1985  return -1;
1986  }
1987  }
1988  return 0;
1989 }
1990 
1991 static int
1992 st_strcmp(st_data_t lhs, st_data_t rhs)
1993 {
1994  const char *s1 = (char *)lhs;
1995  const char *s2 = (char *)rhs;
1996  return strcmp(s1, s2);
1997 }
1998 
1999 static int
2000 st_locale_insensitive_strcasecmp_i(st_data_t lhs, st_data_t rhs)
2001 {
2002  const char *s1 = (char *)lhs;
2003  const char *s2 = (char *)rhs;
2004  return st_locale_insensitive_strcasecmp(s1, s2);
2005 }
2006 
2007 NO_SANITIZE("unsigned-integer-overflow", PUREFUNC(static st_index_t strcasehash(st_data_t)));
2008 static st_index_t
2009 strcasehash(st_data_t arg)
2010 {
2011  register const char *string = (const char *)arg;
2012  register st_index_t hval = FNV1_32A_INIT;
2013 
2014  /*
2015  * FNV-1a hash each octet in the buffer
2016  */
2017  while (*string) {
2018  unsigned int c = (unsigned char)*string++;
2019  if ((unsigned int)(c - 'A') <= ('Z' - 'A')) c += 'a' - 'A';
2020  hval ^= c;
2021 
2022  /* multiply by the 32 bit FNV magic prime mod 2^32 */
2023  hval *= FNV_32_PRIME;
2024  }
2025  return hval;
2026 }
2027 
2028 int
2029 st_numcmp(st_data_t x, st_data_t y)
2030 {
2031  return x != y;
2032 }
2033 
2034 st_index_t
2035 st_numhash(st_data_t n)
2036 {
2037  enum {s1 = 11, s2 = 3};
2038  return (st_index_t)((n>>s1|(n<<s2)) ^ (n>>s2));
2039 }
2040 
2041 /* Expand TAB to be suitable for holding SIZ entries in total.
2042  Pre-existing entries remain not deleted inside of TAB, but its bins
2043  are cleared to expect future reconstruction. See rehash below. */
2044 static void
2045 st_expand_table(st_table *tab, st_index_t siz)
2046 {
2047  st_table *tmp;
2048  st_index_t n;
2049 
2050  if (siz <= get_allocated_entries(tab))
2051  return; /* enough room already */
2052 
2053  tmp = st_init_table_with_size(tab->type, siz);
2054  n = get_allocated_entries(tab);
2055  MEMCPY(tmp->entries, tab->entries, st_table_entry, n);
2056  free(tab->entries);
2057  if (tab->bins != NULL)
2058  free(tab->bins);
2059  if (tmp->bins != NULL)
2060  free(tmp->bins);
2061  tab->entry_power = tmp->entry_power;
2062  tab->bin_power = tmp->bin_power;
2063  tab->size_ind = tmp->size_ind;
2064  tab->entries = tmp->entries;
2065  tab->bins = NULL;
2066  tab->rebuilds_num++;
2067  free(tmp);
2068 }
2069 
2070 /* Rehash using linear search. Return TRUE if we found that the table
2071  was rebuilt. */
2072 static int
2073 st_rehash_linear(st_table *tab)
2074 {
2075  int eq_p, rebuilt_p;
2076  st_index_t i, j;
2077  st_table_entry *p, *q;
2078  if (tab->bins) {
2079  free(tab->bins);
2080  tab->bins = NULL;
2081  }
2082  for (i = tab->entries_start; i < tab->entries_bound; i++) {
2083  p = &tab->entries[i];
2084  if (DELETED_ENTRY_P(p))
2085  continue;
2086  for (j = i + 1; j < tab->entries_bound; j++) {
2087  q = &tab->entries[j];
2088  if (DELETED_ENTRY_P(q))
2089  continue;
2090  DO_PTR_EQUAL_CHECK(tab, p, q->hash, q->key, eq_p, rebuilt_p);
2091  if (EXPECT(rebuilt_p, 0))
2092  return TRUE;
2093  if (eq_p) {
2094  *p = *q;
2095  MARK_ENTRY_DELETED(q);
2096  tab->num_entries--;
2097  update_range_for_deleted(tab, j);
2098  }
2099  }
2100  }
2101  return FALSE;
2102 }
2103 
2104 /* Rehash using index. Return TRUE if we found that the table was
2105  rebuilt. */
2106 static int
2107 st_rehash_indexed(st_table *tab)
2108 {
2109  int eq_p, rebuilt_p;
2110  st_index_t i;
2111  st_index_t const n = bins_size(tab);
2112  unsigned int const size_ind = get_size_ind(tab);
2113  st_index_t *bins = realloc(tab->bins, n);
2114  tab->bins = bins;
2115  initialize_bins(tab);
2116  for (i = tab->entries_start; i < tab->entries_bound; i++) {
2117  st_table_entry *p = &tab->entries[i];
2118  st_index_t ind;
2119 #ifdef QUADRATIC_PROBE
2120  st_index_t d = 1;
2121 #else
2122  st_index_t peterb = p->hash;
2123 #endif
2124 
2125  if (DELETED_ENTRY_P(p))
2126  continue;
2127 
2128  ind = hash_bin(p->hash, tab);
2129  for (;;) {
2130  st_index_t bin = get_bin(bins, size_ind, ind);
2131  if (EMPTY_OR_DELETED_BIN_P(bin)) {
2132  /* ok, new room */
2133  set_bin(bins, size_ind, ind, i + ENTRY_BASE);
2134  break;
2135  }
2136  else {
2137  st_table_entry *q = &tab->entries[bin - ENTRY_BASE];
2138  DO_PTR_EQUAL_CHECK(tab, q, p->hash, p->key, eq_p, rebuilt_p);
2139  if (EXPECT(rebuilt_p, 0))
2140  return TRUE;
2141  if (eq_p) {
2142  /* duplicated key; delete it */
2143  q->record = p->record;
2144  MARK_ENTRY_DELETED(p);
2145  tab->num_entries--;
2146  update_range_for_deleted(tab, bin);
2147  break;
2148  }
2149  else {
2150  /* hash collision; skip it */
2151 #ifdef QUADRATIC_PROBE
2152  ind = hash_bin(ind + d, tab);
2153  d++;
2154 #else
2155  ind = secondary_hash(ind, tab, &peterb);
2156 #endif
2157  }
2158  }
2159  }
2160  }
2161  return FALSE;
2162 }
2163 
2164 /* Reconstruct TAB's bins according to TAB's entries. This function
2165  permits conflicting keys inside of entries. No errors are reported
2166  then. All but one of them are discarded silently. */
2167 static void
2168 st_rehash(st_table *tab)
2169 {
2170  int rebuilt_p;
2171 
2172  do {
2173  if (tab->bin_power <= MAX_POWER2_FOR_TABLES_WITHOUT_BINS)
2174  rebuilt_p = st_rehash_linear(tab);
2175  else
2176  rebuilt_p = st_rehash_indexed(tab);
2177  } while (rebuilt_p);
2178 }
2179 
2180 #ifdef RUBY
2181 static st_data_t
2182 st_stringify(VALUE key)
2183 {
2184  return (rb_obj_class(key) == rb_cString && !RB_OBJ_FROZEN(key)) ?
2185  rb_hash_key_str(key) : key;
2186 }
2187 
2188 static void
2189 st_insert_single(st_table *tab, VALUE hash, VALUE key, VALUE val)
2190 {
2191  st_data_t k = st_stringify(key);
2192  st_table_entry e;
2193  e.hash = do_hash(k, tab);
2194  e.key = k;
2195  e.record = val;
2196 
2197  tab->entries[tab->entries_bound++] = e;
2198  tab->num_entries++;
2199  RB_OBJ_WRITTEN(hash, Qundef, k);
2200  RB_OBJ_WRITTEN(hash, Qundef, val);
2201 }
2202 
2203 static void
2204 st_insert_linear(st_table *tab, long argc, const VALUE *argv, VALUE hash)
2205 {
2206  long i;
2207 
2208  for (i = 0; i < argc; /* */) {
2209  st_data_t k = st_stringify(argv[i++]);
2210  st_data_t v = argv[i++];
2211  st_insert(tab, k, v);
2212  RB_OBJ_WRITTEN(hash, Qundef, k);
2213  RB_OBJ_WRITTEN(hash, Qundef, v);
2214  }
2215 }
2216 
2217 static void
2218 st_insert_generic(st_table *tab, long argc, const VALUE *argv, VALUE hash)
2219 {
2220  long i;
2221 
2222  /* push elems */
2223  for (i = 0; i < argc; /* */) {
2224  VALUE key = argv[i++];
2225  VALUE val = argv[i++];
2226  st_insert_single(tab, hash, key, val);
2227  }
2228 
2229  /* reindex */
2230  st_rehash(tab);
2231 }
2232 
2233 /* Mimics ruby's { foo => bar } syntax. This function is subpart
2234  of rb_hash_bulk_insert. */
2235 void
2236 rb_hash_bulk_insert_into_st_table(long argc, const VALUE *argv, VALUE hash)
2237 {
2238  st_index_t n, size = argc / 2;
2239  st_table *tab = RHASH_ST_TABLE(hash);
2240 
2241  tab = RHASH_TBL_RAW(hash);
2242  n = tab->entries_bound + size;
2243  st_expand_table(tab, n);
2244  if (UNLIKELY(tab->num_entries))
2245  st_insert_generic(tab, argc, argv, hash);
2246  else if (argc <= 2)
2247  st_insert_single(tab, hash, argv[0], argv[1]);
2248  else if (tab->bin_power <= MAX_POWER2_FOR_TABLES_WITHOUT_BINS)
2249  st_insert_linear(tab, argc, argv, hash);
2250  else
2251  st_insert_generic(tab, argc, argv, hash);
2252 }
2253 
2254 // to iterate iv_index_tbl
2255 st_data_t
2256 rb_st_nth_key(st_table *tab, st_index_t index)
2257 {
2258  if (LIKELY(tab->entries_start == 0 &&
2259  tab->num_entries == tab->entries_bound &&
2260  index < tab->num_entries)) {
2261  return tab->entries[index].key;
2262  }
2263  else {
2264  rb_bug("unreachable");
2265  }
2266 }
2267 
2268 #endif
int st_locale_insensitive_strcasecmp(const char *s1, const char *s2)
Our own locale-insensitive version of strcasecmp(3).
Definition: st.c:1942
int st_locale_insensitive_strncasecmp(const char *s1, const char *s2, size_t n)
Our own locale-insensitive version of strcnasecmp(3).
Definition: st.c:1966
static bool RB_OBJ_FROZEN(VALUE obj)
Checks if an object is frozen.
Definition: fl_type.h:927
#define Qundef
Old name of RUBY_Qundef.
void rb_raise(VALUE exc, const char *fmt,...)
Exception entry point.
Definition: error.c:3025
void rb_bug(const char *fmt,...)
Interpreter panic switch.
Definition: error.c:802
VALUE rb_eRuntimeError
RuntimeError exception.
Definition: error.c:1097
VALUE rb_obj_class(VALUE obj)
Queries the class of an object.
Definition: object.c:188
VALUE rb_cString
String class.
Definition: string.c:80
#define RB_OBJ_WRITTEN(old, oldv, young)
Identical to RB_OBJ_WRITE(), except it doesn't write any values, but only a WB declaration.
Definition: rgengc.h:232
#define MEMCPY(p1, p2, type, n)
Handy macro to call memcpy.
Definition: memory.h:366
VALUE type(ANYARGS)
ANYARGS-ed function type.
Definition: cxxanyargs.hpp:56
int st_foreach(st_table *q, int_type *w, st_data_t e)
Iteration over the given table.
Definition: cxxanyargs.hpp:432
int st_foreach_check(st_table *q, int_type *w, st_data_t e, st_data_t)
Iteration over the given table.
Definition: cxxanyargs.hpp:450
#define _(args)
This was a transition path from K&R to ANSI.
Definition: stdarg.h:35
Definition: hash.c:912
Definition: st.c:133
Definition: st.h:79
uintptr_t VALUE
Type that represents a Ruby object.
Definition: value.h:40