107 #include "internal.h"
108 #include "internal/bits.h"
109 #include "internal/hash.h"
110 #include "internal/sanitizers.h"
121 #define PREFETCH(addr, write_p) __builtin_prefetch(addr, write_p)
122 #define EXPECT(expr, val) __builtin_expect(expr, val)
123 #define ATTRIBUTE_UNUSED __attribute__((unused))
125 #define PREFETCH(addr, write_p)
126 #define EXPECT(expr, val) (expr)
127 #define ATTRIBUTE_UNUSED
131 typedef st_index_t st_hash_t;
139 #define type_numhash st_hashtype_num
145 static int st_strcmp(st_data_t, st_data_t);
146 static st_index_t strhash(st_data_t);
152 static int st_locale_insensitive_strcasecmp_i(st_data_t lhs, st_data_t rhs);
153 static st_index_t strcasehash(st_data_t);
155 st_locale_insensitive_strcasecmp_i,
162 #define ST_INIT_VAL 0xafafafafafafafaf
163 #define ST_INIT_VAL_BYTE 0xafa
170 #define malloc ruby_xmalloc
171 #define calloc ruby_xcalloc
172 #define realloc ruby_xrealloc
173 #define free ruby_xfree
176 #define EQUAL(tab,x,y) ((x) == (y) || (*(tab)->type->compare)((x),(y)) == 0)
177 #define PTR_EQUAL(tab, ptr, hash_val, key_) \
178 ((ptr)->hash == (hash_val) && EQUAL((tab), (key_), (ptr)->key))
182 #define DO_PTR_EQUAL_CHECK(tab, ptr, hash_val, key, res, rebuilt_p) \
184 unsigned int _old_rebuilds_num = (tab)->rebuilds_num; \
185 res = PTR_EQUAL(tab, ptr, hash_val, key); \
186 rebuilt_p = _old_rebuilds_num != (tab)->rebuilds_num; \
192 unsigned char entry_power;
196 unsigned char bin_power;
198 unsigned char size_ind;
201 st_index_t bins_words;
205 #if SIZEOF_ST_INDEX_T == 8
206 #define MAX_POWER2 62
224 {16, 17, 2, 0x10000},
225 {17, 18, 2, 0x20000},
226 {18, 19, 2, 0x40000},
227 {19, 20, 2, 0x80000},
228 {20, 21, 2, 0x100000},
229 {21, 22, 2, 0x200000},
230 {22, 23, 2, 0x400000},
231 {23, 24, 2, 0x800000},
232 {24, 25, 2, 0x1000000},
233 {25, 26, 2, 0x2000000},
234 {26, 27, 2, 0x4000000},
235 {27, 28, 2, 0x8000000},
236 {28, 29, 2, 0x10000000},
237 {29, 30, 2, 0x20000000},
238 {30, 31, 2, 0x40000000},
239 {31, 32, 2, 0x80000000},
240 {32, 33, 3, 0x200000000},
241 {33, 34, 3, 0x400000000},
242 {34, 35, 3, 0x800000000},
243 {35, 36, 3, 0x1000000000},
244 {36, 37, 3, 0x2000000000},
245 {37, 38, 3, 0x4000000000},
246 {38, 39, 3, 0x8000000000},
247 {39, 40, 3, 0x10000000000},
248 {40, 41, 3, 0x20000000000},
249 {41, 42, 3, 0x40000000000},
250 {42, 43, 3, 0x80000000000},
251 {43, 44, 3, 0x100000000000},
252 {44, 45, 3, 0x200000000000},
253 {45, 46, 3, 0x400000000000},
254 {46, 47, 3, 0x800000000000},
255 {47, 48, 3, 0x1000000000000},
256 {48, 49, 3, 0x2000000000000},
257 {49, 50, 3, 0x4000000000000},
258 {50, 51, 3, 0x8000000000000},
259 {51, 52, 3, 0x10000000000000},
260 {52, 53, 3, 0x20000000000000},
261 {53, 54, 3, 0x40000000000000},
262 {54, 55, 3, 0x80000000000000},
263 {55, 56, 3, 0x100000000000000},
264 {56, 57, 3, 0x200000000000000},
265 {57, 58, 3, 0x400000000000000},
266 {58, 59, 3, 0x800000000000000},
267 {59, 60, 3, 0x1000000000000000},
268 {60, 61, 3, 0x2000000000000000},
269 {61, 62, 3, 0x4000000000000000},
270 {62, 63, 3, 0x8000000000000000},
274 #define MAX_POWER2 30
293 {16, 17, 2, 0x20000},
294 {17, 18, 2, 0x40000},
295 {18, 19, 2, 0x80000},
296 {19, 20, 2, 0x100000},
297 {20, 21, 2, 0x200000},
298 {21, 22, 2, 0x400000},
299 {22, 23, 2, 0x800000},
300 {23, 24, 2, 0x1000000},
301 {24, 25, 2, 0x2000000},
302 {25, 26, 2, 0x4000000},
303 {26, 27, 2, 0x8000000},
304 {27, 28, 2, 0x10000000},
305 {28, 29, 2, 0x20000000},
306 {29, 30, 2, 0x40000000},
307 {30, 31, 2, 0x80000000},
313 #define RESERVED_HASH_VAL (~(st_hash_t) 0)
314 #define RESERVED_HASH_SUBSTITUTION_VAL ((st_hash_t) 0)
317 static inline st_hash_t
318 do_hash(st_data_t key,
st_table *tab)
320 st_hash_t hash = (st_hash_t)(tab->type->hash)(key);
324 return hash == RESERVED_HASH_VAL ? RESERVED_HASH_SUBSTITUTION_VAL : hash;
328 #define MINIMAL_POWER2 2
330 #if MINIMAL_POWER2 < 2
331 #error "MINIMAL_POWER2 should be >= 2"
336 #define MAX_POWER2_FOR_TABLES_WITHOUT_BINS 4
340 get_power2(st_index_t size)
342 unsigned int n = ST_INDEX_BITS - nlz_intptr(size);
344 return n < MINIMAL_POWER2 ? MINIMAL_POWER2 : n;
355 static inline st_index_t
356 get_bin(st_index_t *bins,
int s, st_index_t n)
358 return (s == 0 ? ((
unsigned char *) bins)[n]
359 : s == 1 ? ((
unsigned short *) bins)[n]
360 : s == 2 ? ((
unsigned int *) bins)[n]
361 : ((st_index_t *) bins)[n]);
367 set_bin(st_index_t *bins,
int s, st_index_t n, st_index_t v)
369 if (s == 0) ((
unsigned char *) bins)[n] = (
unsigned char) v;
370 else if (s == 1) ((
unsigned short *) bins)[n] = (
unsigned short) v;
371 else if (s == 2) ((
unsigned int *) bins)[n] = (
unsigned int) v;
372 else ((st_index_t *) bins)[n] = v;
379 #define DELETED_BIN 1
385 #define MARK_BIN_EMPTY(tab, i) (set_bin((tab)->bins, get_size_ind(tab), i, EMPTY_BIN))
389 #define UNDEFINED_ENTRY_IND (~(st_index_t) 0)
390 #define UNDEFINED_BIN_IND (~(st_index_t) 0)
394 #define REBUILT_TABLE_ENTRY_IND (~(st_index_t) 1)
395 #define REBUILT_TABLE_BIN_IND (~(st_index_t) 1)
400 #define MARK_BIN_DELETED(tab, i) \
402 set_bin((tab)->bins, get_size_ind(tab), i, DELETED_BIN); \
407 #define EMPTY_BIN_P(b) ((b) == EMPTY_BIN)
408 #define DELETED_BIN_P(b) ((b) == DELETED_BIN)
409 #define EMPTY_OR_DELETED_BIN_P(b) ((b) <= DELETED_BIN)
413 #define IND_EMPTY_BIN_P(tab, i) (EMPTY_BIN_P(get_bin((tab)->bins, get_size_ind(tab), i)))
414 #define IND_DELETED_BIN_P(tab, i) (DELETED_BIN_P(get_bin((tab)->bins, get_size_ind(tab), i)))
415 #define IND_EMPTY_OR_DELETED_BIN_P(tab, i) (EMPTY_OR_DELETED_BIN_P(get_bin((tab)->bins, get_size_ind(tab), i)))
419 #define MARK_ENTRY_DELETED(e_ptr) ((e_ptr)->hash = RESERVED_HASH_VAL)
420 #define DELETED_ENTRY_P(e_ptr) ((e_ptr)->hash == RESERVED_HASH_VAL)
423 static inline unsigned int
426 return tab->size_ind;
430 static inline st_index_t
433 return ((st_index_t) 1)<<tab->bin_power;
437 static inline st_index_t
440 return get_bins_num(tab) - 1;
445 static inline st_index_t
446 hash_bin(st_hash_t hash_value,
st_table *tab)
448 return hash_value & bins_mask(tab);
452 static inline st_index_t
453 get_allocated_entries(
const st_table *tab)
455 return ((st_index_t) 1)<<tab->entry_power;
459 static inline st_index_t
462 return features[tab->entry_power].bins_words *
sizeof (st_index_t);
469 memset(tab->bins, 0, bins_size(tab));
476 tab->num_entries = 0;
477 tab->entries_start = tab->entries_bound = 0;
478 if (tab->bins != NULL)
479 initialize_bins(tab);
487 int all, total, num, str, strcase;
492 static int init_st = 0;
499 char fname[10+
sizeof(long)*3];
501 if (!collision.total)
return;
502 f = fopen((snprintf(fname,
sizeof(fname),
"/tmp/col%ld", (
long)getpid()), fname),
"w");
505 fprintf(f,
"collision: %d / %d (%6.2f)\n", collision.all, collision.total,
506 ((
double)collision.all / (collision.total)) * 100);
507 fprintf(f,
"num: %d, str: %d, strcase: %d\n", collision.num, collision.str, collision.strcase);
524 const char *e = getenv(
"ST_HASH_LOG");
525 if (!e || !*e) init_st = 1;
534 n = get_power2(size);
545 tab->entry_power = n;
546 tab->bin_power = features[n].bin_power;
547 tab->size_ind = features[n].size_ind;
548 if (n <= MAX_POWER2_FOR_TABLES_WITHOUT_BINS)
551 tab->bins = (st_index_t *) malloc(bins_size(tab));
553 if (tab->bins == NULL) {
559 tab->entries = (
st_table_entry *) malloc(get_allocated_entries(tab)
562 if (tab->entries == NULL) {
568 tab->rebuilds_num = 0;
577 return st_init_table_with_size(
type, 0);
583 st_init_numtable(
void)
585 return st_init_table(&type_numhash);
590 st_init_numtable_with_size(st_index_t size)
592 return st_init_table_with_size(&type_numhash, size);
598 st_init_strtable(
void)
600 return st_init_table(&type_strhash);
605 st_init_strtable_with_size(st_index_t size)
607 return st_init_table_with_size(&type_strhash, size);
613 st_init_strcasetable(
void)
615 return st_init_table(&type_strcasehash);
621 st_init_strcasetable_with_size(st_index_t size)
623 return st_init_table_with_size(&type_strcasehash, size);
638 if (tab->bins != NULL)
649 + (tab->bins == NULL ? 0 : bins_size(tab))
654 find_table_entry_ind(
st_table *tab, st_hash_t hash_value, st_data_t key);
657 find_table_bin_ind(
st_table *tab, st_hash_t hash_value, st_data_t key);
660 find_table_bin_ind_direct(
st_table *table, st_hash_t hash_value, st_data_t key);
663 find_table_bin_ptr_and_reserve(
st_table *tab, st_hash_t *hash_value,
664 st_data_t key, st_index_t *bin_ind);
671 if (
type == &type_numhash) {
674 else if (
type == &type_strhash) {
677 else if (
type == &type_strcasehash) {
682 #define COLLISION (collision_check ? count_collision(tab->type) : (void)0)
683 #define FOUND_BIN (collision_check ? collision.total++ : (void)0)
684 #define collision_check 0
693 #define REBUILD_THRESHOLD 4
695 #if REBUILD_THRESHOLD < 2
696 #error "REBUILD_THRESHOLD should be >= 2"
707 unsigned int size_ind;
714 if ((2 * tab->num_entries <= get_allocated_entries(tab)
715 && REBUILD_THRESHOLD * tab->num_entries > get_allocated_entries(tab))
716 || tab->num_entries < (1 << MINIMAL_POWER2)) {
718 tab->num_entries = 0;
719 if (tab->bins != NULL)
720 initialize_bins(tab);
722 new_entries = tab->entries;
728 new_tab = st_init_table_with_size(tab->type,
729 2 * tab->num_entries - 1);
730 new_entries = new_tab->entries;
734 bins = new_tab->bins;
735 size_ind = get_size_ind(new_tab);
736 st_index_t bound = tab->entries_bound;
739 for (i = tab->entries_start; i < bound; i++) {
740 curr_entry_ptr = &entries[i];
741 PREFETCH(entries + i + 1, 0);
742 if (EXPECT(DELETED_ENTRY_P(curr_entry_ptr), 0))
744 if (&new_entries[ni] != curr_entry_ptr)
745 new_entries[ni] = *curr_entry_ptr;
746 if (EXPECT(bins != NULL, 1)) {
747 bin_ind = find_table_bin_ind_direct(new_tab, curr_entry_ptr->hash,
748 curr_entry_ptr->key);
749 set_bin(bins, size_ind, bin_ind, ni + ENTRY_BASE);
751 new_tab->num_entries++;
754 if (new_tab != tab) {
755 tab->entry_power = new_tab->entry_power;
756 tab->bin_power = new_tab->bin_power;
757 tab->size_ind = new_tab->size_ind;
758 if (tab->bins != NULL)
760 tab->bins = new_tab->bins;
762 tab->entries = new_tab->entries;
765 tab->entries_start = 0;
766 tab->entries_bound = tab->num_entries;
782 static inline st_index_t
783 secondary_hash(st_index_t ind,
st_table *tab, st_index_t *perterb)
786 ind = (ind << 2) + ind + *perterb + 1;
787 return hash_bin(ind, tab);
794 static inline st_index_t
795 find_entry(
st_table *tab, st_hash_t hash_value, st_data_t key)
801 bound = tab->entries_bound;
802 entries = tab->entries;
803 for (i = tab->entries_start; i < bound; i++) {
804 DO_PTR_EQUAL_CHECK(tab, &entries[i], hash_value, key, eq_p, rebuilt_p);
805 if (EXPECT(rebuilt_p, 0))
806 return REBUILT_TABLE_ENTRY_IND;
810 return UNDEFINED_ENTRY_IND;
822 find_table_entry_ind(
st_table *tab, st_hash_t hash_value, st_data_t key)
826 #ifdef QUADRATIC_PROBE
834 ind = hash_bin(hash_value, tab);
835 #ifdef QUADRATIC_PROBE
842 bin = get_bin(tab->bins, get_size_ind(tab), ind);
843 if (! EMPTY_OR_DELETED_BIN_P(bin)) {
844 DO_PTR_EQUAL_CHECK(tab, &entries[bin - ENTRY_BASE], hash_value, key, eq_p, rebuilt_p);
845 if (EXPECT(rebuilt_p, 0))
846 return REBUILT_TABLE_ENTRY_IND;
850 else if (EMPTY_BIN_P(bin))
851 return UNDEFINED_ENTRY_IND;
852 #ifdef QUADRATIC_PROBE
853 ind = hash_bin(ind + d, tab);
856 ind = secondary_hash(ind, tab, &peterb);
868 find_table_bin_ind(
st_table *tab, st_hash_t hash_value, st_data_t key)
872 #ifdef QUADRATIC_PROBE
880 ind = hash_bin(hash_value, tab);
881 #ifdef QUADRATIC_PROBE
888 bin = get_bin(tab->bins, get_size_ind(tab), ind);
889 if (! EMPTY_OR_DELETED_BIN_P(bin)) {
890 DO_PTR_EQUAL_CHECK(tab, &entries[bin - ENTRY_BASE], hash_value, key, eq_p, rebuilt_p);
891 if (EXPECT(rebuilt_p, 0))
892 return REBUILT_TABLE_BIN_IND;
896 else if (EMPTY_BIN_P(bin))
897 return UNDEFINED_BIN_IND;
898 #ifdef QUADRATIC_PROBE
899 ind = hash_bin(ind + d, tab);
902 ind = secondary_hash(ind, tab, &peterb);
913 find_table_bin_ind_direct(
st_table *tab, st_hash_t hash_value, st_data_t key)
916 #ifdef QUADRATIC_PROBE
923 ind = hash_bin(hash_value, tab);
924 #ifdef QUADRATIC_PROBE
931 bin = get_bin(tab->bins, get_size_ind(tab), ind);
932 if (EMPTY_OR_DELETED_BIN_P(bin))
934 #ifdef QUADRATIC_PROBE
935 ind = hash_bin(ind + d, tab);
938 ind = secondary_hash(ind, tab, &peterb);
954 find_table_bin_ptr_and_reserve(
st_table *tab, st_hash_t *hash_value,
955 st_data_t key, st_index_t *bin_ind)
959 st_hash_t curr_hash_value = *hash_value;
960 #ifdef QUADRATIC_PROBE
965 st_index_t entry_index;
966 st_index_t first_deleted_bin_ind;
969 ind = hash_bin(curr_hash_value, tab);
970 #ifdef QUADRATIC_PROBE
973 peterb = curr_hash_value;
976 first_deleted_bin_ind = UNDEFINED_BIN_IND;
977 entries = tab->entries;
979 entry_index = get_bin(tab->bins, get_size_ind(tab), ind);
980 if (EMPTY_BIN_P(entry_index)) {
982 entry_index = UNDEFINED_ENTRY_IND;
983 if (first_deleted_bin_ind != UNDEFINED_BIN_IND) {
985 ind = first_deleted_bin_ind;
986 MARK_BIN_EMPTY(tab, ind);
990 else if (! DELETED_BIN_P(entry_index)) {
991 DO_PTR_EQUAL_CHECK(tab, &entries[entry_index - ENTRY_BASE], curr_hash_value, key, eq_p, rebuilt_p);
992 if (EXPECT(rebuilt_p, 0))
993 return REBUILT_TABLE_ENTRY_IND;
997 else if (first_deleted_bin_ind == UNDEFINED_BIN_IND)
998 first_deleted_bin_ind = ind;
999 #ifdef QUADRATIC_PROBE
1000 ind = hash_bin(ind + d, tab);
1003 ind = secondary_hash(ind, tab, &peterb);
1014 st_lookup(
st_table *tab, st_data_t key, st_data_t *value)
1017 st_hash_t hash = do_hash(key, tab);
1020 if (tab->bins == NULL) {
1021 bin = find_entry(tab, hash, key);
1022 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1024 if (bin == UNDEFINED_ENTRY_IND)
1028 bin = find_table_entry_ind(tab, hash, key);
1029 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1031 if (bin == UNDEFINED_ENTRY_IND)
1036 *value = tab->entries[bin].record;
1043 st_get_key(
st_table *tab, st_data_t key, st_data_t *result)
1046 st_hash_t hash = do_hash(key, tab);
1049 if (tab->bins == NULL) {
1050 bin = find_entry(tab, hash, key);
1051 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1053 if (bin == UNDEFINED_ENTRY_IND)
1057 bin = find_table_entry_ind(tab, hash, key);
1058 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1060 if (bin == UNDEFINED_ENTRY_IND)
1065 *result = tab->entries[bin].key;
1071 rebuild_table_if_necessary (
st_table *tab)
1073 st_index_t bound = tab->entries_bound;
1075 if (bound == get_allocated_entries(tab))
1083 st_insert(
st_table *tab, st_data_t key, st_data_t value)
1088 st_hash_t hash_value;
1092 hash_value = do_hash(key, tab);
1094 rebuild_table_if_necessary(tab);
1095 if (tab->bins == NULL) {
1096 bin = find_entry(tab, hash_value, key);
1097 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1099 new_p = bin == UNDEFINED_ENTRY_IND;
1102 bin_ind = UNDEFINED_BIN_IND;
1105 bin = find_table_bin_ptr_and_reserve(tab, &hash_value,
1107 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1109 new_p = bin == UNDEFINED_ENTRY_IND;
1113 ind = tab->entries_bound++;
1114 entry = &tab->entries[ind];
1115 entry->hash = hash_value;
1117 entry->record = value;
1118 if (bin_ind != UNDEFINED_BIN_IND)
1119 set_bin(tab->bins, get_size_ind(tab), bin_ind, ind + ENTRY_BASE);
1122 tab->entries[bin].record = value;
1129 st_add_direct_with_hash(
st_table *tab,
1130 st_data_t key, st_data_t value, st_hash_t hash)
1136 rebuild_table_if_necessary(tab);
1137 ind = tab->entries_bound++;
1138 entry = &tab->entries[ind];
1141 entry->record = value;
1143 if (tab->bins != NULL) {
1144 bin_ind = find_table_bin_ind_direct(tab, hash, key);
1145 set_bin(tab->bins, get_size_ind(tab), bin_ind, ind + ENTRY_BASE);
1152 st_add_direct(
st_table *tab, st_data_t key, st_data_t value)
1154 st_hash_t hash_value;
1156 hash_value = do_hash(key, tab);
1157 st_add_direct_with_hash(tab, key, value, hash_value);
1164 st_insert2(
st_table *tab, st_data_t key, st_data_t value,
1165 st_data_t (*func)(st_data_t))
1170 st_hash_t hash_value;
1174 hash_value = do_hash(key, tab);
1176 rebuild_table_if_necessary (tab);
1177 if (tab->bins == NULL) {
1178 bin = find_entry(tab, hash_value, key);
1179 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1181 new_p = bin == UNDEFINED_ENTRY_IND;
1184 bin_ind = UNDEFINED_BIN_IND;
1187 bin = find_table_bin_ptr_and_reserve(tab, &hash_value,
1189 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1191 new_p = bin == UNDEFINED_ENTRY_IND;
1196 ind = tab->entries_bound++;
1197 entry = &tab->entries[ind];
1198 entry->hash = hash_value;
1200 entry->record = value;
1201 if (bin_ind != UNDEFINED_BIN_IND)
1202 set_bin(tab->bins, get_size_ind(tab), bin_ind, ind + ENTRY_BASE);
1205 tab->entries[bin].record = value;
1217 if (new_tab == NULL)
1220 *new_tab = *old_tab;
1221 if (old_tab->bins == NULL)
1222 new_tab->bins = NULL;
1224 new_tab->bins = (st_index_t *) malloc(bins_size(old_tab));
1226 if (new_tab->bins == NULL) {
1232 new_tab->entries = (
st_table_entry *) malloc(get_allocated_entries(old_tab)
1235 if (new_tab->entries == NULL) {
1236 st_free_table(new_tab);
1241 get_allocated_entries(old_tab));
1242 if (old_tab->bins != NULL)
1243 MEMCPY(new_tab->bins, old_tab->bins,
char, bins_size(old_tab));
1250 update_range_for_deleted(
st_table *tab, st_index_t n)
1254 if (tab->entries_start == n) {
1255 st_index_t start = n + 1;
1256 st_index_t bound = tab->entries_bound;
1258 while (start < bound && DELETED_ENTRY_P(&entries[start])) start++;
1259 tab->entries_start = start;
1268 st_general_delete(
st_table *tab, st_data_t *key, st_data_t *value)
1275 hash = do_hash(*key, tab);
1277 if (tab->bins == NULL) {
1278 bin = find_entry(tab, hash, *key);
1279 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1281 if (bin == UNDEFINED_ENTRY_IND) {
1282 if (value != 0) *value = 0;
1287 bin_ind = find_table_bin_ind(tab, hash, *key);
1288 if (EXPECT(bin_ind == REBUILT_TABLE_BIN_IND, 0))
1290 if (bin_ind == UNDEFINED_BIN_IND) {
1291 if (value != 0) *value = 0;
1294 bin = get_bin(tab->bins, get_size_ind(tab), bin_ind) - ENTRY_BASE;
1295 MARK_BIN_DELETED(tab, bin_ind);
1297 entry = &tab->entries[bin];
1299 if (value != 0) *value = entry->record;
1300 MARK_ENTRY_DELETED(entry);
1302 update_range_for_deleted(tab, bin);
1307 st_delete(
st_table *tab, st_data_t *key, st_data_t *value)
1309 return st_general_delete(tab, key, value);
1318 st_delete_safe(
st_table *tab, st_data_t *key, st_data_t *value,
1319 st_data_t never ATTRIBUTE_UNUSED)
1321 return st_general_delete(tab, key, value);
1329 st_shift(
st_table *tab, st_data_t *key, st_data_t *value)
1331 st_index_t i, bound;
1336 entries = tab->entries;
1337 bound = tab->entries_bound;
1338 for (i = tab->entries_start; i < bound; i++) {
1339 curr_entry_ptr = &entries[i];
1340 if (! DELETED_ENTRY_P(curr_entry_ptr)) {
1341 st_hash_t entry_hash = curr_entry_ptr->hash;
1342 st_data_t entry_key = curr_entry_ptr->key;
1344 if (value != 0) *value = curr_entry_ptr->record;
1347 if (tab->bins == NULL) {
1348 bin = find_entry(tab, entry_hash, entry_key);
1349 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0)) {
1350 entries = tab->entries;
1353 curr_entry_ptr = &entries[bin];
1356 bin_ind = find_table_bin_ind(tab, entry_hash, entry_key);
1357 if (EXPECT(bin_ind == REBUILT_TABLE_BIN_IND, 0)) {
1358 entries = tab->entries;
1361 curr_entry_ptr = &entries[get_bin(tab->bins, get_size_ind(tab), bin_ind)
1363 MARK_BIN_DELETED(tab, bin_ind);
1365 MARK_ENTRY_DELETED(curr_entry_ptr);
1367 update_range_for_deleted(tab, i);
1371 if (value != 0) *value = 0;
1377 st_cleanup_safe(
st_table *tab ATTRIBUTE_UNUSED,
1378 st_data_t never ATTRIBUTE_UNUSED)
1392 st_update(
st_table *tab, st_data_t key,
1393 st_update_callback_func *func, st_data_t arg)
1399 st_data_t value = 0, old_key;
1400 int retval, existing;
1401 st_hash_t hash = do_hash(key, tab);
1404 entries = tab->entries;
1405 if (tab->bins == NULL) {
1406 bin = find_entry(tab, hash, key);
1407 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1409 existing = bin != UNDEFINED_ENTRY_IND;
1410 entry = &entries[bin];
1411 bin_ind = UNDEFINED_BIN_IND;
1414 bin_ind = find_table_bin_ind(tab, hash, key);
1415 if (EXPECT(bin_ind == REBUILT_TABLE_BIN_IND, 0))
1417 existing = bin_ind != UNDEFINED_BIN_IND;
1419 bin = get_bin(tab->bins, get_size_ind(tab), bin_ind) - ENTRY_BASE;
1420 entry = &entries[bin];
1425 value = entry->record;
1428 retval = (*func)(&key, &value, arg, existing);
1432 st_add_direct_with_hash(tab, key, value, hash);
1435 if (old_key != key) {
1438 entry->record = value;
1442 if (bin_ind != UNDEFINED_BIN_IND)
1443 MARK_BIN_DELETED(tab, bin_ind);
1444 MARK_ENTRY_DELETED(entry);
1446 update_range_for_deleted(tab, bin);
1462 st_general_foreach(
st_table *tab, st_foreach_check_callback_func *func, st_update_callback_func *replace, st_data_t arg,
1468 enum st_retval retval;
1469 st_index_t i, rebuilds_num;
1472 int error_p, packed_p = tab->bins == NULL;
1474 entries = tab->entries;
1477 for (i = tab->entries_start; i < tab->entries_bound; i++) {
1478 curr_entry_ptr = &entries[i];
1479 if (EXPECT(DELETED_ENTRY_P(curr_entry_ptr), 0))
1481 key = curr_entry_ptr->key;
1482 rebuilds_num = tab->rebuilds_num;
1483 hash = curr_entry_ptr->hash;
1484 retval = (*func)(key, curr_entry_ptr->record, arg, 0);
1486 if (retval == ST_REPLACE && replace) {
1488 value = curr_entry_ptr->record;
1489 retval = (*replace)(&key, &value, arg, TRUE);
1490 curr_entry_ptr->key = key;
1491 curr_entry_ptr->record = value;
1494 if (rebuilds_num != tab->rebuilds_num) {
1496 entries = tab->entries;
1497 packed_p = tab->bins == NULL;
1499 i = find_entry(tab, hash, key);
1500 if (EXPECT(i == REBUILT_TABLE_ENTRY_IND, 0))
1502 error_p = i == UNDEFINED_ENTRY_IND;
1505 i = find_table_entry_ind(tab, hash, key);
1506 if (EXPECT(i == REBUILT_TABLE_ENTRY_IND, 0))
1508 error_p = i == UNDEFINED_ENTRY_IND;
1511 if (error_p && check_p) {
1513 retval = (*func)(0, 0, arg, 1);
1516 curr_entry_ptr = &entries[i];
1529 st_data_t key = curr_entry_ptr->key;
1533 bin = find_entry(tab, hash, key);
1534 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1536 if (bin == UNDEFINED_ENTRY_IND)
1540 bin_ind = find_table_bin_ind(tab, hash, key);
1541 if (EXPECT(bin_ind == REBUILT_TABLE_BIN_IND, 0))
1543 if (bin_ind == UNDEFINED_BIN_IND)
1545 bin = get_bin(tab->bins, get_size_ind(tab), bin_ind) - ENTRY_BASE;
1546 MARK_BIN_DELETED(tab, bin_ind);
1548 curr_entry_ptr = &entries[bin];
1549 MARK_ENTRY_DELETED(curr_entry_ptr);
1551 update_range_for_deleted(tab, bin);
1560 st_foreach_with_replace(
st_table *tab, st_foreach_check_callback_func *func, st_update_callback_func *replace, st_data_t arg)
1562 return st_general_foreach(tab, func, replace, arg, TRUE);
1566 st_foreach_callback_func *func;
1571 apply_functor(st_data_t k, st_data_t v, st_data_t d,
int _)
1573 const struct functor *f = (
void *)d;
1574 return f->func(k, v, f->arg);
1580 const struct functor f = { func, arg };
1581 return st_general_foreach(tab, apply_functor, 0, (st_data_t)&f, FALSE);
1587 st_data_t never ATTRIBUTE_UNUSED)
1589 return st_general_foreach(tab, func, 0, arg, TRUE);
1594 static inline st_index_t
1595 st_general_keys(
st_table *tab, st_data_t *keys, st_index_t size)
1597 st_index_t i, bound;
1598 st_data_t key, *keys_start, *keys_end;
1601 bound = tab->entries_bound;
1603 keys_end = keys + size;
1604 for (i = tab->entries_start; i < bound; i++) {
1605 if (keys == keys_end)
1607 curr_entry_ptr = &entries[i];
1608 key = curr_entry_ptr->key;
1609 if (! DELETED_ENTRY_P(curr_entry_ptr))
1613 return keys - keys_start;
1617 st_keys(
st_table *tab, st_data_t *keys, st_index_t size)
1619 return st_general_keys(tab, keys, size);
1624 st_keys_check(
st_table *tab, st_data_t *keys, st_index_t size,
1625 st_data_t never ATTRIBUTE_UNUSED)
1627 return st_general_keys(tab, keys, size);
1632 static inline st_index_t
1633 st_general_values(
st_table *tab, st_data_t *values, st_index_t size)
1635 st_index_t i, bound;
1636 st_data_t *values_start, *values_end;
1639 values_start = values;
1640 values_end = values + size;
1641 bound = tab->entries_bound;
1642 for (i = tab->entries_start; i < bound; i++) {
1643 if (values == values_end)
1645 curr_entry_ptr = &entries[i];
1646 if (! DELETED_ENTRY_P(curr_entry_ptr))
1647 *values++ = curr_entry_ptr->record;
1650 return values - values_start;
1654 st_values(
st_table *tab, st_data_t *values, st_index_t size)
1656 return st_general_values(tab, values, size);
1661 st_values_check(
st_table *tab, st_data_t *values, st_index_t size,
1662 st_data_t never ATTRIBUTE_UNUSED)
1664 return st_general_values(tab, values, size);
1667 #define FNV1_32A_INIT 0x811c9dc5
1672 #define FNV_32_PRIME 0x01000193
1674 #ifndef UNALIGNED_WORD_ACCESS
1675 # if defined(__i386) || defined(__i386__) || defined(_M_IX86) || \
1676 defined(__x86_64) || defined(__x86_64__) || defined(_M_AMD64) || \
1677 defined(__powerpc64__) || defined(__aarch64__) || \
1678 defined(__mc68020__)
1679 # define UNALIGNED_WORD_ACCESS 1
1682 #ifndef UNALIGNED_WORD_ACCESS
1683 # define UNALIGNED_WORD_ACCESS 0
1689 #define BIG_CONSTANT(x,y) ((st_index_t)(x)<<32|(st_index_t)(y))
1690 #define ROTL(x,n) ((x)<<(n)|(x)>>(SIZEOF_ST_INDEX_T*CHAR_BIT-(n)))
1692 #if ST_INDEX_BITS <= 32
1693 #define C1 (st_index_t)0xcc9e2d51
1694 #define C2 (st_index_t)0x1b873593
1696 #define C1 BIG_CONSTANT(0x87c37b91,0x114253d5);
1697 #define C2 BIG_CONSTANT(0x4cf5ad43,0x2745937f);
1699 NO_SANITIZE(
"unsigned-integer-overflow",
static inline st_index_t murmur_step(st_index_t h, st_index_t k));
1700 NO_SANITIZE(
"unsigned-integer-overflow",
static inline st_index_t murmur_finish(st_index_t h));
1701 NO_SANITIZE(
"unsigned-integer-overflow",
extern st_index_t st_hash(
const void *ptr,
size_t len, st_index_t h));
1703 static inline st_index_t
1704 murmur_step(st_index_t h, st_index_t k)
1706 #if ST_INDEX_BITS <= 32
1722 static inline st_index_t
1723 murmur_finish(st_index_t h)
1725 #if ST_INDEX_BITS <= 32
1729 const st_index_t c1 = 0x85ebca6b;
1730 const st_index_t c2 = 0xc2b2ae35;
1736 const st_index_t c1 = BIG_CONSTANT(0xbf58476d,0x1ce4e5b9);
1737 const st_index_t c2 = BIG_CONSTANT(0x94d049bb,0x133111eb);
1739 #if ST_INDEX_BITS > 64
1756 st_hash(
const void *ptr,
size_t len, st_index_t h)
1758 const char *data = ptr;
1762 #define data_at(n) (st_index_t)((unsigned char)data[(n)])
1763 #define UNALIGNED_ADD_4 UNALIGNED_ADD(2); UNALIGNED_ADD(1); UNALIGNED_ADD(0)
1764 #if SIZEOF_ST_INDEX_T > 4
1765 #define UNALIGNED_ADD_8 UNALIGNED_ADD(6); UNALIGNED_ADD(5); UNALIGNED_ADD(4); UNALIGNED_ADD(3); UNALIGNED_ADD_4
1766 #if SIZEOF_ST_INDEX_T > 8
1767 #define UNALIGNED_ADD_16 UNALIGNED_ADD(14); UNALIGNED_ADD(13); UNALIGNED_ADD(12); UNALIGNED_ADD(11); \
1768 UNALIGNED_ADD(10); UNALIGNED_ADD(9); UNALIGNED_ADD(8); UNALIGNED_ADD(7); UNALIGNED_ADD_8
1769 #define UNALIGNED_ADD_ALL UNALIGNED_ADD_16
1771 #define UNALIGNED_ADD_ALL UNALIGNED_ADD_8
1773 #define UNALIGNED_ADD_ALL UNALIGNED_ADD_4
1776 if (len >=
sizeof(st_index_t)) {
1777 #if !UNALIGNED_WORD_ACCESS
1778 int align = (int)((st_data_t)data %
sizeof(st_index_t));
1784 #ifdef WORDS_BIGENDIAN
1785 # define UNALIGNED_ADD(n) case SIZEOF_ST_INDEX_T - (n) - 1: \
1786 t |= data_at(n) << CHAR_BIT*(SIZEOF_ST_INDEX_T - (n) - 2)
1788 # define UNALIGNED_ADD(n) case SIZEOF_ST_INDEX_T - (n) - 1: \
1789 t |= data_at(n) << CHAR_BIT*(n)
1792 #undef UNALIGNED_ADD
1795 #ifdef WORDS_BIGENDIAN
1796 t >>= (CHAR_BIT * align) - CHAR_BIT;
1798 t <<= (CHAR_BIT * align);
1801 data +=
sizeof(st_index_t)-align;
1802 len -=
sizeof(st_index_t)-align;
1804 sl = CHAR_BIT * (SIZEOF_ST_INDEX_T-align);
1805 sr = CHAR_BIT * align;
1807 while (len >=
sizeof(st_index_t)) {
1808 d = *(st_index_t *)data;
1809 #ifdef WORDS_BIGENDIAN
1810 t = (t << sr) | (d >> sl);
1812 t = (t >> sr) | (d << sl);
1814 h = murmur_step(h, t);
1816 data +=
sizeof(st_index_t);
1817 len -=
sizeof(st_index_t);
1820 pack = len < (size_t)align ? (
int)len : align;
1823 #ifdef WORDS_BIGENDIAN
1824 # define UNALIGNED_ADD(n) case (n) + 1: \
1825 d |= data_at(n) << CHAR_BIT*(SIZEOF_ST_INDEX_T - (n) - 1)
1827 # define UNALIGNED_ADD(n) case (n) + 1: \
1828 d |= data_at(n) << CHAR_BIT*(n)
1831 #undef UNALIGNED_ADD
1833 #ifdef WORDS_BIGENDIAN
1834 t = (t << sr) | (d >> sl);
1836 t = (t >> sr) | (d << sl);
1839 if (len < (
size_t)align)
goto skip_tail;
1840 # define SKIP_TAIL 1
1841 h = murmur_step(h, t);
1847 #ifdef HAVE_BUILTIN___BUILTIN_ASSUME_ALIGNED
1848 #define aligned_data __builtin_assume_aligned(data, sizeof(st_index_t))
1850 #define aligned_data data
1854 h = murmur_step(h, *(st_index_t *)aligned_data);
1855 data +=
sizeof(st_index_t);
1856 len -=
sizeof(st_index_t);
1857 }
while (len >=
sizeof(st_index_t));
1863 #if UNALIGNED_WORD_ACCESS && SIZEOF_ST_INDEX_T <= 8 && CHAR_BIT == 8
1865 #if SIZEOF_ST_INDEX_T > 4
1866 case 7: t |= data_at(6) << 48;
1867 case 6: t |= data_at(5) << 40;
1868 case 5: t |= data_at(4) << 32;
1870 t |= (st_index_t)*(uint32_t*)aligned_data;
1872 # define SKIP_TAIL 1
1874 case 3: t |= data_at(2) << 16;
1875 case 2: t |= data_at(1) << 8;
1876 case 1: t |= data_at(0);
1878 #ifdef WORDS_BIGENDIAN
1879 # define UNALIGNED_ADD(n) case (n) + 1: \
1880 t |= data_at(n) << CHAR_BIT*(SIZEOF_ST_INDEX_T - (n) - 1)
1882 # define UNALIGNED_ADD(n) case (n) + 1: \
1883 t |= data_at(n) << CHAR_BIT*(n)
1886 #undef UNALIGNED_ADD
1891 h ^= t; h -= ROTL(t, 7);
1897 return murmur_finish(h);
1901 st_hash_uint32(st_index_t h, uint32_t i)
1903 return murmur_step(h, i);
1906 NO_SANITIZE(
"unsigned-integer-overflow",
extern st_index_t st_hash_uint(st_index_t h, st_index_t i));
1908 st_hash_uint(st_index_t h, st_index_t i)
1913 #if SIZEOF_ST_INDEX_T*CHAR_BIT > 8*8
1914 h = murmur_step(h, i >> 8*8);
1916 h = murmur_step(h, i);
1921 st_hash_end(st_index_t h)
1923 h = murmur_finish(h);
1927 #undef st_hash_start
1929 rb_st_hash_start(st_index_t h)
1935 strhash(st_data_t arg)
1937 register const char *
string = (
const char *)arg;
1938 return st_hash(
string, strlen(
string), FNV1_32A_INIT);
1949 if (c1 ==
'\0' || c2 ==
'\0') {
1950 if (c1 !=
'\0')
return 1;
1951 if (c2 !=
'\0')
return -1;
1954 if ((
'A' <= c1) && (c1 <=
'Z')) c1 +=
'a' -
'A';
1955 if ((
'A' <= c2) && (c2 <=
'Z')) c2 +=
'a' -
'A';
1971 for (i = 0; i < n; i++) {
1974 if (c1 ==
'\0' || c2 ==
'\0') {
1975 if (c1 !=
'\0')
return 1;
1976 if (c2 !=
'\0')
return -1;
1979 if ((
'A' <= c1) && (c1 <=
'Z')) c1 +=
'a' -
'A';
1980 if ((
'A' <= c2) && (c2 <=
'Z')) c2 +=
'a' -
'A';
1992 st_strcmp(st_data_t lhs, st_data_t rhs)
1994 const char *s1 = (
char *)lhs;
1995 const char *s2 = (
char *)rhs;
1996 return strcmp(s1, s2);
2000 st_locale_insensitive_strcasecmp_i(st_data_t lhs, st_data_t rhs)
2002 const char *s1 = (
char *)lhs;
2003 const char *s2 = (
char *)rhs;
2007 NO_SANITIZE(
"unsigned-integer-overflow", PUREFUNC(
static st_index_t strcasehash(st_data_t)));
2009 strcasehash(st_data_t arg)
2011 register const char *
string = (
const char *)arg;
2012 register st_index_t hval = FNV1_32A_INIT;
2018 unsigned int c = (
unsigned char)*
string++;
2019 if ((
unsigned int)(c -
'A') <= (
'Z' -
'A')) c +=
'a' -
'A';
2023 hval *= FNV_32_PRIME;
2029 st_numcmp(st_data_t x, st_data_t y)
2035 st_numhash(st_data_t n)
2037 enum {s1 = 11, s2 = 3};
2038 return (st_index_t)((n>>s1|(n<<s2)) ^ (n>>s2));
2045 st_expand_table(
st_table *tab, st_index_t siz)
2050 if (siz <= get_allocated_entries(tab))
2053 tmp = st_init_table_with_size(tab->type, siz);
2054 n = get_allocated_entries(tab);
2057 if (tab->bins != NULL)
2059 if (tmp->bins != NULL)
2061 tab->entry_power = tmp->entry_power;
2062 tab->bin_power = tmp->bin_power;
2063 tab->size_ind = tmp->size_ind;
2064 tab->entries = tmp->entries;
2066 tab->rebuilds_num++;
2075 int eq_p, rebuilt_p;
2082 for (i = tab->entries_start; i < tab->entries_bound; i++) {
2083 p = &tab->entries[i];
2084 if (DELETED_ENTRY_P(p))
2086 for (j = i + 1; j < tab->entries_bound; j++) {
2087 q = &tab->entries[j];
2088 if (DELETED_ENTRY_P(q))
2090 DO_PTR_EQUAL_CHECK(tab, p, q->hash, q->key, eq_p, rebuilt_p);
2091 if (EXPECT(rebuilt_p, 0))
2095 MARK_ENTRY_DELETED(q);
2097 update_range_for_deleted(tab, j);
2109 int eq_p, rebuilt_p;
2111 st_index_t
const n = bins_size(tab);
2112 unsigned int const size_ind = get_size_ind(tab);
2113 st_index_t *bins = realloc(tab->bins, n);
2115 initialize_bins(tab);
2116 for (i = tab->entries_start; i < tab->entries_bound; i++) {
2119 #ifdef QUADRATIC_PROBE
2122 st_index_t peterb = p->hash;
2125 if (DELETED_ENTRY_P(p))
2128 ind = hash_bin(p->hash, tab);
2130 st_index_t bin = get_bin(bins, size_ind, ind);
2131 if (EMPTY_OR_DELETED_BIN_P(bin)) {
2133 set_bin(bins, size_ind, ind, i + ENTRY_BASE);
2138 DO_PTR_EQUAL_CHECK(tab, q, p->hash, p->key, eq_p, rebuilt_p);
2139 if (EXPECT(rebuilt_p, 0))
2143 q->record = p->record;
2144 MARK_ENTRY_DELETED(p);
2146 update_range_for_deleted(tab, bin);
2151 #ifdef QUADRATIC_PROBE
2152 ind = hash_bin(ind + d, tab);
2155 ind = secondary_hash(ind, tab, &peterb);
2173 if (tab->bin_power <= MAX_POWER2_FOR_TABLES_WITHOUT_BINS)
2174 rebuilt_p = st_rehash_linear(tab);
2176 rebuilt_p = st_rehash_indexed(tab);
2177 }
while (rebuilt_p);
2182 st_stringify(
VALUE key)
2185 rb_hash_key_str(key) : key;
2191 st_data_t k = st_stringify(key);
2193 e.hash = do_hash(k, tab);
2197 tab->entries[tab->entries_bound++] = e;
2208 for (i = 0; i < argc; ) {
2209 st_data_t k = st_stringify(argv[i++]);
2210 st_data_t v = argv[i++];
2211 st_insert(tab, k, v);
2223 for (i = 0; i < argc; ) {
2224 VALUE key = argv[i++];
2225 VALUE val = argv[i++];
2226 st_insert_single(tab, hash, key, val);
2236 rb_hash_bulk_insert_into_st_table(
long argc,
const VALUE *argv,
VALUE hash)
2238 st_index_t n, size = argc / 2;
2239 st_table *tab = RHASH_ST_TABLE(hash);
2241 tab = RHASH_TBL_RAW(hash);
2242 n = tab->entries_bound + size;
2243 st_expand_table(tab, n);
2244 if (UNLIKELY(tab->num_entries))
2245 st_insert_generic(tab, argc, argv, hash);
2247 st_insert_single(tab, hash, argv[0], argv[1]);
2248 else if (tab->bin_power <= MAX_POWER2_FOR_TABLES_WITHOUT_BINS)
2249 st_insert_linear(tab, argc, argv, hash);
2251 st_insert_generic(tab, argc, argv, hash);
2256 rb_st_nth_key(
st_table *tab, st_index_t index)
2258 if (LIKELY(tab->entries_start == 0 &&
2259 tab->num_entries == tab->entries_bound &&
2260 index < tab->num_entries)) {
2261 return tab->entries[index].key;
int st_locale_insensitive_strcasecmp(const char *s1, const char *s2)
Our own locale-insensitive version of strcasecmp(3).
int st_locale_insensitive_strncasecmp(const char *s1, const char *s2, size_t n)
Our own locale-insensitive version of strcnasecmp(3).
static bool RB_OBJ_FROZEN(VALUE obj)
Checks if an object is frozen.
#define Qundef
Old name of RUBY_Qundef.
void rb_raise(VALUE exc, const char *fmt,...)
Exception entry point.
void rb_bug(const char *fmt,...)
Interpreter panic switch.
VALUE rb_eRuntimeError
RuntimeError exception.
VALUE rb_obj_class(VALUE obj)
Queries the class of an object.
VALUE rb_cString
String class.
#define RB_OBJ_WRITTEN(old, oldv, young)
Identical to RB_OBJ_WRITE(), except it doesn't write any values, but only a WB declaration.
#define MEMCPY(p1, p2, type, n)
Handy macro to call memcpy.
VALUE type(ANYARGS)
ANYARGS-ed function type.
int st_foreach(st_table *q, int_type *w, st_data_t e)
Iteration over the given table.
int st_foreach_check(st_table *q, int_type *w, st_data_t e, st_data_t)
Iteration over the given table.
#define _(args)
This was a transition path from K&R to ANSI.
uintptr_t VALUE
Type that represents a Ruby object.