Leptonica  1.82.0
Image processing and image analysis suite
hashmap.c
1 /*====================================================================*
2  - Copyright (C) 2001 Leptonica. All rights reserved.
3  -
4  - Redistribution and use in source and binary forms, with or without
5  - modification, are permitted provided that the following conditions
6  - are met:
7  - 1. Redistributions of source code must retain the above copyright
8  - notice, this list of conditions and the following disclaimer.
9  - 2. Redistributions in binary form must reproduce the above
10  - copyright notice, this list of conditions and the following
11  - disclaimer in the documentation and/or other materials
12  - provided with the distribution.
13  -
14  - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
15  - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
16  - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
17  - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY
18  - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19  - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20  - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21  - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22  - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
23  - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24  - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  *====================================================================*/
26 
27 /*
28  * \file hashmap.c
29  * <pre>
30  *
31  * Hashmap creation, destruction
32  * L_HASHMAP *l_hmapCreate()
33  * void l_hmapDestroy()
34  *
35  * Hashmap: Accessors and modifiers
36  * L_HASHITEM *l_hmapLookup()
37  * l_int32 l_hmapRehash()
38  *
39  * (1) See also hashmap.h for a brief description of the design.
40  * (2) In a typical use, a set of objects (in an array or associated
41  * with image pixels) is represented by a hashmap. A uint64 key is
42  * produced for each object. This integer is then hashed into an
43  * index in a hashtable, using the mod function with the table size
44  * which is a prime number. Each entry in the hash table is a list
45  * of hash items. In lookup, the appropriate list is traversed,
46  * looking for the object key found earlier.
47  * (3) Hash functions that map points, strings and float64 to uint64
48  * are given in utils1.c.
49  * (4) Use of the hashmap on points, strings and float64 data are
50  * given in ptafunc2.c, sarray2.c and dnafunc1.c.
51  * (5) Useful rule of thumb for hashing collisions:
52  * For a random hashing function (say, from strings to l_uint64),
53  * the probability of a collision increases as N^2 for N much
54  * less than 2^32. The quadratic behavior switches over to
55  * approaching 1.0 around 2^32, which is the square root of 2^64.
56  * So, for example, if you have 10^7 strings, the probability
57  * of a single collision using an l_uint64 key is on the order of
58  * (10^7/4x10^9)^2 ~ 10^-5.
59  * For a million strings, collisions are very rare (~10^-7 probability).
60  * </pre>
61  */
62 
63 #ifdef HAVE_CONFIG_H
64 #include <config_auto.h>
65 #endif /* HAVE_CONFIG_H */
66 
67 #include "allheaders.h"
68 
69  /* Limit on the hashtable size */
70 static const l_uint32 MaxTabsize = 50000000;
71  /* Default values for creating the hashmap. */
72 static const l_int32 DefaultInitNItems = 2000;
73 static const l_int32 DefaultMaxOcc = 2;
74 
75 /*--------------------------------------------------------------------------*
76  * Hashmap creation and destruction *
77  *--------------------------------------------------------------------------*/
101 L_HASHMAP *
102 l_hmapCreate(l_int32 ninit,
103  l_int32 maxocc)
104 {
105 l_uint32 size, tabsize;
106 L_HASHMAP *hmap;
107 
108  PROCNAME("l_hmapCreate");
109 
110  ninit = L_MAX(ninit, DefaultInitNItems);
111  if (maxocc <= 0) maxocc = DefaultMaxOcc;
112  if (maxocc > 5) {
113  L_WARNING("maxocc = %d; non-optimal value. Set to default = %d\n",
114  procName, maxocc, DefaultMaxOcc);
115  maxocc = DefaultMaxOcc;
116  }
117  size = ninit / maxocc;
118  if (size > MaxTabsize) {
119  L_ERROR("ninit/maxocc = %d > MaxTabsize = %d\n", procName,
120  size, MaxTabsize);
121  return NULL;
122  }
123 
124  hmap = (L_HASHMAP *)LEPT_CALLOC(1, sizeof(L_HASHMAP));
125  findNextLargerPrime(size, &tabsize); /* at least 101 */
126  if ((hmap->hashtab =
127  (L_HASHITEM **)LEPT_CALLOC(tabsize, sizeof(L_HASHITEM *))) == NULL) {
128  LEPT_FREE(hmap);
129  return (L_HASHMAP *)ERROR_PTR("hashtab not made", procName, NULL);
130  }
131 
132  hmap->nitems = 0;
133  hmap->ntogo = ninit;
134  hmap->maxocc = maxocc;
135  hmap->tabsize = tabsize;
136  return hmap;
137 }
138 
139 
146 void
147 l_hmapDestroy(L_HASHMAP **phmap)
148 {
149 l_int32 i;
150 L_HASHITEM *hitem, *next;
151 L_HASHMAP *hmap;
152 
153  PROCNAME("l_hmapDestroy");
154 
155  if (phmap == NULL) {
156  L_WARNING("ptr address is NULL!\n", procName);
157  return;
158  }
159 
160  if ((hmap = *phmap) == NULL)
161  return;
162 
163  for (i = 0; i < hmap->tabsize; i++) {
164  for (hitem = hmap->hashtab[i]; hitem != NULL; hitem = next) {
165  next = hitem->next;
166  LEPT_FREE(hitem);
167  }
168  }
169  LEPT_FREE(hmap->hashtab);
170  LEPT_FREE(hmap);
171  *phmap = NULL;
172 }
173 
174 
175 /*--------------------------------------------------------------------------*
176  * Hashmap: Accessors and modifiers *
177  *--------------------------------------------------------------------------*/
202 L_HASHITEM *
203 l_hmapLookup(L_HASHMAP *hmap,
204  l_uint64 key,
205  l_uint64 val,
206  l_int32 op)
207 {
208 l_uint32 index;
209 L_HASHITEM *hlist, *hitem;
210 
211  PROCNAME("l_hmapLookup");
212 
213  if (!hmap)
214  return (L_HASHITEM *)ERROR_PTR("hmap not defined", procName, NULL);
215  if (op != L_HMAP_CHECK && op != L_HMAP_CREATE)
216  return (L_HASHITEM *)ERROR_PTR("invalid op", procName, NULL);
217 
218  /* If found, return a ptr to the hitem (not a copy) */
219  index = key % hmap->tabsize; /* into hashtab */
220  hlist = hmap->hashtab[index]; /* head of the list */
221  for (hitem = hlist; hitem != NULL; hitem = hitem->next) {
222  if (key == hitem->key) {
223  if (op == L_HMAP_CREATE) hitem->count++;
224  return hitem;
225  }
226  }
227  if (op == L_HMAP_CHECK) return NULL;
228 
229  /* Not found and %op == L_HMAP_CREATE.
230  * Make a new hitem and add to the head of the list */
231  hitem = (L_HASHITEM *)LEPT_CALLOC(1, sizeof(L_HASHITEM));
232  hitem->key = key;
233  hitem->val = val;
234  hitem->count = 1;
235  hitem->next = hlist;
236  hmap->hashtab[index] = hitem;
237  hmap->nitems++;
238  hmap->ntogo--;
239 
240  /* If hmap is full based on average occupancy, rehash */
241  if (hmap->ntogo == 0)
242  l_hmapRehash(hmap);
243 
244  return hitem;
245 }
246 
247 
260 l_ok
261 l_hmapRehash(L_HASHMAP *hmap)
262 {
263 l_int32 i, index;
264 l_uint32 tabsize;
265 L_HASHITEM *hstore, *hitem, *next;
266 
267  PROCNAME("l_hmapRehash");
268 
269  if (!hmap)
270  return ERROR_INT("hmap not defined", procName, 1);
271 
272  /* Put hash items in temporary storage in a single list,
273  * successively adding each to the list head. */
274  hstore = NULL; /* ptr to resulting list */
275  for (i = 0; i < hmap->tabsize; i++) {
276  for (hitem = hmap->hashtab[i]; hitem != NULL; hitem = next) {
277  next = hitem->next;
278  hitem->next = hstore;
279  hstore = hitem;
280  }
281  }
282 
283  /* Destroy the old hashtab and make a new one that is twice as big */
284  LEPT_FREE(hmap->hashtab);
285  findNextLargerPrime(2 * hmap->tabsize, &tabsize);
286  hmap->tabsize = tabsize;
287  hmap->hashtab = (L_HASHITEM **)LEPT_CALLOC(tabsize, sizeof(L_HASHITEM *));
288  if (hmap->hashtab == NULL) {
289  hmap->tabsize = 0;
290  return ERROR_INT("hashtab ptr array not made", procName, 1);
291  }
292  hmap->ntogo = hmap->maxocc * tabsize - hmap->nitems;
293 
294  /* Populate with the stored hash items */
295  for (hitem = hstore; hitem != NULL; hitem = next) {
296  next = hitem->next;
297  index = hitem->key % tabsize; /* into new hashtab */
298  hitem->next = hmap->hashtab[index]; /* link to head of existing list */
299  hmap->hashtab[index] = hitem; /* put at head */
300  }
301 
302  return 0;
303 }
l_uint64 val
Definition: hashmap.h:117
l_uint64 key
Definition: hashmap.h:116
l_int32 count
Definition: hashmap.h:118
struct L_Hashitem * next
Definition: hashmap.h:119
struct L_Hashitem ** hashtab
Definition: hashmap.h:106
l_int32 maxocc
Definition: hashmap.h:105
l_int32 tabsize
Definition: hashmap.h:107
l_int32 nitems
Definition: hashmap.h:102
l_int32 ntogo
Definition: hashmap.h:103
l_ok findNextLargerPrime(l_int32 start, l_uint32 *pprime)
findNextLargerPrime()
Definition: utils1.c:861