Leptonica  1.82.0
Image processing and image analysis suite
sarray2.c
Go to the documentation of this file.
1 /*====================================================================*
2  - Copyright (C) 2001 Leptonica. All rights reserved.
3  -
4  - Redistribution and use in source and binary forms, with or without
5  - modification, are permitted provided that the following conditions
6  - are met:
7  - 1. Redistributions of source code must retain the above copyright
8  - notice, this list of conditions and the following disclaimer.
9  - 2. Redistributions in binary form must reproduce the above
10  - copyright notice, this list of conditions and the following
11  - disclaimer in the documentation and/or other materials
12  - provided with the distribution.
13  -
14  - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
15  - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
16  - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
17  - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY
18  - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19  - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20  - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21  - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22  - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
23  - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24  - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  *====================================================================*/
26 
71 #ifdef HAVE_CONFIG_H
72 #include <config_auto.h>
73 #endif /* HAVE_CONFIG_H */
74 
75 #include <string.h>
76 #include "allheaders.h"
77 
78 /*----------------------------------------------------------------------*
79  * Sort *
80  *----------------------------------------------------------------------*/
96 SARRAY *
98  SARRAY *sain,
99  l_int32 sortorder)
100 {
101 char **array;
102 char *tmp;
103 l_int32 n, i, j, gap;
104 
105  PROCNAME("sarraySort");
106 
107  if (!sain)
108  return (SARRAY *)ERROR_PTR("sain not defined", procName, NULL);
109 
110  /* Make saout if necessary; otherwise do in-place */
111  if (!saout)
112  saout = sarrayCopy(sain);
113  else if (sain != saout)
114  return (SARRAY *)ERROR_PTR("invalid: not in-place", procName, NULL);
115  array = saout->array; /* operate directly on the array */
116  n = sarrayGetCount(saout);
117 
118  /* Shell sort */
119  for (gap = n/2; gap > 0; gap = gap / 2) {
120  for (i = gap; i < n; i++) {
121  for (j = i - gap; j >= 0; j -= gap) {
122  if ((sortorder == L_SORT_INCREASING &&
123  stringCompareLexical(array[j], array[j + gap])) ||
124  (sortorder == L_SORT_DECREASING &&
125  stringCompareLexical(array[j + gap], array[j])))
126  {
127  tmp = array[j];
128  array[j] = array[j + gap];
129  array[j + gap] = tmp;
130  }
131  }
132  }
133  }
134 
135  return saout;
136 }
137 
138 
146 SARRAY *
148  NUMA *naindex)
149 {
150 char *str;
151 l_int32 i, n, index;
152 SARRAY *saout;
153 
154  PROCNAME("sarraySortByIndex");
155 
156  if (!sain)
157  return (SARRAY *)ERROR_PTR("sain not defined", procName, NULL);
158  if (!naindex)
159  return (SARRAY *)ERROR_PTR("naindex not defined", procName, NULL);
160 
161  n = sarrayGetCount(sain);
162  saout = sarrayCreate(n);
163  for (i = 0; i < n; i++) {
164  numaGetIValue(naindex, i, &index);
165  str = sarrayGetString(sain, index, L_COPY);
166  sarrayAddString(saout, str, L_INSERT);
167  }
168 
169  return saout;
170 }
171 
172 
186 l_int32
187 stringCompareLexical(const char *str1,
188  const char *str2)
189 {
190 l_int32 i, len1, len2, len;
191 
192  PROCNAME("sarrayCompareLexical");
193 
194  if (!str1)
195  return ERROR_INT("str1 not defined", procName, 1);
196  if (!str2)
197  return ERROR_INT("str2 not defined", procName, 1);
198 
199  len1 = strlen(str1);
200  len2 = strlen(str2);
201  len = L_MIN(len1, len2);
202 
203  for (i = 0; i < len; i++) {
204  if (str1[i] == str2[i])
205  continue;
206  if (str1[i] > str2[i])
207  return 1;
208  else
209  return 0;
210  }
211 
212  if (len1 > len2)
213  return 1;
214  else
215  return 0;
216 }
217 
218 
219 /*----------------------------------------------------------------------*
220  * Set operations using aset (rbtree) *
221  *----------------------------------------------------------------------*/
228 L_ASET *
230 {
231 char *str;
232 l_int32 i, n;
233 l_uint64 hash;
234 L_ASET *set;
235 RB_TYPE key;
236 
237  PROCNAME("l_asetCreateFromSarray");
238 
239  if (!sa)
240  return (L_ASET *)ERROR_PTR("sa not defined", procName, NULL);
241 
242  set = l_asetCreate(L_UINT_TYPE);
243  n = sarrayGetCount(sa);
244  for (i = 0; i < n; i++) {
245  str = sarrayGetString(sa, i, L_NOCOPY);
246  l_hashStringToUint64Fast(str, &hash);
247  key.utype = hash;
248  l_asetInsert(set, key);
249  }
250 
251  return set;
252 }
253 
254 
272 l_ok
274  SARRAY **psad)
275 {
276 char *str;
277 l_int32 i, n;
278 l_uint64 hash;
279 L_ASET *set;
280 RB_TYPE key;
281 SARRAY *sad;
282 
283  PROCNAME("sarrayRemoveDupsByAset");
284 
285  if (!psad)
286  return ERROR_INT("&sad not defined", procName, 1);
287  *psad = NULL;
288  if (!sas)
289  return ERROR_INT("sas not defined", procName, 1);
290 
291  set = l_asetCreate(L_UINT_TYPE);
292  sad = sarrayCreate(0);
293  *psad = sad;
294  n = sarrayGetCount(sas);
295  for (i = 0; i < n; i++) {
296  str = sarrayGetString(sas, i, L_NOCOPY);
297  l_hashStringToUint64Fast(str, &hash);
298  key.utype = hash;
299  if (!l_asetFind(set, key)) {
300  sarrayAddString(sad, str, L_COPY);
301  l_asetInsert(set, key);
302  }
303  }
304 
305  l_asetDestroy(&set);
306  return 0;
307 }
308 
309 
328 l_ok
330  SARRAY *sa2,
331  SARRAY **psad)
332 {
333 SARRAY *sa3;
334 
335  PROCNAME("sarrayUnionByAset");
336 
337  if (!psad)
338  return ERROR_INT("&sad not defined", procName, 1);
339  *psad = NULL;
340  if (!sa1)
341  return ERROR_INT("sa1 not defined", procName, 1);
342  if (!sa2)
343  return ERROR_INT("sa2 not defined", procName, 1);
344 
345  /* Join */
346  sa3 = sarrayCopy(sa1);
347  if (sarrayJoin(sa3, sa2) == 1) {
348  sarrayDestroy(&sa3);
349  return ERROR_INT("join failed for sa3", procName, 1);
350  }
351 
352  /* Eliminate duplicates */
353  sarrayRemoveDupsByAset(sa3, psad);
354  sarrayDestroy(&sa3);
355  return 0;
356 }
357 
358 
379 l_ok
381  SARRAY *sa2,
382  SARRAY **psad)
383 {
384 char *str;
385 l_int32 n1, n2, i, n;
386 l_uint64 hash;
387 L_ASET *set1, *set2;
388 RB_TYPE key;
389 SARRAY *sa_small, *sa_big, *sad;
390 
391  PROCNAME("sarrayIntersectionByAset");
392 
393  if (!psad)
394  return ERROR_INT("&sad not defined", procName, 1);
395  *psad = NULL;
396  if (!sa1)
397  return ERROR_INT("sa1 not defined", procName, 1);
398  if (!sa2)
399  return ERROR_INT("sa2 not defined", procName, 1);
400 
401  /* Put the elements of the biggest array into a set */
402  n1 = sarrayGetCount(sa1);
403  n2 = sarrayGetCount(sa2);
404  sa_small = (n1 < n2) ? sa1 : sa2; /* do not destroy sa_small */
405  sa_big = (n1 < n2) ? sa2 : sa1; /* do not destroy sa_big */
406  set1 = l_asetCreateFromSarray(sa_big);
407 
408  /* Build up the intersection of strings */
409  sad = sarrayCreate(0);
410  *psad = sad;
411  n = sarrayGetCount(sa_small);
412  set2 = l_asetCreate(L_UINT_TYPE);
413  for (i = 0; i < n; i++) {
414  str = sarrayGetString(sa_small, i, L_NOCOPY);
415  l_hashStringToUint64Fast(str, &hash);
416  key.utype = hash;
417  if (l_asetFind(set1, key) && !l_asetFind(set2, key)) {
418  sarrayAddString(sad, str, L_COPY);
419  l_asetInsert(set2, key);
420  }
421  }
422 
423  l_asetDestroy(&set1);
424  l_asetDestroy(&set2);
425  return 0;
426 }
427 
428 
429 /*----------------------------------------------------------------------*
430  * Hashmap operations *
431  *----------------------------------------------------------------------*/
438 L_HASHMAP *
440 {
441 l_int32 i, n;
442 l_uint64 key;
443 char *str;
444 L_HASHITEM *hitem;
445 L_HASHMAP *hmap;
446 
447  PROCNAME("l_hmapCreateFromSarray");
448 
449  if (!sa)
450  return (L_HASHMAP *)ERROR_PTR("sa not defined", procName, NULL);
451 
452  n = sarrayGetCount(sa);
453  if ((hmap = l_hmapCreate(0.51 * n, 2)) == NULL)
454  return (L_HASHMAP *)ERROR_PTR("hmap not made", procName, NULL);
455  for (i = 0; i < n; i++) {
456  str = sarrayGetString(sa, i, L_NOCOPY);
457  l_hashStringToUint64Fast(str, &key);
458  hitem = l_hmapLookup(hmap, key, i, L_HMAP_CREATE);
459  }
460  return hmap;
461 }
462 
463 
472 l_ok
474  SARRAY **psad,
475  L_HASHMAP **phmap)
476 {
477 l_int32 i, tabsize;
478 l_uint64 key;
479 char *str;
480 SARRAY *sad;
481 L_HASHITEM *hitem;
482 L_HASHMAP *hmap;
483 
484  PROCNAME("sarrayRemoveDupsByHmap");
485 
486  if (phmap) *phmap = NULL;
487  if (!psad)
488  return ERROR_INT("&sad not defined", procName, 1);
489  *psad = NULL;
490  if (!sas)
491  return ERROR_INT("sas not defined", procName, 1);
492 
493  /* Traverse the hashtable lists */
494  if ((hmap = l_hmapCreateFromSarray(sas)) == NULL)
495  return ERROR_INT("hmap not made", procName, 1);
496  sad = sarrayCreate(0);
497  *psad = sad;
498  tabsize = hmap->tabsize;
499  for (i = 0; i < tabsize; i++) {
500  hitem = hmap->hashtab[i];
501  while (hitem) {
502  str = sarrayGetString(sas, hitem->val, L_COPY);
503  sarrayAddString(sad, str, L_INSERT);
504  hitem = hitem->next;
505  }
506  }
507 
508  if (phmap)
509  *phmap = hmap;
510  else
511  l_hmapDestroy(&hmap);
512  return 0;
513 }
514 
515 
524 l_ok
526  SARRAY *sa2,
527  SARRAY **psad)
528 {
529 SARRAY *sa3;
530 
531  PROCNAME("l_hmapUnionSarray");
532 
533  if (!psad)
534  return ERROR_INT("&sad not defined", procName, 1);
535  *psad = NULL;
536  if (!sa1)
537  return ERROR_INT("sa1 not defined", procName, 1);
538  if (!sa2)
539  return ERROR_INT("sa2 not defined", procName, 1);
540 
541  sa3 = sarrayCopy(sa1);
542  if (sarrayJoin(sa3, sa2) == 1) {
543  sarrayDestroy(&sa3);
544  return ERROR_INT("sa3 join failed", procName, 1);
545  }
546  sarrayRemoveDupsByHmap(sa3, psad, NULL);
547  sarrayDestroy(&sa3);
548  return 0;
549 }
550 
551 
560 l_ok
562  SARRAY *sa2,
563  SARRAY **psad)
564 {
565 l_int32 i, n1, n2, n;
566 l_uint64 key;
567 char *str;
568 SARRAY *sa_small, *sa_big, *sa3, *sad;
569 L_HASHITEM *hitem;
570 L_HASHMAP *hmap;
571 
572  PROCNAME("sarrayIntersectionByHmap");
573 
574  if (!psad)
575  return ERROR_INT("&sad not defined", procName, 1);
576  *psad = NULL;
577  if (!sa1)
578  return ERROR_INT("sa1 not defined", procName, 1);
579  if (!sa2)
580  return ERROR_INT("sa2 not defined", procName, 1);
581 
582  /* Make a hashmap for the elements of the biggest array */
583  n1 = sarrayGetCount(sa1);
584  n2 = sarrayGetCount(sa2);
585  sa_small = (n1 < n2) ? sa1 : sa2; /* do not destroy sa_small */
586  sa_big = (n1 < n2) ? sa2 : sa1; /* do not destroy sa_big */
587  if ((hmap = l_hmapCreateFromSarray(sa_big)) == NULL)
588  return ERROR_INT("hmap not made", procName, 1);
589 
590  /* Remove duplicates from the smallest array. Alternatively,
591  * we can skip this step and avoid counting duplicates in
592  * sa_small by modifying the count fields in the sa_big hashitems;
593  * e.g., see l_hmapIntersectionDna(). */
594  sarrayRemoveDupsByHmap(sa_small, &sa3, NULL);
595 
596  /* Go through sa3, the set of strings derived from the smallest array,
597  * hashing into the big array table. Any string found belongs to both,
598  * so add it to the output array. */
599  sad = sarrayCreate(0);
600  *psad = sad;
601  n = sarrayGetCount(sa3);
602  for (i = 0; i < n; i++) {
603  str = sarrayGetString(sa3, i, L_NOCOPY);
604  l_hashStringToUint64Fast(str, &key);
605  hitem = l_hmapLookup(hmap, key, i, L_HMAP_CHECK);
606  if (hitem)
607  sarrayAddString(sad, str, L_COPY);
608  }
609  l_hmapDestroy(&hmap);
610  sarrayDestroy(&sa3);
611  return 0;
612 }
613 
614 
615 /*----------------------------------------------------------------------*
616  * Miscellaneous operations *
617  *----------------------------------------------------------------------*/
624 SARRAY *
626 {
627 char buf[32];
628 l_int32 i;
629 SARRAY *sa;
630 
631  PROCNAME("sarrayGenerateIntegers");
632 
633  if ((sa = sarrayCreate(n)) == NULL)
634  return (SARRAY *)ERROR_PTR("sa not made", procName, NULL);
635  for (i = 0; i < n; i++) {
636  snprintf(buf, sizeof(buf), "%d", i);
637  sarrayAddString(sa, buf, L_COPY);
638  }
639  return sa;
640 }
641 
642 
665 l_ok
667  const char *keystring,
668  char **pvalstring)
669 {
670 char *key, *val, *str;
671 l_int32 i, n;
672 SARRAY *sa1;
673 
674  PROCNAME("sarrayLookupCSKV");
675 
676  if (!pvalstring)
677  return ERROR_INT("&valstring not defined", procName, 1);
678  *pvalstring = NULL;
679  if (!sa)
680  return ERROR_INT("sa not defined", procName, 1);
681  if (!keystring)
682  return ERROR_INT("keystring not defined", procName, 1);
683 
684  n = sarrayGetCount(sa);
685  for (i = 0; i < n; i++) {
686  str = sarrayGetString(sa, i, L_NOCOPY);
687  sa1 = sarrayCreate(2);
688  sarraySplitString(sa1, str, ",");
689  if (sarrayGetCount(sa1) != 2) {
690  sarrayDestroy(&sa1);
691  continue;
692  }
693  key = sarrayGetString(sa1, 0, L_NOCOPY);
694  val = sarrayGetString(sa1, 1, L_NOCOPY);
695  if (!strcmp(key, keystring)) {
696  *pvalstring = stringNew(val);
697  sarrayDestroy(&sa1);
698  return 0;
699  }
700  sarrayDestroy(&sa1);
701  }
702 
703  return 0;
704 }
l_ok numaGetIValue(NUMA *na, l_int32 index, l_int32 *pival)
numaGetIValue()
Definition: numabasic.c:754
@ L_COPY
Definition: pix.h:712
@ L_NOCOPY
Definition: pix.h:710
@ L_INSERT
Definition: pix.h:711
@ L_SORT_DECREASING
Definition: pix.h:730
@ L_SORT_INCREASING
Definition: pix.h:729
SARRAY * sarrayCreate(l_int32 n)
sarrayCreate()
Definition: sarray1.c:170
char * sarrayGetString(SARRAY *sa, l_int32 index, l_int32 copyflag)
sarrayGetString()
Definition: sarray1.c:703
l_ok sarrayJoin(SARRAY *sa1, SARRAY *sa2)
sarrayJoin()
Definition: sarray1.c:969
l_int32 sarrayGetCount(SARRAY *sa)
sarrayGetCount()
Definition: sarray1.c:643
void sarrayDestroy(SARRAY **psa)
sarrayDestroy()
Definition: sarray1.c:362
l_ok sarrayAddString(SARRAY *sa, const char *string, l_int32 copyflag)
sarrayAddString()
Definition: sarray1.c:451
SARRAY * sarrayCopy(SARRAY *sa)
sarrayCopy()
Definition: sarray1.c:398
SARRAY * sarraySortByIndex(SARRAY *sain, NUMA *naindex)
sarraySortByIndex()
Definition: sarray2.c:147
SARRAY * sarrayGenerateIntegers(l_int32 n)
sarrayGenerateIntegers()
Definition: sarray2.c:625
l_ok sarrayRemoveDupsByAset(SARRAY *sas, SARRAY **psad)
sarrayRemoveDupsByAset()
Definition: sarray2.c:273
l_ok sarrayIntersectionByHmap(SARRAY *sa1, SARRAY *sa2, SARRAY **psad)
sarrayIntersectionByHmap()
Definition: sarray2.c:561
SARRAY * sarraySort(SARRAY *saout, SARRAY *sain, l_int32 sortorder)
sarraySort()
Definition: sarray2.c:97
l_ok sarrayUnionByAset(SARRAY *sa1, SARRAY *sa2, SARRAY **psad)
sarrayUnionByAset()
Definition: sarray2.c:329
l_int32 stringCompareLexical(const char *str1, const char *str2)
stringCompareLexical()
Definition: sarray2.c:187
l_ok sarrayLookupCSKV(SARRAY *sa, const char *keystring, char **pvalstring)
sarrayLookupCSKV()
Definition: sarray2.c:666
l_ok sarrayUnionByHmap(SARRAY *sa1, SARRAY *sa2, SARRAY **psad)
sarrayUnionByHmap()
Definition: sarray2.c:525
l_ok sarrayIntersectionByAset(SARRAY *sa1, SARRAY *sa2, SARRAY **psad)
sarrayIntersectionByAset()
Definition: sarray2.c:380
L_ASET * l_asetCreateFromSarray(SARRAY *sa)
l_asetCreateFromSarray()
Definition: sarray2.c:229
L_HASHMAP * l_hmapCreateFromSarray(SARRAY *sa)
l_hmapCreateFromSarray()
Definition: sarray2.c:439
l_ok sarrayRemoveDupsByHmap(SARRAY *sas, SARRAY **psad, L_HASHMAP **phmap)
sarrayRemoveDupsByHmap()
Definition: sarray2.c:473
l_uint64 val
Definition: hashmap.h:117
struct L_Hashitem * next
Definition: hashmap.h:119
struct L_Hashitem ** hashtab
Definition: hashmap.h:106
l_int32 tabsize
Definition: hashmap.h:107
Definition: array.h:71
Definition: array.h:127
char ** array
Definition: array.h:131
Definition: rbtree.h:62
l_ok l_hashStringToUint64Fast(const char *str, l_uint64 *phash)
l_hashStringToUint64Fast()
Definition: utils1.c:771
char * stringNew(const char *src)
stringNew()
Definition: utils2.c:223