Leptonica  1.82.0
Image processing and image analysis suite
partition.c
Go to the documentation of this file.
1 /*====================================================================*
2  - Copyright (C) 2001 Leptonica. All rights reserved.
3  -
4  - Redistribution and use in source and binary forms, with or without
5  - modification, are permitted provided that the following conditions
6  - are met:
7  - 1. Redistributions of source code must retain the above copyright
8  - notice, this list of conditions and the following disclaimer.
9  - 2. Redistributions in binary form must reproduce the above
10  - copyright notice, this list of conditions and the following
11  - disclaimer in the documentation and/or other materials
12  - provided with the distribution.
13  -
14  - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
15  - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
16  - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
17  - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY
18  - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19  - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20  - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21  - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22  - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
23  - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24  - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  *====================================================================*/
26 
45 #ifdef HAVE_CONFIG_H
46 #include <config_auto.h>
47 #endif /* HAVE_CONFIG_H */
48 
49 #include "allheaders.h"
50 
53  l_float32 size; /* sorting key */
54  BOX *box; /* region of the element */
55  BOXA *boxa; /* set of intersecting boxes */
56 };
57 typedef struct PartitionElement PARTEL;
58 
59 static PARTEL * partelCreate(BOX *box);
60 static void partelDestroy(PARTEL **ppartel);
61 static l_int32 partelSetSize(PARTEL *partel, l_int32 sortflag);
62 static BOXA * boxaGenerateSubboxes(BOX *box, BOXA *boxa, l_int32 maxperim,
63  l_float32 fract);
64 static BOX * boxaSelectPivotBox(BOX *box, BOXA *boxa, l_int32 maxperim,
65  l_float32 fract);
66 static l_int32 boxCheckIfOverlapIsBig(BOX *box, BOXA *boxa,
67  l_float32 maxoverlap);
68 
69 static const l_int32 DefaultMaxPops = 20000;
70 
71 
72 #ifndef NO_CONSOLE_IO
73 #define OUTPUT_HEAP_STATS 0
74 #endif /* ~NO_CONSOLE_IO */
75 
76 /*------------------------------------------------------------------*
77  * Whitespace block extraction *
78  *------------------------------------------------------------------*/
191 BOXA *
193  BOX *box,
194  l_int32 sortflag,
195  l_int32 maxboxes,
196  l_float32 maxoverlap,
197  l_int32 maxperim,
198  l_float32 fract,
199  l_int32 maxpops)
200 {
201 l_int32 i, w, h, n, nsub, npush, npop;
202 BOX *boxsub;
203 BOXA *boxa, *boxa4, *boxasub, *boxad;
204 PARTEL *partel;
205 L_HEAP *lh;
206 
207  PROCNAME("boxaGetWhiteblocks");
208 
209  if (!boxas)
210  return (BOXA *)ERROR_PTR("boxas not defined", procName, NULL);
211  if (sortflag != L_SORT_BY_WIDTH && sortflag != L_SORT_BY_HEIGHT &&
212  sortflag != L_SORT_BY_MIN_DIMENSION &&
213  sortflag != L_SORT_BY_MAX_DIMENSION &&
214  sortflag != L_SORT_BY_PERIMETER && sortflag != L_SORT_BY_AREA)
215  return (BOXA *)ERROR_PTR("invalid sort flag", procName, NULL);
216  if (maxboxes < 1) {
217  maxboxes = 1;
218  L_WARNING("setting maxboxes = 1\n", procName);
219  }
220  if (maxoverlap < 0.0 || maxoverlap > 1.0)
221  return (BOXA *)ERROR_PTR("invalid maxoverlap", procName, NULL);
222  if (maxpops == 0)
223  maxpops = DefaultMaxPops;
224 
225  if (!box) {
226  boxaGetExtent(boxas, &w, &h, NULL);
227  box = boxCreate(0, 0, w, h);
228  }
229 
230  /* Prime the heap */
231  lh = lheapCreate(20, L_SORT_DECREASING);
232  partel = partelCreate(box);
233  partel->boxa = boxaCopy(boxas, L_CLONE);
234  partelSetSize(partel, sortflag);
235  lheapAdd(lh, partel);
236 
237  npush = 1;
238  npop = 0;
239  boxad = boxaCreate(0);
240  while (1) {
241  if ((partel = (PARTEL *)lheapRemove(lh)) == NULL) /* we're done */
242  break;
243 
244  npop++; /* How many boxes have we retrieved from the queue? */
245  if (npop > maxpops) {
246  partelDestroy(&partel);
247  break;
248  }
249 
250  /* Extract the contents */
251  boxa = boxaCopy(partel->boxa, L_CLONE);
252  box = boxClone(partel->box);
253  partelDestroy(&partel);
254 
255  /* Can we output this one? */
256  n = boxaGetCount(boxa);
257  if (n == 0) {
258  if (boxCheckIfOverlapIsBig(box, boxad, maxoverlap) == 0)
259  boxaAddBox(boxad, box, L_INSERT);
260  else
261  boxDestroy(&box);
262  boxaDestroy(&boxa);
263  if (boxaGetCount(boxad) >= maxboxes) /* we're done */
264  break;
265  continue;
266  }
267 
268 
269  /* Generate up to 4 subboxes and put them on the heap */
270  boxa4 = boxaGenerateSubboxes(box, boxa, maxperim, fract);
271  boxDestroy(&box);
272  nsub = boxaGetCount(boxa4);
273  for (i = 0; i < nsub; i++) {
274  boxsub = boxaGetBox(boxa4, i, L_CLONE);
275  boxasub = boxaIntersectsBox(boxa, boxsub);
276  partel = partelCreate(boxsub);
277  partel->boxa = boxasub;
278  partelSetSize(partel, sortflag);
279  lheapAdd(lh, partel);
280  boxDestroy(&boxsub);
281  }
282  npush += nsub; /* How many boxes have we put on the queue? */
283 
284 /* boxaWriteStderr(boxa4); */
285 
286  boxaDestroy(&boxa4);
287  boxaDestroy(&boxa);
288  }
289 
290 #if OUTPUT_HEAP_STATS
291  lept_stderr("Heap statistics:\n");
292  lept_stderr(" Number of boxes pushed: %d\n", npush);
293  lept_stderr(" Number of boxes popped: %d\n", npop);
294  lept_stderr(" Number of boxes on heap: %d\n", lheapGetCount(lh));
295 #endif /* OUTPUT_HEAP_STATS */
296 
297  /* Clean up the heap */
298  while ((partel = (PARTEL *)lheapRemove(lh)) != NULL)
299  partelDestroy(&partel);
300  lheapDestroy(&lh, FALSE);
301 
302  return boxad;
303 }
304 
305 
306 /*------------------------------------------------------------------*
307  * Helpers *
308  *------------------------------------------------------------------*/
315 static PARTEL *
317 {
318 PARTEL *partel;
319 
320  partel = (PARTEL *)LEPT_CALLOC(1, sizeof(PARTEL));
321  partel->box = boxCopy(box);
322  return partel;
323 }
324 
325 
332 static void
334 {
335 PARTEL *partel;
336 
337  PROCNAME("partelDestroy");
338 
339  if (ppartel == NULL) {
340  L_WARNING("ptr address is null!\n", procName);
341  return;
342  }
343 
344  if ((partel = *ppartel) == NULL)
345  return;
346 
347  boxDestroy(&partel->box);
348  boxaDestroy(&partel->boxa);
349  LEPT_FREE(partel);
350  *ppartel = NULL;
351  return;
352 }
353 
354 
364 static l_int32
366  l_int32 sortflag)
367 {
368 l_int32 w, h;
369 
370  PROCNAME("partelSetSize");
371 
372  if (!partel)
373  return ERROR_INT("partel not defined", procName, 1);
374 
375  boxGetGeometry(partel->box, NULL, NULL, &w, &h);
376  if (sortflag == L_SORT_BY_WIDTH)
377  partel->size = (l_float32)w;
378  else if (sortflag == L_SORT_BY_HEIGHT)
379  partel->size = (l_float32)h;
380  else if (sortflag == L_SORT_BY_MIN_DIMENSION)
381  partel->size = (l_float32)L_MIN(w, h);
382  else if (sortflag == L_SORT_BY_MAX_DIMENSION)
383  partel->size = (l_float32)L_MAX(w, h);
384  else if (sortflag == L_SORT_BY_PERIMETER)
385  partel->size = (l_float32)(w + h);
386  else if (sortflag == L_SORT_BY_AREA)
387  partel->size = (l_float32)(w * h);
388  else
389  return ERROR_INT("invalid sortflag", procName, 1);
390  return 0;
391 }
392 
393 
407 static BOXA *
409  BOXA *boxa,
410  l_int32 maxperim,
411  l_float32 fract)
412 {
413 l_int32 x, y, w, h, xp, yp, wp, hp;
414 BOX *boxp; /* pivot box */
415 BOX *boxsub;
416 BOXA *boxa4;
417 
418  PROCNAME("boxaGenerateSubboxes");
419 
420  if (!box)
421  return (BOXA *)ERROR_PTR("box not defined", procName, NULL);
422  if (!boxa)
423  return (BOXA *)ERROR_PTR("boxa not defined", procName, NULL);
424 
425  boxa4 = boxaCreate(4);
426  boxp = boxaSelectPivotBox(box, boxa, maxperim, fract);
427  boxGetGeometry(box, &x, &y, &w, &h);
428  boxGetGeometry(boxp, &xp, &yp, &wp, &hp);
429  boxDestroy(&boxp);
430  if (xp > x) { /* left sub-box */
431  boxsub = boxCreate(x, y, xp - x, h);
432  boxaAddBox(boxa4, boxsub, L_INSERT);
433  }
434  if (yp > y) { /* top sub-box */
435  boxsub = boxCreate(x, y, w, yp - y);
436  boxaAddBox(boxa4, boxsub, L_INSERT);
437  }
438  if (xp + wp < x + w) { /* right sub-box */
439  boxsub = boxCreate(xp + wp, y, x + w - xp - wp, h);
440  boxaAddBox(boxa4, boxsub, L_INSERT);
441  }
442  if (yp + hp < y + h) { /* bottom sub-box */
443  boxsub = boxCreate(x, yp + hp, w, y + h - yp - hp);
444  boxaAddBox(boxa4, boxsub, L_INSERT);
445  }
446 
447  return boxa4;
448 }
449 
450 
485 static BOX *
487  BOXA *boxa,
488  l_int32 maxperim,
489  l_float32 fract)
490 {
491 l_int32 i, n, bw, bh, w, h;
492 l_int32 smallfound, minindex, perim, minsize;
493 l_float32 delx, dely, mindist, threshdist, dist, x, y, cx, cy;
494 BOX *boxt;
495 
496  PROCNAME("boxaSelectPivotBox");
497 
498  if (!box)
499  return (BOX *)ERROR_PTR("box not defined", procName, NULL);
500  if (!boxa)
501  return (BOX *)ERROR_PTR("boxa not defined", procName, NULL);
502  n = boxaGetCount(boxa);
503  if (n == 0)
504  return (BOX *)ERROR_PTR("no boxes in boxa", procName, NULL);
505  if (fract < 0.0 || fract > 1.0) {
506  L_WARNING("fract out of bounds; using 0.0\n", procName);
507  fract = 0.0;
508  }
509 
510  boxGetGeometry(box, NULL, NULL, &w, &h);
511  boxGetCenter(box, &x, &y);
512  threshdist = fract * (w * w + h * h);
513  mindist = 1000000000.;
514  minindex = 0;
515  smallfound = FALSE;
516  for (i = 0; i < n; i++) {
517  boxt = boxaGetBox(boxa, i, L_CLONE);
518  boxGetGeometry(boxt, NULL, NULL, &bw, &bh);
519  boxGetCenter(boxt, &cx, &cy);
520  boxDestroy(&boxt);
521  if (bw + bh > maxperim)
522  continue;
523  smallfound = TRUE;
524  delx = cx - x;
525  dely = cy - y;
526  dist = delx * delx + dely * dely;
527  if (dist <= threshdist)
528  return boxaGetBox(boxa, i, L_COPY);
529  if (dist < mindist) {
530  minindex = i;
531  mindist = dist;
532  }
533  }
534 
535  /* If there are small boxes but none are within 'fract' of the
536  * centroid, return the nearest one. */
537  if (smallfound == TRUE)
538  return boxaGetBox(boxa, minindex, L_COPY);
539 
540  /* No small boxes; return the smallest of the large boxes */
541  minsize = 1000000000;
542  minindex = 0;
543  for (i = 0; i < n; i++) {
544  boxaGetBoxGeometry(boxa, i, NULL, NULL, &bw, &bh);
545  perim = bw + bh;
546  if (perim < minsize) {
547  minsize = perim;
548  minindex = i;
549  }
550  }
551  return boxaGetBox(boxa, minindex, L_COPY);
552 }
553 
554 
565 static l_int32
567  BOXA *boxa,
568  l_float32 maxoverlap)
569 {
570 l_int32 i, n, bigoverlap;
571 l_float32 fract;
572 BOX *boxt;
573 
574  PROCNAME("boxCheckIfOverlapIsBig");
575 
576  if (!box)
577  return ERROR_INT("box not defined", procName, 1);
578  if (!boxa)
579  return ERROR_INT("boxa not defined", procName, 1);
580  if (maxoverlap < 0.0 || maxoverlap > 1.0)
581  return ERROR_INT("invalid maxoverlap", procName, 1);
582 
583  n = boxaGetCount(boxa);
584  if (n == 0 || maxoverlap == 1.0)
585  return 0;
586 
587  bigoverlap = 0;
588  for (i = 0; i < n; i++) {
589  boxt = boxaGetBox(boxa, i, L_CLONE);
590  boxOverlapFraction(boxt, box, &fract);
591  boxDestroy(&boxt);
592  if (fract > maxoverlap) {
593  bigoverlap = 1;
594  break;
595  }
596  }
597 
598  return bigoverlap;
599 }
600 
601 
620 BOXA *
622  l_float32 maxoverlap)
623 {
624 l_int32 i, j, n, remove;
625 l_float32 fract;
626 BOX *box1, *box2;
627 BOXA *boxad;
628 
629  PROCNAME("boxaPruneSortedOnOverlap");
630 
631  if (!boxas)
632  return (BOXA *)ERROR_PTR("boxas not defined", procName, NULL);
633  if (maxoverlap < 0.0 || maxoverlap > 1.0)
634  return (BOXA *)ERROR_PTR("invalid maxoverlap", procName, NULL);
635 
636  n = boxaGetCount(boxas);
637  if (n == 0 || maxoverlap == 1.0)
638  return boxaCopy(boxas, L_COPY);
639 
640  boxad = boxaCreate(0);
641  box2 = boxaGetBox(boxas, 0, L_COPY);
642  boxaAddBox(boxad, box2, L_INSERT);
643  for (j = 1; j < n; j++) { /* prune on j */
644  box2 = boxaGetBox(boxas, j, L_COPY);
645  remove = FALSE;
646  for (i = 0; i < j; i++) { /* test on i */
647  box1 = boxaGetBox(boxas, i, L_CLONE);
648  boxOverlapFraction(box1, box2, &fract);
649  boxDestroy(&box1);
650  if (fract > maxoverlap) {
651  remove = TRUE;
652  break;
653  }
654  }
655  if (remove == TRUE)
656  boxDestroy(&box2);
657  else
658  boxaAddBox(boxad, box2, L_INSERT);
659  }
660 
661  return boxad;
662 }
BOXA * boxaCopy(BOXA *boxa, l_int32 copyflag)
boxaCopy()
Definition: boxbasic.c:537
void boxDestroy(BOX **pbox)
boxDestroy()
Definition: boxbasic.c:282
BOX * boxClone(BOX *box)
boxClone()
Definition: boxbasic.c:256
l_ok boxaGetBoxGeometry(BOXA *boxa, l_int32 index, l_int32 *px, l_int32 *py, l_int32 *pw, l_int32 *ph)
boxaGetBoxGeometry()
Definition: boxbasic.c:879
l_int32 boxaGetCount(BOXA *boxa)
boxaGetCount()
Definition: boxbasic.c:734
l_ok boxGetGeometry(BOX *box, l_int32 *px, l_int32 *py, l_int32 *pw, l_int32 *ph)
boxGetGeometry()
Definition: boxbasic.c:313
l_ok boxaAddBox(BOXA *boxa, BOX *box, l_int32 copyflag)
boxaAddBox()
Definition: boxbasic.c:620
void boxaDestroy(BOXA **pboxa)
boxaDestroy()
Definition: boxbasic.c:583
BOX * boxaGetBox(BOXA *boxa, l_int32 index, l_int32 accessflag)
boxaGetBox()
Definition: boxbasic.c:779
BOX * boxCopy(BOX *box)
boxCopy()
Definition: boxbasic.c:235
BOX * boxCreate(l_int32 x, l_int32 y, l_int32 w, l_int32 h)
boxCreate()
Definition: boxbasic.c:172
BOXA * boxaCreate(l_int32 n)
boxaCreate()
Definition: boxbasic.c:502
l_ok boxGetCenter(BOX *box, l_float32 *pcx, l_float32 *pcy)
boxGetCenter()
Definition: boxfunc1.c:1583
BOXA * boxaIntersectsBox(BOXA *boxas, BOX *box)
boxaIntersectsBox()
Definition: boxfunc1.c:331
l_ok boxOverlapFraction(BOX *box1, BOX *box2, l_float32 *pfract)
boxOverlapFraction()
Definition: boxfunc1.c:810
l_ok boxaGetExtent(BOXA *boxa, l_int32 *pw, l_int32 *ph, BOX **pbox)
boxaGetExtent()
Definition: boxfunc4.c:953
void lheapDestroy(L_HEAP **plh, l_int32 freeflag)
lheapDestroy()
Definition: heap.c:154
L_HEAP * lheapCreate(l_int32 n, l_int32 direction)
lheapCreate()
Definition: heap.c:112
l_int32 lheapGetCount(L_HEAP *lh)
lheapGetCount()
Definition: heap.c:283
l_ok lheapAdd(L_HEAP *lh, void *item)
lheapAdd()
Definition: heap.c:193
void * lheapRemove(L_HEAP *lh)
lheapRemove()
Definition: heap.c:251
static l_int32 boxCheckIfOverlapIsBig(BOX *box, BOXA *boxa, l_float32 maxoverlap)
boxCheckIfOverlapIsBig()
Definition: partition.c:566
static BOXA * boxaGenerateSubboxes(BOX *box, BOXA *boxa, l_int32 maxperim, l_float32 fract)
boxaGenerateSubboxes()
Definition: partition.c:408
static PARTEL * partelCreate(BOX *box)
partelCreate()
Definition: partition.c:316
static l_int32 partelSetSize(PARTEL *partel, l_int32 sortflag)
partelSetSize()
Definition: partition.c:365
BOXA * boxaGetWhiteblocks(BOXA *boxas, BOX *box, l_int32 sortflag, l_int32 maxboxes, l_float32 maxoverlap, l_int32 maxperim, l_float32 fract, l_int32 maxpops)
boxaGetWhiteblocks()
Definition: partition.c:192
static void partelDestroy(PARTEL **ppartel)
partelDestroy()
Definition: partition.c:333
BOXA * boxaPruneSortedOnOverlap(BOXA *boxas, l_float32 maxoverlap)
boxaPruneSortedOnOverlap()
Definition: partition.c:621
static BOX * boxaSelectPivotBox(BOX *box, BOXA *boxa, l_int32 maxperim, l_float32 fract)
boxaSelectPivotBox()
Definition: partition.c:486
@ L_SORT_BY_AREA
Definition: pix.h:744
@ L_SORT_BY_MIN_DIMENSION
Definition: pix.h:741
@ L_SORT_BY_PERIMETER
Definition: pix.h:743
@ L_SORT_BY_WIDTH
Definition: pix.h:739
@ L_SORT_BY_HEIGHT
Definition: pix.h:740
@ L_SORT_BY_MAX_DIMENSION
Definition: pix.h:742
@ L_COPY
Definition: pix.h:712
@ L_CLONE
Definition: pix.h:713
@ L_INSERT
Definition: pix.h:711
@ L_SORT_DECREASING
Definition: pix.h:730
Definition: pix.h:481
Definition: pix.h:492
Definition: heap.h:78
void lept_stderr(const char *fmt,...)
lept_stderr()
Definition: utils1.c:306