Leptonica  1.82.0
Image processing and image analysis suite
boxfunc5.c
Go to the documentation of this file.
1 /*====================================================================*
2  - Copyright (C) 2001 Leptonica. All rights reserved.
3  -
4  - Redistribution and use in source and binary forms, with or without
5  - modification, are permitted provided that the following conditions
6  - are met:
7  - 1. Redistributions of source code must retain the above copyright
8  - notice, this list of conditions and the following disclaimer.
9  - 2. Redistributions in binary form must reproduce the above
10  - copyright notice, this list of conditions and the following
11  - disclaimer in the documentation and/or other materials
12  - provided with the distribution.
13  -
14  - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
15  - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
16  - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
17  - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY
18  - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19  - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20  - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21  - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22  - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
23  - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24  - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  *====================================================================*/
26 
50 #ifdef HAVE_CONFIG_H
51 #include <config_auto.h>
52 #endif /* HAVE_CONFIG_H */
53 
54 #include <math.h>
55 #include "allheaders.h"
56 
57 static l_int32 boxaFillAll(BOXA *boxa);
58 static void adjustSidePlotName(char *buf, size_t size, const char *preface,
59  l_int32 select);
60 
61 /*---------------------------------------------------------------------*
62  * Boxa sequence fitting *
63  *---------------------------------------------------------------------*/
105 BOXA *
107  l_int32 halfwin,
108  l_int32 subflag,
109  l_int32 maxdiff,
110  l_int32 extrapixels,
111  l_int32 debug)
112 {
113 l_int32 n;
114 BOXA *boxae, *boxao, *boxamede, *boxamedo, *boxame, *boxamo, *boxad;
115 PIX *pix1;
116 
117  PROCNAME("boxaSmoothSequenceMedian");
118 
119  if (!boxas)
120  return (BOXA *)ERROR_PTR("boxas not defined", procName, NULL);
121  if (halfwin <= 0) {
122  L_WARNING("halfwin must be > 0; returning copy\n", procName);
123  return boxaCopy(boxas, L_COPY);
124  }
125  if (maxdiff < 0) {
126  L_WARNING("maxdiff must be >= 0; returning copy\n", procName);
127  return boxaCopy(boxas, L_COPY);
128  }
129  if (subflag != L_USE_MINSIZE && subflag != L_USE_MAXSIZE &&
130  subflag != L_SUB_ON_LOC_DIFF && subflag != L_SUB_ON_SIZE_DIFF &&
131  subflag != L_USE_CAPPED_MIN && subflag != L_USE_CAPPED_MAX) {
132  L_WARNING("invalid subflag; returning copy\n", procName);
133  return boxaCopy(boxas, L_COPY);
134  }
135  if ((n = boxaGetCount(boxas)) < 6) {
136  L_WARNING("need at least 6 boxes; returning copy\n", procName);
137  return boxaCopy(boxas, L_COPY);
138  }
139 
140  boxaSplitEvenOdd(boxas, 0, &boxae, &boxao);
141  if (debug) {
142  lept_mkdir("lept/smooth");
143  boxaWriteDebug("/tmp/lept/smooth/boxae.ba", boxae);
144  boxaWriteDebug("/tmp/lept/smooth/boxao.ba", boxao);
145  }
146 
147  boxamede = boxaWindowedMedian(boxae, halfwin, debug);
148  boxamedo = boxaWindowedMedian(boxao, halfwin, debug);
149  if (debug) {
150  boxaWriteDebug("/tmp/lept/smooth/boxamede.ba", boxamede);
151  boxaWriteDebug("/tmp/lept/smooth/boxamedo.ba", boxamedo);
152  }
153 
154  boxame = boxaModifyWithBoxa(boxae, boxamede, subflag, maxdiff, extrapixels);
155  boxamo = boxaModifyWithBoxa(boxao, boxamedo, subflag, maxdiff, extrapixels);
156  if (debug) {
157  boxaWriteDebug("/tmp/lept/smooth/boxame.ba", boxame);
158  boxaWriteDebug("/tmp/lept/smooth/boxamo.ba", boxamo);
159  }
160 
161  boxad = boxaMergeEvenOdd(boxame, boxamo, 0);
162  if (debug) {
163  boxaPlotSides(boxas, NULL, NULL, NULL, NULL, NULL, &pix1);
164  pixWrite("/tmp/lept/smooth/plotsides1.png", pix1, IFF_PNG);
165  pixDestroy(&pix1);
166  boxaPlotSides(boxad, NULL, NULL, NULL, NULL, NULL, &pix1);
167  pixWrite("/tmp/lept/smooth/plotsides2.png", pix1, IFF_PNG);
168  pixDestroy(&pix1);
169  boxaPlotSizes(boxas, NULL, NULL, NULL, &pix1);
170  pixWrite("/tmp/lept/smooth/plotsizes1.png", pix1, IFF_PNG);
171  pixDestroy(&pix1);
172  boxaPlotSizes(boxad, NULL, NULL, NULL, &pix1);
173  pixWrite("/tmp/lept/smooth/plotsizes2.png", pix1, IFF_PNG);
174  pixDestroy(&pix1);
175  }
176 
177  boxaDestroy(&boxae);
178  boxaDestroy(&boxao);
179  boxaDestroy(&boxamede);
180  boxaDestroy(&boxamedo);
181  boxaDestroy(&boxame);
182  boxaDestroy(&boxamo);
183  return boxad;
184 }
185 
186 
205 BOXA *
207  l_int32 halfwin,
208  l_int32 debug)
209 {
210 l_int32 n, i, left, top, right, bot;
211 BOX *box;
212 BOXA *boxaf, *boxad;
213 NUMA *nal, *nat, *nar, *nab, *naml, *namt, *namr, *namb;
214 PIX *pix1;
215 
216  PROCNAME("boxaWindowedMedian");
217 
218  if (!boxas)
219  return (BOXA *)ERROR_PTR("boxas not defined", procName, NULL);
220  if ((n = boxaGetCount(boxas)) < 3) {
221  L_WARNING("less than 3 boxes; returning a copy\n", procName);
222  return boxaCopy(boxas, L_COPY);
223  }
224  if (halfwin <= 0) {
225  L_WARNING("halfwin must be > 0; returning copy\n", procName);
226  return boxaCopy(boxas, L_COPY);
227  }
228 
229  /* Fill invalid boxes in the input sequence */
230  if ((boxaf = boxaFillSequence(boxas, L_USE_ALL_BOXES, debug)) == NULL)
231  return (BOXA *)ERROR_PTR("filled boxa not made", procName, NULL);
232 
233  /* Get the windowed median output from each of the sides */
234  boxaExtractAsNuma(boxaf, &nal, &nat, &nar, &nab, NULL, NULL, 0);
235  naml = numaWindowedMedian(nal, halfwin);
236  namt = numaWindowedMedian(nat, halfwin);
237  namr = numaWindowedMedian(nar, halfwin);
238  namb = numaWindowedMedian(nab, halfwin);
239 
240  n = boxaGetCount(boxaf);
241  boxad = boxaCreate(n);
242  for (i = 0; i < n; i++) {
243  numaGetIValue(naml, i, &left);
244  numaGetIValue(namt, i, &top);
245  numaGetIValue(namr, i, &right);
246  numaGetIValue(namb, i, &bot);
247  box = boxCreate(left, top, right - left + 1, bot - top + 1);
248  boxaAddBox(boxad, box, L_INSERT);
249  }
250 
251  if (debug) {
252  lept_mkdir("lept/windowed");
253  boxaPlotSides(boxaf, NULL, NULL, NULL, NULL, NULL, &pix1);
254  pixWrite("/tmp/lept/windowed/plotsides1.png", pix1, IFF_PNG);
255  pixDestroy(&pix1);
256  boxaPlotSides(boxad, NULL, NULL, NULL, NULL, NULL, &pix1);
257  pixWrite("/tmp/lept/windowed/plotsides2.png", pix1, IFF_PNG);
258  pixDestroy(&pix1);
259  boxaPlotSizes(boxaf, NULL, NULL, NULL, &pix1);
260  pixWrite("/tmp/lept/windowed/plotsizes1.png", pix1, IFF_PNG);
261  pixDestroy(&pix1);
262  boxaPlotSizes(boxad, NULL, NULL, NULL, &pix1);
263  pixWrite("/tmp/lept/windowed/plotsizes2.png", pix1, IFF_PNG);
264  pixDestroy(&pix1);
265  }
266 
267  boxaDestroy(&boxaf);
268  numaDestroy(&nal);
269  numaDestroy(&nat);
270  numaDestroy(&nar);
271  numaDestroy(&nab);
272  numaDestroy(&naml);
273  numaDestroy(&namt);
274  numaDestroy(&namr);
275  numaDestroy(&namb);
276  return boxad;
277 }
278 
279 
350 BOXA *
352  BOXA *boxam,
353  l_int32 subflag,
354  l_int32 maxdiff,
355  l_int32 extrapixels)
356 {
357 l_int32 n, i, ls, ts, rs, bs, ws, hs, lm, tm, rm, bm, wm, hm, ld, td, rd, bd;
358 BOX *boxs, *boxm, *boxd, *boxempty;
359 BOXA *boxad;
360 
361  PROCNAME("boxaModifyWithBoxa");
362 
363  if (!boxas)
364  return (BOXA *)ERROR_PTR("boxas not defined", procName, NULL);
365  if (!boxam) {
366  L_WARNING("boxam not defined; returning copy", procName);
367  return boxaCopy(boxas, L_COPY);
368  }
369  if (subflag != L_USE_MINSIZE && subflag != L_USE_MAXSIZE &&
370  subflag != L_SUB_ON_LOC_DIFF && subflag != L_SUB_ON_SIZE_DIFF &&
371  subflag != L_USE_CAPPED_MIN && subflag != L_USE_CAPPED_MAX) {
372  L_WARNING("invalid subflag; returning copy", procName);
373  return boxaCopy(boxas, L_COPY);
374  }
375  n = boxaGetCount(boxas);
376  if (n != boxaGetCount(boxam)) {
377  L_WARNING("boxas and boxam sizes differ; returning copy", procName);
378  return boxaCopy(boxas, L_COPY);
379  }
380 
381  boxad = boxaCreate(n);
382  boxempty = boxCreate(0, 0, 0, 0); /* placeholders */
383  for (i = 0; i < n; i++) {
384  boxs = boxaGetValidBox(boxas, i, L_CLONE);
385  boxm = boxaGetValidBox(boxam, i, L_CLONE);
386  if (!boxs || !boxm) {
387  boxaAddBox(boxad, boxempty, L_COPY);
388  } else {
389  boxGetGeometry(boxs, &ls, &ts, &ws, &hs);
390  boxGetGeometry(boxm, &lm, &tm, &wm, &hm);
391  rs = ls + ws - 1;
392  bs = ts + hs - 1;
393  rm = lm + wm - 1;
394  bm = tm + hm - 1;
395  if (subflag == L_USE_MINSIZE) {
396  ld = L_MAX(ls, lm);
397  rd = L_MIN(rs, rm);
398  td = L_MAX(ts, tm);
399  bd = L_MIN(bs, bm);
400  } else if (subflag == L_USE_MAXSIZE) {
401  ld = L_MIN(ls, lm);
402  rd = L_MAX(rs, rm);
403  td = L_MIN(ts, tm);
404  bd = L_MAX(bs, bm);
405  } else if (subflag == L_SUB_ON_LOC_DIFF) {
406  ld = (L_ABS(lm - ls) <= maxdiff) ? ls : lm - extrapixels;
407  td = (L_ABS(tm - ts) <= maxdiff) ? ts : tm - extrapixels;
408  rd = (L_ABS(rm - rs) <= maxdiff) ? rs : rm + extrapixels;
409  bd = (L_ABS(bm - bs) <= maxdiff) ? bs : bm + extrapixels;
410  } else if (subflag == L_SUB_ON_SIZE_DIFF) {
411  ld = (L_ABS(wm - ws) <= maxdiff) ? ls : lm - extrapixels;
412  td = (L_ABS(hm - hs) <= maxdiff) ? ts : tm - extrapixels;
413  rd = (L_ABS(wm - ws) <= maxdiff) ? rs : rm + extrapixels;
414  bd = (L_ABS(hm - hs) <= maxdiff) ? bs : bm + extrapixels;
415  } else if (subflag == L_USE_CAPPED_MIN) {
416  ld = L_MAX(lm, L_MIN(ls, lm + maxdiff));
417  td = L_MAX(tm, L_MIN(ts, tm + maxdiff));
418  rd = L_MIN(rm, L_MAX(rs, rm - maxdiff));
419  bd = L_MIN(bm, L_MAX(bs, bm - maxdiff));
420  } else { /* subflag == L_USE_CAPPED_MAX */
421  ld = L_MIN(lm, L_MAX(ls, lm - maxdiff));
422  td = L_MIN(tm, L_MAX(ts, tm - maxdiff));
423  rd = L_MAX(rm, L_MIN(rs, rm + maxdiff));
424  bd = L_MAX(bm, L_MIN(bs, bm + maxdiff));
425  }
426  boxd = boxCreate(ld, td, rd - ld + 1, bd - td + 1);
427  boxaAddBox(boxad, boxd, L_INSERT);
428  }
429  boxDestroy(&boxs);
430  boxDestroy(&boxm);
431  }
432  boxDestroy(&boxempty);
433 
434  return boxad;
435 }
436 
437 
466 BOXA *
468  l_int32 delw,
469  l_int32 op,
470  l_float32 factor,
471  NUMA *na)
472 {
473 l_int32 i, ne, no, nmin, xe, we, xo, wo, inde, indo, x, w;
474 BOX *boxe, *boxo;
475 BOXA *boxae, *boxao, *boxad;
476 
477  PROCNAME("boxaReconcilePairWidth");
478 
479  if (!boxas)
480  return (BOXA *)ERROR_PTR("boxas not defined", procName, NULL);
481  if (factor <= 0.0) {
482  L_WARNING("invalid factor; setting to 1.0\n", procName);
483  factor = 1.0;
484  }
485 
486  /* Taking the boxes in pairs, if the difference in width reaches
487  * the threshold %delw, adjust the left or right side of one
488  * of the pair. */
489  boxaSplitEvenOdd(boxas, 0, &boxae, &boxao);
490  ne = boxaGetCount(boxae);
491  no = boxaGetCount(boxao);
492  nmin = L_MIN(ne, no);
493  for (i = 0; i < nmin; i++) {
494  /* Set indicator values */
495  if (na) {
496  numaGetIValue(na, 2 * i, &inde);
497  numaGetIValue(na, 2 * i + 1, &indo);
498  } else {
499  inde = indo = 1;
500  }
501  if (inde == 0 && indo == 0) continue;
502 
503  boxe = boxaGetBox(boxae, i, L_CLONE);
504  boxo = boxaGetBox(boxao, i, L_CLONE);
505  boxGetGeometry(boxe, &xe, NULL, &we, NULL);
506  boxGetGeometry(boxo, &xo, NULL, &wo, NULL);
507  if (we == 0 || wo == 0) { /* if either is invalid; skip */
508  boxDestroy(&boxe);
509  boxDestroy(&boxo);
510  continue;
511  } else if (L_ABS(we - wo) > delw) {
512  if (op == L_ADJUST_CHOOSE_MIN) {
513  if (we > wo && inde == 1) {
514  /* move left side of even to the right */
515  w = factor * wo;
516  x = xe + (we - w);
517  boxSetGeometry(boxe, x, -1, w, -1);
518  } else if (we < wo && indo == 1) {
519  /* move right side of odd to the left */
520  w = factor * we;
521  boxSetGeometry(boxo, -1, -1, w, -1);
522  }
523  } else { /* maximize width */
524  if (we < wo && inde == 1) {
525  /* move left side of even to the left */
526  w = factor * wo;
527  x = L_MAX(0, xe + (we - w));
528  w = we + (xe - x); /* covers both cases for the max */
529  boxSetGeometry(boxe, x, -1, w, -1);
530  } else if (we > wo && indo == 1) {
531  /* move right side of odd to the right */
532  w = factor * we;
533  boxSetGeometry(boxo, -1, -1, w, -1);
534  }
535  }
536  }
537  boxDestroy(&boxe);
538  boxDestroy(&boxo);
539  }
540 
541  boxad = boxaMergeEvenOdd(boxae, boxao, 0);
542  boxaDestroy(&boxae);
543  boxaDestroy(&boxao);
544  return boxad;
545 }
546 
547 
589 l_ok
591  l_int32 type,
592  l_float32 threshp,
593  l_float32 threshm,
594  l_float32 *pfvarp,
595  l_float32 *pfvarm,
596  l_int32 *psame)
597 {
598 l_int32 i, n, bw1, bh1, bw2, bh2, npairs;
599 l_float32 ave, fdiff, sumdiff, med, fvarp, fvarm;
600 NUMA *na1;
601 
602  PROCNAME("boxaSizeConsistency");
603 
604  if (pfvarp) *pfvarp = 0.0;
605  if (pfvarm) *pfvarm = 0.0;
606  if (!psame)
607  return ERROR_INT("&same not defined", procName, 1);
608  *psame = -1;
609  if (!boxas)
610  return ERROR_INT("boxas not defined", procName, 1);
611  if (boxaGetValidCount(boxas) < 6)
612  return ERROR_INT("need a least 6 valid boxes", procName, 1);
613  if (type != L_CHECK_WIDTH && type != L_CHECK_HEIGHT)
614  return ERROR_INT("invalid type", procName, 1);
615  if (threshp < 0.0 || threshp >= 0.5)
616  return ERROR_INT("invalid threshp", procName, 1);
617  if (threshm < 0.0 || threshm >= 0.5)
618  return ERROR_INT("invalid threshm", procName, 1);
619  if (threshp == 0.0) threshp = 0.02;
620  if (threshm == 0.0) threshm = 0.015;
621 
622  /* Evaluate pairwise variation */
623  n = boxaGetCount(boxas);
624  na1 = numaCreate(0);
625  for (i = 0, npairs = 0, sumdiff = 0; i < n - 1; i += 2) {
626  boxaGetBoxGeometry(boxas, i, NULL, NULL, &bw1, &bh1);
627  boxaGetBoxGeometry(boxas, i + 1, NULL, NULL, &bw2, &bh2);
628  if (bw1 == 0 || bh1 == 0 || bw2 == 0 || bh2 == 0)
629  continue;
630  npairs++;
631  if (type == L_CHECK_WIDTH) {
632  ave = (bw1 + bw2) / 2.0;
633  fdiff = L_ABS(bw1 - bw2) / ave;
634  numaAddNumber(na1, bw1);
635  numaAddNumber(na1, bw2);
636  } else { /* type == L_CHECK_HEIGHT) */
637  ave = (bh1 + bh2) / 2.0;
638  fdiff = L_ABS(bh1 - bh2) / ave;
639  numaAddNumber(na1, bh1);
640  numaAddNumber(na1, bh2);
641  }
642  sumdiff += fdiff;
643  }
644  fvarp = sumdiff / npairs;
645  if (pfvarp) *pfvarp = fvarp;
646 
647  /* Evaluate the average abs fractional deviation from the median */
648  numaGetMedian(na1, &med);
649  if (med == 0.0) {
650  L_WARNING("median value is 0\n", procName);
651  } else {
652  numaGetMeanDevFromMedian(na1, med, &fvarm);
653  fvarm /= med;
654  if (pfvarm) *pfvarm = fvarm;
655  }
656  numaDestroy(&na1);
657 
658  /* Make decision */
659  if (fvarp < threshp && fvarm < threshm)
660  *psame = 1;
661  else if (fvarp < threshp && fvarm > threshm)
662  *psame = 0;
663  else
664  *psame = -1; /* unknown */
665  return 0;
666 }
667 
668 
690 BOXA *
692  l_int32 select1,
693  l_int32 select2,
694  l_int32 thresh,
695  l_int32 extra,
696  PIXA *pixadb)
697  {
698 l_int32 ncols;
699 BOXA *boxa1e, *boxa1o, *boxa2e, *boxa2o, *boxa3e, *boxa3o, *boxad;
700 PIX *pix1;
701 
702  PROCNAME("boxaReconcileAllByMedian");
703 
704  if (!boxas)
705  return (BOXA *)ERROR_PTR("boxas not defined", procName, NULL);
706  if (select1 != L_ADJUST_LEFT_AND_RIGHT && select1 != L_ADJUST_SKIP) {
707  L_WARNING("invalid select1; returning copy\n", procName);
708  return boxaCopy(boxas, L_COPY);
709  }
710  if (select2 != L_ADJUST_TOP_AND_BOT && select2 != L_ADJUST_SKIP) {
711  L_WARNING("invalid select2; returning copy\n", procName);
712  return boxaCopy(boxas, L_COPY);
713  }
714  if (thresh < 0) {
715  L_WARNING("thresh must be >= 0; returning copy\n", procName);
716  return boxaCopy(boxas, L_COPY);
717  }
718  if (boxaGetValidCount(boxas) < 3) {
719  L_WARNING("need at least 3 valid boxes; returning copy\n", procName);
720  return boxaCopy(boxas, L_COPY);
721  }
722 
723  /* Adjust even and odd box sides separately */
724  boxaSplitEvenOdd(boxas, 0, &boxa1e, &boxa1o);
725  ncols = 1;
726  if (select1 == L_ADJUST_LEFT_AND_RIGHT) {
727  ncols += 2;
728  boxa2e = boxaReconcileSidesByMedian(boxa1e, select1, thresh,
729  extra, pixadb);
730  } else {
731  boxa2e = boxaCopy(boxa1e, L_COPY);
732  }
733  if (select2 == L_ADJUST_TOP_AND_BOT) {
734  ncols += 2;
735  boxa3e = boxaReconcileSidesByMedian(boxa2e, select2, thresh,
736  extra, pixadb);
737  } else {
738  boxa3e = boxaCopy(boxa2e, L_COPY);
739  }
740  if (select1 == L_ADJUST_LEFT_AND_RIGHT)
741  boxa2o = boxaReconcileSidesByMedian(boxa1o, select1, thresh,
742  extra, pixadb);
743  else
744  boxa2o = boxaCopy(boxa1o, L_COPY);
745  if (select2 == L_ADJUST_TOP_AND_BOT)
746  boxa3o = boxaReconcileSidesByMedian(boxa2o, select2, thresh,
747  extra, pixadb);
748  else
749  boxa3o = boxaCopy(boxa2o, L_COPY);
750  boxad = boxaMergeEvenOdd(boxa3e, boxa3o, 0);
751 
752  /* This generates 2 sets of 3 or 5 plots in a row, depending
753  * on whether select1 and select2 are true (not skipping).
754  * The top row is for even boxes; the bottom row is for odd boxes. */
755  if (pixadb) {
756  lept_mkdir("lept/boxa");
757  pix1 = pixaDisplayTiledInColumns(pixadb, ncols, 1.0, 30, 2);
758  pixWrite("/tmp/lept/boxa/recon_sides.png", pix1, IFF_PNG);
759  pixDestroy(&pix1);
760  }
761 
762  boxaDestroy(&boxa1e);
763  boxaDestroy(&boxa1o);
764  boxaDestroy(&boxa2e);
765  boxaDestroy(&boxa2o);
766  boxaDestroy(&boxa3e);
767  boxaDestroy(&boxa3o);
768  return boxad;
769 }
770 
771 
803 BOXA *
805  l_int32 select,
806  l_int32 thresh,
807  l_int32 extra,
808  PIXA *pixadb)
809  {
810 char buf[128];
811 l_int32 i, n, diff;
812 l_int32 left, right, top, bot, medleft, medright, medtop, medbot;
813 BOX *box;
814 BOXA *boxa1, *boxad;
815 PIX *pix;
816 
817  PROCNAME("boxaReconcileSidesByMedian");
818 
819  if (!boxas)
820  return (BOXA *)ERROR_PTR("boxas not defined", procName, NULL);
821  if (select != L_ADJUST_LEFT && select != L_ADJUST_RIGHT &&
822  select != L_ADJUST_TOP && select != L_ADJUST_BOT &&
823  select != L_ADJUST_LEFT_AND_RIGHT && select != L_ADJUST_TOP_AND_BOT) {
824  L_WARNING("invalid select; returning copy\n", procName);
825  return boxaCopy(boxas, L_COPY);
826  }
827  if (thresh < 0) {
828  L_WARNING("thresh must be >= 0; returning copy\n", procName);
829  return boxaCopy(boxas, L_COPY);
830  }
831  if (boxaGetValidCount(boxas) < 3) {
832  L_WARNING("need at least 3 valid boxes; returning copy\n", procName);
833  return boxaCopy(boxas, L_COPY);
834  }
835 
836  if (select == L_ADJUST_LEFT_AND_RIGHT) {
837  boxa1 = boxaReconcileSidesByMedian(boxas, L_ADJUST_LEFT, thresh, extra,
838  pixadb);
839  boxad = boxaReconcileSidesByMedian(boxa1, L_ADJUST_RIGHT, thresh, extra,
840  pixadb);
841  boxaDestroy(&boxa1);
842  return boxad;
843  }
844  if (select == L_ADJUST_TOP_AND_BOT) {
845  boxa1 = boxaReconcileSidesByMedian(boxas, L_ADJUST_TOP, thresh, extra,
846  pixadb);
847  boxad = boxaReconcileSidesByMedian(boxa1, L_ADJUST_BOT, thresh, extra,
848  pixadb);
849  boxaDestroy(&boxa1);
850  return boxad;
851  }
852 
853  if (pixadb) {
854  l_int32 ndb = pixaGetCount(pixadb);
855  if (ndb == 0 || ndb == 5) { /* first of even and odd box sets */
856  adjustSidePlotName(buf, sizeof(buf), "init", select);
857  boxaPlotSides(boxas, buf, NULL, NULL, NULL, NULL, &pix);
858  pixaAddPix(pixadb, pix, L_INSERT);
859  }
860  }
861 
862  n = boxaGetCount(boxas);
863  boxad = boxaCreate(n);
864  if (select == L_ADJUST_LEFT) {
865  boxaGetMedianVals(boxas, &medleft, NULL, NULL, NULL, NULL, NULL);
866  for (i = 0; i < n; i++) {
867  box = boxaGetBox(boxas, i, L_COPY);
868  boxGetSideLocations(box, &left, NULL, NULL, NULL);
869  diff = medleft - left;
870  if (L_ABS(diff) >= thresh)
871  boxAdjustSides(box, box, diff - extra, 0, 0, 0);
872  boxaAddBox(boxad, box, L_INSERT);
873  }
874  } else if (select == L_ADJUST_RIGHT) {
875  boxaGetMedianVals(boxas, NULL, NULL, &medright, NULL, NULL, NULL);
876  for (i = 0; i < n; i++) {
877  box = boxaGetBox(boxas, i, L_COPY);
878  boxGetSideLocations(box, NULL, &right, NULL, NULL);
879  diff = medright - right;
880  if (L_ABS(diff) >= thresh)
881  boxAdjustSides(box, box, 0, diff + extra, 0, 0);
882  boxaAddBox(boxad, box, L_INSERT);
883  }
884  } else if (select == L_ADJUST_TOP) {
885  boxaGetMedianVals(boxas, NULL, &medtop, NULL, NULL, NULL, NULL);
886  for (i = 0; i < n; i++) {
887  box = boxaGetBox(boxas, i, L_COPY);
888  boxGetSideLocations(box, NULL, NULL, &top, NULL);
889  diff = medtop - top;
890  if (L_ABS(diff) >= thresh)
891  boxAdjustSides(box, box, 0, 0, diff - extra, 0);
892  boxaAddBox(boxad, box, L_INSERT);
893  }
894  } else { /* select == L_ADJUST_BOT */
895  boxaGetMedianVals(boxas, NULL, NULL, NULL, &medbot, NULL, NULL);
896  for (i = 0; i < n; i++) {
897  box = boxaGetBox(boxas, i, L_COPY);
898  boxGetSideLocations(box, NULL, NULL, NULL, &bot);
899  diff = medbot - bot;
900  if (L_ABS(diff) >= thresh)
901  boxAdjustSides(box, box, 0, 0, 0, diff + extra);
902  boxaAddBox(boxad, box, L_INSERT);
903  }
904  }
905 
906  if (pixadb) {
907  adjustSidePlotName(buf, sizeof(buf), "final", select);
908  boxaPlotSides(boxad, buf, NULL, NULL, NULL, NULL, &pix);
909  pixaAddPix(pixadb, pix, L_INSERT);
910  }
911  return boxad;
912 }
913 
914 
915 static void
916 adjustSidePlotName(char *buf,
917  size_t size,
918  const char *preface,
919  l_int32 select)
920 {
921  stringCopy(buf, preface, size - 8);
922  if (select == L_ADJUST_LEFT)
923  stringCat(buf, size, "-left");
924  else if (select == L_ADJUST_RIGHT)
925  stringCat(buf, size, "-right");
926  else if (select == L_ADJUST_TOP)
927  stringCat(buf, size, "-top");
928  else if (select == L_ADJUST_BOT)
929  stringCat(buf, size, "-bot");
930 }
931 
932 
981 BOXA *
983  l_int32 type,
984  l_float32 dfract,
985  l_float32 sfract,
986  l_float32 factor,
987  NUMA **pnadelw,
988  NUMA **pnadelh,
989  l_float32 *pratiowh)
990 {
991 l_int32 i, n, ne, no, outfound, isvalid, ind, del, maxdel;
992 l_int32 medw, medh, bw, bh, left, right, top, bot;
993 l_int32 medleft, medlefte, medlefto, medright, medrighte, medrighto;
994 l_int32 medtop, medtope, medtopo, medbot, medbote, medboto;
995 l_float32 brat;
996 BOX *box;
997 BOXA *boxa1, *boxae, *boxao, *boxad;
998 NUMA *naind, *nadelw, *nadelh;
999 
1000  PROCNAME("boxaReconcileSizeByMedian");
1001 
1002  if (pnadelw) *pnadelw = NULL;
1003  if (pnadelh) *pnadelh = NULL;
1004  if (pratiowh) *pratiowh = 0.0;
1005  if (!boxas)
1006  return (BOXA *)ERROR_PTR("boxas not defined", procName, NULL);
1007  if (type != L_CHECK_WIDTH && type != L_CHECK_HEIGHT &&
1008  type != L_CHECK_BOTH) {
1009  L_WARNING("invalid type; returning copy\n", procName);
1010  return boxaCopy(boxas, L_COPY);
1011  }
1012  if (dfract <= 0.0 || dfract >= 0.5) {
1013  L_WARNING("invalid dimensional fract; returning copy\n", procName);
1014  return boxaCopy(boxas, L_COPY);
1015  }
1016  if (sfract <= 0.0 || sfract >= 0.5) {
1017  L_WARNING("invalid side fract; returning copy\n", procName);
1018  return boxaCopy(boxas, L_COPY);
1019  }
1020  if (factor < 0.8 || factor > 1.25)
1021  L_WARNING("factor %5.3f is typ. closer to 1.0\n", procName, factor);
1022  if (boxaGetValidCount(boxas) < 6) {
1023  L_WARNING("need at least 6 valid boxes; returning copy\n", procName);
1024  return boxaCopy(boxas, L_COPY);
1025  }
1026 
1027  /* If reconciling both width and height, optionally return array of
1028  * median deviations and even/odd ratio for width measurements */
1029  if (type == L_CHECK_BOTH) {
1030  boxa1 = boxaReconcileSizeByMedian(boxas, L_CHECK_WIDTH, dfract, sfract,
1031  factor, pnadelw, NULL, pratiowh);
1032  boxad = boxaReconcileSizeByMedian(boxa1, L_CHECK_HEIGHT, dfract, sfract,
1033  factor, NULL, pnadelh, NULL);
1034  boxaDestroy(&boxa1);
1035  return boxad;
1036  }
1037 
1038  n = boxaGetCount(boxas);
1039  naind = numaCreate(n); /* outlier indicator array */
1040  boxae = boxaCreate(0); /* even inliers */
1041  boxao = boxaCreate(0); /* odd inliers */
1042  outfound = FALSE;
1043  if (type == L_CHECK_WIDTH) {
1044  boxaMedianDimensions(boxas, &medw, &medh, NULL, NULL, NULL, NULL,
1045  &nadelw, NULL);
1046  if (pratiowh) {
1047  *pratiowh = (l_float32)medw / (l_float32)medh;
1048  L_INFO("median ratio w/h = %5.3f\n", procName, *pratiowh);
1049  }
1050  if (pnadelw)
1051  *pnadelw = nadelw;
1052  else
1053  numaDestroy(&nadelw);
1054 
1055  /* Check for outliers; assemble inliers */
1056  for (i = 0; i < n; i++) {
1057  if ((box = boxaGetValidBox(boxas, i, L_COPY)) == NULL) {
1058  numaAddNumber(naind, 0);
1059  continue;
1060  }
1061  boxGetGeometry(box, NULL, NULL, &bw, NULL);
1062  brat = (l_float32)bw / (l_float32)medw;
1063  if (brat < 1.0 - dfract || brat > 1.0 + dfract) {
1064  outfound = TRUE;
1065  numaAddNumber(naind, 1);
1066  boxDestroy(&box);
1067  } else { /* add to inliers */
1068  numaAddNumber(naind, 0);
1069  if (i % 2 == 0)
1070  boxaAddBox(boxae, box, L_INSERT);
1071  else
1072  boxaAddBox(boxao, box, L_INSERT);
1073  }
1074  }
1075  if (!outfound) { /* nothing to do */
1076  numaDestroy(&naind);
1077  boxaDestroy(&boxae);
1078  boxaDestroy(&boxao);
1079  L_INFO("no width outlier boxes found\n", procName);
1080  return boxaCopy(boxas, L_COPY);
1081  }
1082 
1083  /* Get left/right parameters from inliers. Handle the case
1084  * where there are no inliers for one of the sets. For example,
1085  * when all the even boxes have a different dimension from
1086  * the odd boxes, and the median arbitrarily gets assigned
1087  * to the even boxes, there are no odd inliers; in that case,
1088  * use the even inliers sides to decide whether to adjust
1089  * the left or the right sides of individual outliers. */
1090  L_INFO("fixing width of outlier boxes\n", procName);
1091  medlefte = medrighte = medlefto = medrighto = 0;
1092  if ((ne = boxaGetValidCount(boxae)) > 0)
1093  boxaGetMedianVals(boxae, &medlefte, NULL, &medrighte, NULL,
1094  NULL, NULL);
1095  if ((no = boxaGetValidCount(boxao)) > 0)
1096  boxaGetMedianVals(boxao, &medlefto, NULL, &medrighto, NULL,
1097  NULL, NULL);
1098  if (ne == 0) { /* use odd inliers values for both */
1099  medlefte = medlefto;
1100  medrighte = medrighto;
1101  } else if (no == 0) { /* use even inliers values for both */
1102  medlefto = medlefte;
1103  medrighto = medrighte;
1104  }
1105 
1106  /* Adjust the left and/or right sides of outliers.
1107  * For each box that is a dimensional outlier, consider each side.
1108  * Any side that differs fractionally from the median value
1109  * by more than %sfract times the median width (medw) is set to
1110  * the median value for that side. Then both sides are moved
1111  * an equal distance in or out to make w = %factor * medw. */
1112  boxad = boxaCreate(n);
1113  maxdel = (l_int32)(sfract * medw + 0.5);
1114  for (i = 0; i < n; i++) {
1115  box = boxaGetBox(boxas, i, L_COPY);
1116  boxIsValid(box, &isvalid);
1117  numaGetIValue(naind, i, &ind);
1118  medleft = (i % 2 == 0) ? medlefte : medlefto;
1119  medright = (i % 2 == 0) ? medrighte : medrighto;
1120  if (ind == 1 && isvalid) { /* adjust sides */
1121  boxGetSideLocations(box, &left, &right, NULL, NULL);
1122  if (L_ABS(left - medleft) > maxdel) left = medleft;
1123  if (L_ABS(right - medright) > maxdel) right = medright;
1124  del = (l_int32)(factor * medw - (right - left)) / 2;
1125  boxSetSide(box, L_SET_LEFT, left - del, 0);
1126  boxSetSide(box, L_SET_RIGHT, right + del, 0);
1127  }
1128  boxaAddBox(boxad, box, L_INSERT);
1129  }
1130  } else { /* L_CHECK_HEIGHT */
1131  boxaMedianDimensions(boxas, &medw, &medh, NULL, NULL, NULL, NULL,
1132  NULL, &nadelh);
1133  if (pratiowh) {
1134  *pratiowh = (l_float32)medw / (l_float32)medh;
1135  L_INFO("median ratio w/h = %5.3f\n", procName, *pratiowh);
1136  }
1137  if (pnadelh)
1138  *pnadelh = nadelh;
1139  else
1140  numaDestroy(&nadelh);
1141 
1142  /* Check for outliers; assemble inliers */
1143  for (i = 0; i < n; i++) {
1144  if ((box = boxaGetValidBox(boxas, i, L_COPY)) == NULL) {
1145  numaAddNumber(naind, 0);
1146  continue;
1147  }
1148  boxGetGeometry(box, NULL, NULL, NULL, &bh);
1149  brat = (l_float32)bh / (l_float32)medh;
1150  if (brat < 1.0 - dfract || brat > 1.0 + dfract) {
1151  outfound = TRUE;
1152  numaAddNumber(naind, 1);
1153  boxDestroy(&box);
1154  } else { /* add to inliers */
1155  numaAddNumber(naind, 0);
1156  if (i % 2 == 0)
1157  boxaAddBox(boxae, box, L_INSERT);
1158  else
1159  boxaAddBox(boxao, box, L_INSERT);
1160  }
1161  }
1162  if (!outfound) { /* nothing to do */
1163  numaDestroy(&naind);
1164  boxaDestroy(&boxae);
1165  boxaDestroy(&boxao);
1166  L_INFO("no height outlier boxes found\n", procName);
1167  return boxaCopy(boxas, L_COPY);
1168  }
1169 
1170  /* Get top/bot parameters from inliers. Handle the case
1171  * where there are no inliers for one of the sets. For example,
1172  * when all the even boxes have a different dimension from
1173  * the odd boxes, and the median arbitrarily gets assigned
1174  * to the even boxes, there are no odd inliers; in that case,
1175  * use the even inlier sides to decide whether to adjust
1176  * the top or the bottom sides of individual outliers. */
1177  L_INFO("fixing height of outlier boxes\n", procName);
1178  medlefte = medtope = medbote = medtopo = medboto = 0;
1179  if ((ne = boxaGetValidCount(boxae)) > 0)
1180  boxaGetMedianVals(boxae, NULL, &medtope, NULL, &medbote,
1181  NULL, NULL);
1182  if ((no = boxaGetValidCount(boxao)) > 0)
1183  boxaGetMedianVals(boxao, NULL, &medtopo, NULL, &medboto,
1184  NULL, NULL);
1185  if (ne == 0) { /* use odd inliers values for both */
1186  medtope = medtopo;
1187  medbote = medboto;
1188  } else if (no == 0) { /* use even inliers values for both */
1189  medtopo = medtope;
1190  medboto = medbote;
1191  }
1192 
1193  /* Adjust the top and/or bottom sides of outliers.
1194  * For each box that is a dimensional outlier, consider each side.
1195  * Any side that differs fractionally from the median value
1196  * by more than %sfract times the median height (medh) is
1197  * set to the median value for that that side. Then both
1198  * sides are moved an equal distance in or out to make
1199  * h = %factor * medh). */
1200  boxad = boxaCreate(n);
1201  maxdel = (l_int32)(sfract * medh + 0.5);
1202  for (i = 0; i < n; i++) {
1203  box = boxaGetBox(boxas, i, L_COPY);
1204  boxIsValid(box, &isvalid);
1205  numaGetIValue(naind, i, &ind);
1206  medtop = (i % 2 == 0) ? medtope : medtopo;
1207  medbot = (i % 2 == 0) ? medbote : medboto;
1208  if (ind == 1 && isvalid) { /* adjust sides */
1209  boxGetSideLocations(box, NULL, NULL, &top, &bot);
1210  if (L_ABS(top - medtop) > maxdel) top = medtop;
1211  if (L_ABS(bot - medbot) > maxdel) bot = medbot;
1212  del = (l_int32)(factor * medh - (bot - top)) / 2; /* typ > 0 */
1213  boxSetSide(box, L_SET_TOP, L_MAX(0, top - del), 0);
1214  boxSetSide(box, L_SET_BOT, bot + del, 0);
1215  }
1216  boxaAddBox(boxad, box, L_INSERT);
1217  }
1218  }
1219  numaDestroy(&naind);
1220  boxaDestroy(&boxae);
1221  boxaDestroy(&boxao);
1222  return boxad;
1223 }
1224 
1225 
1250 l_ok
1252  const char *plotname,
1253  NUMA **pnal,
1254  NUMA **pnat,
1255  NUMA **pnar,
1256  NUMA **pnab,
1257  PIX **ppixd)
1258 {
1259 char buf[128], titlebuf[128];
1260 char *dataname;
1261 static l_int32 plotid = 0;
1262 l_int32 n, i, w, h, left, top, right, bot;
1263 l_int32 debugprint = FALSE; /* change to TRUE to spam stderr */
1264 l_float32 med, dev;
1265 BOXA *boxat;
1266 GPLOT *gplot;
1267 NUMA *nal, *nat, *nar, *nab;
1268 
1269  PROCNAME("boxaPlotSides");
1270 
1271  if (pnal) *pnal = NULL;
1272  if (pnat) *pnat = NULL;
1273  if (pnar) *pnar = NULL;
1274  if (pnab) *pnab = NULL;
1275  if (ppixd) *ppixd = NULL;
1276  if (!boxa)
1277  return ERROR_INT("boxa not defined", procName, 1);
1278  if ((n = boxaGetCount(boxa)) < 2)
1279  return ERROR_INT("less than 2 boxes", procName, 1);
1280  if (!ppixd)
1281  return ERROR_INT("&pixd not defined", procName, 1);
1282 
1283  boxat = boxaFillSequence(boxa, L_USE_ALL_BOXES, 0);
1284 
1285  /* Build the numas for each side */
1286  nal = numaCreate(n);
1287  nat = numaCreate(n);
1288  nar = numaCreate(n);
1289  nab = numaCreate(n);
1290 
1291  for (i = 0; i < n; i++) {
1292  boxaGetBoxGeometry(boxat, i, &left, &top, &w, &h);
1293  right = left + w - 1;
1294  bot = top + h - 1;
1295  numaAddNumber(nal, left);
1296  numaAddNumber(nat, top);
1297  numaAddNumber(nar, right);
1298  numaAddNumber(nab, bot);
1299  }
1300  boxaDestroy(&boxat);
1301 
1302  lept_mkdir("lept/plots");
1303  if (plotname) {
1304  snprintf(buf, sizeof(buf), "/tmp/lept/plots/sides.%s", plotname);
1305  snprintf(titlebuf, sizeof(titlebuf), "%s: Box sides vs. box index",
1306  plotname);
1307  } else {
1308  snprintf(buf, sizeof(buf), "/tmp/lept/plots/sides.%d", plotid++);
1309  snprintf(titlebuf, sizeof(titlebuf), "Box sides vs. box index");
1310  }
1311  gplot = gplotCreate(buf, GPLOT_PNG, titlebuf,
1312  "box index", "side location");
1313  gplotAddPlot(gplot, NULL, nal, GPLOT_LINES, "left side");
1314  gplotAddPlot(gplot, NULL, nat, GPLOT_LINES, "top side");
1315  gplotAddPlot(gplot, NULL, nar, GPLOT_LINES, "right side");
1316  gplotAddPlot(gplot, NULL, nab, GPLOT_LINES, "bottom side");
1317  *ppixd = gplotMakeOutputPix(gplot);
1318  gplotDestroy(&gplot);
1319 
1320  if (debugprint) {
1321  dataname = (plotname) ? stringNew(plotname) : stringNew("no_name");
1322  numaGetMedian(nal, &med);
1323  numaGetMeanDevFromMedian(nal, med, &dev);
1324  lept_stderr("%s left: med = %7.3f, meandev = %7.3f\n",
1325  dataname, med, dev);
1326  numaGetMedian(nat, &med);
1327  numaGetMeanDevFromMedian(nat, med, &dev);
1328  lept_stderr("%s top: med = %7.3f, meandev = %7.3f\n",
1329  dataname, med, dev);
1330  numaGetMedian(nar, &med);
1331  numaGetMeanDevFromMedian(nar, med, &dev);
1332  lept_stderr("%s right: med = %7.3f, meandev = %7.3f\n",
1333  dataname, med, dev);
1334  numaGetMedian(nab, &med);
1335  numaGetMeanDevFromMedian(nab, med, &dev);
1336  lept_stderr("%s bot: med = %7.3f, meandev = %7.3f\n",
1337  dataname, med, dev);
1338  LEPT_FREE(dataname);
1339  }
1340 
1341  if (pnal)
1342  *pnal = nal;
1343  else
1344  numaDestroy(&nal);
1345  if (pnat)
1346  *pnat = nat;
1347  else
1348  numaDestroy(&nat);
1349  if (pnar)
1350  *pnar = nar;
1351  else
1352  numaDestroy(&nar);
1353  if (pnab)
1354  *pnab = nab;
1355  else
1356  numaDestroy(&nab);
1357  return 0;
1358 }
1359 
1360 
1383 l_ok
1385  const char *plotname,
1386  NUMA **pnaw,
1387  NUMA **pnah,
1388  PIX **ppixd)
1389 {
1390 char buf[128], titlebuf[128];
1391 static l_int32 plotid = 0;
1392 l_int32 n, i, w, h;
1393 BOXA *boxat;
1394 GPLOT *gplot;
1395 NUMA *naw, *nah;
1396 
1397  PROCNAME("boxaPlotSizes");
1398 
1399  if (pnaw) *pnaw = NULL;
1400  if (pnah) *pnah = NULL;
1401  if (ppixd) *ppixd = NULL;
1402  if (!boxa)
1403  return ERROR_INT("boxa not defined", procName, 1);
1404  if ((n = boxaGetCount(boxa)) < 2)
1405  return ERROR_INT("less than 2 boxes", procName, 1);
1406  if (!ppixd)
1407  return ERROR_INT("&pixd not defined", procName, 1);
1408 
1409  boxat = boxaFillSequence(boxa, L_USE_ALL_BOXES, 0);
1410 
1411  /* Build the numas for the width and height */
1412  naw = numaCreate(n);
1413  nah = numaCreate(n);
1414  for (i = 0; i < n; i++) {
1415  boxaGetBoxGeometry(boxat, i, NULL, NULL, &w, &h);
1416  numaAddNumber(naw, w);
1417  numaAddNumber(nah, h);
1418  }
1419  boxaDestroy(&boxat);
1420 
1421  lept_mkdir("lept/plots");
1422  if (plotname) {
1423  snprintf(buf, sizeof(buf), "/tmp/lept/plots/size.%s", plotname);
1424  snprintf(titlebuf, sizeof(titlebuf), "%s: Box size vs. box index",
1425  plotname);
1426  } else {
1427  snprintf(buf, sizeof(buf), "/tmp/lept/plots/size.%d", plotid++);
1428  snprintf(titlebuf, sizeof(titlebuf), "Box size vs. box index");
1429  }
1430  gplot = gplotCreate(buf, GPLOT_PNG, titlebuf,
1431  "box index", "box dimension");
1432  gplotAddPlot(gplot, NULL, naw, GPLOT_LINES, "width");
1433  gplotAddPlot(gplot, NULL, nah, GPLOT_LINES, "height");
1434  *ppixd = gplotMakeOutputPix(gplot);
1435  gplotDestroy(&gplot);
1436 
1437  if (pnaw)
1438  *pnaw = naw;
1439  else
1440  numaDestroy(&naw);
1441  if (pnah)
1442  *pnah = nah;
1443  else
1444  numaDestroy(&nah);
1445  return 0;
1446 }
1447 
1448 
1467 BOXA *
1469  l_int32 useflag,
1470  l_int32 debug)
1471 {
1472 l_int32 n, nv;
1473 BOXA *boxae, *boxao, *boxad;
1474 
1475  PROCNAME("boxaFillSequence");
1476 
1477  if (!boxas)
1478  return (BOXA *)ERROR_PTR("boxas not defined", procName, NULL);
1479  if (useflag != L_USE_ALL_BOXES && useflag != L_USE_SAME_PARITY_BOXES)
1480  return (BOXA *)ERROR_PTR("invalid useflag", procName, NULL);
1481 
1482  n = boxaGetCount(boxas);
1483  nv = boxaGetValidCount(boxas);
1484  if (n == nv)
1485  return boxaCopy(boxas, L_COPY); /* all valid */
1486  if (debug)
1487  L_INFO("%d valid boxes, %d invalid boxes\n", procName, nv, n - nv);
1488  if (useflag == L_USE_SAME_PARITY_BOXES && n < 3) {
1489  L_WARNING("n < 3; some invalid\n", procName);
1490  return boxaCopy(boxas, L_COPY);
1491  }
1492 
1493  if (useflag == L_USE_ALL_BOXES) {
1494  boxad = boxaCopy(boxas, L_COPY);
1495  boxaFillAll(boxad);
1496  } else {
1497  boxaSplitEvenOdd(boxas, 0, &boxae, &boxao);
1498  boxaFillAll(boxae);
1499  boxaFillAll(boxao);
1500  boxad = boxaMergeEvenOdd(boxae, boxao, 0);
1501  boxaDestroy(&boxae);
1502  boxaDestroy(&boxao);
1503  }
1504 
1505  nv = boxaGetValidCount(boxad);
1506  if (n != nv)
1507  L_WARNING("there are still %d invalid boxes\n", procName, n - nv);
1508 
1509  return boxad;
1510 }
1511 
1512 
1526 static l_int32
1528 {
1529 l_int32 n, nv, i, j, spandown, spanup;
1530 l_int32 *indic;
1531 BOX *box, *boxt;
1532 
1533  PROCNAME("boxaFillAll");
1534 
1535  if (!boxa)
1536  return ERROR_INT("boxa not defined", procName, 1);
1537  n = boxaGetCount(boxa);
1538  nv = boxaGetValidCount(boxa);
1539  if (n == nv) return 0;
1540  if (nv == 0) {
1541  L_WARNING("no valid boxes out of %d boxes\n", procName, n);
1542  return 0;
1543  }
1544 
1545  /* Make indicator array for valid boxes */
1546  if ((indic = (l_int32 *)LEPT_CALLOC(n, sizeof(l_int32))) == NULL)
1547  return ERROR_INT("indic not made", procName, 1);
1548  for (i = 0; i < n; i++) {
1549  box = boxaGetValidBox(boxa, i, L_CLONE);
1550  if (box)
1551  indic[i] = 1;
1552  boxDestroy(&box);
1553  }
1554 
1555  /* Replace invalid boxes with the nearest valid one */
1556  for (i = 0; i < n; i++) {
1557  box = boxaGetValidBox(boxa, i, L_CLONE);
1558  if (!box) {
1559  spandown = spanup = 10000000;
1560  for (j = i - 1; j >= 0; j--) {
1561  if (indic[j] == 1) {
1562  spandown = i - j;
1563  break;
1564  }
1565  }
1566  for (j = i + 1; j < n; j++) {
1567  if (indic[j] == 1) {
1568  spanup = j - i;
1569  break;
1570  }
1571  }
1572  if (spandown < spanup)
1573  boxt = boxaGetBox(boxa, i - spandown, L_COPY);
1574  else
1575  boxt = boxaGetBox(boxa, i + spanup, L_COPY);
1576  boxaReplaceBox(boxa, i, boxt);
1577  }
1578  boxDestroy(&box);
1579  }
1580 
1581  LEPT_FREE(indic);
1582  return 0;
1583 }
1584 
1585 
1610 l_ok
1612  l_int32 type,
1613  l_float32 *pdel_evenodd,
1614  l_float32 *prms_even,
1615  l_float32 *prms_odd,
1616  l_float32 *prms_all)
1617 {
1618 l_int32 n, ne, no, nmin, vale, valo, i;
1619 l_float32 sum;
1620 BOXA *boxae, *boxao;
1621 NUMA *nae, *nao, *na_all;
1622 
1623  PROCNAME("boxaSizeVariation");
1624 
1625  if (pdel_evenodd) *pdel_evenodd = 0.0;
1626  if (prms_even) *prms_even = 0.0;
1627  if (prms_odd) *prms_odd = 0.0;
1628  if (prms_all) *prms_all = 0.0;
1629  if (!boxa)
1630  return ERROR_INT("boxa not defined", procName, 1);
1631  if (type != L_SELECT_WIDTH && type != L_SELECT_HEIGHT)
1632  return ERROR_INT("invalid type", procName, 1);
1633  if (!pdel_evenodd && !prms_even && !prms_odd && !prms_all)
1634  return ERROR_INT("nothing to do", procName, 1);
1635  n = boxaGetCount(boxa);
1636  if (n < 4)
1637  return ERROR_INT("too few boxes", procName, 1);
1638 
1639  boxaSplitEvenOdd(boxa, 0, &boxae, &boxao);
1640  ne = boxaGetCount(boxae);
1641  no = boxaGetCount(boxao);
1642  nmin = L_MIN(ne, no);
1643  if (nmin == 0) {
1644  boxaDestroy(&boxae);
1645  boxaDestroy(&boxao);
1646  return ERROR_INT("either no even or no odd boxes", procName, 1);
1647  }
1648 
1649  if (type == L_SELECT_WIDTH) {
1650  boxaGetSizes(boxae, &nae, NULL);
1651  boxaGetSizes(boxao, &nao, NULL);
1652  boxaGetSizes(boxa, &na_all, NULL);
1653  } else { /* L_SELECT_HEIGHT) */
1654  boxaGetSizes(boxae, NULL, &nae);
1655  boxaGetSizes(boxao, NULL, &nao);
1656  boxaGetSizes(boxa, NULL, &na_all);
1657  }
1658 
1659  if (pdel_evenodd) {
1660  sum = 0.0;
1661  for (i = 0; i < nmin; i++) {
1662  numaGetIValue(nae, i, &vale);
1663  numaGetIValue(nao, i, &valo);
1664  sum += L_ABS(vale - valo);
1665  }
1666  *pdel_evenodd = sum / nmin;
1667  }
1668  if (prms_even)
1669  numaSimpleStats(nae, 0, -1, NULL, NULL, prms_even);
1670  if (prms_odd)
1671  numaSimpleStats(nao, 0, -1, NULL, NULL, prms_odd);
1672  if (prms_all)
1673  numaSimpleStats(na_all, 0, -1, NULL, NULL, prms_all);
1674 
1675  boxaDestroy(&boxae);
1676  boxaDestroy(&boxao);
1677  numaDestroy(&nae);
1678  numaDestroy(&nao);
1679  numaDestroy(&na_all);
1680  return 0;
1681 }
1682 
1683 
1713 l_ok
1715  l_int32 *pmedw,
1716  l_int32 *pmedh,
1717  l_int32 *pmedwe,
1718  l_int32 *pmedwo,
1719  l_int32 *pmedhe,
1720  l_int32 *pmedho,
1721  NUMA **pnadelw,
1722  NUMA **pnadelh)
1723 {
1724 l_int32 i, n, bw, bh, medw, medh, medwe, medwo, medhe, medho;
1725 BOXA *boxae, *boxao;
1726 NUMA *nadelw, *nadelh;
1727 
1728  PROCNAME("boxaMedianDimensions");
1729 
1730  if (pmedw) *pmedw = 0;
1731  if (pmedh) *pmedh = 0;
1732  if (pmedwe) *pmedwe= 0;
1733  if (pmedwo) *pmedwo= 0;
1734  if (pmedhe) *pmedhe= 0;
1735  if (pmedho) *pmedho= 0;
1736  if (pnadelw) *pnadelw = NULL;
1737  if (pnadelh) *pnadelh = NULL;
1738  if (!boxas)
1739  return ERROR_INT("boxas not defined", procName, 1);
1740  if (boxaGetValidCount(boxas) < 6)
1741  return ERROR_INT("need at least 6 valid boxes", procName, 1);
1742 
1743  /* Require at least 3 valid boxes of both types */
1744  boxaSplitEvenOdd(boxas, 0, &boxae, &boxao);
1745  if (boxaGetValidCount(boxae) < 3 || boxaGetValidCount(boxao) < 3) {
1746  boxaDestroy(&boxae);
1747  boxaDestroy(&boxao);
1748  return ERROR_INT("don't have 3+ valid boxes of each type", procName, 1);
1749  }
1750 
1751  /* Get the relevant median widths and heights */
1752  boxaGetMedianVals(boxas, NULL, NULL, NULL, NULL, &medw, &medh);
1753  boxaGetMedianVals(boxae, NULL, NULL, NULL, NULL, &medwe, &medhe);
1754  boxaGetMedianVals(boxao, NULL, NULL, NULL, NULL, &medwo, &medho);
1755  if (pmedw) *pmedw = medw;
1756  if (pmedh) *pmedh = medh;
1757  if (pmedwe) *pmedwe = medwe;
1758  if (pmedwo) *pmedwo = medwo;
1759  if (pmedhe) *pmedhe = medhe;
1760  if (pmedho) *pmedho = medho;
1761 
1762  /* Find the variation from median dimension for each box */
1763  n = boxaGetCount(boxas);
1764  nadelw = numaCreate(n);
1765  nadelh = numaCreate(n);
1766  for (i = 0; i < n; i++) {
1767  boxaGetBoxGeometry(boxas, i, NULL, NULL, &bw, &bh);
1768  if (bw == 0 || bh == 0) { /* invalid box */
1769  numaAddNumber(nadelw, 0);
1770  numaAddNumber(nadelh, 0);
1771  } else {
1772  numaAddNumber(nadelw, bw - medw);
1773  numaAddNumber(nadelh, bh - medh);
1774  }
1775  }
1776  if (pnadelw)
1777  *pnadelw = nadelw;
1778  else
1779  numaDestroy(&nadelw);
1780  if (pnadelh)
1781  *pnadelh = nadelh;
1782  else
1783  numaDestroy(&nadelh);
1784 
1785  boxaDestroy(&boxae);
1786  boxaDestroy(&boxao);
1787  return 0;
1788 }
1789 
l_int32 boxaGetValidCount(BOXA *boxa)
boxaGetValidCount()
Definition: boxbasic.c:751
BOXA * boxaCopy(BOXA *boxa, l_int32 copyflag)
boxaCopy()
Definition: boxbasic.c:537
void boxDestroy(BOX **pbox)
boxDestroy()
Definition: boxbasic.c:282
l_ok boxGetSideLocations(BOX *box, l_int32 *pl, l_int32 *pr, l_int32 *pt, l_int32 *pb)
boxGetSideLocations()
Definition: boxbasic.c:374
l_ok boxaReplaceBox(BOXA *boxa, l_int32 index, BOX *box)
boxaReplaceBox()
Definition: boxbasic.c:962
l_ok boxaGetBoxGeometry(BOXA *boxa, l_int32 index, l_int32 *px, l_int32 *py, l_int32 *pw, l_int32 *ph)
boxaGetBoxGeometry()
Definition: boxbasic.c:879
l_int32 boxaGetCount(BOXA *boxa)
boxaGetCount()
Definition: boxbasic.c:734
l_ok boxaWriteDebug(const char *filename, BOXA *boxa)
boxaWriteDebug()
Definition: boxbasic.c:2245
l_ok boxGetGeometry(BOX *box, l_int32 *px, l_int32 *py, l_int32 *pw, l_int32 *ph)
boxGetGeometry()
Definition: boxbasic.c:313
l_ok boxaAddBox(BOXA *boxa, BOX *box, l_int32 copyflag)
boxaAddBox()
Definition: boxbasic.c:620
void boxaDestroy(BOXA **pboxa)
boxaDestroy()
Definition: boxbasic.c:583
l_ok boxIsValid(BOX *box, l_int32 *pvalid)
boxIsValid()
Definition: boxbasic.c:475
l_ok boxSetGeometry(BOX *box, l_int32 x, l_int32 y, l_int32 w, l_int32 h)
boxSetGeometry()
Definition: boxbasic.c:343
BOX * boxaGetValidBox(BOXA *boxa, l_int32 index, l_int32 accessflag)
boxaGetValidBox()
Definition: boxbasic.c:818
BOX * boxaGetBox(BOXA *boxa, l_int32 index, l_int32 accessflag)
boxaGetBox()
Definition: boxbasic.c:779
BOX * boxCreate(l_int32 x, l_int32 y, l_int32 w, l_int32 h)
boxCreate()
Definition: boxbasic.c:172
BOXA * boxaCreate(l_int32 n)
boxaCreate()
Definition: boxbasic.c:502
BOXA * boxaMergeEvenOdd(BOXA *boxae, BOXA *boxao, l_int32 fillflag)
boxaMergeEvenOdd()
Definition: boxfunc1.c:2700
l_ok boxSetSide(BOX *boxs, l_int32 side, l_int32 val, l_int32 thresh)
boxSetSide()
Definition: boxfunc1.c:2093
BOX * boxAdjustSides(BOX *boxd, BOX *boxs, l_int32 delleft, l_int32 delright, l_int32 deltop, l_int32 delbot)
boxAdjustSides()
Definition: boxfunc1.c:1991
l_ok boxaSplitEvenOdd(BOXA *boxa, l_int32 fillflag, BOXA **pboxae, BOXA **pboxao)
boxaSplitEvenOdd()
Definition: boxfunc1.c:2636
l_ok boxaGetMedianVals(BOXA *boxa, l_int32 *px, l_int32 *py, l_int32 *pr, l_int32 *pb, l_int32 *pw, l_int32 *ph)
boxaGetMedianVals()
Definition: boxfunc2.c:1483
l_ok boxaExtractAsNuma(BOXA *boxa, NUMA **pnal, NUMA **pnat, NUMA **pnar, NUMA **pnab, NUMA **pnaw, NUMA **pnah, l_int32 keepinvalid)
boxaExtractAsNuma()
Definition: boxfunc2.c:1171
l_ok boxaGetSizes(BOXA *boxa, NUMA **pnaw, NUMA **pnah)
boxaGetSizes()
Definition: boxfunc4.c:1241
l_ok boxaSizeConsistency(BOXA *boxas, l_int32 type, l_float32 threshp, l_float32 threshm, l_float32 *pfvarp, l_float32 *pfvarm, l_int32 *psame)
boxaSizeConsistency()
Definition: boxfunc5.c:590
BOXA * boxaModifyWithBoxa(BOXA *boxas, BOXA *boxam, l_int32 subflag, l_int32 maxdiff, l_int32 extrapixels)
boxaModifyWithBoxa()
Definition: boxfunc5.c:351
BOXA * boxaReconcileAllByMedian(BOXA *boxas, l_int32 select1, l_int32 select2, l_int32 thresh, l_int32 extra, PIXA *pixadb)
boxaReconcileAllByMedian()
Definition: boxfunc5.c:691
l_ok boxaPlotSides(BOXA *boxa, const char *plotname, NUMA **pnal, NUMA **pnat, NUMA **pnar, NUMA **pnab, PIX **ppixd)
boxaPlotSides()
Definition: boxfunc5.c:1251
BOXA * boxaReconcileSidesByMedian(BOXA *boxas, l_int32 select, l_int32 thresh, l_int32 extra, PIXA *pixadb)
boxaReconcileSidesByMedian()
Definition: boxfunc5.c:804
l_ok boxaMedianDimensions(BOXA *boxas, l_int32 *pmedw, l_int32 *pmedh, l_int32 *pmedwe, l_int32 *pmedwo, l_int32 *pmedhe, l_int32 *pmedho, NUMA **pnadelw, NUMA **pnadelh)
boxaMedianDimensions()
Definition: boxfunc5.c:1714
BOXA * boxaSmoothSequenceMedian(BOXA *boxas, l_int32 halfwin, l_int32 subflag, l_int32 maxdiff, l_int32 extrapixels, l_int32 debug)
boxaSmoothSequenceMedian()
Definition: boxfunc5.c:106
l_ok boxaPlotSizes(BOXA *boxa, const char *plotname, NUMA **pnaw, NUMA **pnah, PIX **ppixd)
boxaPlotSizes()
Definition: boxfunc5.c:1384
l_ok boxaSizeVariation(BOXA *boxa, l_int32 type, l_float32 *pdel_evenodd, l_float32 *prms_even, l_float32 *prms_odd, l_float32 *prms_all)
boxaSizeVariation()
Definition: boxfunc5.c:1611
BOXA * boxaReconcilePairWidth(BOXA *boxas, l_int32 delw, l_int32 op, l_float32 factor, NUMA *na)
boxaReconcilePairWidth()
Definition: boxfunc5.c:467
BOXA * boxaWindowedMedian(BOXA *boxas, l_int32 halfwin, l_int32 debug)
boxaWindowedMedian()
Definition: boxfunc5.c:206
BOXA * boxaFillSequence(BOXA *boxas, l_int32 useflag, l_int32 debug)
boxaFillSequence()
Definition: boxfunc5.c:1468
static l_int32 boxaFillAll(BOXA *boxa)
boxaFillAll()
Definition: boxfunc5.c:1527
BOXA * boxaReconcileSizeByMedian(BOXA *boxas, l_int32 type, l_float32 dfract, l_float32 sfract, l_float32 factor, NUMA **pnadelw, NUMA **pnadelh, l_float32 *pratiowh)
boxaReconcileSizeByMedian()
Definition: boxfunc5.c:982
l_ok gplotAddPlot(GPLOT *gplot, NUMA *nax, NUMA *nay, l_int32 plotstyle, const char *plotlabel)
gplotAddPlot()
Definition: gplot.c:320
GPLOT * gplotCreate(const char *rootname, l_int32 outformat, const char *title, const char *xlabel, const char *ylabel)
gplotCreate()
Definition: gplot.c:187
PIX * gplotMakeOutputPix(GPLOT *gplot)
gplotMakeOutputPix()
Definition: gplot.c:431
void gplotDestroy(GPLOT **pgplot)
gplotDestroy()
Definition: gplot.c:255
l_ok numaAddNumber(NUMA *na, l_float32 val)
numaAddNumber()
Definition: numabasic.c:478
NUMA * numaCreate(l_int32 n)
numaCreate()
Definition: numabasic.c:194
void numaDestroy(NUMA **pna)
numaDestroy()
Definition: numabasic.c:366
l_ok numaGetIValue(NUMA *na, l_int32 index, l_int32 *pival)
numaGetIValue()
Definition: numabasic.c:754
l_ok numaGetMedian(NUMA *na, l_float32 *pval)
numaGetMedian()
Definition: numafunc1.c:3405
l_ok numaGetMeanDevFromMedian(NUMA *na, l_float32 med, l_float32 *pdev)
numaGetMeanDevFromMedian()
Definition: numafunc1.c:3465
NUMA * numaWindowedMedian(NUMA *nas, l_int32 halfwin)
numaWindowedMedian()
Definition: numafunc2.c:784
l_ok numaSimpleStats(NUMA *na, l_int32 first, l_int32 last, l_float32 *pmean, l_float32 *pvar, l_float32 *prvar)
numaSimpleStats()
Definition: numafunc2.c:452
void pixDestroy(PIX **ppix)
pixDestroy()
Definition: pix1.c:621
@ L_USE_SAME_PARITY_BOXES
Definition: pix.h:1108
@ L_USE_ALL_BOXES
Definition: pix.h:1107
@ L_SELECT_WIDTH
Definition: pix.h:800
@ L_SELECT_HEIGHT
Definition: pix.h:801
@ L_COPY
Definition: pix.h:712
@ L_CLONE
Definition: pix.h:713
@ L_INSERT
Definition: pix.h:711
@ L_SET_RIGHT
Definition: pix.h:1060
@ L_SET_LEFT
Definition: pix.h:1059
@ L_ADJUST_LEFT
Definition: pix.h:1051
@ L_SET_BOT
Definition: pix.h:1062
@ L_ADJUST_LEFT_AND_RIGHT
Definition: pix.h:1053
@ L_ADJUST_RIGHT
Definition: pix.h:1052
@ L_ADJUST_BOT
Definition: pix.h:1055
@ L_ADJUST_TOP
Definition: pix.h:1054
@ L_ADJUST_SKIP
Definition: pix.h:1050
@ L_SET_TOP
Definition: pix.h:1061
@ L_ADJUST_TOP_AND_BOT
Definition: pix.h:1056
@ L_ADJUST_CHOOSE_MIN
Definition: pix.h:1057
@ L_CHECK_BOTH
Definition: pix.h:814
@ L_CHECK_WIDTH
Definition: pix.h:812
@ L_CHECK_HEIGHT
Definition: pix.h:813
@ L_USE_MINSIZE
Definition: pix.h:1074
@ L_SUB_ON_LOC_DIFF
Definition: pix.h:1076
@ L_USE_CAPPED_MIN
Definition: pix.h:1078
@ L_USE_CAPPED_MAX
Definition: pix.h:1079
@ L_USE_MAXSIZE
Definition: pix.h:1075
@ L_SUB_ON_SIZE_DIFF
Definition: pix.h:1077
l_ok pixaAddPix(PIXA *pixa, PIX *pix, l_int32 copyflag)
pixaAddPix()
Definition: pixabasic.c:506
l_int32 pixaGetCount(PIXA *pixa)
pixaGetCount()
Definition: pixabasic.c:650
PIX * pixaDisplayTiledInColumns(PIXA *pixas, l_int32 nx, l_float32 scalefactor, l_int32 spacing, l_int32 border)
pixaDisplayTiledInColumns()
Definition: pixafunc2.c:930
Definition: pix.h:481
Definition: pix.h:492
Definition: gplot.h:77
Definition: array.h:71
Definition: pix.h:139
Definition: pix.h:456
void lept_stderr(const char *fmt,...)
lept_stderr()
Definition: utils1.c:306
char * stringNew(const char *src)
stringNew()
Definition: utils2.c:223
l_int32 stringCat(char *dest, size_t size, const char *src)
stringCat()
Definition: utils2.c:423
l_int32 lept_mkdir(const char *subdir)
lept_mkdir()
Definition: utils2.c:2218
l_ok stringCopy(char *dest, const char *src, l_int32 n)
stringCopy()
Definition: utils2.c:263