Leptonica  1.82.0
Image processing and image analysis suite
numafunc2.c
Go to the documentation of this file.
1 /*====================================================================*
2  - Copyright (C) 2001 Leptonica. All rights reserved.
3  -
4  - Redistribution and use in source and binary forms, with or without
5  - modification, are permitted provided that the following conditions
6  - are met:
7  - 1. Redistributions of source code must retain the above copyright
8  - notice, this list of conditions and the following disclaimer.
9  - 2. Redistributions in binary form must reproduce the above
10  - copyright notice, this list of conditions and the following
11  - disclaimer in the documentation and/or other materials
12  - provided with the distribution.
13  -
14  - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
15  - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
16  - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
17  - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY
18  - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19  - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20  - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21  - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22  - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
23  - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24  - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  *====================================================================*/
26 
137 #ifdef HAVE_CONFIG_H
138 #include <config_auto.h>
139 #endif /* HAVE_CONFIG_H */
140 
141 #include <math.h>
142 #include "allheaders.h"
143 
144  /* bin sizes in numaMakeHistogram() */
145 static const l_int32 BinSizeArray[] = {2, 5, 10, 20, 50, 100, 200, 500, 1000,\
146  2000, 5000, 10000, 20000, 50000, 100000, 200000,\
147  500000, 1000000, 2000000, 5000000, 10000000,\
148  200000000, 50000000, 100000000};
149 static const l_int32 NBinSizes = 24;
150 
151 
152 #ifndef NO_CONSOLE_IO
153 #define DEBUG_HISTO 0
154 #define DEBUG_CROSSINGS 0
155 #define DEBUG_FREQUENCY 0
156 #endif /* ~NO_CONSOLE_IO */
157 
158 /*----------------------------------------------------------------------*
159  * Morphological operations *
160  *----------------------------------------------------------------------*/
182 NUMA *
184  l_int32 size)
185 {
186 l_int32 i, j, n, hsize, len;
187 l_float32 minval;
188 l_float32 *fa, *fas, *fad;
189 NUMA *nad;
190 
191  PROCNAME("numaErode");
192 
193  if (!nas)
194  return (NUMA *)ERROR_PTR("nas not defined", procName, NULL);
195  if (size <= 0)
196  return (NUMA *)ERROR_PTR("size must be > 0", procName, NULL);
197  if ((size & 1) == 0 ) {
198  L_WARNING("sel size must be odd; increasing by 1\n", procName);
199  size++;
200  }
201 
202  if (size == 1)
203  return numaCopy(nas);
204 
205  /* Make a source fa (fas) that has an added (size / 2) boundary
206  * on left and right, contains a copy of nas in the interior region
207  * (between 'size' and 'size + n', and has large values
208  * inserted in the boundary (because it is an erosion). */
209  n = numaGetCount(nas);
210  hsize = size / 2;
211  len = n + 2 * hsize;
212  if ((fas = (l_float32 *)LEPT_CALLOC(len, sizeof(l_float32))) == NULL)
213  return (NUMA *)ERROR_PTR("fas not made", procName, NULL);
214  for (i = 0; i < hsize; i++)
215  fas[i] = 1.0e37;
216  for (i = hsize + n; i < len; i++)
217  fas[i] = 1.0e37;
218  fa = numaGetFArray(nas, L_NOCOPY);
219  for (i = 0; i < n; i++)
220  fas[hsize + i] = fa[i];
221 
222  nad = numaMakeConstant(0, n);
223  numaCopyParameters(nad, nas);
224  fad = numaGetFArray(nad, L_NOCOPY);
225  for (i = 0; i < n; i++) {
226  minval = 1.0e37; /* start big */
227  for (j = 0; j < size; j++)
228  minval = L_MIN(minval, fas[i + j]);
229  fad[i] = minval;
230  }
231 
232  LEPT_FREE(fas);
233  return nad;
234 }
235 
236 
251 NUMA *
253  l_int32 size)
254 {
255 l_int32 i, j, n, hsize, len;
256 l_float32 maxval;
257 l_float32 *fa, *fas, *fad;
258 NUMA *nad;
259 
260  PROCNAME("numaDilate");
261 
262  if (!nas)
263  return (NUMA *)ERROR_PTR("nas not defined", procName, NULL);
264  if (size <= 0)
265  return (NUMA *)ERROR_PTR("size must be > 0", procName, NULL);
266  if ((size & 1) == 0 ) {
267  L_WARNING("sel size must be odd; increasing by 1\n", procName);
268  size++;
269  }
270 
271  if (size == 1)
272  return numaCopy(nas);
273 
274  /* Make a source fa (fas) that has an added (size / 2) boundary
275  * on left and right, contains a copy of nas in the interior region
276  * (between 'size' and 'size + n', and has small values
277  * inserted in the boundary (because it is a dilation). */
278  n = numaGetCount(nas);
279  hsize = size / 2;
280  len = n + 2 * hsize;
281  if ((fas = (l_float32 *)LEPT_CALLOC(len, sizeof(l_float32))) == NULL)
282  return (NUMA *)ERROR_PTR("fas not made", procName, NULL);
283  for (i = 0; i < hsize; i++)
284  fas[i] = -1.0e37;
285  for (i = hsize + n; i < len; i++)
286  fas[i] = -1.0e37;
287  fa = numaGetFArray(nas, L_NOCOPY);
288  for (i = 0; i < n; i++)
289  fas[hsize + i] = fa[i];
290 
291  nad = numaMakeConstant(0, n);
292  numaCopyParameters(nad, nas);
293  fad = numaGetFArray(nad, L_NOCOPY);
294  for (i = 0; i < n; i++) {
295  maxval = -1.0e37; /* start small */
296  for (j = 0; j < size; j++)
297  maxval = L_MAX(maxval, fas[i + j]);
298  fad[i] = maxval;
299  }
300 
301  LEPT_FREE(fas);
302  return nad;
303 }
304 
305 
320 NUMA *
322  l_int32 size)
323 {
324 NUMA *nat, *nad;
325 
326  PROCNAME("numaOpen");
327 
328  if (!nas)
329  return (NUMA *)ERROR_PTR("nas not defined", procName, NULL);
330  if (size <= 0)
331  return (NUMA *)ERROR_PTR("size must be > 0", procName, NULL);
332  if ((size & 1) == 0 ) {
333  L_WARNING("sel size must be odd; increasing by 1\n", procName);
334  size++;
335  }
336 
337  if (size == 1)
338  return numaCopy(nas);
339 
340  nat = numaErode(nas, size);
341  nad = numaDilate(nat, size);
342  numaDestroy(&nat);
343  return nad;
344 }
345 
346 
367 NUMA *
369  l_int32 size)
370 {
371 NUMA *nab, *nat1, *nat2, *nad;
372 
373  PROCNAME("numaClose");
374 
375  if (!nas)
376  return (NUMA *)ERROR_PTR("nas not defined", procName, NULL);
377  if (size <= 0)
378  return (NUMA *)ERROR_PTR("size must be > 0", procName, NULL);
379  if ((size & 1) == 0 ) {
380  L_WARNING("sel size must be odd; increasing by 1\n", procName);
381  size++;
382  }
383 
384  if (size == 1)
385  return numaCopy(nas);
386 
387  nab = numaAddBorder(nas, size, size, 0); /* to preserve extensivity */
388  nat1 = numaDilate(nab, size);
389  nat2 = numaErode(nat1, size);
390  nad = numaRemoveBorder(nat2, size, size);
391  numaDestroy(&nab);
392  numaDestroy(&nat1);
393  numaDestroy(&nat2);
394  return nad;
395 }
396 
397 
398 /*----------------------------------------------------------------------*
399  * Other transforms *
400  *----------------------------------------------------------------------*/
414 NUMA *
416  l_float32 shift,
417  l_float32 scale)
418 {
419 l_int32 i, n;
420 l_float32 val;
421 NUMA *nad;
422 
423  PROCNAME("numaTransform");
424 
425  if (!nas)
426  return (NUMA *)ERROR_PTR("nas not defined", procName, NULL);
427  n = numaGetCount(nas);
428  if ((nad = numaCreate(n)) == NULL)
429  return (NUMA *)ERROR_PTR("nad not made", procName, NULL);
430  numaCopyParameters(nad, nas);
431  for (i = 0; i < n; i++) {
432  numaGetFValue(nas, i, &val);
433  val = scale * (val + shift);
434  numaAddNumber(nad, val);
435  }
436  return nad;
437 }
438 
439 
451 l_ok
453  l_int32 first,
454  l_int32 last,
455  l_float32 *pmean,
456  l_float32 *pvar,
457  l_float32 *prvar)
458 {
459 l_int32 i, n, ni;
460 l_float32 sum, sumsq, val, mean, var;
461 
462  PROCNAME("numaSimpleStats");
463 
464  if (pmean) *pmean = 0.0;
465  if (pvar) *pvar = 0.0;
466  if (prvar) *prvar = 0.0;
467  if (!pmean && !pvar && !prvar)
468  return ERROR_INT("nothing requested", procName, 1);
469  if (!na)
470  return ERROR_INT("na not defined", procName, 1);
471  if ((n = numaGetCount(na)) == 0)
472  return ERROR_INT("na is empty", procName, 1);
473  first = L_MAX(0, first);
474  if (last < 0) last = n - 1;
475  if (first >= n)
476  return ERROR_INT("invalid first", procName, 1);
477  if (last >= n) {
478  L_WARNING("last = %d is beyond max index = %d; adjusting\n",
479  procName, last, n - 1);
480  last = n - 1;
481  }
482  if (first > last)
483  return ERROR_INT("first > last\n", procName, 1);
484  ni = last - first + 1;
485  sum = sumsq = 0.0;
486  for (i = first; i <= last; i++) {
487  numaGetFValue(na, i, &val);
488  sum += val;
489  sumsq += val * val;
490  }
491 
492  mean = sum / ni;
493  if (pmean)
494  *pmean = mean;
495  if (pvar || prvar) {
496  var = sumsq / ni - mean * mean;
497  if (pvar) *pvar = var;
498  if (prvar) *prvar = sqrtf(var);
499  }
500 
501  return 0;
502 }
503 
504 
536 l_ok
538  l_int32 wc,
539  NUMA **pnam,
540  NUMA **pnams,
541  NUMA **pnav,
542  NUMA **pnarv)
543 {
544 NUMA *nam, *nams;
545 
546  PROCNAME("numaWindowedStats");
547 
548  if (!nas)
549  return ERROR_INT("nas not defined", procName, 1);
550  if (2 * wc + 1 > numaGetCount(nas))
551  L_WARNING("filter wider than input array!\n", procName);
552 
553  if (!pnav && !pnarv) {
554  if (pnam) *pnam = numaWindowedMean(nas, wc);
555  if (pnams) *pnams = numaWindowedMeanSquare(nas, wc);
556  return 0;
557  }
558 
559  nam = numaWindowedMean(nas, wc);
560  nams = numaWindowedMeanSquare(nas, wc);
561  numaWindowedVariance(nam, nams, pnav, pnarv);
562  if (pnam)
563  *pnam = nam;
564  else
565  numaDestroy(&nam);
566  if (pnams)
567  *pnams = nams;
568  else
569  numaDestroy(&nams);
570  return 0;
571 }
572 
573 
587 NUMA *
589  l_int32 wc)
590 {
591 l_int32 i, n, n1, width;
592 l_float32 sum, norm;
593 l_float32 *fa1, *fad, *suma;
594 NUMA *na1, *nad;
595 
596  PROCNAME("numaWindowedMean");
597 
598  if (!nas)
599  return (NUMA *)ERROR_PTR("nas not defined", procName, NULL);
600  n = numaGetCount(nas);
601  width = 2 * wc + 1; /* filter width */
602  if (width > n)
603  L_WARNING("filter wider than input array!\n", procName);
604 
605  na1 = numaAddSpecifiedBorder(nas, wc, wc, L_MIRRORED_BORDER);
606  n1 = n + 2 * wc;
607  fa1 = numaGetFArray(na1, L_NOCOPY);
608  nad = numaMakeConstant(0, n);
609  fad = numaGetFArray(nad, L_NOCOPY);
610 
611  /* Make sum array; note the indexing */
612  if ((suma = (l_float32 *)LEPT_CALLOC(n1 + 1, sizeof(l_float32))) == NULL) {
613  numaDestroy(&na1);
614  numaDestroy(&nad);
615  return (NUMA *)ERROR_PTR("suma not made", procName, NULL);
616  }
617  sum = 0.0;
618  suma[0] = 0.0;
619  for (i = 0; i < n1; i++) {
620  sum += fa1[i];
621  suma[i + 1] = sum;
622  }
623 
624  norm = 1. / (2 * wc + 1);
625  for (i = 0; i < n; i++)
626  fad[i] = norm * (suma[width + i] - suma[i]);
627 
628  LEPT_FREE(suma);
629  numaDestroy(&na1);
630  return nad;
631 }
632 
633 
647 NUMA *
649  l_int32 wc)
650 {
651 l_int32 i, n, n1, width;
652 l_float32 sum, norm;
653 l_float32 *fa1, *fad, *suma;
654 NUMA *na1, *nad;
655 
656  PROCNAME("numaWindowedMeanSquare");
657 
658  if (!nas)
659  return (NUMA *)ERROR_PTR("nas not defined", procName, NULL);
660  n = numaGetCount(nas);
661  width = 2 * wc + 1; /* filter width */
662  if (width > n)
663  L_WARNING("filter wider than input array!\n", procName);
664 
665  na1 = numaAddSpecifiedBorder(nas, wc, wc, L_MIRRORED_BORDER);
666  n1 = n + 2 * wc;
667  fa1 = numaGetFArray(na1, L_NOCOPY);
668  nad = numaMakeConstant(0, n);
669  fad = numaGetFArray(nad, L_NOCOPY);
670 
671  /* Make sum array; note the indexing */
672  if ((suma = (l_float32 *)LEPT_CALLOC(n1 + 1, sizeof(l_float32))) == NULL) {
673  numaDestroy(&na1);
674  numaDestroy(&nad);
675  return (NUMA *)ERROR_PTR("suma not made", procName, NULL);
676  }
677  sum = 0.0;
678  suma[0] = 0.0;
679  for (i = 0; i < n1; i++) {
680  sum += fa1[i] * fa1[i];
681  suma[i + 1] = sum;
682  }
683 
684  norm = 1. / (2 * wc + 1);
685  for (i = 0; i < n; i++)
686  fad[i] = norm * (suma[width + i] - suma[i]);
687 
688  LEPT_FREE(suma);
689  numaDestroy(&na1);
690  return nad;
691 }
692 
693 
715 l_ok
717  NUMA *nams,
718  NUMA **pnav,
719  NUMA **pnarv)
720 {
721 l_int32 i, nm, nms;
722 l_float32 var;
723 l_float32 *fam, *fams, *fav, *farv;
724 NUMA *nav, *narv; /* variance and square root of variance */
725 
726  PROCNAME("numaWindowedVariance");
727 
728  if (pnav) *pnav = NULL;
729  if (pnarv) *pnarv = NULL;
730  if (!pnav && !pnarv)
731  return ERROR_INT("neither &nav nor &narv are defined", procName, 1);
732  if (!nam)
733  return ERROR_INT("nam not defined", procName, 1);
734  if (!nams)
735  return ERROR_INT("nams not defined", procName, 1);
736  nm = numaGetCount(nam);
737  nms = numaGetCount(nams);
738  if (nm != nms)
739  return ERROR_INT("sizes of nam and nams differ", procName, 1);
740 
741  if (pnav) {
742  nav = numaMakeConstant(0, nm);
743  *pnav = nav;
744  fav = numaGetFArray(nav, L_NOCOPY);
745  }
746  if (pnarv) {
747  narv = numaMakeConstant(0, nm);
748  *pnarv = narv;
749  farv = numaGetFArray(narv, L_NOCOPY);
750  }
751  fam = numaGetFArray(nam, L_NOCOPY);
752  fams = numaGetFArray(nams, L_NOCOPY);
753 
754  for (i = 0; i < nm; i++) {
755  var = fams[i] - fam[i] * fam[i];
756  if (pnav)
757  fav[i] = var;
758  if (pnarv)
759  farv[i] = sqrtf(var);
760  }
761 
762  return 0;
763 }
764 
765 
783 NUMA *
785  l_int32 halfwin)
786 {
787 l_int32 i, n;
788 l_float32 medval;
789 NUMA *na1, *na2, *nad;
790 
791  PROCNAME("numaWindowedMedian");
792 
793  if (!nas)
794  return (NUMA *)ERROR_PTR("nas not defined", procName, NULL);
795  if ((n = numaGetCount(nas)) < 3)
796  return numaCopy(nas);
797  if (halfwin <= 0) {
798  L_ERROR("filter too small; returning a copy\n", procName);
799  return numaCopy(nas);
800  }
801 
802  if (halfwin > (n - 1) / 2) {
803  halfwin = (n - 1) / 2;
804  L_INFO("reducing filter to halfwin = %d\n", procName, halfwin);
805  }
806 
807  /* Add a border to both ends */
808  na1 = numaAddSpecifiedBorder(nas, halfwin, halfwin, L_MIRRORED_BORDER);
809 
810  /* Get the median value at the center of each window, corresponding
811  * to locations in the input nas. */
812  nad = numaCreate(n);
813  for (i = 0; i < n; i++) {
814  na2 = numaClipToInterval(na1, i, i + 2 * halfwin);
815  numaGetMedian(na2, &medval);
816  numaAddNumber(nad, medval);
817  numaDestroy(&na2);
818  }
819 
820  numaDestroy(&na1);
821  return nad;
822 }
823 
824 
832 NUMA *
834 {
835 l_int32 i, n, ival;
836 NUMA *nad;
837 
838  PROCNAME("numaConvertToInt");
839 
840  if (!nas)
841  return (NUMA *)ERROR_PTR("nas not defined", procName, NULL);
842 
843  n = numaGetCount(nas);
844  if ((nad = numaCreate(n)) == NULL)
845  return (NUMA *)ERROR_PTR("nad not made", procName, NULL);
846  numaCopyParameters(nad, nas);
847  for (i = 0; i < n; i++) {
848  numaGetIValue(nas, i, &ival);
849  numaAddNumber(nad, ival);
850  }
851  return nad;
852 }
853 
854 
855 /*----------------------------------------------------------------------*
856  * Histogram generation and statistics *
857  *----------------------------------------------------------------------*/
884 NUMA *
886  l_int32 maxbins,
887  l_int32 *pbinsize,
888  l_int32 *pbinstart)
889 {
890 l_int32 i, n, ival, hval;
891 l_int32 iminval, imaxval, range, binsize, nbins, ibin;
892 l_float32 val, ratio;
893 NUMA *nai, *nahist;
894 
895  PROCNAME("numaMakeHistogram");
896 
897  if (pbinsize) *pbinsize = 0;
898  if (pbinstart) *pbinstart = 0;
899  if (!na)
900  return (NUMA *)ERROR_PTR("na not defined", procName, NULL);
901  if (maxbins < 1)
902  return (NUMA *)ERROR_PTR("maxbins < 1", procName, NULL);
903 
904  /* Determine input range */
905  numaGetMin(na, &val, NULL);
906  iminval = (l_int32)(val + 0.5);
907  numaGetMax(na, &val, NULL);
908  imaxval = (l_int32)(val + 0.5);
909  if (pbinstart == NULL) { /* clip negative vals; start from 0 */
910  iminval = 0;
911  if (imaxval < 0)
912  return (NUMA *)ERROR_PTR("all values < 0", procName, NULL);
913  }
914 
915  /* Determine binsize */
916  range = imaxval - iminval + 1;
917  if (range > maxbins - 1) {
918  ratio = (l_float64)range / (l_float64)maxbins;
919  binsize = 0;
920  for (i = 0; i < NBinSizes; i++) {
921  if (ratio < BinSizeArray[i]) {
922  binsize = BinSizeArray[i];
923  break;
924  }
925  }
926  if (binsize == 0)
927  return (NUMA *)ERROR_PTR("numbers too large", procName, NULL);
928  } else {
929  binsize = 1;
930  }
931  if (pbinsize) *pbinsize = binsize;
932  nbins = 1 + range / binsize; /* +1 seems to be sufficient */
933 
934  /* Redetermine iminval */
935  if (pbinstart && binsize > 1) {
936  if (iminval >= 0)
937  iminval = binsize * (iminval / binsize);
938  else
939  iminval = binsize * ((iminval - binsize + 1) / binsize);
940  }
941  if (pbinstart) *pbinstart = iminval;
942 
943 #if DEBUG_HISTO
944  lept_stderr(" imaxval = %d, range = %d, nbins = %d\n",
945  imaxval, range, nbins);
946 #endif /* DEBUG_HISTO */
947 
948  /* Use integerized data for input */
949  if ((nai = numaConvertToInt(na)) == NULL)
950  return (NUMA *)ERROR_PTR("nai not made", procName, NULL);
951  n = numaGetCount(nai);
952 
953  /* Make histogram, converting value in input array
954  * into a bin number for this histogram array. */
955  if ((nahist = numaCreate(nbins)) == NULL) {
956  numaDestroy(&nai);
957  return (NUMA *)ERROR_PTR("nahist not made", procName, NULL);
958  }
959  numaSetCount(nahist, nbins);
960  numaSetParameters(nahist, iminval, binsize);
961  for (i = 0; i < n; i++) {
962  numaGetIValue(nai, i, &ival);
963  ibin = (ival - iminval) / binsize;
964  if (ibin >= 0 && ibin < nbins) {
965  numaGetIValue(nahist, ibin, &hval);
966  numaSetValue(nahist, ibin, hval + 1.0);
967  }
968  }
969 
970  numaDestroy(&nai);
971  return nahist;
972 }
973 
974 
997 NUMA *
999  l_int32 maxbins)
1000 {
1001 l_int32 i, n, imin, imax, irange, ibin, ival, allints;
1002 l_float32 minval, maxval, range, binsize, fval;
1003 NUMA *nah;
1004 
1005  PROCNAME("numaMakeHistogramAuto");
1006 
1007  if (!na)
1008  return (NUMA *)ERROR_PTR("na not defined", procName, NULL);
1009  maxbins = L_MAX(1, maxbins);
1010 
1011  /* Determine input range */
1012  numaGetMin(na, &minval, NULL);
1013  numaGetMax(na, &maxval, NULL);
1014 
1015  /* Determine if values are all integers */
1016  n = numaGetCount(na);
1017  numaHasOnlyIntegers(na, &allints);
1018 
1019  /* Do simple integer binning if possible */
1020  if (allints && (maxval - minval < maxbins)) {
1021  imin = (l_int32)minval;
1022  imax = (l_int32)maxval;
1023  irange = imax - imin + 1;
1024  nah = numaCreate(irange);
1025  numaSetCount(nah, irange); /* init */
1026  numaSetParameters(nah, minval, 1.0);
1027  for (i = 0; i < n; i++) {
1028  numaGetIValue(na, i, &ival);
1029  ibin = ival - imin;
1030  numaGetIValue(nah, ibin, &ival);
1031  numaSetValue(nah, ibin, ival + 1.0);
1032  }
1033 
1034  return nah;
1035  }
1036 
1037  /* Do float binning, even if the data is integers. */
1038  range = maxval - minval;
1039  binsize = range / (l_float32)maxbins;
1040  if (range == 0.0) {
1041  nah = numaCreate(1);
1042  numaSetParameters(nah, minval, binsize);
1043  numaAddNumber(nah, n);
1044  return nah;
1045  }
1046  nah = numaCreate(maxbins);
1047  numaSetCount(nah, maxbins);
1048  numaSetParameters(nah, minval, binsize);
1049  for (i = 0; i < n; i++) {
1050  numaGetFValue(na, i, &fval);
1051  ibin = (l_int32)((fval - minval) / binsize);
1052  ibin = L_MIN(ibin, maxbins - 1); /* "edge" case; stay in bounds */
1053  numaGetIValue(nah, ibin, &ival);
1054  numaSetValue(nah, ibin, ival + 1.0);
1055  }
1056 
1057  return nah;
1058 }
1059 
1060 
1081 NUMA *
1083  l_float32 binsize,
1084  l_float32 maxsize)
1085 {
1086 l_int32 i, n, nbins, ival, ibin;
1087 l_float32 val, maxval;
1088 NUMA *nad;
1089 
1090  PROCNAME("numaMakeHistogramClipped");
1091 
1092  if (!na)
1093  return (NUMA *)ERROR_PTR("na not defined", procName, NULL);
1094  if (binsize <= 0.0)
1095  return (NUMA *)ERROR_PTR("binsize must be > 0.0", procName, NULL);
1096  if (binsize > maxsize)
1097  binsize = maxsize; /* just one bin */
1098 
1099  numaGetMax(na, &maxval, NULL);
1100  n = numaGetCount(na);
1101  maxsize = L_MIN(maxsize, maxval);
1102  nbins = (l_int32)(maxsize / binsize) + 1;
1103 
1104 /* lept_stderr("maxsize = %7.3f, nbins = %d\n", maxsize, nbins); */
1105 
1106  if ((nad = numaCreate(nbins)) == NULL)
1107  return (NUMA *)ERROR_PTR("nad not made", procName, NULL);
1108  numaSetParameters(nad, 0.0, binsize);
1109  numaSetCount(nad, nbins); /* interpret zeroes in bins as data */
1110  for (i = 0; i < n; i++) {
1111  numaGetFValue(na, i, &val);
1112  ibin = (l_int32)(val / binsize);
1113  if (ibin >= 0 && ibin < nbins) {
1114  numaGetIValue(nad, ibin, &ival);
1115  numaSetValue(nad, ibin, ival + 1.0);
1116  }
1117  }
1118 
1119  return nad;
1120 }
1121 
1122 
1130 NUMA *
1132  l_int32 newsize)
1133 {
1134 l_int32 i, j, ns, nd, index, count, val;
1135 l_float32 start, oldsize;
1136 NUMA *nad;
1137 
1138  PROCNAME("numaRebinHistogram");
1139 
1140  if (!nas)
1141  return (NUMA *)ERROR_PTR("nas not defined", procName, NULL);
1142  if (newsize <= 1)
1143  return (NUMA *)ERROR_PTR("newsize must be > 1", procName, NULL);
1144  if ((ns = numaGetCount(nas)) == 0)
1145  return (NUMA *)ERROR_PTR("no bins in nas", procName, NULL);
1146 
1147  nd = (ns + newsize - 1) / newsize;
1148  if ((nad = numaCreate(nd)) == NULL)
1149  return (NUMA *)ERROR_PTR("nad not made", procName, NULL);
1150  numaGetParameters(nad, &start, &oldsize);
1151  numaSetParameters(nad, start, oldsize * newsize);
1152 
1153  for (i = 0; i < nd; i++) { /* new bins */
1154  count = 0;
1155  index = i * newsize;
1156  for (j = 0; j < newsize; j++) {
1157  if (index < ns) {
1158  numaGetIValue(nas, index, &val);
1159  count += val;
1160  index++;
1161  }
1162  }
1163  numaAddNumber(nad, count);
1164  }
1165 
1166  return nad;
1167 }
1168 
1169 
1178 NUMA *
1180  l_float32 tsum)
1181 {
1182 l_int32 i, ns;
1183 l_float32 sum, factor, fval;
1184 NUMA *nad;
1185 
1186  PROCNAME("numaNormalizeHistogram");
1187 
1188  if (!nas)
1189  return (NUMA *)ERROR_PTR("nas not defined", procName, NULL);
1190  if (tsum <= 0.0)
1191  return (NUMA *)ERROR_PTR("tsum must be > 0.0", procName, NULL);
1192  if ((ns = numaGetCount(nas)) == 0)
1193  return (NUMA *)ERROR_PTR("no bins in nas", procName, NULL);
1194 
1195  numaGetSum(nas, &sum);
1196  factor = tsum / sum;
1197 
1198  if ((nad = numaCreate(ns)) == NULL)
1199  return (NUMA *)ERROR_PTR("nad not made", procName, NULL);
1200  numaCopyParameters(nad, nas);
1201 
1202  for (i = 0; i < ns; i++) {
1203  numaGetFValue(nas, i, &fval);
1204  fval *= factor;
1205  numaAddNumber(nad, fval);
1206  }
1207 
1208  return nad;
1209 }
1210 
1211 
1260 l_ok
1262  l_int32 maxbins,
1263  l_float32 *pmin,
1264  l_float32 *pmax,
1265  l_float32 *pmean,
1266  l_float32 *pvariance,
1267  l_float32 *pmedian,
1268  l_float32 rank,
1269  l_float32 *prval,
1270  NUMA **phisto)
1271 {
1272 l_int32 i, n;
1273 l_float32 minval, maxval, fval, mean, sum;
1274 NUMA *nah;
1275 
1276  PROCNAME("numaGetStatsUsingHistogram");
1277 
1278  if (pmin) *pmin = 0.0;
1279  if (pmax) *pmax = 0.0;
1280  if (pmean) *pmean = 0.0;
1281  if (pvariance) *pvariance = 0.0;
1282  if (pmedian) *pmedian = 0.0;
1283  if (prval) *prval = 0.0;
1284  if (phisto) *phisto = NULL;
1285  if (!na)
1286  return ERROR_INT("na not defined", procName, 1);
1287  if ((n = numaGetCount(na)) == 0)
1288  return ERROR_INT("numa is empty", procName, 1);
1289 
1290  numaGetMin(na, &minval, NULL);
1291  numaGetMax(na, &maxval, NULL);
1292  if (pmin) *pmin = minval;
1293  if (pmax) *pmax = maxval;
1294  if (pmean || pvariance) {
1295  sum = 0.0;
1296  for (i = 0; i < n; i++) {
1297  numaGetFValue(na, i, &fval);
1298  sum += fval;
1299  }
1300  mean = sum / (l_float32)n;
1301  if (pmean) *pmean = mean;
1302  }
1303  if (pvariance) {
1304  sum = 0.0;
1305  for (i = 0; i < n; i++) {
1306  numaGetFValue(na, i, &fval);
1307  sum += fval * fval;
1308  }
1309  *pvariance = sum / (l_float32)n - mean * mean;
1310  }
1311 
1312  if (!pmedian && !prval && !phisto)
1313  return 0;
1314 
1315  nah = numaMakeHistogramAuto(na, maxbins);
1316  if (pmedian)
1317  numaHistogramGetValFromRank(nah, 0.5, pmedian);
1318  if (prval)
1319  numaHistogramGetValFromRank(nah, rank, prval);
1320  if (phisto)
1321  *phisto = nah;
1322  else
1323  numaDestroy(&nah);
1324  return 0;
1325 }
1326 
1327 
1351 l_ok
1353  l_float32 startx,
1354  l_float32 deltax,
1355  l_float32 *pxmean,
1356  l_float32 *pxmedian,
1357  l_float32 *pxmode,
1358  l_float32 *pxvariance)
1359 {
1360  PROCNAME("numaGetHistogramStats");
1361 
1362  if (pxmean) *pxmean = 0.0;
1363  if (pxmedian) *pxmedian = 0.0;
1364  if (pxmode) *pxmode = 0.0;
1365  if (pxvariance) *pxvariance = 0.0;
1366  if (!nahisto)
1367  return ERROR_INT("nahisto not defined", procName, 1);
1368 
1369  return numaGetHistogramStatsOnInterval(nahisto, startx, deltax, 0, -1,
1370  pxmean, pxmedian, pxmode,
1371  pxvariance);
1372 }
1373 
1374 
1400 l_ok
1402  l_float32 startx,
1403  l_float32 deltax,
1404  l_int32 ifirst,
1405  l_int32 ilast,
1406  l_float32 *pxmean,
1407  l_float32 *pxmedian,
1408  l_float32 *pxmode,
1409  l_float32 *pxvariance)
1410 {
1411 l_int32 i, n, imax;
1412 l_float32 sum, sumval, halfsum, moment, var, x, y, ymax;
1413 
1414  PROCNAME("numaGetHistogramStatsOnInterval");
1415 
1416  if (pxmean) *pxmean = 0.0;
1417  if (pxmedian) *pxmedian = 0.0;
1418  if (pxmode) *pxmode = 0.0;
1419  if (pxvariance) *pxvariance = 0.0;
1420  if (!nahisto)
1421  return ERROR_INT("nahisto not defined", procName, 1);
1422  if (!pxmean && !pxmedian && !pxmode && !pxvariance)
1423  return ERROR_INT("nothing to compute", procName, 1);
1424 
1425  n = numaGetCount(nahisto);
1426  ifirst = L_MAX(0, ifirst);
1427  if (ilast < 0) ilast = n - 1;
1428  if (ifirst >= n)
1429  return ERROR_INT("invalid ifirst", procName, 1);
1430  if (ilast >= n) {
1431  L_WARNING("ilast = %d is beyond max index = %d; adjusting\n",
1432  procName, ilast, n - 1);
1433  ilast = n - 1;
1434  }
1435  if (ifirst > ilast)
1436  return ERROR_INT("ifirst > ilast", procName, 1);
1437  for (sum = 0.0, moment = 0.0, var = 0.0, i = ifirst; i <= ilast ; i++) {
1438  x = startx + i * deltax;
1439  numaGetFValue(nahisto, i, &y);
1440  sum += y;
1441  moment += x * y;
1442  var += x * x * y;
1443  }
1444  if (sum == 0.0) {
1445  L_INFO("sum is 0\n", procName);
1446  return 0;
1447  }
1448 
1449  if (pxmean)
1450  *pxmean = moment / sum;
1451  if (pxvariance)
1452  *pxvariance = var / sum - moment * moment / (sum * sum);
1453 
1454  if (pxmedian) {
1455  halfsum = sum / 2.0;
1456  for (sumval = 0.0, i = ifirst; i <= ilast; i++) {
1457  numaGetFValue(nahisto, i, &y);
1458  sumval += y;
1459  if (sumval >= halfsum) {
1460  *pxmedian = startx + i * deltax;
1461  break;
1462  }
1463  }
1464  }
1465 
1466  if (pxmode) {
1467  imax = -1;
1468  ymax = -1.0e10;
1469  for (i = ifirst; i <= ilast; i++) {
1470  numaGetFValue(nahisto, i, &y);
1471  if (y > ymax) {
1472  ymax = y;
1473  imax = i;
1474  }
1475  }
1476  *pxmode = startx + imax * deltax;
1477  }
1478 
1479  return 0;
1480 }
1481 
1482 
1494 l_ok
1495 numaMakeRankFromHistogram(l_float32 startx,
1496  l_float32 deltax,
1497  NUMA *nasy,
1498  l_int32 npts,
1499  NUMA **pnax,
1500  NUMA **pnay)
1501 {
1502 l_int32 i, n;
1503 l_float32 sum, fval;
1504 NUMA *nan, *nar;
1505 
1506  PROCNAME("numaMakeRankFromHistogram");
1507 
1508  if (pnax) *pnax = NULL;
1509  if (!pnay)
1510  return ERROR_INT("&nay not defined", procName, 1);
1511  *pnay = NULL;
1512  if (!nasy)
1513  return ERROR_INT("nasy not defined", procName, 1);
1514  if ((n = numaGetCount(nasy)) == 0)
1515  return ERROR_INT("no bins in nas", procName, 1);
1516 
1517  /* Normalize and generate the rank array corresponding to
1518  * the binned histogram. */
1519  nan = numaNormalizeHistogram(nasy, 1.0);
1520  nar = numaCreate(n + 1); /* rank numa corresponding to nan */
1521  sum = 0.0;
1522  numaAddNumber(nar, sum); /* first element is 0.0 */
1523  for (i = 0; i < n; i++) {
1524  numaGetFValue(nan, i, &fval);
1525  sum += fval;
1526  numaAddNumber(nar, sum);
1527  }
1528 
1529  /* Compute rank array on full range with specified
1530  * number of points and correspondence to x-values. */
1531  numaInterpolateEqxInterval(startx, deltax, nar, L_LINEAR_INTERP,
1532  startx, startx + n * deltax, npts,
1533  pnax, pnay);
1534  numaDestroy(&nan);
1535  numaDestroy(&nar);
1536  return 0;
1537 }
1538 
1539 
1562 l_ok
1564  l_float32 rval,
1565  l_float32 *prank)
1566 {
1567 l_int32 i, ibinval, n;
1568 l_float32 startval, binsize, binval, maxval, fractval, total, sum, val;
1569 
1570  PROCNAME("numaHistogramGetRankFromVal");
1571 
1572  if (!prank)
1573  return ERROR_INT("prank not defined", procName, 1);
1574  *prank = 0.0;
1575  if (!na)
1576  return ERROR_INT("na not defined", procName, 1);
1577  numaGetParameters(na, &startval, &binsize);
1578  n = numaGetCount(na);
1579  if (rval < startval)
1580  return 0;
1581  maxval = startval + n * binsize;
1582  if (rval > maxval) {
1583  *prank = 1.0;
1584  return 0;
1585  }
1586 
1587  binval = (rval - startval) / binsize;
1588  ibinval = (l_int32)binval;
1589  if (ibinval >= n) {
1590  *prank = 1.0;
1591  return 0;
1592  }
1593  fractval = binval - (l_float32)ibinval;
1594 
1595  sum = 0.0;
1596  for (i = 0; i < ibinval; i++) {
1597  numaGetFValue(na, i, &val);
1598  sum += val;
1599  }
1600  numaGetFValue(na, ibinval, &val);
1601  sum += fractval * val;
1602  numaGetSum(na, &total);
1603  *prank = sum / total;
1604 
1605 /* lept_stderr("binval = %7.3f, rank = %7.3f\n", binval, *prank); */
1606 
1607  return 0;
1608 }
1609 
1610 
1633 l_ok
1635  l_float32 rank,
1636  l_float32 *prval)
1637 {
1638 l_int32 i, n;
1639 l_float32 startval, binsize, rankcount, total, sum, fract, val;
1640 
1641  PROCNAME("numaHistogramGetValFromRank");
1642 
1643  if (!prval)
1644  return ERROR_INT("prval not defined", procName, 1);
1645  *prval = 0.0;
1646  if (!na)
1647  return ERROR_INT("na not defined", procName, 1);
1648  if (rank < 0.0) {
1649  L_WARNING("rank < 0; setting to 0.0\n", procName);
1650  rank = 0.0;
1651  }
1652  if (rank > 1.0) {
1653  L_WARNING("rank > 1.0; setting to 1.0\n", procName);
1654  rank = 1.0;
1655  }
1656 
1657  n = numaGetCount(na);
1658  numaGetParameters(na, &startval, &binsize);
1659  numaGetSum(na, &total);
1660  rankcount = rank * total; /* count that corresponds to rank */
1661  sum = 0.0;
1662  for (i = 0; i < n; i++) {
1663  numaGetFValue(na, i, &val);
1664  if (sum + val >= rankcount)
1665  break;
1666  sum += val;
1667  }
1668  if (val <= 0.0) /* can == 0 if rank == 0.0 */
1669  fract = 0.0;
1670  else /* sum + fract * val = rankcount */
1671  fract = (rankcount - sum) / val;
1672 
1673  /* The use of the fraction of a bin allows a simple calculation
1674  * for the histogram value at the given rank. */
1675  *prval = startval + binsize * ((l_float32)i + fract);
1676 
1677 /* lept_stderr("rank = %7.3f, val = %7.3f\n", rank, *prval); */
1678 
1679  return 0;
1680 }
1681 
1682 
1705 l_ok
1707  l_int32 nbins,
1708  NUMA **pnabinval)
1709 {
1710 NUMA *nabinval; /* average gray value in the bins */
1711 NUMA *naeach;
1712 l_int32 i, ntot, count, bincount, binindex, binsize;
1713 l_float32 sum, val, ave;
1714 
1715  PROCNAME("numaDiscretizeSortedInBins");
1716 
1717  if (!pnabinval)
1718  return ERROR_INT("&nabinval not defined", procName, 1);
1719  *pnabinval = NULL;
1720  if (!na)
1721  return ERROR_INT("na not defined", procName, 1);
1722  if (nbins < 2)
1723  return ERROR_INT("nbins must be > 1", procName, 1);
1724 
1725  /* Get the number of items in each bin */
1726  ntot = numaGetCount(na);
1727  if ((naeach = numaGetUniformBinSizes(ntot, nbins)) == NULL)
1728  return ERROR_INT("naeach not made", procName, 1);
1729 
1730  /* Get the average value in each bin */
1731  sum = 0.0;
1732  bincount = 0;
1733  binindex = 0;
1734  numaGetIValue(naeach, 0, &binsize);
1735  nabinval = numaCreate(nbins);
1736  for (i = 0; i < ntot; i++) {
1737  numaGetFValue(na, i, &val);
1738  bincount++;
1739  sum += val;
1740  if (bincount == binsize) { /* add bin entry */
1741  ave = sum / binsize;
1742  numaAddNumber(nabinval, ave);
1743  sum = 0.0;
1744  bincount = 0;
1745  binindex++;
1746  if (binindex == nbins) break;
1747  numaGetIValue(naeach, binindex, &binsize);
1748  }
1749  }
1750  *pnabinval = nabinval;
1751 
1752  numaDestroy(&naeach);
1753  return 0;
1754 }
1755 
1756 
1782 l_ok
1784  l_int32 nbins,
1785  NUMA **pnabinval,
1786  NUMA **pnarank)
1787 {
1788 NUMA *nabinval; /* average gray value in the bins */
1789 NUMA *narank; /* rank value as function of input value */
1790 NUMA *naeach, *nan;
1791 l_int32 i, j, k, nxvals, occup, count, bincount, binindex, binsize;
1792 l_float32 sum, ave, ntot;
1793 
1794  PROCNAME("numaDiscretizeHistoInBins");
1795 
1796  if (pnarank) *pnarank = NULL;
1797  if (!pnabinval)
1798  return ERROR_INT("&nabinval not defined", procName, 1);
1799  *pnabinval = NULL;
1800  if (!na)
1801  return ERROR_INT("na not defined", procName, 1);
1802  if (nbins < 2)
1803  return ERROR_INT("nbins must be > 1", procName, 1);
1804 
1805  nxvals = numaGetCount(na);
1806  numaGetSum(na, &ntot);
1807  occup = ntot / nxvals;
1808  if (occup < 1) L_INFO("average occupancy %d < 1\n", procName, occup);
1809 
1810  /* Get the number of items in each bin */
1811  if ((naeach = numaGetUniformBinSizes(ntot, nbins)) == NULL)
1812  return ERROR_INT("naeach not made", procName, 1);
1813 
1814  /* Get the average value in each bin */
1815  sum = 0.0;
1816  bincount = 0;
1817  binindex = 0;
1818  numaGetIValue(naeach, 0, &binsize);
1819  nabinval = numaCreate(nbins);
1820  k = 0; /* count up to ntot */
1821  for (i = 0; i < nxvals; i++) {
1822  numaGetIValue(na, i, &count);
1823  for (j = 0; j < count; j++) {
1824  k++;
1825  bincount++;
1826  sum += i;
1827  if (bincount == binsize) { /* add bin entry */
1828  ave = sum / binsize;
1829  numaAddNumber(nabinval, ave);
1830  sum = 0.0;
1831  bincount = 0;
1832  binindex++;
1833  if (binindex == nbins) break;
1834  numaGetIValue(naeach, binindex, &binsize);
1835  }
1836  }
1837  if (binindex == nbins) break;
1838  }
1839  *pnabinval = nabinval;
1840  if (binindex != nbins)
1841  L_ERROR("binindex = %d != nbins = %d\n", procName, binindex, nbins);
1842 
1843  /* Get cumulative normalized histogram (rank[gray value]).
1844  * This is the partial sum operating on the normalized histogram. */
1845  if (pnarank) {
1846  nan = numaNormalizeHistogram(na, 1.0);
1847  *pnarank = numaGetPartialSums(nan);
1848  numaDestroy(&nan);
1849  }
1850  numaDestroy(&naeach);
1851  return 0;
1852 }
1853 
1854 
1873 l_ok
1875  l_int32 nbins,
1876  NUMA **pnam)
1877 {
1878 NUMA *na1;
1879 l_int32 maxbins, type;
1880 l_float32 maxval, delx;
1881 
1882  PROCNAME("numaGetRankBinValues");
1883 
1884  if (!pnam)
1885  return ERROR_INT("&pnam not defined", procName, 1);
1886  *pnam = NULL;
1887  if (!na)
1888  return ERROR_INT("na not defined", procName, 1);
1889  if (numaGetCount(na) == 0)
1890  return ERROR_INT("na is empty", procName, 1);
1891  if (nbins < 2)
1892  return ERROR_INT("nbins must be > 1", procName, 1);
1893 
1894  /* Choose between sorted array and a histogram.
1895  * If the input array is has a small number of numbers with
1896  * a large maximum, we will sort it. At the other extreme, if
1897  * the array has many numbers with a small maximum, such as the
1898  * values of pixels in an 8 bpp grayscale image, generate a histogram.
1899  * If type comes back as L_BIN_SORT, use a histogram. */
1900  type = numaChooseSortType(na);
1901  if (type == L_SHELL_SORT) { /* sort the array */
1902  L_INFO("sort the array: input size = %d\n", procName, numaGetCount(na));
1903  na1 = numaSort(NULL, na, L_SORT_INCREASING);
1904  numaDiscretizeSortedInBins(na1, nbins, pnam);
1905  numaDestroy(&na1);
1906  return 0;
1907  }
1908 
1909  /* Make the histogram. Assuming there are no negative values
1910  * in the array, if the max value in the array does not exceed
1911  * about 100000, the bin size for generating the histogram will
1912  * be 1; maxbins refers to the number of entries in the histogram. */
1913  L_INFO("use a histogram: input size = %d\n", procName, numaGetCount(na));
1914  numaGetMax(na, &maxval, NULL);
1915  maxbins = L_MIN(100002, (l_int32)maxval + 2);
1916  na1 = numaMakeHistogram(na, maxbins, NULL, NULL);
1917 
1918  /* Warn if there is a scale change. This shouldn't happen
1919  * unless the max value is above 100000. */
1920  numaGetParameters(na1, NULL, &delx);
1921  if (delx > 1.0)
1922  L_WARNING("scale change: delx = %6.2f\n", procName, delx);
1923 
1924  /* Rank bin the results */
1925  numaDiscretizeHistoInBins(na1, nbins, pnam, NULL);
1926  numaDestroy(&na1);
1927  return 0;
1928 }
1929 
1930 
1944 NUMA *
1945 numaGetUniformBinSizes(l_int32 ntotal,
1946  l_int32 nbins)
1947 {
1948 l_int32 i, start, end;
1949 NUMA *naeach;
1950 
1951  PROCNAME("numaGetUniformBinSizes");
1952 
1953  if (ntotal <= 0)
1954  return (NUMA *)ERROR_PTR("ntotal <= 0", procName, NULL);
1955  if (nbins <= 0)
1956  return (NUMA *)ERROR_PTR("nbins <= 0", procName, NULL);
1957 
1958  if ((naeach = numaCreate(nbins)) == NULL)
1959  return (NUMA *)ERROR_PTR("naeach not made", procName, NULL);
1960  start = 0;
1961  for (i = 0; i < nbins; i++) {
1962  end = ntotal * (i + 1) / nbins;
1963  numaAddNumber(naeach, end - start);
1964  start = end;
1965  }
1966  return naeach;
1967 }
1968 
1969 
1970 /*----------------------------------------------------------------------*
1971  * Splitting a distribution *
1972  *----------------------------------------------------------------------*/
2022 l_ok
2024  l_float32 scorefract,
2025  l_int32 *psplitindex,
2026  l_float32 *pave1,
2027  l_float32 *pave2,
2028  l_float32 *pnum1,
2029  l_float32 *pnum2,
2030  NUMA **pnascore)
2031 {
2032 l_int32 i, n, bestsplit, minrange, maxrange, maxindex;
2033 l_float32 ave1, ave2, ave1prev, ave2prev;
2034 l_float32 num1, num2, num1prev, num2prev;
2035 l_float32 val, minval, sum, fract1;
2036 l_float32 norm, score, minscore, maxscore;
2037 NUMA *nascore, *naave1, *naave2, *nanum1, *nanum2;
2038 
2039  PROCNAME("numaSplitDistribution");
2040 
2041  if (psplitindex) *psplitindex = 0;
2042  if (pave1) *pave1 = 0.0;
2043  if (pave2) *pave2 = 0.0;
2044  if (pnum1) *pnum1 = 0.0;
2045  if (pnum2) *pnum2 = 0.0;
2046  if (pnascore) *pnascore = NULL;
2047  if (!na)
2048  return ERROR_INT("na not defined", procName, 1);
2049 
2050  n = numaGetCount(na);
2051  if (n <= 1)
2052  return ERROR_INT("n = 1 in histogram", procName, 1);
2053  numaGetSum(na, &sum);
2054  if (sum <= 0.0)
2055  return ERROR_INT("sum <= 0.0", procName, 1);
2056  norm = 4.0 / ((l_float32)(n - 1) * (n - 1));
2057  ave1prev = 0.0;
2058  numaGetHistogramStats(na, 0.0, 1.0, &ave2prev, NULL, NULL, NULL);
2059  num1prev = 0.0;
2060  num2prev = sum;
2061  maxindex = n / 2; /* initialize with something */
2062 
2063  /* Split the histogram with [0 ... i] in the lower part
2064  * and [i+1 ... n-1] in upper part. First, compute an otsu
2065  * score for each possible splitting. */
2066  if ((nascore = numaCreate(n)) == NULL)
2067  return ERROR_INT("nascore not made", procName, 1);
2068  naave1 = (pave1) ? numaCreate(n) : NULL;
2069  naave2 = (pave2) ? numaCreate(n) : NULL;
2070  nanum1 = (pnum1) ? numaCreate(n) : NULL;
2071  nanum2 = (pnum2) ? numaCreate(n) : NULL;
2072  maxscore = 0.0;
2073  for (i = 0; i < n; i++) {
2074  numaGetFValue(na, i, &val);
2075  num1 = num1prev + val;
2076  if (num1 == 0)
2077  ave1 = ave1prev;
2078  else
2079  ave1 = (num1prev * ave1prev + i * val) / num1;
2080  num2 = num2prev - val;
2081  if (num2 == 0)
2082  ave2 = ave2prev;
2083  else
2084  ave2 = (num2prev * ave2prev - i * val) / num2;
2085  fract1 = num1 / sum;
2086  score = norm * (fract1 * (1 - fract1)) * (ave2 - ave1) * (ave2 - ave1);
2087  numaAddNumber(nascore, score);
2088  if (pave1) numaAddNumber(naave1, ave1);
2089  if (pave2) numaAddNumber(naave2, ave2);
2090  if (pnum1) numaAddNumber(nanum1, num1);
2091  if (pnum2) numaAddNumber(nanum2, num2);
2092  if (score > maxscore) {
2093  maxscore = score;
2094  maxindex = i;
2095  }
2096  num1prev = num1;
2097  num2prev = num2;
2098  ave1prev = ave1;
2099  ave2prev = ave2;
2100  }
2101 
2102  /* Next, for all contiguous scores within a specified fraction
2103  * of the max, choose the split point as the value with the
2104  * minimum in the histogram. */
2105  minscore = (1. - scorefract) * maxscore;
2106  for (i = maxindex - 1; i >= 0; i--) {
2107  numaGetFValue(nascore, i, &val);
2108  if (val < minscore)
2109  break;
2110  }
2111  minrange = i + 1;
2112  for (i = maxindex + 1; i < n; i++) {
2113  numaGetFValue(nascore, i, &val);
2114  if (val < minscore)
2115  break;
2116  }
2117  maxrange = i - 1;
2118  numaGetFValue(na, minrange, &minval);
2119  bestsplit = minrange;
2120  for (i = minrange + 1; i <= maxrange; i++) {
2121  numaGetFValue(na, i, &val);
2122  if (val < minval) {
2123  minval = val;
2124  bestsplit = i;
2125  }
2126  }
2127 
2128  /* Add one to the bestsplit value to get the threshold value,
2129  * because when we take a threshold, as in pixThresholdToBinary(),
2130  * we always choose the set with values below the threshold. */
2131  bestsplit = L_MIN(255, bestsplit + 1);
2132 
2133  if (psplitindex) *psplitindex = bestsplit;
2134  if (pave1) numaGetFValue(naave1, bestsplit, pave1);
2135  if (pave2) numaGetFValue(naave2, bestsplit, pave2);
2136  if (pnum1) numaGetFValue(nanum1, bestsplit, pnum1);
2137  if (pnum2) numaGetFValue(nanum2, bestsplit, pnum2);
2138 
2139  if (pnascore) { /* debug mode */
2140  lept_stderr("minrange = %d, maxrange = %d\n", minrange, maxrange);
2141  lept_stderr("minval = %10.0f\n", minval);
2142  gplotSimple1(nascore, GPLOT_PNG, "/tmp/lept/nascore",
2143  "Score for split distribution");
2144  *pnascore = nascore;
2145  } else {
2146  numaDestroy(&nascore);
2147  }
2148 
2149  if (pave1) numaDestroy(&naave1);
2150  if (pave2) numaDestroy(&naave2);
2151  if (pnum1) numaDestroy(&nanum1);
2152  if (pnum2) numaDestroy(&nanum2);
2153  return 0;
2154 }
2155 
2156 
2157 /*----------------------------------------------------------------------*
2158  * Comparing histograms *
2159  *----------------------------------------------------------------------*/
2184 l_ok
2186  NUMAA *naa2,
2187  NUMA **pnad)
2188 {
2189 l_int32 i, n, nt;
2190 l_float32 dist;
2191 NUMA *na1, *na2, *nad;
2192 
2193  PROCNAME("grayHistogramsToEMD");
2194 
2195  if (!pnad)
2196  return ERROR_INT("&nad not defined", procName, 1);
2197  *pnad = NULL;
2198  if (!naa1 || !naa2)
2199  return ERROR_INT("na1 and na2 not both defined", procName, 1);
2200  n = numaaGetCount(naa1);
2201  if (n != numaaGetCount(naa2))
2202  return ERROR_INT("naa1 and naa2 numa counts differ", procName, 1);
2203  nt = numaaGetNumberCount(naa1);
2204  if (nt != numaaGetNumberCount(naa2))
2205  return ERROR_INT("naa1 and naa2 number counts differ", procName, 1);
2206  if (256 * n != nt) /* good enough check */
2207  return ERROR_INT("na sizes must be 256", procName, 1);
2208 
2209  nad = numaCreate(n);
2210  *pnad = nad;
2211  for (i = 0; i < n; i++) {
2212  na1 = numaaGetNuma(naa1, i, L_CLONE);
2213  na2 = numaaGetNuma(naa2, i, L_CLONE);
2214  numaEarthMoverDistance(na1, na2, &dist);
2215  numaAddNumber(nad, dist / 255.); /* normalize to [0.0 - 1.0] */
2216  numaDestroy(&na1);
2217  numaDestroy(&na2);
2218  }
2219  return 0;
2220 }
2221 
2222 
2250 l_ok
2252  NUMA *na2,
2253  l_float32 *pdist)
2254 {
2255 l_int32 n, norm, i;
2256 l_float32 sum1, sum2, diff, total;
2257 l_float32 *array1, *array3;
2258 NUMA *na3;
2259 
2260  PROCNAME("numaEarthMoverDistance");
2261 
2262  if (!pdist)
2263  return ERROR_INT("&dist not defined", procName, 1);
2264  *pdist = 0.0;
2265  if (!na1 || !na2)
2266  return ERROR_INT("na1 and na2 not both defined", procName, 1);
2267  n = numaGetCount(na1);
2268  if (n != numaGetCount(na2))
2269  return ERROR_INT("na1 and na2 have different size", procName, 1);
2270 
2271  /* Generate na3; normalize to na1 if necessary */
2272  numaGetSum(na1, &sum1);
2273  numaGetSum(na2, &sum2);
2274  norm = (L_ABS(sum1 - sum2) < 0.00001 * L_ABS(sum1)) ? 1 : 0;
2275  if (!norm)
2276  na3 = numaTransform(na2, 0, sum1 / sum2);
2277  else
2278  na3 = numaCopy(na2);
2279  array1 = numaGetFArray(na1, L_NOCOPY);
2280  array3 = numaGetFArray(na3, L_NOCOPY);
2281 
2282  /* Move earth in n3 from array elements, to match n1 */
2283  total = 0;
2284  for (i = 1; i < n; i++) {
2285  diff = array1[i - 1] - array3[i - 1];
2286  array3[i] -= diff;
2287  total += L_ABS(diff);
2288  }
2289  *pdist = total / sum1;
2290 
2291  numaDestroy(&na3);
2292  return 0;
2293 }
2294 
2295 
2341 l_ok
2343  l_int32 wc,
2344  NUMA **pnam,
2345  NUMA **pnams,
2346  NUMA **pnav,
2347  NUMA **pnarv)
2348 {
2349 l_int32 i, j, n, nn;
2350 l_float32 **arrays;
2351 l_float32 mean, var, rvar;
2352 NUMA *na1, *na2, *na3, *na4;
2353 
2354  PROCNAME("grayInterHistogramStats");
2355 
2356  if (pnam) *pnam = NULL;
2357  if (pnams) *pnams = NULL;
2358  if (pnav) *pnav = NULL;
2359  if (pnarv) *pnarv = NULL;
2360  if (!pnam && !pnams && !pnav && !pnarv)
2361  return ERROR_INT("nothing requested", procName, 1);
2362  if (!naa)
2363  return ERROR_INT("naa not defined", procName, 1);
2364  n = numaaGetCount(naa);
2365  for (i = 0; i < n; i++) {
2366  nn = numaaGetNumaCount(naa, i);
2367  if (nn != 256) {
2368  L_ERROR("%d numbers in numa[%d]\n", procName, nn, i);
2369  return 1;
2370  }
2371  }
2372 
2373  if (pnam) *pnam = numaCreate(256);
2374  if (pnams) *pnams = numaCreate(256);
2375  if (pnav) *pnav = numaCreate(256);
2376  if (pnarv) *pnarv = numaCreate(256);
2377 
2378  /* First, use mean smoothing, normalize each histogram,
2379  * and save all results in a 2D matrix. */
2380  arrays = (l_float32 **)LEPT_CALLOC(n, sizeof(l_float32 *));
2381  for (i = 0; i < n; i++) {
2382  na1 = numaaGetNuma(naa, i, L_CLONE);
2383  na2 = numaWindowedMean(na1, wc);
2384  na3 = numaNormalizeHistogram(na2, 10000.);
2385  arrays[i] = numaGetFArray(na3, L_COPY);
2386  numaDestroy(&na1);
2387  numaDestroy(&na2);
2388  numaDestroy(&na3);
2389  }
2390 
2391  /* Get stats between histograms */
2392  for (j = 0; j < 256; j++) {
2393  na4 = numaCreate(n);
2394  for (i = 0; i < n; i++) {
2395  numaAddNumber(na4, arrays[i][j]);
2396  }
2397  numaSimpleStats(na4, 0, -1, &mean, &var, &rvar);
2398  if (pnam) numaAddNumber(*pnam, mean);
2399  if (pnams) numaAddNumber(*pnams, mean * mean);
2400  if (pnav) numaAddNumber(*pnav, var);
2401  if (pnarv) numaAddNumber(*pnarv, rvar);
2402  numaDestroy(&na4);
2403  }
2404 
2405  for (i = 0; i < n; i++)
2406  LEPT_FREE(arrays[i]);
2407  LEPT_FREE(arrays);
2408  return 0;
2409 }
2410 
2411 
2412 /*----------------------------------------------------------------------*
2413  * Extrema finding *
2414  *----------------------------------------------------------------------*/
2431 NUMA *
2433  l_int32 nmax,
2434  l_float32 fract1,
2435  l_float32 fract2)
2436 {
2437 l_int32 i, k, n, maxloc, lloc, rloc;
2438 l_float32 fmaxval, sum, total, newtotal, val, lastval;
2439 l_float32 peakfract;
2440 NUMA *na, *napeak;
2441 
2442  PROCNAME("numaFindPeaks");
2443 
2444  if (!nas)
2445  return (NUMA *)ERROR_PTR("nas not defined", procName, NULL);
2446  n = numaGetCount(nas);
2447  numaGetSum(nas, &total);
2448 
2449  /* We munge this copy */
2450  if ((na = numaCopy(nas)) == NULL)
2451  return (NUMA *)ERROR_PTR("na not made", procName, NULL);
2452  if ((napeak = numaCreate(4 * nmax)) == NULL) {
2453  numaDestroy(&na);
2454  return (NUMA *)ERROR_PTR("napeak not made", procName, NULL);
2455  }
2456 
2457  for (k = 0; k < nmax; k++) {
2458  numaGetSum(na, &newtotal);
2459  if (newtotal == 0.0) /* sanity check */
2460  break;
2461  numaGetMax(na, &fmaxval, &maxloc);
2462  sum = fmaxval;
2463  lastval = fmaxval;
2464  lloc = 0;
2465  for (i = maxloc - 1; i >= 0; --i) {
2466  numaGetFValue(na, i, &val);
2467  if (val == 0.0) {
2468  lloc = i + 1;
2469  break;
2470  }
2471  if (val > fract1 * fmaxval) {
2472  sum += val;
2473  lastval = val;
2474  continue;
2475  }
2476  if (lastval - val > fract2 * lastval) {
2477  sum += val;
2478  lastval = val;
2479  continue;
2480  }
2481  lloc = i;
2482  break;
2483  }
2484  lastval = fmaxval;
2485  rloc = n - 1;
2486  for (i = maxloc + 1; i < n; ++i) {
2487  numaGetFValue(na, i, &val);
2488  if (val == 0.0) {
2489  rloc = i - 1;
2490  break;
2491  }
2492  if (val > fract1 * fmaxval) {
2493  sum += val;
2494  lastval = val;
2495  continue;
2496  }
2497  if (lastval - val > fract2 * lastval) {
2498  sum += val;
2499  lastval = val;
2500  continue;
2501  }
2502  rloc = i;
2503  break;
2504  }
2505  peakfract = sum / total;
2506  numaAddNumber(napeak, lloc);
2507  numaAddNumber(napeak, maxloc);
2508  numaAddNumber(napeak, rloc);
2509  numaAddNumber(napeak, peakfract);
2510 
2511  for (i = lloc; i <= rloc; i++)
2512  numaSetValue(na, i, 0.0);
2513  }
2514 
2515  numaDestroy(&na);
2516  return napeak;
2517 }
2518 
2519 
2549 NUMA *
2551  l_float32 delta,
2552  NUMA **pnav)
2553 {
2554 l_int32 i, n, found, loc, direction;
2555 l_float32 startval, val, maxval, minval;
2556 NUMA *nav, *nad;
2557 
2558  PROCNAME("numaFindExtrema");
2559 
2560  if (pnav) *pnav = NULL;
2561  if (!nas)
2562  return (NUMA *)ERROR_PTR("nas not defined", procName, NULL);
2563  if (delta < 0.0)
2564  return (NUMA *)ERROR_PTR("delta < 0", procName, NULL);
2565 
2566  n = numaGetCount(nas);
2567  nad = numaCreate(0);
2568  nav = NULL;
2569  if (pnav) {
2570  nav = numaCreate(0);
2571  *pnav = nav;
2572  }
2573 
2574  /* We don't know if we'll find a peak or valley first,
2575  * but use the first element of nas as the reference point.
2576  * Break when we deviate by 'delta' from the first point. */
2577  numaGetFValue(nas, 0, &startval);
2578  found = FALSE;
2579  for (i = 1; i < n; i++) {
2580  numaGetFValue(nas, i, &val);
2581  if (L_ABS(val - startval) >= delta) {
2582  found = TRUE;
2583  break;
2584  }
2585  }
2586 
2587  if (!found)
2588  return nad; /* it's empty */
2589 
2590  /* Are we looking for a peak or a valley? */
2591  if (val > startval) { /* peak */
2592  direction = 1;
2593  maxval = val;
2594  } else {
2595  direction = -1;
2596  minval = val;
2597  }
2598  loc = i;
2599 
2600  /* Sweep through the rest of the array, recording alternating
2601  * peak/valley extrema. */
2602  for (i = i + 1; i < n; i++) {
2603  numaGetFValue(nas, i, &val);
2604  if (direction == 1 && val > maxval ) { /* new local max */
2605  maxval = val;
2606  loc = i;
2607  } else if (direction == -1 && val < minval ) { /* new local min */
2608  minval = val;
2609  loc = i;
2610  } else if (direction == 1 && (maxval - val >= delta)) {
2611  numaAddNumber(nad, loc); /* save the current max location */
2612  if (nav) numaAddNumber(nav, maxval);
2613  direction = -1; /* reverse: start looking for a min */
2614  minval = val;
2615  loc = i; /* current min location */
2616  } else if (direction == -1 && (val - minval >= delta)) {
2617  numaAddNumber(nad, loc); /* save the current min location */
2618  if (nav) numaAddNumber(nav, minval);
2619  direction = 1; /* reverse: start looking for a max */
2620  maxval = val;
2621  loc = i; /* current max location */
2622  }
2623  }
2624 
2625  /* Save the final extremum */
2626 /* numaAddNumber(nad, loc); */
2627  return nad;
2628 }
2629 
2630 
2656 l_ok
2658  l_int32 skip,
2659  l_int32 *pthresh,
2660  l_float32 *pfract)
2661 {
2662 l_int32 i, n, start, index, minloc;
2663 l_float32 val, pval, jval, minval, sum, partsum;
2664 l_float32 *fa;
2665 
2666  PROCNAME("numaFindLocForThreshold");
2667 
2668  if (pfract) *pfract = 0.0;
2669  if (!pthresh)
2670  return ERROR_INT("&thresh not defined", procName, 1);
2671  *pthresh = 0;
2672  if (!na)
2673  return ERROR_INT("na not defined", procName, 1);
2674  if (skip <= 0) skip = 20;
2675 
2676  /* Look for the top of the first peak */
2677  n = numaGetCount(na);
2678  fa = numaGetFArray(na, L_NOCOPY);
2679  pval = fa[0];
2680  for (i = 1; i < n; i++) {
2681  val = fa[i];
2682  index = L_MIN(i + skip, n - 1);
2683  jval = fa[index];
2684  if (val < pval && jval < pval) /* near the top if not there */
2685  break;
2686  pval = val;
2687  }
2688 
2689  /* Look for the low point in the valley */
2690  start = i;
2691  pval = fa[start];
2692  for (i = start + 1; i < n; i++) {
2693  val = fa[i];
2694  if (val <= pval) { /* going down */
2695  pval = val;
2696  } else { /* going up */
2697  index = L_MIN(i + skip, n - 1);
2698  jval = fa[index]; /* junp ahead 20 */
2699  if (val > jval) { /* still going down; jump ahead */
2700  pval = jval;
2701  i = index;
2702  } else { /* really going up; passed the min */
2703  break;
2704  }
2705  }
2706  }
2707 
2708  /* Find the location of the minimum in the interval */
2709  minloc = index; /* likely passed the min; look backward */
2710  minval = fa[index];
2711  for (i = index - 1; i > index - skip; i--) {
2712  if (fa[i] < minval) {
2713  minval = fa[i];
2714  minloc = i;
2715  }
2716  }
2717  *pthresh = minloc;
2718 
2719  /* Find the fraction under the first peak */
2720  if (pfract) {
2721  numaGetSumOnInterval(na, 0, minloc, &partsum);
2722  numaGetSum(na, &sum);
2723  if (sum > 0.0)
2724  *pfract = partsum / sum;
2725  }
2726  return 0;
2727 }
2728 
2729 
2751 l_ok
2753  l_float32 minreversal,
2754  l_int32 *pnr,
2755  l_float32 *prd)
2756 {
2757 l_int32 i, n, nr, ival, binvals;
2758 l_int32 *ia;
2759 l_float32 fval, delx, len;
2760 NUMA *nat;
2761 
2762  PROCNAME("numaCountReversals");
2763 
2764  if (pnr) *pnr = 0;
2765  if (prd) *prd = 0.0;
2766  if (!pnr && !prd)
2767  return ERROR_INT("neither &nr nor &rd are defined", procName, 1);
2768  if (!nas)
2769  return ERROR_INT("nas not defined", procName, 1);
2770  if ((n = numaGetCount(nas)) == 0) {
2771  L_INFO("nas is empty\n", procName);
2772  return 0;
2773  }
2774  if (minreversal < 0.0)
2775  return ERROR_INT("minreversal < 0", procName, 1);
2776 
2777  /* Decide if the only values are 0 and 1 */
2778  binvals = TRUE;
2779  for (i = 0; i < n; i++) {
2780  numaGetFValue(nas, i, &fval);
2781  if (fval != 0.0 && fval != 1.0) {
2782  binvals = FALSE;
2783  break;
2784  }
2785  }
2786 
2787  nr = 0;
2788  if (binvals) {
2789  if (minreversal > 1.0) {
2790  L_WARNING("binary values but minreversal > 1\n", procName);
2791  } else {
2792  ia = numaGetIArray(nas);
2793  ival = ia[0];
2794  for (i = 1; i < n; i++) {
2795  if (ia[i] != ival) {
2796  nr++;
2797  ival = ia[i];
2798  }
2799  }
2800  LEPT_FREE(ia);
2801  }
2802  } else {
2803  nat = numaFindExtrema(nas, minreversal, NULL);
2804  nr = numaGetCount(nat);
2805  numaDestroy(&nat);
2806  }
2807  if (pnr) *pnr = nr;
2808  if (prd) {
2809  numaGetParameters(nas, NULL, &delx);
2810  len = delx * n;
2811  *prd = (l_float32)nr / len;
2812  }
2813 
2814  return 0;
2815 }
2816 
2817 
2818 /*----------------------------------------------------------------------*
2819  * Threshold crossings and frequency analysis *
2820  *----------------------------------------------------------------------*/
2847 l_ok
2849  NUMA *nay,
2850  l_float32 estthresh,
2851  l_float32 *pbestthresh)
2852 {
2853 l_int32 i, inrun, istart, iend, maxstart, maxend, runlen, maxrunlen;
2854 l_int32 val, maxval, nmax, count;
2855 l_float32 thresh, fmaxval, fmodeval;
2856 NUMA *nat, *nac;
2857 
2858  PROCNAME("numaSelectCrossingThreshold");
2859 
2860  if (!pbestthresh)
2861  return ERROR_INT("&bestthresh not defined", procName, 1);
2862  *pbestthresh = 0.0;
2863  if (!nay)
2864  return ERROR_INT("nay not defined", procName, 1);
2865  if (numaGetCount(nay) < 2) {
2866  L_WARNING("nay count < 2; no threshold crossing\n", procName);
2867  return 1;
2868  }
2869 
2870  /* Compute the number of crossings for different thresholds */
2871  nat = numaCreate(41);
2872  for (i = 0; i < 41; i++) {
2873  thresh = estthresh - 80.0 + 4.0 * i;
2874  nac = numaCrossingsByThreshold(nax, nay, thresh);
2875  numaAddNumber(nat, numaGetCount(nac));
2876  numaDestroy(&nac);
2877  }
2878 
2879  /* Find the center of the plateau of max crossings, which
2880  * extends from thresh[istart] to thresh[iend]. */
2881  numaGetMax(nat, &fmaxval, NULL);
2882  maxval = (l_int32)fmaxval;
2883  nmax = 0;
2884  for (i = 0; i < 41; i++) {
2885  numaGetIValue(nat, i, &val);
2886  if (val == maxval)
2887  nmax++;
2888  }
2889  if (nmax < 3) { /* likely accidental max; try the mode */
2890  numaGetMode(nat, &fmodeval, &count);
2891  if (count > nmax && fmodeval > 0.5 * fmaxval)
2892  maxval = (l_int32)fmodeval; /* use the mode */
2893  }
2894 
2895  inrun = FALSE;
2896  iend = 40;
2897  maxrunlen = 0, maxstart = 0, maxend = 0;
2898  for (i = 0; i < 41; i++) {
2899  numaGetIValue(nat, i, &val);
2900  if (val == maxval) {
2901  if (!inrun) {
2902  istart = i;
2903  inrun = TRUE;
2904  }
2905  continue;
2906  }
2907  if (inrun && (val != maxval)) {
2908  iend = i - 1;
2909  runlen = iend - istart + 1;
2910  inrun = FALSE;
2911  if (runlen > maxrunlen) {
2912  maxstart = istart;
2913  maxend = iend;
2914  maxrunlen = runlen;
2915  }
2916  }
2917  }
2918  if (inrun) {
2919  runlen = i - istart;
2920  if (runlen > maxrunlen) {
2921  maxstart = istart;
2922  maxend = i - 1;
2923  maxrunlen = runlen;
2924  }
2925  }
2926 
2927  *pbestthresh = estthresh - 80.0 + 2.0 * (l_float32)(maxstart + maxend);
2928 
2929 #if DEBUG_CROSSINGS
2930  lept_stderr("\nCrossings attain a maximum at %d thresholds, between:\n"
2931  " thresh[%d] = %5.1f and thresh[%d] = %5.1f\n",
2932  nmax, maxstart, estthresh - 80.0 + 4.0 * maxstart,
2933  maxend, estthresh - 80.0 + 4.0 * maxend);
2934  lept_stderr("The best choice: %5.1f\n", *pbestthresh);
2935  lept_stderr("Number of crossings at the 41 thresholds:");
2936  numaWriteStderr(nat);
2937 #endif /* DEBUG_CROSSINGS */
2938 
2939  numaDestroy(&nat);
2940  return 0;
2941 }
2942 
2943 
2958 NUMA *
2960  NUMA *nay,
2961  l_float32 thresh)
2962 {
2963 l_int32 i, n;
2964 l_float32 startx, delx;
2965 l_float32 xval1, xval2, yval1, yval2, delta1, delta2, crossval, fract;
2966 NUMA *nad;
2967 
2968  PROCNAME("numaCrossingsByThreshold");
2969 
2970  if (!nay)
2971  return (NUMA *)ERROR_PTR("nay not defined", procName, NULL);
2972  n = numaGetCount(nay);
2973 
2974  if (nax && (numaGetCount(nax) != n))
2975  return (NUMA *)ERROR_PTR("nax and nay sizes differ", procName, NULL);
2976 
2977  nad = numaCreate(0);
2978  if (n < 2) return nad;
2979  numaGetFValue(nay, 0, &yval1);
2980  numaGetParameters(nay, &startx, &delx);
2981  if (nax)
2982  numaGetFValue(nax, 0, &xval1);
2983  else
2984  xval1 = startx;
2985  for (i = 1; i < n; i++) {
2986  numaGetFValue(nay, i, &yval2);
2987  if (nax)
2988  numaGetFValue(nax, i, &xval2);
2989  else
2990  xval2 = startx + i * delx;
2991  delta1 = yval1 - thresh;
2992  delta2 = yval2 - thresh;
2993  if (delta1 == 0.0) {
2994  numaAddNumber(nad, xval1);
2995  } else if (delta2 == 0.0) {
2996  numaAddNumber(nad, xval2);
2997  } else if (delta1 * delta2 < 0.0) { /* crossing */
2998  fract = L_ABS(delta1) / L_ABS(yval1 - yval2);
2999  crossval = xval1 + fract * (xval2 - xval1);
3000  numaAddNumber(nad, crossval);
3001  }
3002  xval1 = xval2;
3003  yval1 = yval2;
3004  }
3005 
3006  return nad;
3007 }
3008 
3009 
3024 NUMA *
3026  NUMA *nay,
3027  l_float32 delta)
3028 {
3029 l_int32 i, j, n, np, previndex, curindex;
3030 l_float32 startx, delx;
3031 l_float32 xval1, xval2, yval1, yval2, delta1, delta2;
3032 l_float32 prevval, curval, thresh, crossval, fract;
3033 NUMA *nap, *nad;
3034 
3035  PROCNAME("numaCrossingsByPeaks");
3036 
3037  if (!nay)
3038  return (NUMA *)ERROR_PTR("nay not defined", procName, NULL);
3039 
3040  n = numaGetCount(nay);
3041  if (nax && (numaGetCount(nax) != n))
3042  return (NUMA *)ERROR_PTR("nax and nay sizes differ", procName, NULL);
3043 
3044  /* Find the extrema. Also add last point in nay to get
3045  * the last transition (from the last peak to the end).
3046  * The number of crossings is 1 more than the number of extrema. */
3047  nap = numaFindExtrema(nay, delta, NULL);
3048  numaAddNumber(nap, n - 1);
3049  np = numaGetCount(nap);
3050  L_INFO("Number of crossings: %d\n", procName, np);
3051 
3052  /* Do all computation in index units of nax or the delx of nay */
3053  nad = numaCreate(np); /* output crossing locations, in nax units */
3054  previndex = 0; /* prime the search with 1st point */
3055  numaGetFValue(nay, 0, &prevval); /* prime the search with 1st point */
3056  numaGetParameters(nay, &startx, &delx);
3057  for (i = 0; i < np; i++) {
3058  numaGetIValue(nap, i, &curindex);
3059  numaGetFValue(nay, curindex, &curval);
3060  thresh = (prevval + curval) / 2.0;
3061  if (nax)
3062  numaGetFValue(nax, previndex, &xval1);
3063  else
3064  xval1 = startx + previndex * delx;
3065  numaGetFValue(nay, previndex, &yval1);
3066  for (j = previndex + 1; j <= curindex; j++) {
3067  if (nax)
3068  numaGetFValue(nax, j, &xval2);
3069  else
3070  xval2 = startx + j * delx;
3071  numaGetFValue(nay, j, &yval2);
3072  delta1 = yval1 - thresh;
3073  delta2 = yval2 - thresh;
3074  if (delta1 == 0.0) {
3075  numaAddNumber(nad, xval1);
3076  break;
3077  } else if (delta2 == 0.0) {
3078  numaAddNumber(nad, xval2);
3079  break;
3080  } else if (delta1 * delta2 < 0.0) { /* crossing */
3081  fract = L_ABS(delta1) / L_ABS(yval1 - yval2);
3082  crossval = xval1 + fract * (xval2 - xval1);
3083  numaAddNumber(nad, crossval);
3084  break;
3085  }
3086  xval1 = xval2;
3087  yval1 = yval2;
3088  }
3089  previndex = curindex;
3090  prevval = curval;
3091  }
3092 
3093  numaDestroy(&nap);
3094  return nad;
3095 }
3096 
3097 
3135 l_ok
3137  l_float32 relweight,
3138  l_int32 nwidth,
3139  l_int32 nshift,
3140  l_float32 minwidth,
3141  l_float32 maxwidth,
3142  l_float32 *pbestwidth,
3143  l_float32 *pbestshift,
3144  l_float32 *pbestscore)
3145 {
3146 l_int32 i, j;
3147 l_float32 delwidth, delshift, width, shift, score;
3148 l_float32 bestwidth, bestshift, bestscore;
3149 
3150  PROCNAME("numaEvalBestHaarParameters");
3151 
3152  if (pbestscore) *pbestscore = 0.0;
3153  if (pbestwidth) *pbestwidth = 0.0;
3154  if (pbestshift) *pbestshift = 0.0;
3155  if (!pbestwidth || !pbestshift)
3156  return ERROR_INT("&bestwidth and &bestshift not defined", procName, 1);
3157  if (!nas)
3158  return ERROR_INT("nas not defined", procName, 1);
3159 
3160  bestscore = bestwidth = bestshift = 0.0;
3161  delwidth = (maxwidth - minwidth) / (nwidth - 1.0);
3162  for (i = 0; i < nwidth; i++) {
3163  width = minwidth + delwidth * i;
3164  delshift = width / (l_float32)(nshift);
3165  for (j = 0; j < nshift; j++) {
3166  shift = j * delshift;
3167  numaEvalHaarSum(nas, width, shift, relweight, &score);
3168  if (score > bestscore) {
3169  bestscore = score;
3170  bestwidth = width;
3171  bestshift = shift;
3172 #if DEBUG_FREQUENCY
3173  lept_stderr("width = %7.3f, shift = %7.3f, score = %7.3f\n",
3174  width, shift, score);
3175 #endif /* DEBUG_FREQUENCY */
3176  }
3177  }
3178  }
3179 
3180  *pbestwidth = bestwidth;
3181  *pbestshift = bestshift;
3182  if (pbestscore)
3183  *pbestscore = bestscore;
3184  return 0;
3185 }
3186 
3187 
3220 l_ok
3222  l_float32 width,
3223  l_float32 shift,
3224  l_float32 relweight,
3225  l_float32 *pscore)
3226 {
3227 l_int32 i, n, nsamp, index;
3228 l_float32 score, weight, val;
3229 
3230  PROCNAME("numaEvalHaarSum");
3231 
3232  if (!pscore)
3233  return ERROR_INT("&score not defined", procName, 1);
3234  *pscore = 0.0;
3235  if (!nas)
3236  return ERROR_INT("nas not defined", procName, 1);
3237  if ((n = numaGetCount(nas)) < 2 * width)
3238  return ERROR_INT("nas size too small", procName, 1);
3239 
3240  score = 0.0;
3241  nsamp = (l_int32)((n - shift) / width);
3242  for (i = 0; i < nsamp; i++) {
3243  index = (l_int32)(shift + i * width);
3244  weight = (i % 2) ? 1.0 : -1.0 * relweight;
3245  numaGetFValue(nas, index, &val);
3246  score += weight * val;
3247  }
3248 
3249  *pscore = 2.0 * width * score / (l_float32)n;
3250  return 0;
3251 }
3252 
3253 
3254 /*----------------------------------------------------------------------*
3255  * Generating numbers in a range under constraints *
3256  *----------------------------------------------------------------------*/
3277 NUMA *
3279  l_int32 last,
3280  l_int32 nmax,
3281  l_int32 use_pairs)
3282 {
3283 l_int32 i, nsets, val;
3284 l_float32 delta;
3285 NUMA *na;
3286 
3287  PROCNAME("genConstrainedNumaInRange");
3288 
3289  first = L_MAX(0, first);
3290  if (last < first)
3291  return (NUMA *)ERROR_PTR("last < first!", procName, NULL);
3292  if (nmax < 1)
3293  return (NUMA *)ERROR_PTR("nmax < 1!", procName, NULL);
3294 
3295  nsets = L_MIN(nmax, last - first + 1);
3296  if (use_pairs == 1)
3297  nsets = nsets / 2;
3298  if (nsets == 0)
3299  return (NUMA *)ERROR_PTR("nsets == 0", procName, NULL);
3300 
3301  /* Select delta so that selection covers the full range if possible */
3302  if (nsets == 1) {
3303  delta = 0.0;
3304  } else {
3305  if (use_pairs == 0)
3306  delta = (l_float32)(last - first) / (nsets - 1);
3307  else
3308  delta = (l_float32)(last - first - 1) / (nsets - 1);
3309  }
3310 
3311  na = numaCreate(nsets);
3312  for (i = 0; i < nsets; i++) {
3313  val = (l_int32)(first + i * delta + 0.5);
3314  numaAddNumber(na, val);
3315  if (use_pairs == 1)
3316  numaAddNumber(na, val + 1);
3317  }
3318 
3319  return na;
3320 }
@ L_LINEAR_INTERP
Definition: array.h:151
@ L_MIRRORED_BORDER
Definition: array.h:159
l_ok gplotSimple1(NUMA *na, l_int32 outformat, const char *outroot, const char *title)
gplotSimple1()
Definition: gplot.c:665
l_ok numaAddNumber(NUMA *na, l_float32 val)
numaAddNumber()
Definition: numabasic.c:478
l_int32 numaaGetNumberCount(NUMAA *naa)
numaaGetNumberCount()
Definition: numabasic.c:1670
l_ok numaGetFValue(NUMA *na, l_int32 index, l_float32 *pval)
numaGetFValue()
Definition: numabasic.c:719
l_ok numaWriteStderr(NUMA *na)
numaWriteStderr()
Definition: numabasic.c:1313
NUMA * numaCreate(l_int32 n)
numaCreate()
Definition: numabasic.c:194
l_int32 numaaGetCount(NUMAA *naa)
numaaGetCount()
Definition: numabasic.c:1631
NUMA * numaaGetNuma(NUMAA *naa, l_int32 index, l_int32 accessflag)
numaaGetNuma()
Definition: numabasic.c:1740
l_int32 numaaGetNumaCount(NUMAA *naa, l_int32 index)
numaaGetNumaCount()
Definition: numabasic.c:1649
l_ok numaSetCount(NUMA *na, l_int32 newcount)
numaSetCount()
Definition: numabasic.c:685
void numaDestroy(NUMA **pna)
numaDestroy()
Definition: numabasic.c:366
l_ok numaSetValue(NUMA *na, l_int32 index, l_float32 val)
numaSetValue()
Definition: numabasic.c:786
l_int32 numaGetCount(NUMA *na)
numaGetCount()
Definition: numabasic.c:658
l_ok numaGetIValue(NUMA *na, l_int32 index, l_int32 *pival)
numaGetIValue()
Definition: numabasic.c:754
NUMA * numaCopy(NUMA *na)
numaCopy()
Definition: numabasic.c:399
l_int32 * numaGetIArray(NUMA *na)
numaGetIArray()
Definition: numabasic.c:847
l_ok numaGetParameters(NUMA *na, l_float32 *pstartx, l_float32 *pdelx)
numaGetParameters()
Definition: numabasic.c:963
l_ok numaCopyParameters(NUMA *nad, NUMA *nas)
numaCopyParameters()
Definition: numabasic.c:1016
l_ok numaSetParameters(NUMA *na, l_float32 startx, l_float32 delx)
numaSetParameters()
Definition: numabasic.c:993
l_float32 * numaGetFArray(NUMA *na, l_int32 copyflag)
numaGetFArray()
Definition: numabasic.c:892
NUMA * numaSort(NUMA *naout, NUMA *nain, l_int32 sortorder)
numaSort()
Definition: numafunc1.c:2650
l_ok numaGetMode(NUMA *na, l_float32 *pval, l_int32 *pcount)
numaGetMode()
Definition: numafunc1.c:3560
NUMA * numaAddBorder(NUMA *nas, l_int32 left, l_int32 right, l_float32 val)
numaAddBorder()
Definition: numafunc1.c:902
l_ok numaHasOnlyIntegers(NUMA *na, l_int32 *pallints)
numaHasOnlyIntegers()
Definition: numafunc1.c:656
l_ok numaGetMedian(NUMA *na, l_float32 *pval)
numaGetMedian()
Definition: numafunc1.c:3405
l_ok numaGetSumOnInterval(NUMA *na, l_int32 first, l_int32 last, l_float32 *psum)
numaGetSumOnInterval()
Definition: numafunc1.c:613
NUMA * numaRemoveBorder(NUMA *nas, l_int32 left, l_int32 right)
numaRemoveBorder()
Definition: numafunc1.c:996
l_ok numaInterpolateEqxInterval(l_float32 startx, l_float32 deltax, NUMA *nasy, l_int32 type, l_float32 x0, l_float32 x1, l_int32 npts, NUMA **pnax, NUMA **pnay)
numaInterpolateEqxInterval()
Definition: numafunc1.c:1912
l_ok numaGetMin(NUMA *na, l_float32 *pminval, l_int32 *piminloc)
numaGetMin()
Definition: numafunc1.c:453
NUMA * numaMakeConstant(l_float32 val, l_int32 size)
numaMakeConstant()
Definition: numafunc1.c:851
NUMA * numaClipToInterval(NUMA *nas, l_int32 first, l_int32 last)
numaClipToInterval()
Definition: numafunc1.c:1182
l_ok numaGetMax(NUMA *na, l_float32 *pmaxval, l_int32 *pimaxloc)
numaGetMax()
Definition: numafunc1.c:496
NUMA * numaGetPartialSums(NUMA *na)
numaGetPartialSums()
Definition: numafunc1.c:579
l_int32 numaChooseSortType(NUMA *nas)
numaChooseSortType()
Definition: numafunc1.c:2602
NUMA * numaAddSpecifiedBorder(NUMA *nas, l_int32 left, l_int32 right, l_int32 type)
numaAddSpecifiedBorder()
Definition: numafunc1.c:945
l_ok numaGetSum(NUMA *na, l_float32 *psum)
numaGetSum()
Definition: numafunc1.c:538
NUMA * numaWindowedMeanSquare(NUMA *nas, l_int32 wc)
numaWindowedMeanSquare()
Definition: numafunc2.c:648
l_ok numaGetRankBinValues(NUMA *na, l_int32 nbins, NUMA **pnam)
numaGetRankBinValues()
Definition: numafunc2.c:1874
l_ok numaCountReversals(NUMA *nas, l_float32 minreversal, l_int32 *pnr, l_float32 *prd)
numaCountReversals()
Definition: numafunc2.c:2752
NUMA * numaMakeHistogram(NUMA *na, l_int32 maxbins, l_int32 *pbinsize, l_int32 *pbinstart)
numaMakeHistogram()
Definition: numafunc2.c:885
NUMA * numaFindExtrema(NUMA *nas, l_float32 delta, NUMA **pnav)
numaFindExtrema()
Definition: numafunc2.c:2550
NUMA * numaDilate(NUMA *nas, l_int32 size)
numaDilate()
Definition: numafunc2.c:252
NUMA * genConstrainedNumaInRange(l_int32 first, l_int32 last, l_int32 nmax, l_int32 use_pairs)
genConstrainedNumaInRange()
Definition: numafunc2.c:3278
l_ok numaEvalHaarSum(NUMA *nas, l_float32 width, l_float32 shift, l_float32 relweight, l_float32 *pscore)
numaEvalHaarSum()
Definition: numafunc2.c:3221
NUMA * numaConvertToInt(NUMA *nas)
numaConvertToInt()
Definition: numafunc2.c:833
l_ok numaEarthMoverDistance(NUMA *na1, NUMA *na2, l_float32 *pdist)
numaEarthMoverDistance()
Definition: numafunc2.c:2251
l_ok numaSelectCrossingThreshold(NUMA *nax, NUMA *nay, l_float32 estthresh, l_float32 *pbestthresh)
numaSelectCrossingThreshold()
Definition: numafunc2.c:2848
l_ok numaWindowedStats(NUMA *nas, l_int32 wc, NUMA **pnam, NUMA **pnams, NUMA **pnav, NUMA **pnarv)
numaWindowedStats()
Definition: numafunc2.c:537
NUMA * numaFindPeaks(NUMA *nas, l_int32 nmax, l_float32 fract1, l_float32 fract2)
numaFindPeaks()
Definition: numafunc2.c:2432
NUMA * numaWindowedMean(NUMA *nas, l_int32 wc)
numaWindowedMean()
Definition: numafunc2.c:588
l_ok numaWindowedVariance(NUMA *nam, NUMA *nams, NUMA **pnav, NUMA **pnarv)
numaWindowedVariance()
Definition: numafunc2.c:716
l_ok numaEvalBestHaarParameters(NUMA *nas, l_float32 relweight, l_int32 nwidth, l_int32 nshift, l_float32 minwidth, l_float32 maxwidth, l_float32 *pbestwidth, l_float32 *pbestshift, l_float32 *pbestscore)
numaEvalBestHaarParameters()
Definition: numafunc2.c:3136
l_ok numaDiscretizeHistoInBins(NUMA *na, l_int32 nbins, NUMA **pnabinval, NUMA **pnarank)
numaDiscretizeHistoInBins()
Definition: numafunc2.c:1783
l_ok numaGetHistogramStatsOnInterval(NUMA *nahisto, l_float32 startx, l_float32 deltax, l_int32 ifirst, l_int32 ilast, l_float32 *pxmean, l_float32 *pxmedian, l_float32 *pxmode, l_float32 *pxvariance)
numaGetHistogramStatsOnInterval()
Definition: numafunc2.c:1401
NUMA * numaMakeHistogramClipped(NUMA *na, l_float32 binsize, l_float32 maxsize)
numaMakeHistogramClipped()
Definition: numafunc2.c:1082
NUMA * numaNormalizeHistogram(NUMA *nas, l_float32 tsum)
numaNormalizeHistogram()
Definition: numafunc2.c:1179
NUMA * numaErode(NUMA *nas, l_int32 size)
numaErode()
Definition: numafunc2.c:183
l_ok numaGetHistogramStats(NUMA *nahisto, l_float32 startx, l_float32 deltax, l_float32 *pxmean, l_float32 *pxmedian, l_float32 *pxmode, l_float32 *pxvariance)
numaGetHistogramStats()
Definition: numafunc2.c:1352
l_ok grayHistogramsToEMD(NUMAA *naa1, NUMAA *naa2, NUMA **pnad)
grayHistogramsToEMD()
Definition: numafunc2.c:2185
NUMA * numaTransform(NUMA *nas, l_float32 shift, l_float32 scale)
numaTransform()
Definition: numafunc2.c:415
NUMA * numaWindowedMedian(NUMA *nas, l_int32 halfwin)
numaWindowedMedian()
Definition: numafunc2.c:784
l_ok numaMakeRankFromHistogram(l_float32 startx, l_float32 deltax, NUMA *nasy, l_int32 npts, NUMA **pnax, NUMA **pnay)
numaMakeRankFromHistogram()
Definition: numafunc2.c:1495
l_ok numaSplitDistribution(NUMA *na, l_float32 scorefract, l_int32 *psplitindex, l_float32 *pave1, l_float32 *pave2, l_float32 *pnum1, l_float32 *pnum2, NUMA **pnascore)
numaSplitDistribution()
Definition: numafunc2.c:2023
l_ok numaHistogramGetRankFromVal(NUMA *na, l_float32 rval, l_float32 *prank)
numaHistogramGetRankFromVal()
Definition: numafunc2.c:1563
l_ok numaHistogramGetValFromRank(NUMA *na, l_float32 rank, l_float32 *prval)
numaHistogramGetValFromRank()
Definition: numafunc2.c:1634
l_ok numaFindLocForThreshold(NUMA *na, l_int32 skip, l_int32 *pthresh, l_float32 *pfract)
numaFindLocForThreshold()
Definition: numafunc2.c:2657
l_ok grayInterHistogramStats(NUMAA *naa, l_int32 wc, NUMA **pnam, NUMA **pnams, NUMA **pnav, NUMA **pnarv)
grayInterHistogramStats()
Definition: numafunc2.c:2342
l_ok numaSimpleStats(NUMA *na, l_int32 first, l_int32 last, l_float32 *pmean, l_float32 *pvar, l_float32 *prvar)
numaSimpleStats()
Definition: numafunc2.c:452
NUMA * numaGetUniformBinSizes(l_int32 ntotal, l_int32 nbins)
numaGetUniformBinSizes()
Definition: numafunc2.c:1945
NUMA * numaCrossingsByThreshold(NUMA *nax, NUMA *nay, l_float32 thresh)
numaCrossingsByThreshold()
Definition: numafunc2.c:2959
NUMA * numaRebinHistogram(NUMA *nas, l_int32 newsize)
numaRebinHistogram()
Definition: numafunc2.c:1131
NUMA * numaMakeHistogramAuto(NUMA *na, l_int32 maxbins)
numaMakeHistogramAuto()
Definition: numafunc2.c:998
NUMA * numaClose(NUMA *nas, l_int32 size)
numaClose()
Definition: numafunc2.c:368
l_ok numaDiscretizeSortedInBins(NUMA *na, l_int32 nbins, NUMA **pnabinval)
numaDiscretizeSortedInBins()
Definition: numafunc2.c:1706
NUMA * numaOpen(NUMA *nas, l_int32 size)
numaOpen()
Definition: numafunc2.c:321
l_ok numaGetStatsUsingHistogram(NUMA *na, l_int32 maxbins, l_float32 *pmin, l_float32 *pmax, l_float32 *pmean, l_float32 *pvariance, l_float32 *pmedian, l_float32 rank, l_float32 *prval, NUMA **phisto)
numaGetStatsUsingHistogram()
Definition: numafunc2.c:1261
NUMA * numaCrossingsByPeaks(NUMA *nax, NUMA *nay, l_float32 delta)
numaCrossingsByPeaks()
Definition: numafunc2.c:3025
@ L_COPY
Definition: pix.h:712
@ L_CLONE
Definition: pix.h:713
@ L_NOCOPY
Definition: pix.h:710
@ L_SHELL_SORT
Definition: pix.h:723
@ L_SORT_INCREASING
Definition: pix.h:729
Definition: array.h:71
Definition: array.h:83
void lept_stderr(const char *fmt,...)
lept_stderr()
Definition: utils1.c:306