![]() |
Leptonica
1.82.0
Image processing and image analysis suite
|
#include <math.h>
#include "allheaders.h"
Go to the source code of this file.
Macros | |
#define | DEBUG_HISTO 0 |
#define | DEBUG_CROSSINGS 0 |
#define | DEBUG_FREQUENCY 0 |
Functions | |
NUMA * | numaErode (NUMA *nas, l_int32 size) |
NUMA * | numaDilate (NUMA *nas, l_int32 size) |
NUMA * | numaOpen (NUMA *nas, l_int32 size) |
NUMA * | numaClose (NUMA *nas, l_int32 size) |
NUMA * | numaTransform (NUMA *nas, l_float32 shift, l_float32 scale) |
l_ok | numaSimpleStats (NUMA *na, l_int32 first, l_int32 last, l_float32 *pmean, l_float32 *pvar, l_float32 *prvar) |
l_ok | numaWindowedStats (NUMA *nas, l_int32 wc, NUMA **pnam, NUMA **pnams, NUMA **pnav, NUMA **pnarv) |
NUMA * | numaWindowedMean (NUMA *nas, l_int32 wc) |
NUMA * | numaWindowedMeanSquare (NUMA *nas, l_int32 wc) |
l_ok | numaWindowedVariance (NUMA *nam, NUMA *nams, NUMA **pnav, NUMA **pnarv) |
NUMA * | numaWindowedMedian (NUMA *nas, l_int32 halfwin) |
NUMA * | numaConvertToInt (NUMA *nas) |
NUMA * | numaMakeHistogram (NUMA *na, l_int32 maxbins, l_int32 *pbinsize, l_int32 *pbinstart) |
NUMA * | numaMakeHistogramAuto (NUMA *na, l_int32 maxbins) |
NUMA * | numaMakeHistogramClipped (NUMA *na, l_float32 binsize, l_float32 maxsize) |
NUMA * | numaRebinHistogram (NUMA *nas, l_int32 newsize) |
NUMA * | numaNormalizeHistogram (NUMA *nas, l_float32 tsum) |
l_ok | numaGetStatsUsingHistogram (NUMA *na, l_int32 maxbins, l_float32 *pmin, l_float32 *pmax, l_float32 *pmean, l_float32 *pvariance, l_float32 *pmedian, l_float32 rank, l_float32 *prval, NUMA **phisto) |
l_ok | numaGetHistogramStats (NUMA *nahisto, l_float32 startx, l_float32 deltax, l_float32 *pxmean, l_float32 *pxmedian, l_float32 *pxmode, l_float32 *pxvariance) |
l_ok | numaGetHistogramStatsOnInterval (NUMA *nahisto, l_float32 startx, l_float32 deltax, l_int32 ifirst, l_int32 ilast, l_float32 *pxmean, l_float32 *pxmedian, l_float32 *pxmode, l_float32 *pxvariance) |
l_ok | numaMakeRankFromHistogram (l_float32 startx, l_float32 deltax, NUMA *nasy, l_int32 npts, NUMA **pnax, NUMA **pnay) |
l_ok | numaHistogramGetRankFromVal (NUMA *na, l_float32 rval, l_float32 *prank) |
l_ok | numaHistogramGetValFromRank (NUMA *na, l_float32 rank, l_float32 *prval) |
l_ok | numaDiscretizeSortedInBins (NUMA *na, l_int32 nbins, NUMA **pnabinval) |
l_ok | numaDiscretizeHistoInBins (NUMA *na, l_int32 nbins, NUMA **pnabinval, NUMA **pnarank) |
l_ok | numaGetRankBinValues (NUMA *na, l_int32 nbins, NUMA **pnam) |
NUMA * | numaGetUniformBinSizes (l_int32 ntotal, l_int32 nbins) |
l_ok | numaSplitDistribution (NUMA *na, l_float32 scorefract, l_int32 *psplitindex, l_float32 *pave1, l_float32 *pave2, l_float32 *pnum1, l_float32 *pnum2, NUMA **pnascore) |
l_ok | grayHistogramsToEMD (NUMAA *naa1, NUMAA *naa2, NUMA **pnad) |
l_ok | numaEarthMoverDistance (NUMA *na1, NUMA *na2, l_float32 *pdist) |
l_ok | grayInterHistogramStats (NUMAA *naa, l_int32 wc, NUMA **pnam, NUMA **pnams, NUMA **pnav, NUMA **pnarv) |
NUMA * | numaFindPeaks (NUMA *nas, l_int32 nmax, l_float32 fract1, l_float32 fract2) |
NUMA * | numaFindExtrema (NUMA *nas, l_float32 delta, NUMA **pnav) |
l_ok | numaFindLocForThreshold (NUMA *na, l_int32 skip, l_int32 *pthresh, l_float32 *pfract) |
l_ok | numaCountReversals (NUMA *nas, l_float32 minreversal, l_int32 *pnr, l_float32 *prd) |
l_ok | numaSelectCrossingThreshold (NUMA *nax, NUMA *nay, l_float32 estthresh, l_float32 *pbestthresh) |
NUMA * | numaCrossingsByThreshold (NUMA *nax, NUMA *nay, l_float32 thresh) |
NUMA * | numaCrossingsByPeaks (NUMA *nax, NUMA *nay, l_float32 delta) |
l_ok | numaEvalBestHaarParameters (NUMA *nas, l_float32 relweight, l_int32 nwidth, l_int32 nshift, l_float32 minwidth, l_float32 maxwidth, l_float32 *pbestwidth, l_float32 *pbestshift, l_float32 *pbestscore) |
l_ok | numaEvalHaarSum (NUMA *nas, l_float32 width, l_float32 shift, l_float32 relweight, l_float32 *pscore) |
NUMA * | genConstrainedNumaInRange (l_int32 first, l_int32 last, l_int32 nmax, l_int32 use_pairs) |
Variables | |
static const l_int32 | BinSizeArray [] |
static const l_int32 | NBinSizes = 24 |
-------------------------------------- This file has these Numa utilities:
Definition in file numafunc2.c.
NUMA* genConstrainedNumaInRange | ( | l_int32 | first, |
l_int32 | last, | ||
l_int32 | nmax, | ||
l_int32 | use_pairs | ||
) |
[in] | first | first number to choose; >= 0 |
[in] | last | biggest possible number to reach; >= first |
[in] | nmax | maximum number of numbers to select; > 0 |
[in] | use_pairs | 1 = select pairs of adjacent numbers; 0 = select individual numbers |
Notes: (1) Selection is made uniformly in the range. This can be used to select pages distributed as uniformly as possible through a book, where you are constrained to: ~ choose between [first, ... biggest], ~ choose no more than nmax numbers, and and you have the option of requiring pairs of adjacent numbers.
Definition at line 3278 of file numafunc2.c.
References numaAddNumber(), and numaCreate().
Referenced by pixaConstrainedSelect().
[in] | naa1,naa2 | two numaa, each with one or more 256-element histograms |
[out] | pnad | nad of EM distances for each histogram |
Notes: (1) The two numaas must be the same size and have corresponding 256-element histograms. Pairs do not need to be normalized to the same sum. (2) This is typically used on two sets of histograms from corresponding tiles of two images. The similarity of two images can be found with the scoring function used in pixCompareGrayByHisto(): score S = 1.0 - k * D, where k is a constant, say in the range 5-10 D = EMD for each tile; for multiple tiles, take the Min(S) over the set of tiles to be the final score.
Definition at line 2185 of file numafunc2.c.
References L_CLONE, numaAddNumber(), numaaGetCount(), numaaGetNuma(), numaaGetNumberCount(), numaCreate(), numaDestroy(), and numaEarthMoverDistance().
l_ok grayInterHistogramStats | ( | NUMAA * | naa, |
l_int32 | wc, | ||
NUMA ** | pnam, | ||
NUMA ** | pnams, | ||
NUMA ** | pnav, | ||
NUMA ** | pnarv | ||
) |
[in] | naa | numaa with two or more 256-element histograms |
[in] | wc | half-width of the smoothing window |
[out] | pnam | [optional] mean values |
[out] | pnams | [optional] mean square values |
[out] | pnav | [optional] variances |
[out] | pnarv | [optional] rms deviations from the mean |
Notes: (1) The naa has two or more 256-element numa histograms, which are to be compared value-wise at each of the 256 gray levels. The result are stats (mean, mean square, variance, root variance) aggregated across the set of histograms, and each is output as a 256 entry numa. Think of these histograms as a matrix, where each histogram is one row of the array. The stats are then aggregated column-wise, between the histograms. (2) These stats are: ~ average value: <v> (nam) ~ average squared value: <v*v> (nams) ~ variance: <(v - <v>)*(v - <v>)> = <v*v> - <v>*<v> (nav) ~ square-root of variance: (narv) where the brackets < .. > indicate that the average value is to be taken over each column of the array. (3) The input histograms are optionally smoothed before these statistical operations. (4) The input histograms are normalized to a sum of 10000. By doing this, the resulting numbers are independent of the number of samples used in building the individual histograms. (5) A typical application is on a set of histograms from tiles of an image, to distinguish between text/tables and photo regions. If the tiles are much larger than the text line spacing, text/table regions typically have smaller variance across tiles than photo regions. For this application, it may be useful to ignore values near white, which are large for text and would magnify the variance due to variations in illumination. However, because the variance of a drawing or a light photo can be similar to that of grayscale text, this function is only a discriminator between darker photos/drawings and light photos/text/line-graphics.
Definition at line 2342 of file numafunc2.c.
References L_CLONE, L_COPY, numaAddNumber(), numaaGetCount(), numaaGetNuma(), numaaGetNumaCount(), numaCreate(), numaDestroy(), numaGetFArray(), numaNormalizeHistogram(), numaSimpleStats(), and numaWindowedMean().
[in] | nas | |
[in] | size | of sel; greater than 0, odd. The origin is implicitly in the center. |
Notes: (1) The structuring element (sel) is linear, all "hits" (2) If size == 1, this returns a copy (3) We add a border before doing this operation, for the same reason that we add a border to a pix before doing a safe closing. Without the border, a small component near the border gets clipped at the border on dilation, and can be entirely removed by the following erosion, violating the basic extensivity property of closing.
Definition at line 368 of file numafunc2.c.
References numaAddBorder(), numaCopy(), numaDestroy(), numaDilate(), numaErode(), and numaRemoveBorder().
[in] | nas | source numa |
Definition at line 833 of file numafunc2.c.
References numaAddNumber(), numaCopyParameters(), numaCreate(), numaGetCount(), and numaGetIValue().
l_ok numaCountReversals | ( | NUMA * | nas, |
l_float32 | minreversal, | ||
l_int32 * | pnr, | ||
l_float32 * | prd | ||
) |
[in] | nas | input values |
[in] | minreversal | relative amount to resolve peaks and valleys |
[out] | pnr | [optional] number of reversals |
[out] | prd | [optional] reversal density: reversals/length |
Notes: (1) The input numa is can be generated from pixExtractAlongLine(). If so, the x parameters can be used to find the reversal frequency along a line. (2) If the input numa was generated from a 1 bpp pix, the values will be 0 and 1. Use minreversal == 1 to get the number of pixel flips. If the only values are 0 and 1, but minreversal > 1, set the reversal count to 0 and issue a warning.
Definition at line 2752 of file numafunc2.c.
References numaDestroy(), numaFindExtrema(), numaGetCount(), numaGetFValue(), numaGetIArray(), and numaGetParameters().
[in] | nax | [optional] numa of abscissa values |
[in] | nay | numa of ordinate values, corresponding to nax |
[in] | delta | parameter used to identify when a new peak can be found |
Notes: (1) If nax == NULL, we use startx and delx from nay to compute the crossing values in nad.
Definition at line 3025 of file numafunc2.c.
References numaAddNumber(), numaCreate(), numaDestroy(), numaFindExtrema(), numaGetCount(), numaGetFValue(), numaGetIValue(), and numaGetParameters().
[in] | nax | [optional] numa of abscissa values; can be NULL |
[in] | nay | numa of ordinate values, corresponding to nax |
[in] | thresh | threshold value for nay |
Notes: (1) If nax == NULL, we use startx and delx from nay to compute the crossing values in nad.
Definition at line 2959 of file numafunc2.c.
References numaAddNumber(), numaCreate(), numaGetCount(), numaGetFValue(), and numaGetParameters().
Referenced by numaSelectCrossingThreshold().
[in] | nas | |
[in] | size | of sel; greater than 0, odd. The origin is implicitly in the center. |
Notes: (1) The structuring element (sel) is linear, all "hits" (2) If size == 1, this returns a copy
Definition at line 252 of file numafunc2.c.
References L_NOCOPY, numaCopy(), numaCopyParameters(), numaGetCount(), numaGetFArray(), and numaMakeConstant().
Referenced by numaClose(), and numaOpen().
[in] | na | histogram |
[in] | nbins | number of equal population bins (> 1) |
[out] | pnabinval | average "gray" values in each bin |
[out] | pnarank | [optional] rank value of input histogram; this is a cumulative norm histogram. |
Notes: (1) With nbins == 100, nabinval is the average gray value in each of the 100 equally populated bins. It is the function gray[100 * rank]. Thus it is the inverse of rank[gray] which is optionally returned in narank. (2) The "gray value" is the index into the input histogram. (3) The two output arrays give the following mappings, where the input is an un-normalized histogram of array values: bin number --> average array value in bin (nabinval) array values --> cumulative normalized histogram (narank)
Definition at line 1783 of file numafunc2.c.
References numaAddNumber(), numaCreate(), numaDestroy(), numaGetCount(), numaGetIValue(), numaGetPartialSums(), numaGetSum(), numaGetUniformBinSizes(), and numaNormalizeHistogram().
Referenced by numaGetRankBinValues().
[in] | na | sorted |
[in] | nbins | number of equal population bins (> 1) |
[out] | pnabinval | average "gray" values in each bin |
Notes: (1) The input na is sorted in increasing value. (2) The output array has the following mapping: bin number --> average array value in bin (nabinval) (3) With nbins == 100, nabinval is the average gray value in each of the 100 equally populated bins. It is the function gray[100 * rank]. Thus it is the inverse of rank[gray] (4) Contast with numaDiscretizeHistoInBins(), where the input na is a histogram.
Definition at line 1706 of file numafunc2.c.
References numaAddNumber(), numaCreate(), numaDestroy(), numaGetCount(), numaGetFValue(), numaGetIValue(), and numaGetUniformBinSizes().
Referenced by numaGetRankBinValues().
[in] | na1,na2 | two numas of the same size, typically histograms |
[out] | pdist | earthmover distance |
Notes: (1) The two numas must have the same size. They do not need to be normalized to the same sum before applying the function. (2) For a 1D discrete function, the implementation of the EMD is trivial. Just keep filling or emptying buckets in one numa to match the amount in the other, moving sequentially along both arrays. (3) We divide the sum of the absolute value of everything moved (by 1 unit at a time) by the sum of the numa (amount of "earth") to get the average distance that the "earth" was moved. This is the value returned here. (4) The caller can do a further normalization, by the number of buckets (minus 1), to get the EM distance as a fraction of the maximum possible distance, which is n-1. This fraction is 1.0 for the situation where all the 'earth' in the first array is at one end, and all in the second array is at the other end.
Definition at line 2251 of file numafunc2.c.
References L_NOCOPY, numaCopy(), numaDestroy(), numaGetCount(), numaGetFArray(), numaGetSum(), and numaTransform().
Referenced by compareTilesByHisto(), and grayHistogramsToEMD().
[in] | nas | |
[in] | size | of sel; greater than 0, odd. The origin is implicitly in the center. |
Notes: (1) The structuring element (sel) is linear, all "hits" (2) If size == 1, this returns a copy (3) General comment. The morphological operations are equivalent to those that would be performed on a 1-dimensional fpix. However, because we have not implemented morphological operations on fpix, we do this here. Because it is only 1 dimensional, there is no reason to use the more complicated van Herk/Gil-Werman algorithm, and we do it by brute force.
Definition at line 183 of file numafunc2.c.
References L_NOCOPY, numaCopy(), numaCopyParameters(), numaGetCount(), numaGetFArray(), and numaMakeConstant().
Referenced by numaClose(), and numaOpen().
l_ok numaEvalBestHaarParameters | ( | NUMA * | nas, |
l_float32 | relweight, | ||
l_int32 | nwidth, | ||
l_int32 | nshift, | ||
l_float32 | minwidth, | ||
l_float32 | maxwidth, | ||
l_float32 * | pbestwidth, | ||
l_float32 * | pbestshift, | ||
l_float32 * | pbestscore | ||
) |
[in] | nas | numa of non-negative signal values |
[in] | relweight | relative weight of (-1 comb) / (+1 comb) contributions to the 'convolution'. In effect, the convolution kernel is a comb consisting of alternating +1 and -weight. |
[in] | nwidth | number of widths to consider |
[in] | nshift | number of shifts to consider for each width |
[in] | minwidth | smallest width to consider |
[in] | maxwidth | largest width to consider |
[out] | pbestwidth | width giving largest score |
[out] | pbestshift | shift giving largest score |
[out] | pbestscore | [optional] convolution with "Haar"-like comb |
Notes: (1) This does a linear sweep of widths, evaluating at nshift shifts for each width, computing the score from a convolution with a long comb, and finding the (width, shift) pair that gives the maximum score. The best width is the "half-wavelength" of the signal. (2) The convolving function is a comb of alternating values +1 and -1 * relweight, separated by the width and phased by the shift. This is similar to a Haar transform, except there the convolution is performed with a square wave. (3) The function is useful for finding the line spacing and strength of line signal from pixel sum projections. (4) The score is normalized to the size of nas divided by the number of half-widths. For image applications, the input is typically an array of pixel projections, so one should normalize by dividing the score by the image width in the pixel projection direction.
Definition at line 3136 of file numafunc2.c.
References lept_stderr(), and numaEvalHaarSum().
l_ok numaEvalHaarSum | ( | NUMA * | nas, |
l_float32 | width, | ||
l_float32 | shift, | ||
l_float32 | relweight, | ||
l_float32 * | pscore | ||
) |
[in] | nas | numa of non-negative signal values |
[in] | width | distance between +1 and -1 in convolution comb |
[in] | shift | phase of the comb: location of first +1 |
[in] | relweight | relative weight of (-1 comb) / (+1 comb) contributions to the 'convolution'. In effect, the convolution kernel is a comb consisting of alternating +1 and -weight. |
[out] | pscore | convolution with "Haar"-like comb |
Notes: (1) This does a convolution with a comb of alternating values +1 and -relweight, separated by the width and phased by the shift. This is similar to a Haar transform, except that for Haar, (1) the convolution kernel is symmetric about 0, so the relweight is 1.0, and (2) the convolution is performed with a square wave. (2) The score is normalized to the size of nas divided by twice the "width". For image applications, the input is typically an array of pixel projections, so one should normalize by dividing the score by the image width in the pixel projection direction. (3) To get a Haar-like result, use relweight = 1.0. For detecting signals where you expect every other sample to be close to zero, as with barcodes or filtered text lines, you can use relweight > 1.0.
Definition at line 3221 of file numafunc2.c.
References numaGetCount(), and numaGetFValue().
Referenced by numaEvalBestHaarParameters().
[in] | nas | input values |
[in] | delta | relative amount to resolve peaks and valleys |
[out] | pnav | [optional] values of extrema |
Notes: (1) This returns a sequence of extrema (peaks and valleys). (2) The algorithm is analogous to that for determining mountain peaks. Suppose we have a local peak, with bumps on the side. Under what conditions can we consider those 'bumps' to be actual peaks? The answer: if the bump is separated from the peak by a saddle that is at least 500 feet below the bump. (3) Operationally, suppose we are trying to identify a peak. We have a previous valley, and also the largest value that we have seen since that valley. We can identify this as a peak if we find a value that is delta BELOW it. When we find such a value, label the peak, use the current value to label the starting point for the search for a valley, and do the same operation in reverse. Namely, keep track of the lowest point seen, and look for a value that is delta ABOVE it. Once found, the lowest point is labeled the valley, and continue, looking for the next peak.
Definition at line 2550 of file numafunc2.c.
References numaAddNumber(), numaCreate(), numaGetCount(), and numaGetFValue().
Referenced by numaCountReversals(), and numaCrossingsByPeaks().
l_ok numaFindLocForThreshold | ( | NUMA * | na, |
l_int32 | skip, | ||
l_int32 * | pthresh, | ||
l_float32 * | pfract | ||
) |
[in] | nas | input histogram |
[in] | skip | distance to skip to check for false min; 0 for default |
[out] | pthresh | threshold value |
[out] | pfract | [optional] fraction below or at threshold |
Notes: (1) This finds a good place to set a threshold for a histogram of values that has two peaks. The peaks can differ greatly in area underneath them. The number of buckets in the histogram is expected to be 256 (e.g, from an 8 bpp gray image). (2) The input histogram should have been smoothed with a window to avoid false peak and valley detection due to noise. For example, see pixThresholdByHisto(). (3) A skip value can be input to determine the look-ahead distance to ignore a false peak on the descent from the first peak. Input 0 to use the default value (it assumes a histo size of 256). (4) Optionally, the fractional area under the first peak can be returned.
Definition at line 2657 of file numafunc2.c.
References L_NOCOPY, numaGetCount(), numaGetFArray(), numaGetSum(), and numaGetSumOnInterval().
[in] | nas | source numa |
[in] | nmax | max number of peaks to be found |
[in] | fract1 | min fraction of peak value |
[in] | fract2 | min slope |
Notes: (1) The returned na consists of sets of four numbers representing the peak, in the following order: left edge; peak center; right edge; normalized peak area
Definition at line 2432 of file numafunc2.c.
References numaAddNumber(), numaCopy(), numaCreate(), numaDestroy(), numaGetCount(), numaGetFValue(), numaGetMax(), numaGetSum(), and numaSetValue().
l_ok numaGetHistogramStats | ( | NUMA * | nahisto, |
l_float32 | startx, | ||
l_float32 | deltax, | ||
l_float32 * | pxmean, | ||
l_float32 * | pxmedian, | ||
l_float32 * | pxmode, | ||
l_float32 * | pxvariance | ||
) |
[in] | nahisto | histogram: y(x(i)), i = 0 ... nbins - 1 |
[in] | startx | x value of first bin: x(0) |
[in] | deltax | x increment between bins; the bin size; x(1) - x(0) |
[out] | pxmean | [optional] mean value of histogram |
[out] | pxmedian | [optional] median value of histogram |
[out] | pxmode | [optional] mode value of histogram: xmode = x(imode), where y(xmode) >= y(x(i)) for all i != imode |
[out] | pxvariance | [optional] variance of x |
Notes: (1) If the histogram represents the relation y(x), the computed values that are returned are the x values. These are NOT the bucket indices i; they are related to the bucket indices by x(i) = startx + i * deltax
Definition at line 1352 of file numafunc2.c.
References numaGetHistogramStatsOnInterval().
Referenced by numaSplitDistribution().
l_ok numaGetHistogramStatsOnInterval | ( | NUMA * | nahisto, |
l_float32 | startx, | ||
l_float32 | deltax, | ||
l_int32 | ifirst, | ||
l_int32 | ilast, | ||
l_float32 * | pxmean, | ||
l_float32 * | pxmedian, | ||
l_float32 * | pxmode, | ||
l_float32 * | pxvariance | ||
) |
numaGetHistogramStatsOnInterval()
[in] | nahisto | histogram: y(x(i)), i = 0 ... nbins - 1 |
[in] | startx | x value of first bin: x(0) |
[in] | deltax | x increment between bins; the bin size; x(1) - x(0) |
[in] | ifirst | first bin to use for collecting stats |
[in] | ilast | last bin for collecting stats; -1 to go to the end |
[out] | pxmean | [optional] mean value of histogram |
[out] | pxmedian | [optional] median value of histogram |
[out] | pxmode | [optional] mode value of histogram: xmode = x(imode), where y(xmode) >= y(x(i)) for all i != imode |
[out] | pxvariance | [optional] variance of x |
Notes: (1) If the histogram represents the relation y(x), the computed values that are returned are the x values. These are NOT the bucket indices i; they are related to the bucket indices by x(i) = startx + i * deltax
Definition at line 1401 of file numafunc2.c.
References numaGetCount(), and numaGetFValue().
Referenced by numaGetHistogramStats().
[in] | na | an array of values |
[in] | nbins | number of bins at which the rank is divided |
[out] | pnam | mean intensity in a bin vs rank bin value, with nbins of discretized rank values |
Notes: (1) Simple interface for getting a binned rank representation of an input array of values. This returns: rank bin number --> average array value in each rank bin (nam) (2) Uses bins either a sorted array or a histogram, depending on the values in the array and the size of the array.
Definition at line 1874 of file numafunc2.c.
References L_SHELL_SORT, L_SORT_INCREASING, numaChooseSortType(), numaDestroy(), numaDiscretizeHistoInBins(), numaDiscretizeSortedInBins(), numaGetCount(), numaGetMax(), numaGetParameters(), numaMakeHistogram(), and numaSort().
l_ok numaGetStatsUsingHistogram | ( | NUMA * | na, |
l_int32 | maxbins, | ||
l_float32 * | pmin, | ||
l_float32 * | pmax, | ||
l_float32 * | pmean, | ||
l_float32 * | pvariance, | ||
l_float32 * | pmedian, | ||
l_float32 | rank, | ||
l_float32 * | prval, | ||
NUMA ** | phisto | ||
) |
[in] | na | an arbitrary set of numbers; not ordered and not a histogram |
[in] | maxbins | the maximum number of bins to be allowed in the histogram; use an integer larger than the largest number in na for consecutive integer bins |
[out] | pmin | [optional] min value of set |
[out] | pmax | [optional] max value of set |
[out] | pmean | [optional] mean value of set |
[out] | pvariance | [optional] variance |
[out] | pmedian | [optional] median value of set |
[in] | rank | in [0.0 ... 1.0]; median has a rank 0.5; ignored if &rval == NULL |
[out] | prval | [optional] value in na corresponding to rank |
[out] | phisto | [optional] Numa histogram; use NULL to prevent |
Notes: (1) This is a simple interface for gathering statistics from a numa, where a histogram is used 'under the covers' to avoid sorting if a rank value is requested. In that case, by using a histogram we are trading speed for accuracy, because the values in na are quantized to the center of a set of bins. (2) If the median, other rank value, or histogram are not requested, the calculation is all performed on the input Numa. (3) The variance is the average of the square of the difference from the mean. The median is the value in na with rank 0.5. (4) There are two situations where this gives rank results with accuracy comparable to computing stastics directly on the input data, without binning into a histogram: (a) the data is integers and the range of data is less than maxbins, and (b) the data is floats and the range is small compared to maxbins, so that the binsize is much less than 1. (5) If a histogram is used and the numbers in the Numa extend over a large range, you can limit the required storage by specifying the maximum number of bins in the histogram. Use maxbins == 0 to force the bin size to be 1. (6) This optionally returns the median and one arbitrary rank value. If you need several rank values, return the histogram and use numaHistogramGetValFromRank(nah, rank, &rval) multiple times.
Definition at line 1261 of file numafunc2.c.
References numaDestroy(), numaGetCount(), numaGetFValue(), numaGetMax(), numaGetMin(), numaHistogramGetValFromRank(), and numaMakeHistogramAuto().
NUMA* numaGetUniformBinSizes | ( | l_int32 | ntotal, |
l_int32 | nbins | ||
) |
[in] | ntotal | number of values to be split up |
[in] | nbins | number of bins |
Notes: (1) The numbers in the bins can differ by 1. The sum of bin numbers in @naeach is @ntotal.
Definition at line 1945 of file numafunc2.c.
References numaAddNumber(), and numaCreate().
Referenced by fileSplitLinesUniform(), numaDiscretizeHistoInBins(), numaDiscretizeSortedInBins(), and sarrayConcatUniformly().
l_ok numaHistogramGetRankFromVal | ( | NUMA * | na, |
l_float32 | rval, | ||
l_float32 * | prank | ||
) |
[in] | na | histogram |
[in] | rval | value of input sample for which we want the rank |
[out] | prank | fraction of total samples below rval |
Notes: (1) If we think of the histogram as a function y(x), normalized to 1, for a given input value of x, this computes the rank of x, which is the integral of y(x) from the start value of x to the input value. (2) This function only makes sense when applied to a Numa that is a histogram. The values in the histogram can be ints and floats, and are computed as floats. The rank is returned as a float between 0.0 and 1.0. (3) The numa parameters startx and binsize are used to compute x from the Numa index i.
Definition at line 1563 of file numafunc2.c.
References numaGetCount(), numaGetFValue(), numaGetParameters(), and numaGetSum().
l_ok numaHistogramGetValFromRank | ( | NUMA * | na, |
l_float32 | rank, | ||
l_float32 * | prval | ||
) |
[in] | na | histogram |
[in] | rank | fraction of total samples |
[out] | prval | approx. to the bin value |
Notes: (1) If we think of the histogram as a function y(x), this returns the value x such that the integral of y(x) from the start value to x gives the fraction 'rank' of the integral of y(x) over all bins. (2) This function only makes sense when applied to a Numa that is a histogram. The values in the histogram can be ints and floats, and are computed as floats. The val is returned as a float, even though the buckets are of integer width. (3) The numa parameters startx and binsize are used to compute x from the Numa index i.
Definition at line 1634 of file numafunc2.c.
References numaGetCount(), numaGetFValue(), numaGetParameters(), and numaGetSum().
Referenced by numaGetCrossingDistances(), and numaGetStatsUsingHistogram().
[in] | na | |
[in] | maxbins | max number of histogram bins |
[out] | pbinsize | [optional] size of histogram bins |
[out] | pbinstart | [optional] start val of minimum bin; input NULL to force start at 0 |
Notes: (1) This simple interface is designed for integer data. The bins are of integer width and start on integer boundaries, so the results on float data will not have high precision. (2) Specify the max number of input bins. Then binsize, the size of bins necessary to accommodate the input data, is returned. It is optionally returned and one of the sequence: {1, 2, 5, 10, 20, 50, ...}. (3) If &binstart is given, all values are accommodated, and the min value of the starting bin is returned. Otherwise, all negative values are discarded and the histogram bins start at 0.
Definition at line 885 of file numafunc2.c.
References numaGetMax(), and numaGetMin().
Referenced by numaGetCrossingDistances(), and numaGetRankBinValues().
[in] | na | numa of floats; these may be integers |
[in] | maxbins | max number of histogram bins; >= 1 |
Notes: (1) This simple interface is designed for accurate binning of both integer and float data. (2) If the array data is integers, and the range of integers is smaller than maxbins, they are binned as they fall, with binsize = 1. (3) If the range of data, (maxval - minval), is larger than maxbins, or if the data is floats, they are binned into exactly maxbins bins. (4) Unlike numaMakeHistogram(), these bins in general have non-integer location and width, even for integer data.
Definition at line 998 of file numafunc2.c.
References numaAddNumber(), numaCreate(), numaGetCount(), numaGetFValue(), numaGetIValue(), numaGetMax(), numaGetMin(), numaHasOnlyIntegers(), numaSetCount(), numaSetParameters(), and numaSetValue().
Referenced by numaGetStatsUsingHistogram().
[in] | na | |
[in] | binsize | typically 1.0 |
[in] | maxsize | of histogram ordinate |
Notes: (1) This simple function generates a histogram of values from na, discarding all values < 0.0 or greater than min(maxsize, maxval), where maxval is the maximum value in na. The histogram data is put in bins of size delx = binsize, starting at x = 0.0. We use as many bins as are needed to hold the data.
Definition at line 1082 of file numafunc2.c.
References numaCreate(), numaGetCount(), numaGetFValue(), numaGetIValue(), numaGetMax(), numaSetCount(), numaSetParameters(), and numaSetValue().
Referenced by numaQuantizeCrossingsByWidth().
l_ok numaMakeRankFromHistogram | ( | l_float32 | startx, |
l_float32 | deltax, | ||
NUMA * | nasy, | ||
l_int32 | npts, | ||
NUMA ** | pnax, | ||
NUMA ** | pnay | ||
) |
[in] | startx | xval corresponding to first element in nay |
[in] | deltax | x increment between array elements in nay |
[in] | nasy | input histogram, assumed equally spaced |
[in] | npts | number of points to evaluate rank function |
[out] | pnax | [optional] array of x values in range |
[out] | pnay | rank array of specified npts |
Definition at line 1495 of file numafunc2.c.
References L_LINEAR_INTERP, numaAddNumber(), numaCreate(), numaDestroy(), numaGetCount(), numaGetFValue(), numaInterpolateEqxInterval(), and numaNormalizeHistogram().
[in] | nas | input histogram |
[in] | tsum | target sum of all numbers in dest histogram; e.g., use tsum= 1.0 if this represents a probability distribution |
Definition at line 1179 of file numafunc2.c.
References numaAddNumber(), numaCopyParameters(), numaCreate(), numaGetCount(), numaGetFValue(), and numaGetSum().
Referenced by grayInterHistogramStats(), numaDiscretizeHistoInBins(), numaMakeRankFromHistogram(), pixCompareRankDifference(), and pixGetDifferenceStats().
[in] | nas | |
[in] | size | of sel; greater than 0, odd. The origin is implicitly in the center. |
Notes: (1) The structuring element (sel) is linear, all "hits" (2) If size == 1, this returns a copy
Definition at line 321 of file numafunc2.c.
References numaCopy(), numaDestroy(), numaDilate(), and numaErode().
[in] | nas | input histogram |
[in] | newsize | number of old bins contained in each new bin |
Definition at line 1131 of file numafunc2.c.
References numaAddNumber(), numaCreate(), numaGetCount(), numaGetIValue(), numaGetParameters(), and numaSetParameters().
l_ok numaSelectCrossingThreshold | ( | NUMA * | nax, |
NUMA * | nay, | ||
l_float32 | estthresh, | ||
l_float32 * | pbestthresh | ||
) |
[in] | nax | [optional] numa of abscissa values; can be NULL |
[in] | nay | signal |
[in] | estthresh | estimated pixel threshold for crossing: e.g., for images, white <--> black; typ. ~120 |
[out] | pbestthresh | robust estimate of threshold to use |
Notes: (1) When a valid threshold is used, the number of crossings is a maximum, because none are missed. If no threshold intersects all the crossings, the crossings must be determined with numaCrossingsByPeaks(). (2) estthresh is an input estimate of the threshold that should be used. We compute the crossings with 41 thresholds (20 below and 20 above). There is a range in which the number of crossings is a maximum. Return a threshold in the center of this stable plateau of crossings. This can then be used with numaCrossingsByThreshold() to get a good estimate of crossing locations. (3) If the count of nay is less than 2, a warning is issued.
Definition at line 2848 of file numafunc2.c.
References lept_stderr(), numaAddNumber(), numaCreate(), numaCrossingsByThreshold(), numaDestroy(), numaGetCount(), numaGetIValue(), numaGetMax(), numaGetMode(), and numaWriteStderr().
l_ok numaSimpleStats | ( | NUMA * | na, |
l_int32 | first, | ||
l_int32 | last, | ||
l_float32 * | pmean, | ||
l_float32 * | pvar, | ||
l_float32 * | prvar | ||
) |
[in] | na | input numa |
[in] | first | first element to use |
[in] | last | last element to use; -1 to go to the end |
[out] | pmean | [optional] mean value |
[out] | pvar | [optional] variance |
[out] | prvar | [optional] rms deviation from the mean |
Definition at line 452 of file numafunc2.c.
References numaGetCount(), and numaGetFValue().
Referenced by boxaSizeVariation(), and grayInterHistogramStats().
l_ok numaSplitDistribution | ( | NUMA * | na, |
l_float32 | scorefract, | ||
l_int32 * | psplitindex, | ||
l_float32 * | pave1, | ||
l_float32 * | pave2, | ||
l_float32 * | pnum1, | ||
l_float32 * | pnum2, | ||
NUMA ** | pnascore | ||
) |
[in] | na | histogram |
[in] | scorefract | fraction of the max score, used to determine range over which the histogram min is searched |
[out] | psplitindex | [optional] index for splitting |
[out] | pave1 | [optional] average of lower distribution |
[out] | pave2 | [optional] average of upper distribution |
[out] | pnum1 | [optional] population of lower distribution |
[out] | pnum2 | [optional] population of upper distribution |
[out] | pnascore | [optional] for debugging; otherwise use NULL |
Notes: (1) This function is intended to be used on a distribution of values that represent two sets, such as a histogram of pixel values for an image with a fg and bg, and the goal is to determine the averages of the two sets and the best splitting point. (2) The Otsu method finds a split point that divides the distribution into two parts by maximizing a score function that is the product of two terms: (a) the square of the difference of centroids, (ave1 - ave2)^2 (b) fract1 * (1 - fract1) where fract1 is the fraction in the lower distribution. (3) This works well for images where the fg and bg are each relatively homogeneous and well-separated in color. However, if the actual fg and bg sets are very different in size, and the bg is highly varied, as can occur in some scanned document images, this will bias the split point into the larger "bump" (i.e., toward the point where the (b) term reaches its maximum of 0.25 at fract1 = 0.5. To avoid this, we define a range of values near the maximum of the score function, and choose the value within this range such that the histogram itself has a minimum value. The range is determined by scorefract: we include all abscissa values to the left and right of the value that maximizes the score, such that the score stays above (1 - scorefract) * maxscore. The intuition behind this modification is to try to find a split point that both has a high variance score and is at or near a minimum in the histogram, so that the histogram slope is small at the split point. (4) We normalize the score so that if the two distributions were of equal size and at opposite ends of the numa, the score would be 1.0.
Definition at line 2023 of file numafunc2.c.
References gplotSimple1(), lept_stderr(), numaAddNumber(), numaCreate(), numaGetCount(), numaGetFValue(), numaGetHistogramStats(), and numaGetSum().
Referenced by pixSplitDistributionFgBg().
[in] | nas | |
[in] | shift | add this to each number |
[in] | scale | multiply each number by this |
Notes: (1) Each number is shifted before scaling.
Definition at line 415 of file numafunc2.c.
References numaAddNumber(), numaCopyParameters(), numaCreate(), numaGetCount(), and numaGetFValue().
Referenced by l_compressGrayHistograms(), numaEarthMoverDistance(), and pixCompareTilesByHisto().
[in] | nas | |
[in] | wc | half width of the convolution window |
Notes: (1) This is a convolution. The window has width = 2 * wc + 1. (2) We add a mirrored border of size wc to each end of the array.
Definition at line 588 of file numafunc2.c.
References L_MIRRORED_BORDER, L_NOCOPY, numaAddSpecifiedBorder(), numaDestroy(), numaGetCount(), numaGetFArray(), and numaMakeConstant().
Referenced by grayInterHistogramStats(), numaWindowedStats(), and pixCompareTilesByHisto().
[in] | nas | |
[in] | wc | half width of the window |
Notes: (1) The window has width = 2 * wc + 1. (2) We add a mirrored border of size wc to each end of the array.
Definition at line 648 of file numafunc2.c.
References L_MIRRORED_BORDER, L_NOCOPY, numaAddSpecifiedBorder(), numaDestroy(), numaGetCount(), numaGetFArray(), and numaMakeConstant().
Referenced by numaWindowedStats().
[in] | nas | |
[in] | halfwin | half width of window over which the median is found |
Notes: (1) The requested window has width = 2 * halfwin + 1. (2) If the input nas has less then 3 elements, return a copy. (3) If the filter is too small (halfwin <= 0), return a copy. (4) If the filter is too large, it is reduced in size. (5) We add a mirrored border of size halfwin to each end of the array to simplify the calculation by avoiding end-effects.
Definition at line 784 of file numafunc2.c.
References L_MIRRORED_BORDER, numaAddNumber(), numaAddSpecifiedBorder(), numaClipToInterval(), numaCopy(), numaCreate(), numaDestroy(), numaGetCount(), and numaGetMedian().
Referenced by boxaWindowedMedian().
l_ok numaWindowedStats | ( | NUMA * | nas, |
l_int32 | wc, | ||
NUMA ** | pnam, | ||
NUMA ** | pnams, | ||
NUMA ** | pnav, | ||
NUMA ** | pnarv | ||
) |
[in] | nas | input numa |
[in] | wc | half width of the window |
[out] | pnam | [optional] mean value in window |
[out] | pnams | [optional] mean square value in window |
[out] | pnav | [optional] variance in window |
[out] | pnarv | [optional] rms deviation from the mean |
Notes: (1) This is a high-level convenience function for calculating any or all of these derived arrays. (2) These statistical measures over the values in the rectangular window are: ~ average value: [x] (nam) ~ average squared value: [x*x] (nams) ~ variance: [(x - [x])*(x - [x])] = [x*x] - [x]*[x] (nav) ~ square-root of variance: (narv) where the brackets [ .. ] indicate that the average value is to be taken over the window. (3) Note that the variance is just the mean square difference from the mean value; and the square root of the variance is the root mean square difference from the mean, sometimes also called the 'standard deviation'. (4) Internally, use mirrored borders to handle values near the end of each array.
Definition at line 537 of file numafunc2.c.
References numaDestroy(), numaGetCount(), numaWindowedMean(), numaWindowedMeanSquare(), and numaWindowedVariance().
[in] | nam | windowed mean values |
[in] | nams | windowed mean square values |
[out] | pnav | [optional] numa of variance – the ms deviation from the mean |
[out] | pnarv | [optional] numa of rms deviation from the mean |
Notes: (1) The numas of windowed mean and mean square are precomputed, using numaWindowedMean() and numaWindowedMeanSquare(). (2) Either or both of the variance and square-root of variance are returned, where the variance is the average over the window of the mean square difference of the pixel value from the mean: [(x - [x])*(x - [x])] = [x*x] - [x]*[x]
Definition at line 716 of file numafunc2.c.
References L_NOCOPY, numaGetCount(), numaGetFArray(), and numaMakeConstant().
Referenced by numaWindowedStats().
|
static |
Definition at line 145 of file numafunc2.c.