![]() |
Leptonica
1.82.0
Image processing and image analysis suite
|
#include <string.h>
#include "allheaders.h"
Go to the source code of this file.
Macros | |
#define | SPLIT_WITH_DID 1 |
Functions | |
static l_int32 | pixCorrelationBestShift (PIX *pix1, PIX *pix2, NUMA *nasum1, NUMA *namoment1, l_int32 area2, l_int32 ycent2, l_int32 maxyshift, l_int32 *tab8, l_int32 *pdelx, l_int32 *pdely, l_float32 *pscore, l_int32 debugflag) |
static L_RCH * | rchCreate (l_int32 index, l_float32 score, char *text, l_int32 sample, l_int32 xloc, l_int32 yloc, l_int32 width) |
static L_RCHA * | rchaCreate () |
static l_int32 | transferRchToRcha (L_RCH *rch, L_RCHA *rcha) |
static PIX * | recogPreSplittingFilter (L_RECOG *recog, PIX *pixs, l_int32 minh, l_float32 minaf, l_int32 debug) |
static l_int32 | recogSplittingFilter (L_RECOG *recog, PIX *pixs, l_int32 minh, l_float32 minaf, l_int32 *premove, l_int32 debug) |
static void | l_showIndicatorSplitValues (NUMA *na1, NUMA *na2, NUMA *na3, NUMA *na4, NUMA *na5, NUMA *na6) |
l_ok | recogIdentifyMultiple (L_RECOG *recog, PIX *pixs, l_int32 minh, l_int32 skipsplit, BOXA **pboxa, PIXA **ppixa, PIX **ppixdb, l_int32 debugsplit) |
l_ok | recogSplitIntoCharacters (L_RECOG *recog, PIX *pixs, l_int32 minh, l_int32 skipsplit, BOXA **pboxa, PIXA **ppixa, l_int32 debug) |
l_ok | recogCorrelationBestRow (L_RECOG *recog, PIX *pixs, BOXA **pboxa, NUMA **pnascore, NUMA **pnaindex, SARRAY **psachar, l_int32 debug) |
l_ok | recogCorrelationBestChar (L_RECOG *recog, PIX *pixs, BOX **pbox, l_float32 *pscore, l_int32 *pindex, char **pcharstr, PIX **ppixdb) |
l_ok | recogIdentifyPixa (L_RECOG *recog, PIXA *pixa, PIX **ppixdb) |
l_ok | recogIdentifyPix (L_RECOG *recog, PIX *pixs, PIX **ppixdb) |
l_ok | recogSkipIdentify (L_RECOG *recog) |
void | rchaDestroy (L_RCHA **prcha) |
void | rchDestroy (L_RCH **prch) |
l_ok | rchaExtract (L_RCHA *rcha, NUMA **pnaindex, NUMA **pnascore, SARRAY **psatext, NUMA **pnasample, NUMA **pnaxloc, NUMA **pnayloc, NUMA **pnawidth) |
l_ok | rchExtract (L_RCH *rch, l_int32 *pindex, l_float32 *pscore, char **ptext, l_int32 *psample, l_int32 *pxloc, l_int32 *pyloc, l_int32 *pwidth) |
PIX * | recogProcessToIdentify (L_RECOG *recog, PIX *pixs, l_int32 pad) |
SARRAY * | recogExtractNumbers (L_RECOG *recog, BOXA *boxas, l_float32 scorethresh, l_int32 spacethresh, BOXAA **pbaa, NUMAA **pnaa) |
PIXA * | showExtractNumbers (PIX *pixs, SARRAY *sa, BOXAA *baa, NUMAA *naa, PIX **ppixdb) |
Top-level identification l_int32 recogIdentifyMultiple() Segmentation and noise removal l_int32 recogSplitIntoCharacters() Greedy character splitting l_int32 recogCorrelationBestRow() l_int32 recogCorrelationBestChar() static l_int32 pixCorrelationBestShift() Low-level identification of single characters l_int32 recogIdentifyPixa() l_int32 recogIdentifyPix() l_int32 recogSkipIdentify() Operations for handling identification results static L_RCHA *rchaCreate() void rchaDestroy() static L_RCH *rchCreate() void rchDestroy() l_int32 rchaExtract() l_int32 rchExtract() static l_int32 transferRchToRcha() Preprocessing and filtering l_int32 recogProcessToIdentify() static PIX *recogPreSplittingFilter() static PIX *recogSplittingFilter() Postprocessing SARRAY *recogExtractNumbers() PIX *showExtractNumbers() Static debug helper static void l_showIndicatorSplitValues() See recogbasic.c for examples of training a recognizer, which is required before it can be used for identification. The character splitter repeatedly does a greedy correlation with each averaged unscaled template, at all pixel locations along the text to be identified. The vertical alignment is between the template centroid and the (moving) windowed centroid, including a delta of 1 pixel above and below. The best match then removes part of the input image, leaving 1 or 2 pieces, which, after filtering, are put in a queue. The process ends when the queue is empty. The filtering is based on the size and aspect ratio of the remaining pieces; the intent is to remove anything that is unlikely to be text, such as small pieces and line graphics. After splitting, the selected segments are identified using the input parameters that were initially specified for the recognizer. Unlike the splitter, which uses the averaged templates from the unscaled input, the recognizer can use either all training examples or averaged templates, and these can be either scaled or unscaled. These choices are specified when the recognizer is constructed.
Definition in file recogident.c.
|
static |
[in] | na1,na2,na3,na4,na5,na6 | 6 indicator array |
Notes: (1) The values indicate that specific criteria has been met for component removal by pre-splitting filter.. The 'result' line shows which components have been removed.
Definition at line 1852 of file recogident.c.
References Numa::array, lept_stderr(), and numaGetCount().
|
static |
[in] | pix1 | 1 bpp, the unknown image; typically larger |
[in] | pix2 | 1 bpp, the matching template image) |
[in] | nasum1 | vertical column pixel sums for pix1 |
[in] | namoment1 | vertical column first moment of pixels for pix1 |
[in] | area2 | number of on pixels in pix2 |
[in] | ycent2 | y component of centroid of pix2 |
[in] | maxyshift | max y shift of pix2 around the location where the centroids of pix2 and a windowed part of pix1 are vertically aligned |
[in] | tab8 | [optional] sum tab for ON pixels in byte; can be NULL |
[out] | pdelx | [optional] best x shift of pix2 relative to pix1 |
[out] | pdely | [optional] best y shift of pix2 relative to pix1 |
[out] | pscore | [optional] maximum score found; can be NULL |
[in] | debugflag | <= 0 to skip; positive to generate output; the integer is used to label the debug image. |
Notes: (1) This maximizes the correlation score between two 1 bpp images, one of which is typically wider. In a typical example, pix1 is a bitmap of 2 or more touching characters and pix2 is a single character template. This finds the location of pix2 that gives the largest correlation. (2) The windowed area of fg pixels and windowed first moment in the y direction are computed from the input sum and moment column arrays, nasum1 and namoment1 (3) This is a brute force operation. We compute the correlation at every x shift for which pix2 fits entirely within pix1, and where the centroid of pix2 is aligned, within +-maxyshift, with the centroid of a window of pix1 of the same width. The correlation is taken over the full height of pix1. This can be made more efficient.
Definition at line 729 of file recogident.c.
|
static |
Return: 0 if OK, 1 on error
Notes: (1) Be sure to destroy any existing rcha before assigning this.
Definition at line 1150 of file recogident.c.
References L_Rcha::naindex, L_Rcha::nasample, L_Rcha::nascore, L_Rcha::nawidth, L_Rcha::naxloc, L_Rcha::nayloc, numaCreate(), sarrayCreate(), and L_Rcha::satext.
Referenced by recogIdentifyPixa().
void rchaDestroy | ( | L_RCHA ** | prcha | ) |
[in,out] | prcha | to be nulled |
Definition at line 1172 of file recogident.c.
References L_Rcha::naindex, L_Rcha::nasample, L_Rcha::nascore, L_Rcha::nawidth, L_Rcha::naxloc, L_Rcha::nayloc, numaDestroy(), sarrayDestroy(), and L_Rcha::satext.
Referenced by recogDestroy(), and recogIdentifyPixa().
l_ok rchaExtract | ( | L_RCHA * | rcha, |
NUMA ** | pnaindex, | ||
NUMA ** | pnascore, | ||
SARRAY ** | psatext, | ||
NUMA ** | pnasample, | ||
NUMA ** | pnaxloc, | ||
NUMA ** | pnayloc, | ||
NUMA ** | pnawidth | ||
) |
[in] | rcha | |
[out] | pnaindex | [optional] indices of best templates |
[out] | pnascore | [optional] correl scores of best templates |
[out] | psatext | [optional] character strings of best templates |
[out] | pnasample | [optional] indices of best samples |
[out] | pnaxloc | [optional] x-locations of templates |
[out] | pnayloc | [optional] y-locations of templates |
[out] | pnawidth | [optional] widths of best templates |
Notes: (1) This returns clones of the number and string arrays. They must be destroyed by the caller.
Definition at line 1283 of file recogident.c.
References L_Rcha::naindex, L_Rcha::nasample, L_Rcha::nascore, L_Rcha::nawidth, L_Rcha::naxloc, L_Rcha::nayloc, numaClone(), sarrayClone(), and L_Rcha::satext.
Referenced by recogExtractNumbers().
|
static |
[in] | index | index of best template |
[in] | score | correlation score of best template |
[in] | text | character string of best template |
[in] | sample | index of best sample; -1 if averages are used |
[in] | xloc | x-location of template: delx + shiftx |
[in] | yloc | y-location of template: dely + shifty |
[in] | width | width of best template |
Notes: (1) Be sure to destroy any existing rch before assigning this. (2) This stores the text string, not a copy of it, so the caller must not destroy the string.
Definition at line 1217 of file recogident.c.
References L_Rch::index, L_Rch::sample, L_Rch::score, L_Rch::text, L_Rch::width, L_Rch::xloc, and L_Rch::yloc.
Referenced by recogSkipIdentify().
void rchDestroy | ( | L_RCH ** | prch | ) |
[in,out] | prch | to be nulled |
Definition at line 1245 of file recogident.c.
References L_Rch::text.
Referenced by recogDestroy(), and recogSkipIdentify().
l_ok rchExtract | ( | L_RCH * | rch, |
l_int32 * | pindex, | ||
l_float32 * | pscore, | ||
char ** | ptext, | ||
l_int32 * | psample, | ||
l_int32 * | pxloc, | ||
l_int32 * | pyloc, | ||
l_int32 * | pwidth | ||
) |
[in] | rch | |
[out] | pindex | [optional] index of best template |
[out] | pscore | [optional] correlation score of best template |
[out] | ptext | [optional] character string of best template |
[out] | psample | [optional] index of best sample |
[out] | pxloc | [optional] x-location of template |
[out] | pyloc | [optional] y-location of template |
[out] | pwidth | [optional] width of best template |
Definition at line 1329 of file recogident.c.
References L_Rch::index, L_Rch::sample, L_Rch::score, stringNew(), L_Rch::text, L_Rch::width, L_Rch::xloc, and L_Rch::yloc.
Referenced by recogDebugAverages(), recogIdentifyPixa(), recogRescoreDidResult(), recogShowMatchesInRange(), and recogTrainFromBoot().
l_ok recogCorrelationBestChar | ( | L_RECOG * | recog, |
PIX * | pixs, | ||
BOX ** | pbox, | ||
l_float32 * | pscore, | ||
l_int32 * | pindex, | ||
char ** | pcharstr, | ||
PIX ** | ppixdb | ||
) |
[in] | recog | with LUT's pre-computed |
[in] | pixs | can be of multiple touching characters, 1 bpp |
[out] | pbox | bounding box of best fit character |
[out] | pscore | correlation score |
[out] | pindex | [optional] index of class |
[out] | pcharstr | [optional] character string of class |
[out] | ppixdb | [optional] debug pix showing input and best fit |
Notes: (1) Basic matching character splitter. Finds the best match among all templates to some region of the image. This can result in splitting the image into two parts. This is "image decoding" without dynamic programming, because we don't use a setwidth and compute the best matching score for the entire image. (2) Matching is to the average templates, without character scaling.
Definition at line 587 of file recogident.c.
l_ok recogCorrelationBestRow | ( | L_RECOG * | recog, |
PIX * | pixs, | ||
BOXA ** | pboxa, | ||
NUMA ** | pnascore, | ||
NUMA ** | pnaindex, | ||
SARRAY ** | psachar, | ||
l_int32 | debug | ||
) |
[in] | recog | with LUT's pre-computed |
[in] | pixs | typically of multiple touching characters, 1 bpp |
[out] | pboxa | bounding boxs of best fit character |
[out] | pnascore | [optional] correlation scores |
[out] | pnaindex | [optional] indices of classes |
[out] | psachar | [optional] array of character strings |
[in] | debug | 1 for results written to pixadb_split |
Notes: (1) Supervises character matching for (in general) a c.c with multiple touching characters. Finds the best match greedily. Rejects small parts that are left over after splitting. (2) Matching is to the average, and without character scaling.
Definition at line 405 of file recogident.c.
SARRAY* recogExtractNumbers | ( | L_RECOG * | recog, |
BOXA * | boxas, | ||
l_float32 | scorethresh, | ||
l_int32 | spacethresh, | ||
BOXAA ** | pbaa, | ||
NUMAA ** | pnaa | ||
) |
[in] | recog | |
[in] | boxas | location of components |
[in] | scorethresh | min score for which we accept a component |
[in] | spacethresh | max horizontal distance allowed between digits; use -1 for default |
[out] | pbaa | [optional] bounding boxes of identified numbers |
[out] | pnaa | [optional] scores of identified digits |
Notes: (1) This extracts digit data after recogaIdentifyMultiple() or lower-level identification has taken place. (2) Each string in the returned sa contains a sequence of ascii digits in a number. (3) The horizontal distance between boxes (limited by spacethresh) is the negative of the horizontal overlap. (4) Components with a score less than scorethresh, which may be hyphens or other small characters, will signal the end of the current sequence of digits in the number. A typical value for scorethresh is 0.60. (5) We allow two digits to be combined if these conditions apply: (a) the first is to the left of the second (b) the second has a horizontal separation less than spacethresh (c) the vertical overlap >= 0 (vertical separation < 0) (d) both have a score that exceeds scorethresh (6) Each numa in the optionally returned naa contains the digit scores of a number. Each boxa in the optionally returned baa contains the bounding boxes of the digits in the number.
Definition at line 1630 of file recogident.c.
References boxaaAddBoxa(), boxaaCreate(), boxaAddBox(), boxaaDestroy(), boxaCreate(), boxaGetBox(), boxDestroy(), boxGetGeometry(), boxOverlapDistance(), L_CLONE, L_COPY, L_INSERT, L_NOCOPY, L_Recog::maxheight_u, numaaAddNuma(), numaaCreate(), numaAddNumber(), numaaDestroy(), numaCreate(), numaDestroy(), numaGetCount(), numaGetFValue(), L_Recog::rcha, rchaExtract(), sarrayAddString(), sarrayCreate(), sarrayDestroy(), sarrayGetCount(), sarrayGetString(), and sarrayToString().
l_ok recogIdentifyMultiple | ( | L_RECOG * | recog, |
PIX * | pixs, | ||
l_int32 | minh, | ||
l_int32 | skipsplit, | ||
BOXA ** | pboxa, | ||
PIXA ** | ppixa, | ||
PIX ** | ppixdb, | ||
l_int32 | debugsplit | ||
) |
[in] | recog | with training finished |
[in] | pixs | containing typically a small number of characters |
[in] | minh | remove shorter components; use 0 for default |
[in] | skipsplit | 1 to skip the splitting step |
[out] | pboxa | [optional] locations of identified components |
[out] | ppixa | [optional] images of identified components |
[out] | ppixdb | [optional] debug pix: inputs and best fits |
[in] | debugsplit | 1 returns pix split debugging images |
Notes: (1) This filters the input pixa and calls recogIdentifyPixa() (2) Splitting is relatively slow, because it tries to match all character templates to all locations. This step can be skipped. (3) An attempt is made to order the (optionally) returned images and boxes in 2-dimensional sorted order. These can then be used to aggregate identified characters into numbers or words. One typically wants the pixa, which contains a boxa of the extracted subimages.
Definition at line 162 of file recogident.c.
References L_Recog::train_done.
[in] | recog | with LUT's pre-computed |
[in] | pixs | of a single character, 1 bpp |
[out] | ppixdb | [optional] debug pix showing input and best fit |
Notes: (1) Basic recognition function for a single character. (2) If templ_use == L_USE_ALL_TEMPLATES, which is the default situation, matching is attempted to every bitmap in the recog, and the identify of the best match is returned. (3) For finding outliers, templ_use == L_USE_AVERAGE_TEMPLATES, and matching is only attemplted to the averaged bitmaps. For this case, the index of the bestsample is meaningless (0 is returned if requested). (4) The score is related to the confidence (probability of correct identification), in that a higher score is correlated with a higher probability. However, the actual relation between the correlation (score) and the probability is not known; we call this a "score" because "confidence" can be misinterpreted as an actual probability.
Definition at line 975 of file recogident.c.
Referenced by recogDebugAverages(), recogIdentifyPixa(), recogRescoreDidResult(), recogShowMatchesInRange(), and recogTrainFromBoot().
[in] | recog | |
[in] | pixa | of 1 bpp images to match |
[out] | ppixdb | [optional] pix showing inputs and best fits |
Notes: (1) This should be called by recogIdentifyMuliple(), which binarizes and splits characters before sending pixa here. (2) This calls recogIdentifyPix(), which does the same operation on each pix in pixa, and optionally returns the arrays of results (scores, class index and character string) for the best correlation match.
Definition at line 882 of file recogident.c.
References L_CLONE, pixaCreate(), pixaGetCount(), pixaGetPix(), pixDestroy(), pixSetText(), L_Recog::rch, L_Recog::rcha, rchaCreate(), rchaDestroy(), rchExtract(), recogIdentifyPix(), recogShowMatch(), and recogSkipIdentify().
|
static |
[in] | recog | |
[in] | pixs | 1 bpp, many connected components |
[in] | minh | minimum height of components to be retained |
[in] | minaf | minimum area fraction (|fg|/(w*h)) to be retained |
[in] | debug | 1 to output indicator arrays |
Definition at line 1461 of file recogident.c.
References L_SELECT_IF_GT, L_SELECT_IF_LT, L_Recog::max_splith, L_Recog::max_wh_ratio, L_Recog::min_splitw, numaCopy(), numaLogicalOp(), numaMakeThresholdIndicator(), pixaFindAreaFraction(), pixaFindDimensions(), pixaFindWidthHeightRatio(), pixConnComp(), L_Recog::scaleh, and L_Recog::scalew.
[in] | recog | with LUT's pre-computed |
[in] | pixs | typ. single character, possibly d > 1 and uncropped |
[in] | pad | extra pixels added to left and right sides |
Notes: (1) This is a lightweight operation to insure that the input image is 1 bpp, properly cropped, and padded on each side. If bpp > 1, the image is thresholded.
Definition at line 1417 of file recogident.c.
l_ok recogSkipIdentify | ( | L_RECOG * | recog | ) |
[in] | recog |
Notes: (1) This just writes a "dummy" result with 0 score and empty string id into the rch.
Definition at line 1124 of file recogident.c.
References L_Recog::rch, rchCreate(), rchDestroy(), and stringNew().
Referenced by recogIdentifyPixa().
l_ok recogSplitIntoCharacters | ( | L_RECOG * | recog, |
PIX * | pixs, | ||
l_int32 | minh, | ||
l_int32 | skipsplit, | ||
BOXA ** | pboxa, | ||
PIXA ** | ppixa, | ||
l_int32 | debug | ||
) |
[in] | recog | |
[in] | pixs | 1 bpp, contains only mostly deskewed text |
[in] | minh | remove shorter components; use 0 for default |
[in] | skipsplit | 1 to skip the splitting step |
[out] | pboxa | character bounding boxes |
[out] | ppixa | character images |
[in] | debug | 1 for results written to pixadb_split |
Notes: (1) This can be given an image that has an arbitrary number of text characters. It optionally splits connected components based on document image decoding in recogDecode(). The returned pixa includes the boxes from which the (possibly split) components are extracted. (2) After noise filtering, the resulting components are put in row-major (2D) order, and the smaller of overlapping components are removed if they satisfy conditions of relative size and fractional overlap. (3) Note that the splitting function uses unscaled templates and does not bother returning the class results and scores. These are more accurately found later using the scaled templates.
Definition at line 250 of file recogident.c.
References lept_mkdir(), and L_Recog::train_done.
|
static |
[in] | recog | |
[in] | pixs | 1 bpp, single connected component |
[in] | minh | minimum height of component; 0 for default |
[in] | minaf | minimum area fraction (|fg|/(w*h)) to be retained |
[out] | premove | 0 to save, 1 to remove |
[in] | debug | 1 to output indicator arrays |
Definition at line 1538 of file recogident.c.
[in] | pixs | input 1 bpp image |
[in] | sa | recognized text strings |
[in] | baa | boxa array for location of characters in each string |
[in] | naa | numa array for scores of characters in each string |
[out] | ppixdb | [optional] input pixs with identified chars outlined |
Notes: (1) This is a debugging routine on digit identification; e.g.: recogIdentifyMultiple(recog, pixs, 0, 1, &boxa, NULL, NULL, 0); sa = recogExtractNumbers(recog, boxa, 0.8, -1, &baa, &naa); pixa = showExtractNumbers(pixs, sa, baa, naa, NULL);
Definition at line 1765 of file recogident.c.
References bmfCreate(), bmfDestroy(), boxaaGetBoxa(), boxaDestroy(), boxAdjustSides(), boxaGetExtent(), boxDestroy(), L_ADD_BELOW, L_CLONE, L_INSERT, L_NOCOPY, L_SET_WHITE, numaaGetNuma(), numaDestroy(), numaGetCount(), numaGetFValue(), pixaAddPix(), pixaCreate(), pixAddBlackOrWhiteBorder(), pixAddTextlines(), pixClipRectangle(), pixConvertTo8(), pixDestroy(), pixRenderBoxArb(), sarrayGetCount(), sarrayGetString(), and stringJoinIP().
[in] | rch | source of data |
[in] | rcha | append to arrays in this destination |
Notes: (1) This is used to transfer the results of a single character identification to an rcha array for the array of characters.
Definition at line 1375 of file recogident.c.
References L_Rch::index, L_COPY, L_Rcha::naindex, L_Rcha::nasample, L_Rcha::nascore, L_Rcha::nawidth, L_Rcha::naxloc, L_Rcha::nayloc, numaAddNumber(), L_Rch::sample, sarrayAddString(), L_Rcha::satext, L_Rch::score, L_Rch::text, L_Rch::width, L_Rch::xloc, and L_Rch::yloc.