Leptonica  1.82.0
Image processing and image analysis suite
pageseg.c File Reference
#include "allheaders.h"
#include "math.h"

Go to the source code of this file.

Functions

l_ok pixGetRegionsBinary (PIX *pixs, PIX **ppixhm, PIX **ppixtm, PIX **ppixtb, PIXA *pixadb)
 
PIXpixGenHalftoneMask (PIX *pixs, PIX **ppixtext, l_int32 *phtfound, l_int32 debug)
 
PIXpixGenerateHalftoneMask (PIX *pixs, PIX **ppixtext, l_int32 *phtfound, PIXA *pixadb)
 
PIXpixGenTextlineMask (PIX *pixs, PIX **ppixvws, l_int32 *ptlfound, PIXA *pixadb)
 
PIXpixGenTextblockMask (PIX *pixs, PIX *pixvws, PIXA *pixadb)
 
BOXpixFindPageForeground (PIX *pixs, l_int32 threshold, l_int32 mindist, l_int32 erasedist, l_int32 showmorph, PIXAC *pixac)
 
l_ok pixSplitIntoCharacters (PIX *pixs, l_int32 minw, l_int32 minh, BOXA **pboxa, PIXA **ppixa, PIX **ppixdebug)
 
BOXApixSplitComponentWithProfile (PIX *pixs, l_int32 delta, l_int32 mindel, PIX **ppixdebug)
 
PIXApixExtractTextlines (PIX *pixs, l_int32 maxw, l_int32 maxh, l_int32 minw, l_int32 minh, l_int32 adjw, l_int32 adjh, PIXA *pixadb)
 
PIXApixExtractRawTextlines (PIX *pixs, l_int32 maxw, l_int32 maxh, l_int32 adjw, l_int32 adjh, PIXA *pixadb)
 
l_ok pixCountTextColumns (PIX *pixs, l_float32 deltafract, l_float32 peakfract, l_float32 clipfract, l_int32 *pncols, PIXA *pixadb)
 
l_ok pixDecideIfText (PIX *pixs, BOX *box, l_int32 *pistext, PIXA *pixadb)
 
l_ok pixFindThreshFgExtent (PIX *pixs, l_int32 thresh, l_int32 *ptop, l_int32 *pbot)
 
l_ok pixDecideIfTable (PIX *pixs, BOX *box, l_int32 orient, l_int32 *pscore, PIXA *pixadb)
 
PIXpixPrepare1bpp (PIX *pixs, BOX *box, l_float32 cropfract, l_int32 outres)
 
l_ok pixEstimateBackground (PIX *pixs, l_int32 darkthresh, l_float32 edgecrop, l_int32 *pbg)
 
l_ok pixFindLargeRectangles (PIX *pixs, l_int32 polarity, l_int32 nrect, BOXA **pboxa, PIX **ppixdb)
 
l_ok pixFindLargestRectangle (PIX *pixs, l_int32 polarity, BOX **pbox, PIX **ppixdb)
 
PIXpixAutoPhotoinvert (PIX *pixs, l_int32 thresh, PIX **ppixm, PIXA *pixadb)
 

Variables

static const l_int32 MinWidth = 100
 
static const l_int32 MinHeight = 100
 

Detailed Description


     Top level page segmentation
         l_int32   pixGetRegionsBinary()

     Halftone region extraction
         PIX      *pixGenHalftoneMask()    **Deprecated wrapper**
         PIX      *pixGenerateHalftoneMask()


     Textline extraction
         PIX      *pixGenTextlineMask()

     Textblock extraction
         PIX      *pixGenTextblockMask()

     Location of page foreground
         PIX      *pixFindPageForeground()

     Extraction of characters from image with only text
         l_int32   pixSplitIntoCharacters()
         BOXA     *pixSplitComponentWithProfile()

     Extraction of lines of text
         PIXA     *pixExtractTextlines()
         PIXA     *pixExtractRawTextlines()

     How many text columns
         l_int32   pixCountTextColumns()

     Decision: text vs photo
         l_int32   pixDecideIfText()
         l_int32   pixFindThreshFgExtent()

     Decision: table vs text
         l_int32   pixDecideIfTable()
         Pix      *pixPrepare1bpp()

     Estimate the grayscale background value
         l_int32   pixEstimateBackground()

     Largest white or black rectangles in an image
         l_int32   pixFindLargeRectangles()
         l_int32   pixFindLargestRectangle()

     Generate rectangle inside connected component
         BOX      *pixFindRectangleInCC()

     Automatic photoinvert for OCR
         PIX      *pixAutoPhotoinvert()

Definition in file pageseg.c.

Function Documentation

◆ pixAutoPhotoinvert()

PIX* pixAutoPhotoinvert ( PIX pixs,
l_int32  thresh,
PIX **  ppixm,
PIXA pixadb 
)

pixFindRectangleInCC()

Parameters
[in]pixs1 bpp, with sufficient closings to make the fg be a single c.c. that is a convex hull
[in]boxs[optional] if NULL, pixs should be a minimum container of a single c.c.
[in]fractfirst and all consecutive lines found must be at least this fraction of the fast scan dimension
[in]dirL_SCAN_HORIZONTAL, L_SCAN_VERTICAL; direction of fast scan
[in]selectL_GEOMETRIC_UNION, L_GEOMETRIC_INTERSECTION, L_LARGEST_AREA, L_SMALEST_AREA
[in]debugif 1, generates output pdf showing intermediate computation and final result
Returns
box of included rectangle, or NULL on error
   Notes:
        (1) Computation is similar to pixFindLargestRectangle(), but allows
            a different set of results to choose from.
        (2) Select the fast scan direction.  Then, scanning in the slow
            direction, find the longest run of ON pixels in the fast
            scan direction and look for the first run that is longer
            than fract of the dimension.  Continue until a shorter run
            is found.  This generates a box of ON pixels fitting into the c.c.
        (3) Do this from both slow scan directions and use select to get
            a resulting box from these two.
        (4) The extracted rectangle is not necessarily the largest that
            can fit in the c.c.  To get that, use pixFindLargestRectangle().
 */
BOX *
pixFindRectangleInCC(PIX       *pixs,
                     BOX       *boxs,
                     l_float32  fract,
                     l_int32    dir,
                     l_int32    select,
                     l_int32    debug)
{
l_int32  x, y, i, w, h, w1, h1, w2, h2, found, res;
l_int32  xfirst, xlast, xstart, yfirst, ylast, length;
BOX     *box1, *box2, *box3, *box4, *box5;
PIX     *pix1, *pix2, *pixdb1, *pixdb2;
PIXA    *pixadb;

    PROCNAME("pixFindRectangleInCC");

    if (!pixs || pixGetDepth(pixs) != 1)
        return (BOX *)ERROR_PTR("pixs undefined or not 1 bpp", procName, NULL);
    if (fract <= 0.0 || fract > 1.0)
        return (BOX *)ERROR_PTR("invalid fraction", procName, NULL);
    if (dir != L_SCAN_VERTICAL && dir != L_SCAN_HORIZONTAL)
        return (BOX *)ERROR_PTR("invalid scan direction", procName, NULL);
    if (select != L_GEOMETRIC_UNION && select != L_GEOMETRIC_INTERSECTION &&
        select != L_LARGEST_AREA && select != L_SMALLEST_AREA)
        return (BOX *)ERROR_PTR("invalid select", procName, NULL);

        /* Extract the c.c. if necessary */
    x = y = 0;
    if (boxs) {
        pix1 = pixClipRectangle(pixs, boxs, NULL);
        boxGetGeometry(boxs, &x, &y, NULL, NULL);
    } else {
        pix1 = pixClone(pixs);
    }

        /* All fast scans are horizontal; rotate 90 deg cw if necessary */
    if (dir == L_SCAN_VERTICAL)
        pix2 = pixRotate90(pix1, 1);
    else  /* L_SCAN_HORIZONTAL */
        pix2 = pixClone(pix1);
    pixGetDimensions(pix2, &w, &h, NULL);

    pixadb = (debug) ? pixaCreate(0) : NULL;
    pixdb1 = NULL;
    if (pixadb) {
        lept_mkdir("lept/rect");
        pixaAddPix(pixadb, pix1, L_CLONE);
        pixdb1 = pixConvertTo32(pix2);
    }
    pixDestroy(&pix1);

        /* Scanning down, find the first scanline with a long enough run.
           That run goes from (xfirst, yfirst) to (xlast, yfirst).  */
    found = FALSE;
    for (i = 0; i < h; i++) {
        pixFindMaxHorizontalRunOnLine(pix2, i, &xstart, &length);
        if (length >= (l_int32)(fract * w + 0.5)) {
            yfirst = i;
            xfirst = xstart;
            xlast = xfirst + length - 1;
            found = TRUE;
            break;
        }
    }
    if (!found) {
        L_WARNING("no run of sufficient size was found\n", procName);
        pixDestroy(&pix2);
        pixDestroy(&pixdb1);
        pixaDestroy(&pixadb);
        return NULL;
    }

         /* Continue down until the condition fails */
    w1 = xlast - xfirst + 1;
    h1 = h - yfirst;  /* init */
    ylast = h - 1;  /* init */
    for (i = yfirst + 1; i < h; i++) {
        pixFindMaxHorizontalRunOnLine(pix2, i, &xstart, &length);
        if (xstart > xfirst || (xstart + length - 1 < xlast) ||
            i == h - 1) {
            ylast = i - 1;
            h1 = ylast - yfirst + 1;
            break;
        }
    }
    box1 = boxCreate(xfirst, yfirst, w1, h1);

        /* Scanning up, find the first scanline with a long enough run.
           That run goes from (xfirst, ylast) to (xlast, ylast).  */
    for (i = h - 1; i >= 0; i--) {
        pixFindMaxHorizontalRunOnLine(pix2, i, &xstart, &length);
        if (length >= (l_int32)(fract * w + 0.5)) {
            ylast = i;
            xfirst = xstart;
            xlast = xfirst + length - 1;
            break;
        }
    }

         /* Continue up until the condition fails */
    w2 = xlast - xfirst + 1;
    h2 = ylast + 1;  /* initialize */
    for (i = ylast - 1; i >= 0; i--) {
        pixFindMaxHorizontalRunOnLine(pix2, i, &xstart, &length);
        if (xstart > xfirst || (xstart + length - 1 < xlast) ||
            i == 0) {
            yfirst = i + 1;
            h2 = ylast - yfirst + 1;
            break;
        }
    }
    box2 = boxCreate(xfirst, yfirst, w2, h2);
    pixDestroy(&pix2);

    if (pixadb) {
        pixRenderBoxArb(pixdb1, box1, 2, 255, 0, 0);
        pixRenderBoxArb(pixdb1, box2, 2, 0, 255, 0);
        pixaAddPix(pixadb, pixdb1, L_INSERT);
    }

        /* Select the final result from the two boxes */
    if (select == L_GEOMETRIC_UNION)
        box3 = boxBoundingRegion(box1, box2);
    else if (select == L_GEOMETRIC_INTERSECTION)
        box3 = boxOverlapRegion(box1, box2);
    else if (select == L_LARGEST_AREA)
        box3 = (w1 * h1 >= w2 * h2) ? boxCopy(box1) : boxCopy(box2);
    else  /* select == L_SMALLEST_AREA) */
        box3 = (w1 * h1 <= w2 * h2) ? boxCopy(box1) : boxCopy(box2);
    boxDestroy(&box1);
    boxDestroy(&box2);

        /* Rotate the box 90 degrees ccw if necessary */
    box4 = NULL;
    if (box3) {
        if (dir == L_SCAN_VERTICAL)
            box4 = boxRotateOrth(box3, w, h, 3);
        else
            box4 = boxCopy(box3);
    }

        /* Transform back to global coordinates if boxs exists */
    box5 = (box4) ? boxTransform(box4, x, y, 1.0, 1.0) : NULL;
    boxDestroy(&box3);
    boxDestroy(&box4);

        /* Debug output */
    if (pixadb) {
        pixdb1 = pixConvertTo8(pixs, 0);
        pixAddConstantGray(pixdb1, 190);
        pixdb2 = pixConvertTo32(pixdb1);
        if (box5) pixRenderBoxArb(pixdb2, box5, 4, 0, 0, 255);
        pixaAddPix(pixadb, pixdb2, L_INSERT);
        res = pixGetXRes(pixs);
        L_INFO("Writing debug files to /tmp/lept/rect/\n", procName);
        pixaConvertToPdf(pixadb, res, 1.0, L_DEFAULT_ENCODE, 75, NULL,
                        "/tmp/lept/rect/fitrect.pdf");
        pix1 = pixaDisplayTiledAndScaled(pixadb, 32, 800, 1, 0, 40, 2);
        pixWrite("/tmp/lept/rect/fitrect.png", pix1, IFF_PNG);
        pixDestroy(&pix1);
        pixDestroy(&pixdb1);
        pixaDestroy(&pixadb);
    }

    return box5;
}

/*------------------------------------------------------------------*
                      Automatic photoinvert for OCR                 *
 *------------------------------------------------------------------*/
/*!

pixAutoPhotoinvert()

Parameters
[in]pixs any depth, colormap ok
[in]thresh binarization threshold; use 0 for default
[out]ppixm [optional] image regions to be inverted
[out]pixadb [optional] debug; input NULL to skip
Returns
pixd 1 bpp image to be sent to OCR, or NULL on error
   Notes:
        (1) A 1 bpp image is returned, where pixels in image regions are
            photo-inverted.
        (2) If there is light text with a dark background, this will
            identify the region and photoinvert the pixels there if
            there are at least 60% fg pixels in the region.
        (3) For debug output, input a (typically empty) pixadb.
   

Definition at line 2393 of file pageseg.c.

References boxaDestroy(), boxaGetBox(), boxaGetCount(), boxDestroy(), boxGetGeometry(), L_CLONE, L_COPY, lept_stderr(), PIX_CLR, pixaAddPix(), pixClipRectangle(), pixCombineMasked(), pixConnCompBB(), pixConvertTo1(), pixDestroy(), pixFillHolesToBoundingRect(), pixForegroundFraction(), pixGenerateHalftoneMask(), pixInvert(), pixMorphSequence(), pixRasterop(), and pixZero().

◆ pixCountTextColumns()

l_ok pixCountTextColumns ( PIX pixs,
l_float32  deltafract,
l_float32  peakfract,
l_float32  clipfract,
l_int32 *  pncols,
PIXA pixadb 
)

pixCountTextColumns()

Parameters
[in]pixs1 bpp
[in]deltafractfraction of (max - min) to be used in the delta for extrema finding; typ 0.3
[in]peakfractfraction of (max - min) to be used to threshold the peak value; typ. 0.5
[in]clipfractfraction of image dimension removed on each side; typ. 0.1, which leaves w and h reduced by 0.8
[out]pncolsnumber of columns; -1 if not determined
[in]pixadb[optional] pre-allocated, for showing intermediate computation; use null to skip
Returns
0 if OK, 1 on error
Notes:
     (1) It is assumed that pixs has the correct resolution set.
         If the resolution is 0, we set to 300 and issue a warning.
     (2) If necessary, the image is scaled to between 37 and 75 ppi;
         most of the processing is done at this resolution.
     (3) If no text is found (essentially a blank page),
         this returns ncols = 0.
     (4) For debug output, input a pre-allocated pixa.

Definition at line 1226 of file pageseg.c.

Referenced by dewarpaApplyInit().

◆ pixDecideIfTable()

l_ok pixDecideIfTable ( PIX pixs,
BOX box,
l_int32  orient,
l_int32 *  pscore,
PIXA pixadb 
)

pixDecideIfTable()

Parameters
[in]pixsany depth, any resolution >= 75 ppi
[in]box[optional] if null, use entire pixs
[in]orientL_PORTRAIT_MODE, L_LANDSCAPE_MODE
[out]pscore0 - 4; -1 if not determined
[in]pixadb[optional] pre-allocated, for showing intermediate computation; use NULL to skip
Returns
0 if OK, 1 on error
Notes:
     (1) It is assumed that pixs has the correct resolution set.
         If the resolution is 0, we assume it is 300 ppi and issue a warning.
     (2) If orient == L_LANDSCAPE_MODE, the image is rotated 90 degrees
         clockwise before being analyzed.
     (3) The interpretation of the returned score:
           -1     undetermined
            0     no table
            1     unlikely to have a table
            2     likely to have a table
            3     even more likely to have a table
            4     extremely likely to have a table
         * Setting the condition for finding a table at score >= 2 works
           well, except for false positives on kanji and landscape text.
         * These false positives can be removed by setting the condition
           at score >= 3, but recall is lowered because it will not find
           tables without either horizontal or vertical lines.
     (4) Most of the processing takes place at 75 ppi.
     (5) Internally, three numbers are determined, for horizontal and
         vertical fg lines, and for vertical bg lines.  From these,
         four tests are made to decide if there is a table occupying
         a significant part of the image.
     (6) Images have arbitrary content and would be likely to trigger
         this detector, so they are checked for first, and if found,
         return with a 0 (no table) score.
     (7) Musical scores (tablature) are likely to trigger the detector.
     (8) Tables of content with more than 2 columns are likely to
         trigger the detector.
     (9) For debug output, input a pre-allocated pixa.

Definition at line 1630 of file pageseg.c.

References L_COPY, L_INSERT, L_LANDSCAPE_MODE, L_SELECT_IF_GTE, L_SELECT_WIDTH, pixaAddPix(), pixClone(), pixCountConnComp(), pixDeskewBoth(), pixDestroy(), pixDilateBrick(), pixGenerateHalftoneMask(), pixInvert(), pixMorphSequence(), pixOr(), pixPrepare1bpp(), pixRotate90(), pixScale(), pixSeedfillBinary(), pixSelectBySize(), pixSubtract(), and pixZero().

◆ pixDecideIfText()

l_ok pixDecideIfText ( PIX pixs,
BOX box,
l_int32 *  pistext,
PIXA pixadb 
)

pixDecideIfText()

Parameters
[in]pixsany depth
[in]box[optional] if null, use entire pixs
[out]pistext1 if text; 0 if photo; -1 if not determined or empty
[in]pixadb[optional] pre-allocated, for showing intermediate computation; use NULL to skip
Returns
0 if OK, 1 on error
Notes:
     (1) It is assumed that pixs has the correct resolution set.
         If the resolution is 0, we set to 300 and issue a warning.
     (2) If necessary, the image is scaled to 300 ppi; most of the
         processing is done at this resolution.
     (3) Text is assumed to be in horizontal lines.
     (4) Because thin vertical lines are removed before filtering for
         text lines, this should identify tables as text.
     (5) If box is null and pixs contains both text lines and line art,
         this function might return istext == true.
     (6) If the input pixs is empty, or for some other reason the
         result can not be determined, return -1.
     (7) For debug output, input a pre-allocated pixa.

Definition at line 1374 of file pageseg.c.

References pixDestroy(), pixPrepare1bpp(), and pixZero().

◆ pixEstimateBackground()

l_ok pixEstimateBackground ( PIX pixs,
l_int32  darkthresh,
l_float32  edgecrop,
l_int32 *  pbg 
)

pixEstimateBackground()

Parameters
[in]pixs8 bpp, with or without colormap
[in]darkthreshpixels below this value are never considered part of the background; typ. 70; use 0 to skip
[in]edgecropfraction of half-width on each side, and of half-height at top and bottom, that are cropped
[out]pbgestimated background, or 0 on error
Returns
0 if OK, 1 on error
Notes:
     (1) Caller should check that return bg value is > 0.

Definition at line 1865 of file pageseg.c.

◆ pixExtractRawTextlines()

PIXA* pixExtractRawTextlines ( PIX pixs,
l_int32  maxw,
l_int32  maxh,
l_int32  adjw,
l_int32  adjh,
PIXA pixadb 
)

pixExtractRawTextlines()

Parameters
[in]pixsany depth, assumed to have nearly horizontal text
[in]maxw,maxhinitial filtering: remove any components in pixs with components larger than maxw or maxh; use 0 for default values.
[in]adjw,adjhfinal adjustment of boxes representing each text line. If > 0, these increase the box size at each edge by this amount.
[in]pixadbpixa for saving intermediate steps; NULL to omit
Returns
pixa of textline images, including bounding boxes, or NULL on error
Notes:
     (1) This function assumes that textlines have sufficient
         vertical separation and small enough skew so that a
         horizontal dilation sufficient to join words will not join
         textlines.  It aggressively joins textlines across multiple
         columns, so if that is not desired, you must either (a) make
         sure that pixs is a single column of text or (b) use instead
         pixExtractTextlines(), which is more conservative
         about joining text fragments that have vertical overlap.
     (2) This first removes components from pixs that are either
         very wide (> maxw) or very tall (> maxh).
     (3) For reasonable accuracy, the resolution of pixs should be
         at least 100 ppi.  For reasonable efficiency, the resolution
         should not exceed 600 ppi.
     (4) This can be used to determine if some region of a scanned
         image is horizontal text.
     (5) As an example, for a pix with resolution 300 ppi, a reasonable
         set of parameters is:
            pixExtractRawTextlines(pix, 150, 150, 0, 0, NULL);
     (6) The output pixa is composed of subimages, one for each textline,
         and the boxa in the pixa tells where in pixs each textline goes.

Definition at line 1098 of file pageseg.c.

◆ pixExtractTextlines()

PIXA* pixExtractTextlines ( PIX pixs,
l_int32  maxw,
l_int32  maxh,
l_int32  minw,
l_int32  minh,
l_int32  adjw,
l_int32  adjh,
PIXA pixadb 
)

pixExtractTextlines()

Parameters
[in]pixsany depth, assumed to have nearly horizontal text
[in]maxw,maxhinitial filtering: remove any components in pixs with components larger than maxw or maxh
[in]minw,minhfinal filtering: remove extracted 'lines' with sizes smaller than minw or minh; use 0 for default.
[in]adjw,adjhfinal adjustment of boxes representing each text line. If > 0, these increase the box size at each edge by this amount.
[in]pixadbpixa for saving intermediate steps; NULL to omit
Returns
pixa of textline images, including bounding boxes, or NULL on error
Notes:
     (1) This function assumes that textline fragments have sufficient
         vertical separation and small enough skew so that a
         horizontal dilation sufficient to join words will not join
         textlines.  It does not guarantee that horizontally adjacent
         textline fragments on the same line will be joined.
     (2) For images with multiple columns, it attempts to avoid joining
         textlines across the space between columns.  If that is not
         a concern, you can also use pixExtractRawTextlines(),
         which will join them with alacrity.
     (3) This first removes components from pixs that are either
         wide (> maxw) or tall (> maxh).
     (4) A final filtering operation removes small components, such
         that width < minw or height < minh.
     (5) For reasonable accuracy, the resolution of pixs should be
         at least 100 ppi.  For reasonable efficiency, the resolution
         should not exceed 600 ppi.
     (6) This can be used to determine if some region of a scanned
         image is horizontal text.
     (7) As an example, for a pix with resolution 300 ppi, a reasonable
         set of parameters is:
            pixExtractTextlines(pix, 150, 150, 36, 20, 5, 5, NULL);
         The defaults minw and minh for 300 ppi are about 36 and 20,
         so the same result is obtained with:
            pixExtractTextlines(pix, 150, 150, 0, 0, 5, 5, NULL);
     (8) The output pixa is composed of subimages, one for each textline,
         and the boxa in the pixa tells where in pixs each textline goes.

Definition at line 958 of file pageseg.c.

◆ pixFindLargeRectangles()

l_ok pixFindLargeRectangles ( PIX pixs,
l_int32  polarity,
l_int32  nrect,
BOXA **  pboxa,
PIX **  ppixdb 
)

pixFindLargeRectangles()

Parameters
[in]pixs1 bpp
[in]polarity0 within background, 1 within foreground
[in]nrectnumber of rectangles to be found
[out]pboxalargest rectangles, sorted by decreasing area
[in,out]ppixdboptional return output with rectangles drawn on it
Returns
0 if OK, 1 on error
Notes:
     (1) This does a greedy search to find the largest rectangles,
         either black or white and without overlaps, in pix.
     (2) See pixFindLargestRectangle(), which is called multiple
         times, for details.  On each call, the largest rectangle
         found is painted, so that none of its pixels can be
         used later, before calling it again.
     (3) This function is surprisingly fast.  Although
         pixFindLargestRectangle() runs at about 50 MPix/sec, when it
         is run multiple times by pixFindLargeRectangles(), it processes
         at 150 - 250 MPix/sec, and the time is approximately linear
         in nrect.  For example, for a 1 MPix image, searching for
         the largest 50 boxes takes about 0.2 seconds.

Definition at line 1949 of file pageseg.c.

◆ pixFindLargestRectangle()

l_ok pixFindLargestRectangle ( PIX pixs,
l_int32  polarity,
BOX **  pbox,
PIX **  ppixdb 
)

pixFindLargestRectangle()

Parameters
[in]pixs1 bpp
[in]polarity0 within background, 1 within foreground
[out]pboxlargest area rectangle
[in,out]ppixdboptional return output with rectangle drawn on it
Returns
0 if OK, 1 on error
Notes:
     (1) This is a simple and elegant solution to a problem in
         computational geometry that at first appears to be quite
         difficult: what is the largest rectangle that can be
         placed in the image, covering only pixels of one polarity
         (bg or fg)?  The solution is O(n), where n is the number
         of pixels in the image, and it requires nothing more than
         using a simple recursion relation in a single sweep of the image.
     (2) In a sweep from UL to LR with left-to-right being the fast
         direction, calculate the largest white rectangle at (x, y),
         using previously calculated values at pixels #1 and #2:
            #1:    (x, y - 1)
            #2:    (x - 1, y)
         We also need the most recent "black" pixels that were seen
         in the current row and column.
         Consider the largest area.  There are only two possibilities:
            (a)  Min(w(1), horizdist) * (h(1) + 1)
            (b)  Min(h(2), vertdist) * (w(2) + 1)
         where
            horizdist: the distance from the rightmost "black" pixel seen
                       in the current row across to the current pixel
            vertdist: the distance from the lowest "black" pixel seen
                      in the current column down to the current pixel
         and we choose the Max of (a) and (b).
     (3) To convince yourself that these recursion relations are correct,
         it helps to draw the maximum rectangles at #1 and #2.
         Then for #1, you try to extend the rectangle down one line,
         so that the height is h(1) + 1.  Do you get the full
         width of #1, w(1)?  It depends on where the black pixels are
         in the current row.  You know the final width is bounded by w(1)
         and w(2) + 1, but the actual value depends on the distribution
         of black pixels in the current row that are at a distance
         from the current pixel that is between these limits.
         We call that value "horizdist", and the area is then given
         by the expression (a) above.  Using similar reasoning for #2,
         where you attempt to extend the rectangle to the right
         by 1 pixel, you arrive at (b).  The largest rectangle is
         then found by taking the Max.

Definition at line 2052 of file pageseg.c.

References pixCreate(), pixGetData(), pixGetDimensions(), and pixGetLinePtrs().

◆ pixFindPageForeground()

BOX* pixFindPageForeground ( PIX pixs,
l_int32  threshold,
l_int32  mindist,
l_int32  erasedist,
l_int32  showmorph,
PIXAC pixac 
)

pixFindPageForeground()

Parameters
[in]pixsfull resolution (any type or depth
[in]thresholdfor binarization; typically about 128
[in]mindistmin distance of text from border to allow cleaning near border; at 2x reduction, this should be larger than 50; typically about 70
[in]erasedistwhen conditions are satisfied, erase anything within this distance of the edge; typically 20-30 at 2x reduction
[in]showmorphdebug: set to a negative integer to show steps in generating masks; this is typically used for debugging region extraction
[in]pixacdebug: allocate outside and pass this in to accumulate results of each call to this function, which can be displayed in a mosaic or a pdf.
Returns
box region including foreground, with some pixel noise removed, or NULL if not found
Notes:
     (1) This doesn't simply crop to the fg.  It attempts to remove
         pixel noise and junk at the edge of the image before cropping.
         The input threshold is used if pixs is not 1 bpp.
     (2) This is not intended to work on small thumbnails.  The
         dimensions of pixs must be at least MinWidth x MinHeight.
     (3) Debug: set showmorph to display the intermediate image in
         the morphological operations on this page.
     (4) Debug: to get pdf output of results when called repeatedly,
         call with an existing pixac, which will add an image of this page,
         with the fg outlined.  If no foreground is found, there is
         no output for this page image.

Definition at line 571 of file pageseg.c.

References pixGetDimensions().

◆ pixFindThreshFgExtent()

l_ok pixFindThreshFgExtent ( PIX pixs,
l_int32  thresh,
l_int32 *  ptop,
l_int32 *  pbot 
)

pixFindThreshFgExtent()

Parameters
[in]pixs1 bpp
[in]threshthreshold number of pixels in row
[out]ptop[optional] location of top of region
[out]pbot[optional] location of bottom of region
Returns
0 if OK, 1 on error

Definition at line 1540 of file pageseg.c.

◆ pixGenerateHalftoneMask()

PIX* pixGenerateHalftoneMask ( PIX pixs,
PIX **  ppixtext,
l_int32 *  phtfound,
PIXA pixadb 
)

pixGenerateHalftoneMask()

Parameters
[in]pixs1 bpp, assumed to be 150 to 200 ppi
[out]ppixtext[optional] text part of pixs
[out]phtfound[optional] 1 if the mask is not empty
[in]pixadbinput for collecting debug pix; use NULL to skip
Returns
pixd halftone mask, or NULL on error
Notes:
     (1) This is not intended to work on small thumbnails.  The
         dimensions of pixs must be at least MinWidth x MinHeight.

Definition at line 306 of file pageseg.c.

Referenced by pixAutoPhotoinvert(), pixDecideIfTable(), and pixGenHalftoneMask().

◆ pixGenHalftoneMask()

PIX* pixGenHalftoneMask ( PIX pixs,
PIX **  ppixtext,
l_int32 *  phtfound,
l_int32  debug 
)

pixGenHalftoneMask()

Deprecated:
  This wrapper avoids an ABI change with tesseract 3.0.4.
  It should be removed when we no longer need to support 3.0.4.
  The debug parameter is ignored (assumed 0).

Definition at line 281 of file pageseg.c.

References pixGenerateHalftoneMask().

◆ pixGenTextblockMask()

PIX* pixGenTextblockMask ( PIX pixs,
PIX pixvws,
PIXA pixadb 
)

pixGenTextblockMask()

Parameters
[in]pixs1 bpp, textline mask, assumed to be 150 to 200 ppi
[in]pixvwsvertical white space mask
[in]pixadbinput for collecting debug pix; use NULL to skip
Returns
pixd textblock mask, or NULL if empty or on error
Notes:
     (1) Both the input masks (textline and vertical white space) and
         the returned textblock mask are at the same resolution.
     (2) This is not intended to work on small thumbnails.  The
         dimensions of pixs must be at least MinWidth x MinHeight.
     (3) The result is somewhat noisy, in that small "blocks" of
         text may be included.  These can be removed by post-processing,
         using, e.g.,
            pixSelectBySize(pix, 60, 60, 4, L_SELECT_IF_EITHER,
                            L_SELECT_IF_GTE, NULL);

Definition at line 481 of file pageseg.c.

◆ pixGenTextlineMask()

PIX* pixGenTextlineMask ( PIX pixs,
PIX **  ppixvws,
l_int32 *  ptlfound,
PIXA pixadb 
)

pixGenTextlineMask()

Parameters
[in]pixs1 bpp, assumed to be 150 to 200 ppi
[out]ppixvwsvertical whitespace mask
[out]ptlfound[optional] 1 if the mask is not empty
[in]pixadbinput for collecting debug pix; use NULL to skip
Returns
pixd textline mask, or NULL on error
Notes:
     (1) The input pixs should be deskewed.
     (2) pixs should have no halftone pixels.
     (3) This is not intended to work on small thumbnails.  The
         dimensions of pixs must be at least MinWidth x MinHeight.
     (4) Both the input image and the returned textline mask
         are at the same resolution.

Definition at line 389 of file pageseg.c.

◆ pixGetRegionsBinary()

l_ok pixGetRegionsBinary ( PIX pixs,
PIX **  ppixhm,
PIX **  ppixtm,
PIX **  ppixtb,
PIXA pixadb 
)

pixGetRegionsBinary()

Parameters
[in]pixs1 bpp, assumed to be 300 to 400 ppi
[out]ppixhm[optional] halftone mask
[out]ppixtm[optional] textline mask
[out]ppixtb[optional] textblock mask
[in]pixadbinput for collecting debug pix; use NULL to skip
Returns
0 if OK, 1 on error
Notes:
     (1) It is best to deskew the image before segmenting.
     (2) Passing in pixadb enables debug output.

Definition at line 113 of file pageseg.c.

◆ pixPrepare1bpp()

PIX* pixPrepare1bpp ( PIX pixs,
BOX box,
l_float32  cropfract,
l_int32  outres 
)

pixPrepare1bpp()

Parameters
[in]pixsany depth
[in]box[optional] if null, use entire pixs
[in]cropfractfraction to be removed from the boundary; use 0.0 to retain the entire image
[in]outresdesired resolution of output image; if the input image resolution is not set, assume 300 ppi; use 0 to skip scaling.
Returns
pixd if OK, NULL on error
Notes:
     (1) This handles some common pre-processing operations,
         where the page segmentation algorithm takes a 1 bpp image.

Definition at line 1780 of file pageseg.c.

References boxCreate(), boxDestroy(), pixClipRectangle(), and pixGetDimensions().

Referenced by pixDecideIfTable(), and pixDecideIfText().

◆ pixSplitComponentWithProfile()

BOXA* pixSplitComponentWithProfile ( PIX pixs,
l_int32  delta,
l_int32  mindel,
PIX **  ppixdebug 
)

pixSplitComponentWithProfile()

Parameters
[in]pixs1 bpp, exactly one connected component
[in]deltadistance used in extrema finding in a numa; typ. 10
[in]mindelminimum required difference between profile minimum and profile values +2 and -2 away; typ. 7
[out]ppixdebug[optional] debug image of splitting
Returns
boxa of c.c. after splitting, or NULL on error
Notes:
     (1) This will split the most obvious cases of touching characters.
         The split points it is searching for are narrow and deep
         minimima in the vertical pixel projection profile, after a
         large vertical closing has been applied to the component.

Definition at line 802 of file pageseg.c.

◆ pixSplitIntoCharacters()

l_ok pixSplitIntoCharacters ( PIX pixs,
l_int32  minw,
l_int32  minh,
BOXA **  pboxa,
PIXA **  ppixa,
PIX **  ppixdebug 
)

pixSplitIntoCharacters()

Parameters
[in]pixs1 bpp, contains only deskewed text
[in]minwmin component width for initial filtering; typ. 4
[in]minhmin component height for initial filtering; typ. 4
[out]pboxa[optional] character bounding boxes
[out]ppixa[optional] character images
[out]ppixdebug[optional] showing splittings
Returns
0 if OK, 1 on error
Notes:
     (1) This is a simple function that attempts to find split points
         based on vertical pixel profiles.
     (2) It should be given an image that has an arbitrary number
         of text characters.
     (3) The returned pixa includes the boxes from which the
         (possibly split) components are extracted.

Definition at line 701 of file pageseg.c.