Leptonica  1.82.0
Image processing and image analysis suite
dewarp2.c File Reference
#include <math.h>
#include "allheaders.h"

Go to the source code of this file.

Macros

#define DEBUG_TEXTLINE_CENTERS   0 /* set this to 1 for debugging */
 
#define DEBUG_SHORT_LINES   0 /* ditto */
 

Functions

static PTAdewarpGetMeanVerticals (PIX *pixs, l_int32 x, l_int32 y)
 
static l_int32 dewarpGetLineEndPoints (l_int32 h, PTAA *ptaa, PTA **pptal, PTA **pptar)
 
static l_int32 dewarpFilterLineEndPoints (L_DEWARP *dew, PTA *ptal, PTA *ptar, PTA **pptalf, PTA **pptarf)
 
static PTAdewarpRemoveBadEndPoints (l_int32 w, PTA *ptas)
 
static l_int32 dewarpIsLineCoverageValid (PTAA *ptaa, l_int32 h, l_int32 *pntop, l_int32 *pnbot, l_int32 *pytop, l_int32 *pybot)
 
static l_int32 dewarpLinearLSF (PTA *ptad, l_float32 *pa, l_float32 *pb, l_float32 *pmederr)
 
static l_int32 dewarpQuadraticLSF (PTA *ptad, l_float32 *pa, l_float32 *pb, l_float32 *pc, l_float32 *pmederr)
 
static l_int32 pixRenderMidYs (PIX *pixs, NUMA *namidys, l_int32 linew)
 
static l_int32 pixRenderHorizEndPoints (PIX *pixs, PTA *ptal, PTA *ptar, l_uint32 color)
 
l_ok dewarpBuildPageModel (L_DEWARP *dew, const char *debugfile)
 
l_ok dewarpFindVertDisparity (L_DEWARP *dew, PTAA *ptaa, l_int32 rotflag)
 
l_ok dewarpFindHorizDisparity (L_DEWARP *dew, PTAA *ptaa)
 
PTAAdewarpGetTextlineCenters (PIX *pixs, l_int32 debugflag)
 
PTAAdewarpRemoveShortLines (PIX *pixs, PTAA *ptaas, l_float32 fract, l_int32 debugflag)
 
l_ok dewarpFindHorizSlopeDisparity (L_DEWARP *dew, PIX *pixb, l_float32 fractthresh, l_int32 parity)
 
l_ok dewarpBuildLineModel (L_DEWARP *dew, l_int32 opensize, const char *debugfile)
 
l_ok dewarpaModelStatus (L_DEWARPA *dewa, l_int32 pageno, l_int32 *pvsuccess, l_int32 *phsuccess)
 

Variables

static const l_float32 MinRatioLinesToHeight = 0.45
 
static const l_int32 MinLinesForHoriz1 = 10
 
static const l_int32 MinLinesForHoriz2 = 3
 
static const l_float32 AllowedWidthFract = 0.05
 

Detailed Description


   Build the page disparity model

     Build basic page disparity model
         l_int32            dewarpBuildPageModel()
         l_int32            dewarpFindVertDisparity()
         l_int32            dewarpFindHorizDisparity()
         PTAA              *dewarpGetTextlineCenters()
         static PTA        *dewarpGetMeanVerticals()
         PTAA              *dewarpRemoveShortLines()
         static l_int32     dewarpGetLineEndPoints()
         static l_int32     dewarpFilterLineEndPoints()
         static PTA        *dewarpRemoveBadEndPoints()
         static l_int32     dewarpIsLineCoverageValid()
         static l_int32     dewarpLinearLSF()
         static l_int32     dewarpQuadraticLSF()

     Build disparity model for slope near binding
         l_int32            dewarpFindHorizSlopeDisparity()

     Build the line disparity model
         l_int32            dewarpBuildLineModel()

     Query model status
         l_int32            dewarpaModelStatus()

     Rendering helpers
         static l_int32     pixRenderMidYs()
         static l_int32     pixRenderHorizEndPoints

Definition in file dewarp2.c.

Function Documentation

◆ dewarpaModelStatus()

l_ok dewarpaModelStatus ( L_DEWARPA dewa,
l_int32  pageno,
l_int32 *  pvsuccess,
l_int32 *  phsuccess 
)

dewarpaModelStatus()

Parameters
[in]dewa
[in]pageno
[out]pvsuccess[optional] 1 on success
[out]phsuccess[optional] 1 on success
Returns
0 if OK, 1 on error
Notes:
     (1) This tests if a model has been built, not if it is valid.

Definition at line 1919 of file dewarp2.c.

References dewarpaGetDewarp(), L_Dewarp::hsuccess, and L_Dewarp::vsuccess.

Referenced by dewarpSinglePageRun().

◆ dewarpBuildLineModel()

l_ok dewarpBuildLineModel ( L_DEWARP dew,
l_int32  opensize,
const char *  debugfile 
)

dewarpBuildLineModel()

Parameters
[in]dew
[in]opensizesize of opening to remove perpendicular lines
[in]debugfileuse NULL to skip writing this
Returns
0 if OK, 1 if unable to build the model or on error
Notes:
     (1) This builds the horizontal and vertical disparity arrays
         for an input of ruled lines, typically for calibration.
         In book scanning, you could lay the ruled paper over a page.
         Then for that page and several below it, you can use the
         disparity correction of the line model to dewarp the pages.
     (2) The dew has been initialized with the image of ruled lines.
         These lines must be continuous, but we do a small amount
         of pre-processing here to insure that.
     (3) opensize is typically about 8.  It must be larger than
         the thickness of the lines to be extracted.  This is the
         default value, which is applied if opensize < 3.
     (4) Sets vsuccess = 1 and hsuccess = 1 if the vertical and/or
         horizontal disparity arrays build.
     (5) Similar to dewarpBuildPageModel(), except here the vertical
         and horizontal disparity arrays are both built from ruled lines.
         See notes there.

Definition at line 1735 of file dewarp2.c.

References L_Dewarp::debug, L_Dewarp::hsuccess, lept_mkdir(), lept_rmdir(), L_Dewarp::pixs, and L_Dewarp::vsuccess.

◆ dewarpBuildPageModel()

l_ok dewarpBuildPageModel ( L_DEWARP dew,
const char *  debugfile 
)

dewarpBuildPageModel()

Parameters
[in]dew
[in]debugfileuse NULL to skip writing this
Returns
0 if OK, 1 if unable to build the model or on error
Notes:
     (1) This is the basic function that builds the horizontal and
         vertical disparity arrays, which allow determination of the
         src pixel in the input image corresponding to each
         dest pixel in the dewarped image.
     (2) Sets vsuccess = 1 if the vertical disparity array builds.
         Always attempts to build the horizontal disparity array,
         even if it will not be requested (useboth == 0).
         Sets hsuccess = 1 if horizontal disparity builds.
     (3) The method is as follows:
         (a) Estimate the points along the centers of all the
             long textlines.  If there are too few lines, no
             disparity models are built.
         (b) From the vertical deviation of the lines, estimate
             the vertical disparity.
         (c) From the ends of the lines, estimate the horizontal
             disparity, assuming that the text is made of lines
             that are close to left and right justified.
         (d) One can also compute an additional contribution to the
             horizontal disparity, inferred from slopes of the top
             and bottom lines.  We do not do this.
     (4) In more detail for the vertical disparity:
         (a) Fit a LS quadratic to center locations along each line.
             This smooths the curves.
         (b) Sample each curve at a regular interval, find the y-value
             of the mid-point on each curve, and subtract the sampled
             curve value from this value.  This is the vertical
             disparity at sampled points along each curve.
         (c) Fit a LS quadratic to each set of vertically aligned
             disparity samples.  This smooths the disparity values
             in the vertical direction.  Then resample at the same
             regular interval.  We now have a regular grid of smoothed
             vertical disparity valuels.
     (5) Once the sampled vertical disparity array is found, it can be
         interpolated to get a full resolution vertical disparity map.
         This can be applied directly to the src image pixels
         to dewarp the image in the vertical direction, making
         all textlines horizontal.  Likewise, the horizontal
         disparity array is used to left- and right-align the
         longest textlines.

Definition at line 156 of file dewarp2.c.

References L_Dewarp::debug, L_Dewarp::hsuccess, lept_mkdir(), lept_rmdir(), L_Dewarp::pixs, and L_Dewarp::vsuccess.

Referenced by dewarpSinglePageRun().

◆ dewarpFilterLineEndPoints()

static l_int32 dewarpFilterLineEndPoints ( L_DEWARP dew,
PTA ptal,
PTA ptar,
PTA **  pptalf,
PTA **  pptarf 
)
static

dewarpFilterLineEndPoints()

Parameters
[in]dew
[in]ptalinput left end points of each line
[in]ptarinput right end points of each line
[out]pptalffiltered left end points
[out]pptarffiltered right end points
Returns
0 if OK, 1 on error.
Notes:
     (1) Avoid confusion with multiple columns by requiring that line
         end points be close enough to leftmost and rightmost end points.
         Must have at least 8 points on left and right after this step.
     (2) Apply second filtering step, find the median positions in
         top and bottom halves, and removing end points that are
         displaced too much from these in the x direction.
         Must have at least 6 points on left and right after this step.
     (3) Reminder: x and y in the pta are transposed; think x = f(y).

Definition at line 1130 of file dewarp2.c.

◆ dewarpFindHorizDisparity()

l_ok dewarpFindHorizDisparity ( L_DEWARP dew,
PTAA ptaa 
)

dewarpFindHorizDisparity()

Parameters
[in]dew
[in]ptaaunsmoothed lines, not vertically ordered
Returns
0 if OK, 1 if horizontal disparity array is not built, or on error
Notes:
     (1) This builds a horizontal disparity model (HDM), but
         does not check it against constraints for validity.
         Constraint checking is done at rendering time.
     (2) Horizontal disparity is not required for a successful model;
         only the vertical disparity is required.  This will not be
         called if the function to build the vertical disparity fails.
     (3) This sets the hsuccess flag to 1 on success.
     (4) Internally in ptal1, ptar1, ptal2, ptar2: x and y are reversed,
         so the 'y' value is horizontal distance across the image width.
     (5) Debug output goes to /tmp/lept/dewmod/ for collection into a pdf.

Definition at line 567 of file dewarp2.c.

References L_Dewarp::debug, and L_Dewarp::hsuccess.

◆ dewarpFindHorizSlopeDisparity()

l_ok dewarpFindHorizSlopeDisparity ( L_DEWARP dew,
PIX pixb,
l_float32  fractthresh,
l_int32  parity 
)

dewarpFindHorizSlopeDisparity()

Parameters
[in]dew
[in]pixb1 bpp, with vert and horiz disparity removed
[in]fractthreshthreshold fractional difference in density
[in]parity0 if even page, 1 if odd page
Returns
0 if OK, 1 on error
Notes:
     (1) fractthresh is a threshold on the fractional difference in stroke
         density between between left and right sides.  Process this
         disparity only if the absolute value of the fractional
         difference equals or exceeds this threshold.
     (2) parity indicates where the binding is: on the left for
         parity == 0 and on the right for parity == 1.
     (3) This takes a 1 bpp pixb where both vertical and horizontal
         disparity have been applied, so the text lines are straight and,
         more importantly, the line end points are vertically aligned.
         It estimates the foreshortening of the characters on the
         binding side, and if significant, computes a one-dimensional
         horizontal disparity function to compensate.
     (4) The first attempt was to use the average width of the
         connected components (c.c.) in vertical slices.  This does not work
         reliably, because the horizontal compression of the text is
         often accompanied by horizontal joining of c.c.
     (5) We use the density of vertical strokes, measured by first using
         a vertical opening, which improves the signal.  The result
         is relatively insensitive to the size of the opening; we use
         a 10-pixel opening.  The relative density is measured by
         finding the number of c.c. in a full height sliding window
         of width 50 pixels, and compute every 25 pixels.  Similar results
         are obtained counting c.c. that either intersect the window
         or are fully contained within it.
     (6) Debug output goes to /tmp/lept/dewmod/ for collection into a pdf.

Definition at line 1505 of file dewarp2.c.

References L_Dewarp::hvalid, and L_Dewarp::vvalid.

◆ dewarpFindVertDisparity()

l_ok dewarpFindVertDisparity ( L_DEWARP dew,
PTAA ptaa,
l_int32  rotflag 
)

dewarpFindVertDisparity()

Parameters
[in]dew
[in]ptaaunsmoothed lines, not vertically ordered
[in]rotflag0 if using dew->pixs; 1 if rotated by 90 degrees cw
Returns
0 if OK, 1 on error
Notes:
     (1) This starts with points along the centers of textlines.
         It does quadratic fitting (and smoothing), first along the
         lines and then in the vertical direction, to generate
         the sampled vertical disparity map.  This can then be
         interpolated to full resolution and used to remove
         the vertical line warping.
     (2) Use rotflag == 1 if you are dewarping vertical lines, as
         is done in dewarpBuildLineModel().  The usual case is for
         rotflag == 0.
     (3) Note that this builds a vertical disparity model (VDM), but
         does not check it against constraints for validity.
         Constraint checking is done after building the models,
         and before inserting reference models.
     (4) This sets the vsuccess flag to 1 on success.
     (5) Pix debug output goes to /tmp/dewvert/ for collection into
         a pdf.  Non-pix debug output goes to /tmp.

Definition at line 303 of file dewarp2.c.

References applyQuadraticFit(), L_Dewarp::debug, generatePtaFilledCircle(), L_CLONE, L_INSERT, lept_mkdir(), L_Dewarp::nlines, numaAddNumber(), numaCreate(), numaDestroy(), L_Dewarp::nx, L_Dewarp::ny, pixClone(), pixConvertTo32(), pixDisplayPtaaPattern(), pixGenerateFromPta(), pixRotateOrth(), L_Dewarp::pixs, ptaaAddPta(), ptaaCreate(), ptaAddPt(), ptaaGetCount(), ptaaGetPta(), ptaCreate(), ptaCreateFromNuma(), ptaDestroy(), ptaGetArrays(), ptaGetQuadraticLSF(), L_Dewarp::sampling, and L_Dewarp::vsuccess.

◆ dewarpGetLineEndPoints()

static l_int32 dewarpGetLineEndPoints ( l_int32  h,
PTAA ptaa,
PTA **  pptal,
PTA **  pptar 
)
static

dewarpGetLineEndPoints()

Parameters
[in]hheight of pixs
[in]ptaalines
[out]pptalleft end points of each line
[out]pptarright end points of each line
Returns
0 if OK, 1 on error.
Notes:
     (1) We require that the set of end points extends over 45% of the
         height of the input image, to insure good coverage and
         avoid extrapolating the curvature too far beyond the
         actual textlines.  Large extrapolations are particularly
         dangerous if used as a reference model.  We also require
         at least 10 lines of text.
     (2) We sort the lines from top to bottom (sort by x in the ptas).
     (3) For fitting the endpoints, x = f(y), we transpose x and y.
         Thus all these ptas have x and y swapped!

Definition at line 1051 of file dewarp2.c.

References ptaaGetCount().

◆ dewarpGetMeanVerticals()

static PTA * dewarpGetMeanVerticals ( PIX pixs,
l_int32  x,
l_int32  y 
)
static

dewarpGetMeanVerticals()

Parameters
[in]pixs1 bpp, single c.c.
[in]x,ylocation of UL corner of pixs, relative to page image
Returns
pta (mean y-values in component for each x-value, both translated by (x,y

Definition at line 922 of file dewarp2.c.

◆ dewarpGetTextlineCenters()

PTAA* dewarpGetTextlineCenters ( PIX pixs,
l_int32  debugflag 
)

dewarpGetTextlineCenters()

Parameters
[in]pixs1 bpp
[in]debugflag1 for debug output
Returns
ptaa of center values of textlines
Notes:
     (1) This in general does not have a point for each value
         of x, because there will be gaps between words.
         It doesn't matter because we will fit a quadratic to the
         points that we do have.

Definition at line 814 of file dewarp2.c.

◆ dewarpIsLineCoverageValid()

static l_int32 dewarpIsLineCoverageValid ( PTAA ptaa,
l_int32  h,
l_int32 *  pntop,
l_int32 *  pnbot,
l_int32 *  pytop,
l_int32 *  pybot 
)
static

dewarpIsLineCoverageValid()

Parameters
[in]ptaaof validated lines
[in]hheight of pix
[out]pntopnumber of lines in top half
[out]pnbotnumber of lines in bottom half
[out]pytoplocation of top line
[out]pybotlocation of bottom line
Returns
1 if coverage is valid, 0 if not or on error.
Notes:
     (1) The criterion for valid coverage is:
         (a) there must be at least 4 lines in each half (top and bottom)
             of the image.
         (b) the coverage must be at least 50% of the image height

Definition at line 1294 of file dewarp2.c.

References L_SORT_INCREASING, numaAddNumber(), numaCreate(), numaDestroy(), numaGetIValue(), numaSort(), ptaaGetCount(), and ptaaGetPt().

◆ dewarpLinearLSF()

static l_int32 dewarpLinearLSF ( PTA ptad,
l_float32 *  pa,
l_float32 *  pb,
l_float32 *  pmederr 
)
static

dewarpLinearLSF()

Parameters
[in]ptadleft or right end points of longest lines
[out]pacoeff a of LSF: y = ax + b
[out]pbcoeff b of LSF: y = ax + b
[out]pmederr[optional] median error
Returns
0 if OK, 1 on error.
Notes:
     (1) This is used for finding the left or right sides of the text
         block, computed as a best-fit line.  Only the longest lines
         are input, so there are no outlier line ends.
     (2) The ptas for the end points all have x and y swapped.

Definition at line 1364 of file dewarp2.c.

References applyLinearFit(), numaAddNumber(), numaCreate(), numaDestroy(), numaGetMedian(), ptaGetCount(), ptaGetLinearLSF(), and ptaGetPt().

◆ dewarpQuadraticLSF()

static l_int32 dewarpQuadraticLSF ( PTA ptad,
l_float32 *  pa,
l_float32 *  pb,
l_float32 *  pc,
l_float32 *  pmederr 
)
static

dewarpQuadraticLSF()

Parameters
[in]ptadleft or right end points of longest lines
[out]pacoeff a of LSF: y = ax^2 + bx + c
[out]pbcoeff b of LSF: y = ax^2 + bx + c
[out]pccoeff c of LSF: y = ax^2 + bx + c
[out]pmederr[optional] median error
Returns
0 if OK, 1 on error.
Notes:
     (1) This is used for finding the left or right sides of the text
         block, computed as a best-fit quadratic curve.  Only the
         longest lines are input, so there are no outlier line ends.
     (2) The ptas for the end points all have x and y swapped.

Definition at line 1422 of file dewarp2.c.

References applyQuadraticFit(), numaAddNumber(), numaCreate(), numaDestroy(), numaGetMedian(), ptaGetCount(), ptaGetPt(), and ptaGetQuadraticLSF().

◆ dewarpRemoveBadEndPoints()

static PTA * dewarpRemoveBadEndPoints ( l_int32  w,
PTA ptas 
)
static

dewarpRemoveBadEndPoints()

Parameters
[in]wwidth of input image
[in]ptasleft or right line end points
Returns
ptad filtered left or right end points, or NULL on error.
Notes:
     (1) The input set is sorted by line position (x value).
         Break into two (upper and lower); for each find the median
         horizontal (y value), and remove all points farther than
         a fraction of the image width from this.  Make sure each
         part still has at least 3 points, and join the two sections
         before returning.
     (2) Reminder: x and y in the pta are transposed; think x = f(y).

Definition at line 1216 of file dewarp2.c.

◆ dewarpRemoveShortLines()

PTAA* dewarpRemoveShortLines ( PIX pixs,
PTAA ptaas,
l_float32  fract,
l_int32  debugflag 
)

dewarpRemoveShortLines()

Parameters
[in]pixs1 bpp
[in]ptaasinput lines
[in]fractminimum fraction of longest line to keep
[in]debugflag
Returns
ptaad containing only lines of sufficient length, or NULL on error

Definition at line 968 of file dewarp2.c.

◆ pixRenderHorizEndPoints()

static l_int32 pixRenderHorizEndPoints ( PIX pixs,
PTA ptal,
PTA ptar,
l_uint32  color 
)
static

pixRenderHorizEndPoints()

Parameters
[in]pixs32 bpp
[in]ptalleft side line end points
[in]ptarright side line end points
[in]color0xrrggbb00
Returns
0 if OK, 1 on error

Definition at line 1990 of file dewarp2.c.

References generatePtaFilledCircle(), pixDestroy(), pixDisplayPtaPattern(), pixGenerateFromPta(), ptaDestroy(), and ptaTranspose().

◆ pixRenderMidYs()

static l_int32 pixRenderMidYs ( PIX pixs,
NUMA namidys,
l_int32  linew 
)
static

pixRenderMidYs()

Parameters
[in]pixs32 bpp
[in]namidysy location of reference lines for vertical disparity
[in]linewwidth of rendered line; typ 2
Returns
0 if OK, 1 on error

Definition at line 1953 of file dewarp2.c.