53 #include <config_auto.h>
57 #include "allheaders.h"
59 #define L_BUF_SIZE 512
64 static l_int32 testLineAlignmentX(
NUMA *na1,
NUMA *na2, l_int32 shiftx,
65 l_int32 delx, l_int32 nperline);
66 static l_int32 countAlignedMatches(
NUMA *nai1,
NUMA *nai2,
NUMA *nasx,
67 NUMA *nasy, l_int32 n1, l_int32 n2,
68 l_int32 delx, l_int32 dely,
69 l_int32 nreq, l_int32 *psame,
71 static void printRowIndices(l_int32 *index1, l_int32 n1,
72 l_int32 *index2, l_int32 n2);
103 const char *rootname,
109 l_int32 nfiles, i, numpages;
116 PROCNAME(
"jbCorrelation");
119 return ERROR_INT(
"dirin not defined", procName, 1);
121 return ERROR_INT(
"rootname not defined", procName, 1);
122 if (components != JB_CONN_COMPS && components != JB_CHARACTERS &&
123 components != JB_WORDS)
124 return ERROR_INT(
"components invalid", procName, 1);
130 classer = jbCorrelationInit(components, 0, 0, thresh, weight);
131 jbAddPages(classer, safiles);
134 data = jbDataSave(classer);
135 jbDataWrite(rootname, data);
139 pixa = jbDataRender(data, FALSE);
141 if (numpages != nfiles)
142 lept_stderr(
"numpages = %d, nfiles = %d, not equal!\n",
144 for (i = 0; i < numpages; i++) {
146 snprintf(filename,
L_BUF_SIZE,
"%s.%04d", rootname, i);
148 pixWrite(filename, pix, IFF_PNG);
155 jbClasserDestroy(&classer);
156 jbDataDestroy(&data);
185 const char *rootname,
191 l_int32 nfiles, i, numpages;
198 PROCNAME(
"jbRankHaus");
201 return ERROR_INT(
"dirin not defined", procName, 1);
203 return ERROR_INT(
"rootname not defined", procName, 1);
204 if (components != JB_CONN_COMPS && components != JB_CHARACTERS &&
205 components != JB_WORDS)
206 return ERROR_INT(
"components invalid", procName, 1);
212 classer = jbRankHausInit(components, 0, 0, size, rank);
213 jbAddPages(classer, safiles);
216 data = jbDataSave(classer);
217 jbDataWrite(rootname, data);
221 pixa = jbDataRender(data, FALSE);
223 if (numpages != nfiles)
224 lept_stderr(
"numpages = %d, nfiles = %d, not equal!\n",
226 for (i = 0; i < numpages; i++) {
228 snprintf(filename,
L_BUF_SIZE,
"%s.%04d", rootname, i);
230 pixWrite(filename, pix, IFF_PNG);
237 jbClasserDestroy(&classer);
238 jbDataDestroy(&data);
280 l_int32 nfiles, i, w, h;
288 PROCNAME(
"jbWordsInTextlines");
291 return (
JBCLASSER *)ERROR_PTR(
"&natl not defined", procName, NULL);
294 return (
JBCLASSER *)ERROR_PTR(
"dirin not defined", procName, NULL);
295 if (reduction != 1 && reduction != 2)
296 return (
JBCLASSER *)ERROR_PTR(
"reduction not in {1,2}", procName, NULL);
302 classer = jbCorrelationInit(JB_WORDS, maxwidth, maxheight, thresh, weight);
306 for (i = 0; i < nfiles; i++) {
308 if ((pix1 =
pixRead(fname)) == NULL) {
309 L_WARNING(
"image file %d not read\n", procName, i);
322 jbAddPageComponents(classer, pix2, boxa, pixa);
399 PROCNAME(
"pixGetWordsInTextlines");
401 if (!pboxad || !ppixad || !pnai)
402 return ERROR_INT(
"&boxad, &pixad, &nai not all defined", procName, 1);
407 return ERROR_INT(
"pixs not defined", procName, 1);
410 pixWordBoxesByDilation(pixs, minwidth, minheight, maxwidth, maxheight,
472 PROCNAME(
"pixGetWordBoxesInTextlines");
474 if (pnai) *pnai = NULL;
476 return ERROR_INT(
"&boxad and &nai not both defined", procName, 1);
479 return ERROR_INT(
"pixs not defined", procName, 1);
482 pixWordBoxesByDilation(pixs, minwidth, minheight, maxwidth, maxheight,
531 const char *debugdir)
533 char *debugfile, *subdir;
534 l_int32 i, xs, ys, xb, yb, nb, loc;
537 BOXA *boxa1, *boxa1a, *boxa2, *boxa3, *boxa4, *boxa5, *boxaw;
539 PIX *pix1, *pix2, *pix3, *pix3a, *pix4, *pix5;
541 PROCNAME(
"pixFindWordAndCharacterBoxes");
543 if (pboxaw) *pboxaw = NULL;
544 if (pboxaac) *pboxaac = NULL;
545 if (!pboxaw || !pboxaac)
546 return ERROR_INT(
"&boxaw and &boxaac not defined", procName, 1);
547 if (!pixs || pixGetDepth(pixs) == 1)
548 return ERROR_INT(
"pixs not defined or 1 bpp", procName, 1);
550 L_WARNING(
"threshold is %d; may be too high\n", procName, thresh);
554 return ERROR_INT(
"pix1 not made", procName, 1);
574 boxa1 =
boxaTransform(boxa1a, 0, 0, 1.0 / scalefact, 1.0 / scalefact);
582 debugfile =
stringJoin(debugdir,
"/words.png");
583 pixWrite(debugfile, pix4, IFF_PNG);
585 LEPT_FREE(debugfile);
594 for (i = 0; i < nb; i++) {
638 debugfile =
stringJoin(debugdir,
"/chars.png");
639 pixWrite(debugfile, pix4, IFF_PNG);
643 LEPT_FREE(debugfile);
673 l_int32 index, nbox, row, prevrow, x, y, w, h;
678 PROCNAME(
"boxaExtractSortedPattern");
681 return (
NUMAA *)ERROR_PTR(
"boxa not defined", procName, NULL);
683 return (
NUMAA *)ERROR_PTR(
"na not defined", procName, NULL);
691 for (index = 0; index < nbox; index++) {
769 l_int32 n1, n2, i, j, nbox, y1, y2, xl1, xl2;
770 l_int32 shiftx, shifty, match;
771 l_int32 *line1, *line2;
772 l_int32 *yloc1, *yloc2;
773 l_int32 *xleft1, *xleft2;
774 NUMA *na1, *na2, *nai1, *nai2, *nasx, *nasy;
776 PROCNAME(
"numaaCompareImagesByBoxes");
779 return ERROR_INT(
"&same not defined", procName, 1);
782 return ERROR_INT(
"naa1 not defined", procName, 1);
784 return ERROR_INT(
"naa2 not defined", procName, 1);
786 return ERROR_INT(
"nperline < 1", procName, 1);
788 return ERROR_INT(
"nreq < 1", procName, 1);
792 if (n1 < nreq || n2 < nreq)
798 line1 = (l_int32 *)LEPT_CALLOC(n1,
sizeof(l_int32));
799 line2 = (l_int32 *)LEPT_CALLOC(n2,
sizeof(l_int32));
800 yloc1 = (l_int32 *)LEPT_CALLOC(n1,
sizeof(l_int32));
801 yloc2 = (l_int32 *)LEPT_CALLOC(n2,
sizeof(l_int32));
802 xleft1 = (l_int32 *)LEPT_CALLOC(n1,
sizeof(l_int32));
803 xleft2 = (l_int32 *)LEPT_CALLOC(n2,
sizeof(l_int32));
804 if (!line1 || !line2 || !yloc1 || !yloc2 || !xleft1 || !xleft2)
805 return ERROR_INT(
"callof failure for an array", procName, 1);
806 for (i = 0; i < n1; i++) {
811 if (nbox >= nperline)
815 for (i = 0; i < n2; i++) {
820 if (nbox >= nperline)
836 for (i = 0; i < n1; i++) {
837 if (line1[i] == 0)
continue;
841 for (j = 0; j < n2; j++) {
842 if (line2[j] == 0)
continue;
844 if (L_ABS(y1 - y2) > maxshifty)
continue;
846 if (L_ABS(xl1 - xl2) > maxshiftx)
continue;
852 match = testLineAlignmentX(na1, na2, shiftx, delx, nperline);
868 countAlignedMatches(nai1, nai2, nasx, nasy, n1, n2, delx, dely,
869 nreq, psame, debugflag);
886 testLineAlignmentX(
NUMA *na1,
892 l_int32 i, xl1, xr1, xl2, xr2, diffl, diffr;
894 PROCNAME(
"testLineAlignmentX");
897 return ERROR_INT(
"na1 not defined", procName, 1);
899 return ERROR_INT(
"na2 not defined", procName, 1);
901 for (i = 0; i < nperline; i++) {
906 diffl = L_ABS(xl1 - xl2 - shiftx);
907 diffr = L_ABS(xr1 - xr2 - shiftx);
908 if (diffl > delx || diffr > delx)
938 countAlignedMatches(
NUMA *nai1,
950 l_int32 i, j, nm, shiftx, shifty, nmatch, diffx, diffy;
951 l_int32 *ia1, *ia2, *iasx, *iasy, *index1, *index2;
953 PROCNAME(
"countAlignedMatches");
955 if (!nai1 || !nai2 || !nasx || !nasy)
956 return ERROR_INT(
"4 input numas not defined", procName, 1);
958 return ERROR_INT(
"&same not defined", procName, 1);
975 index1 = (l_int32 *)LEPT_CALLOC(n1,
sizeof(l_int32));
976 index2 = (l_int32 *)LEPT_CALLOC(n2,
sizeof(l_int32));
977 if (!index1 || !index2)
978 return ERROR_INT(
"calloc fail for array", procName, 1);
979 for (i = 0; i < nm; i++) {
984 memset(index1, 0, 4 * n1);
985 memset(index2, 0, 4 * n2);
987 index1[ia1[i]] = nmatch;
988 index2[ia2[i]] = nmatch;
995 for (j = 0; j < nm; j++) {
996 if (j == i)
continue;
998 if (index1[ia1[j]] > 0 || index2[ia2[j]] > 0)
continue;
1000 diffx = L_ABS(shiftx - iasx[j]);
1001 diffy = L_ABS(shifty - iasy[j]);
1002 if (diffx > delx || diffy > dely)
continue;
1005 index1[ia1[j]] = nmatch;
1006 index2[ia2[j]] = nmatch;
1007 if (nmatch >= nreq) {
1010 printRowIndices(index1, n1, index2, n2);
1027 printRowIndices(l_int32 *index1,
1035 for (i = 0; i < n1; i++) {
1036 if (i && (i % 20 == 0))
1043 for (i = 0; i < n2; i++) {
1044 if (i && (i % 20 == 0))
PIX * pixReduceRankBinaryCascade(PIX *pixs, l_int32 level1, l_int32 level2, l_int32 level3, l_int32 level4)
pixReduceRankBinaryCascade()
void boxDestroy(BOX **pbox)
boxDestroy()
BOXAA * boxaaCreate(l_int32 n)
boxaaCreate()
l_int32 boxaGetCount(BOXA *boxa)
boxaGetCount()
l_ok boxaaAddBoxa(BOXAA *baa, BOXA *ba, l_int32 copyflag)
boxaaAddBoxa()
l_ok boxGetGeometry(BOX *box, l_int32 *px, l_int32 *py, l_int32 *pw, l_int32 *ph)
boxGetGeometry()
l_ok boxaAddBox(BOXA *boxa, BOX *box, l_int32 copyflag)
boxaAddBox()
void boxaDestroy(BOXA **pboxa)
boxaDestroy()
BOX * boxaGetBox(BOXA *boxa, l_int32 index, l_int32 accessflag)
boxaGetBox()
void boxaaDestroy(BOXAA **pbaa)
boxaaDestroy()
BOXA * boxaCreate(l_int32 n)
boxaCreate()
BOXA * boxaAdjustSides(BOXA *boxas, l_int32 delleft, l_int32 delright, l_int32 deltop, l_int32 delbot)
boxaAdjustSides()
BOX * boxTransform(BOX *box, l_int32 shiftx, l_int32 shifty, l_float32 scalex, l_float32 scaley)
boxTransform()
BOXA * boxaTransform(BOXA *boxas, l_int32 shiftx, l_int32 shifty, l_float32 scalex, l_float32 scaley)
boxaTransform()
BOXA * boxaaFlattenToBoxa(BOXAA *baa, NUMA **pnaindex, l_int32 copyflag)
boxaaFlattenToBoxa()
BOXA * boxaSort(BOXA *boxas, l_int32 sorttype, l_int32 sortorder, NUMA **pnaindex)
boxaSort()
BOXAA * boxaSort2d(BOXA *boxas, NUMAA **pnaad, l_int32 delta1, l_int32 delta2, l_int32 minh1)
boxaSort2d()
BOXA * boxaSelectBySize(BOXA *boxas, l_int32 width, l_int32 height, l_int32 type, l_int32 relation, l_int32 *pchanged)
boxaSelectBySize()
l_ok pixGetWordBoxesInTextlines(PIX *pixs, l_int32 minwidth, l_int32 minheight, l_int32 maxwidth, l_int32 maxheight, BOXA **pboxad, NUMA **pnai)
pixGetWordBoxesInTextlines()
JBCLASSER * jbWordsInTextlines(const char *dirin, l_int32 reduction, l_int32 maxwidth, l_int32 maxheight, l_float32 thresh, l_float32 weight, NUMA **pnatl, l_int32 firstpage, l_int32 npages)
jbWordsInTextlines()
l_ok jbRankHaus(const char *dirin, l_int32 size, l_float32 rank, l_int32 components, const char *rootname, l_int32 firstpage, l_int32 npages, l_int32 renderflag)
jbRankHaus()
static const l_int32 JB_WORDS_MIN_HEIGHT
NUMAA * boxaExtractSortedPattern(BOXA *boxa, NUMA *na)
boxaExtractSortedPattern()
static const l_int32 JB_WORDS_MIN_WIDTH
l_ok jbCorrelation(const char *dirin, l_float32 thresh, l_float32 weight, l_int32 components, const char *rootname, l_int32 firstpage, l_int32 npages, l_int32 renderflag)
jbCorrelation()
l_ok pixGetWordsInTextlines(PIX *pixs, l_int32 minwidth, l_int32 minheight, l_int32 maxwidth, l_int32 maxheight, BOXA **pboxad, PIXA **ppixad, NUMA **pnai)
pixGetWordsInTextlines()
l_ok pixFindWordAndCharacterBoxes(PIX *pixs, BOX *boxs, l_int32 thresh, BOXA **pboxaw, BOXAA **pboxaac, const char *debugdir)
pixFindWordAndCharacterBoxes()
l_ok numaaCompareImagesByBoxes(NUMAA *naa1, NUMAA *naa2, l_int32 nperline, l_int32 nreq, l_int32 maxshiftx, l_int32 maxshifty, l_int32 delx, l_int32 dely, l_int32 *psame, l_int32 debugflag)
numaaCompareImagesByBoxes()
BOXA * pixConnCompBB(PIX *pixs, l_int32 connectivity)
pixConnCompBB()
l_ok pixRenderBoxaArb(PIX *pix, BOXA *boxa, l_int32 width, l_uint8 rval, l_uint8 gval, l_uint8 bval)
pixRenderBoxaArb()
PIX * pixMorphSequence(PIX *pixs, const char *sequence, l_int32 dispsep)
pixMorphSequence()
l_ok numaAddNumber(NUMA *na, l_float32 val)
numaAddNumber()
NUMA * numaCreate(l_int32 n)
numaCreate()
l_int32 numaaGetCount(NUMAA *naa)
numaaGetCount()
NUMA * numaaGetNuma(NUMAA *naa, l_int32 index, l_int32 accessflag)
numaaGetNuma()
void numaDestroy(NUMA **pna)
numaDestroy()
NUMAA * numaaCreate(l_int32 n)
numaaCreate()
l_int32 numaGetCount(NUMA *na)
numaGetCount()
l_ok numaGetIValue(NUMA *na, l_int32 index, l_int32 *pival)
numaGetIValue()
l_ok numaaAddNuma(NUMAA *naa, NUMA *na, l_int32 copyflag)
numaaAddNuma()
l_int32 * numaGetIArray(NUMA *na)
numaGetIArray()
void numaaDestroy(NUMAA **pnaa)
numaaDestroy()
l_ok numaJoin(NUMA *nad, NUMA *nas, l_int32 istart, l_int32 iend)
numaJoin()
void pixDestroy(PIX **ppix)
pixDestroy()
l_ok pixGetDimensions(const PIX *pix, l_int32 *pw, l_int32 *ph, l_int32 *pd)
pixGetDimensions()
PIX * pixClone(PIX *pixs)
pixClone()
PIX * pixClipRectangle(PIX *pixs, BOX *box, BOX **pboxc)
pixClipRectangle()
void pixaDestroy(PIXA **ppixa)
pixaDestroy()
l_int32 pixaGetCount(PIXA *pixa)
pixaGetCount()
BOXA * pixaGetBoxa(PIXA *pixa, l_int32 accesstype)
pixaGetBoxa()
PIXA * pixaCreateFromBoxa(PIX *pixs, BOXA *boxa, l_int32 start, l_int32 num, l_int32 *pcropwarn)
pixaCreateFromBoxa()
void pixaaDestroy(PIXAA **ppaa)
pixaaDestroy()
PIX * pixaGetPix(PIXA *pixa, l_int32 index, l_int32 accesstype)
pixaGetPix()
PIXAA * pixaSort2dByIndex(PIXA *pixas, NUMAA *naa, l_int32 copyflag)
pixaSort2dByIndex()
PIXA * pixaaFlattenToPixa(PIXAA *paa, NUMA **pnaindex, l_int32 copyflag)
pixaaFlattenToPixa()
PIX * pixConvertTo1(PIX *pixs, l_int32 threshold)
pixConvertTo1()
PIX * pixConvertTo8(PIX *pixs, l_int32 cmapflag)
pixConvertTo8()
PIX * pixConvertTo32(PIX *pixs)
pixConvertTo32()
PIX * pixRead(const char *filename)
pixRead()
char * sarrayGetString(SARRAY *sa, l_int32 index, l_int32 copyflag)
sarrayGetString()
l_int32 sarrayGetCount(SARRAY *sa)
sarrayGetCount()
void sarrayDestroy(SARRAY **psa)
sarrayDestroy()
SARRAY * getSortedPathnamesInDirectory(const char *dirname, const char *substr, l_int32 first, l_int32 nfiles)
getSortedPathnamesInDirectory()
SARRAY * sarrayCopy(SARRAY *sa)
sarrayCopy()
PIX * pixScaleToResolution(PIX *pixs, l_float32 target, l_float32 assumed, l_float32 *pscalefact)
pixScaleToResolution()
void lept_stderr(const char *fmt,...)
lept_stderr()
char * stringReplaceSubstr(const char *src, const char *sub1, const char *sub2, l_int32 *ploc, l_int32 *pfound)
stringReplaceSubstr()
l_int32 lept_mkdir(const char *subdir)
lept_mkdir()
char * stringJoin(const char *src1, const char *src2)
stringJoin()