Leptonica  1.82.0
Image processing and image analysis suite
pdfio1.c
Go to the documentation of this file.
1 /*====================================================================*
2  - Copyright (C) 2001 Leptonica. All rights reserved.
3  -
4  - Redistribution and use in source and binary forms, with or without
5  - modification, are permitted provided that the following conditions
6  - are met:
7  - 1. Redistributions of source code must retain the above copyright
8  - notice, this list of conditions and the following disclaimer.
9  - 2. Redistributions in binary form must reproduce the above
10  - copyright notice, this list of conditions and the following
11  - disclaimer in the documentation and/or other materials
12  - provided with the distribution.
13  -
14  - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
15  - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
16  - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
17  - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY
18  - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19  - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20  - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21  - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22  - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
23  - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24  - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  *====================================================================*/
26 
200 #ifdef HAVE_CONFIG_H
201 #include <config_auto.h>
202 #endif /* HAVE_CONFIG_H */
203 
204 #include <string.h>
205 #include <math.h>
206 #include "allheaders.h"
207 
208 /* --------------------------------------------*/
209 #if USE_PDFIO /* defined in environ.h */
210  /* --------------------------------------------*/
211 
212  /* Typical scan resolution in ppi (pixels/inch) */
213 static const l_int32 DefaultInputRes = 300;
214 
215 /*---------------------------------------------------------------------*
216  * Convert specified image files to pdf (one image file per page) *
217  *---------------------------------------------------------------------*/
252 l_ok
253 convertFilesToPdf(const char *dirname,
254  const char *substr,
255  l_int32 res,
256  l_float32 scalefactor,
257  l_int32 type,
258  l_int32 quality,
259  const char *title,
260  const char *fileout)
261 {
262 l_int32 ret;
263 SARRAY *sa;
264 
265  PROCNAME("convertFilesToPdf");
266 
267  if (!dirname)
268  return ERROR_INT("dirname not defined", procName, 1);
269  if (!fileout)
270  return ERROR_INT("fileout not defined", procName, 1);
271 
272  if ((sa = getSortedPathnamesInDirectory(dirname, substr, 0, 0)) == NULL)
273  return ERROR_INT("sa not made", procName, 1);
274  ret = saConvertFilesToPdf(sa, res, scalefactor, type, quality,
275  title, fileout);
276  sarrayDestroy(&sa);
277  return ret;
278 }
279 
280 
302 l_ok
304  l_int32 res,
305  l_float32 scalefactor,
306  l_int32 type,
307  l_int32 quality,
308  const char *title,
309  const char *fileout)
310 {
311 l_uint8 *data;
312 l_int32 ret;
313 size_t nbytes;
314 
315  PROCNAME("saConvertFilesToPdf");
316 
317  if (!sa)
318  return ERROR_INT("sa not defined", procName, 1);
319 
320  ret = saConvertFilesToPdfData(sa, res, scalefactor, type, quality,
321  title, &data, &nbytes);
322  if (ret) {
323  if (data) LEPT_FREE(data);
324  return ERROR_INT("pdf data not made", procName, 1);
325  }
326 
327  ret = l_binaryWrite(fileout, "w", data, nbytes);
328  LEPT_FREE(data);
329  if (ret)
330  L_ERROR("pdf data not written to file\n", procName);
331  return ret;
332 }
333 
334 
357 l_ok
359  l_int32 res,
360  l_float32 scalefactor,
361  l_int32 type,
362  l_int32 quality,
363  const char *title,
364  l_uint8 **pdata,
365  size_t *pnbytes)
366 {
367 char *fname;
368 const char *pdftitle;
369 l_uint8 *imdata;
370 l_int32 i, n, ret, pagetype, npages, scaledres;
371 size_t imbytes;
372 L_BYTEA *ba;
373 PIX *pixs, *pix;
374 L_PTRA *pa_data;
375 
376  PROCNAME("saConvertFilesToPdfData");
377 
378  if (!pdata)
379  return ERROR_INT("&data not defined", procName, 1);
380  *pdata = NULL;
381  if (!pnbytes)
382  return ERROR_INT("&nbytes not defined", procName, 1);
383  *pnbytes = 0;
384  if (!sa)
385  return ERROR_INT("sa not defined", procName, 1);
386  if (scalefactor <= 0.0) scalefactor = 1.0;
387  if (type != L_JPEG_ENCODE && type != L_G4_ENCODE &&
388  type != L_FLATE_ENCODE && type != L_JP2K_ENCODE) {
389  type = L_DEFAULT_ENCODE;
390  }
391 
392  /* Generate all the encoded pdf strings */
393  n = sarrayGetCount(sa);
394  pa_data = ptraCreate(n);
395  pdftitle = NULL;
396  for (i = 0; i < n; i++) {
397  if (i && (i % 10 == 0)) lept_stderr(".. %d ", i);
398  fname = sarrayGetString(sa, i, L_NOCOPY);
399  if ((pixs = pixRead(fname)) == NULL) {
400  L_ERROR("image not readable from file %s\n", procName, fname);
401  continue;
402  }
403  if (!pdftitle)
404  pdftitle = (title) ? title : fname;
405  if (scalefactor != 1.0)
406  pix = pixScale(pixs, scalefactor, scalefactor);
407  else
408  pix = pixClone(pixs);
409  pixDestroy(&pixs);
410  scaledres = (l_int32)(res * scalefactor);
411 
412  /* Select the encoding type */
413  if (type != L_DEFAULT_ENCODE) {
414  pagetype = type;
415  } else if (selectDefaultPdfEncoding(pix, &pagetype) != 0) {
416  pixDestroy(&pix);
417  L_ERROR("encoding type selection failed for file %s\n",
418  procName, fname);
419  continue;
420  }
421 
422  ret = pixConvertToPdfData(pix, pagetype, quality, &imdata, &imbytes,
423  0, 0, scaledres, pdftitle, NULL, 0);
424  pixDestroy(&pix);
425  if (ret) {
426  LEPT_FREE(imdata);
427  L_ERROR("pdf encoding failed for %s\n", procName, fname);
428  continue;
429  }
430  ba = l_byteaInitFromMem(imdata, imbytes);
431  LEPT_FREE(imdata);
432  ptraAdd(pa_data, ba);
433  }
434  ptraGetActualCount(pa_data, &npages);
435  if (npages == 0) {
436  L_ERROR("no pdf files made\n", procName);
437  ptraDestroy(&pa_data, FALSE, FALSE);
438  return 1;
439  }
440 
441  /* Concatenate them */
442  lept_stderr("\nconcatenating ... ");
443  ret = ptraConcatenatePdfToData(pa_data, NULL, pdata, pnbytes);
444  lept_stderr("done\n");
445 
446  ptraGetActualCount(pa_data, &npages); /* recalculate in case it changes */
447  for (i = 0; i < npages; i++) {
448  ba = (L_BYTEA *)ptraRemove(pa_data, i, L_NO_COMPACTION);
449  l_byteaDestroy(&ba);
450  }
451  ptraDestroy(&pa_data, FALSE, FALSE);
452  return ret;
453 }
454 
455 
476 l_ok
478  l_int32 *ptype)
479 {
480 l_int32 w, h, d, factor, ncolors;
481 PIXCMAP *cmap;
482 
483  PROCNAME("selectDefaultPdfEncoding");
484 
485  if (!ptype)
486  return ERROR_INT("&type not defined", procName, 1);
487  *ptype = L_FLATE_ENCODE; /* default universal encoding */
488  if (!pix)
489  return ERROR_INT("pix not defined", procName, 1);
490  pixGetDimensions(pix, &w, &h, &d);
491  cmap = pixGetColormap(pix);
492  if (d == 8 && !cmap) {
493  factor = L_MAX(1, (l_int32)sqrt((l_float64)(w * h) / 20000.));
494  pixNumColors(pix, factor, &ncolors);
495  if (ncolors < 20)
496  *ptype = L_FLATE_ENCODE;
497  else
498  *ptype = L_JPEG_ENCODE;
499  } else if (d == 1) {
500  *ptype = L_G4_ENCODE;
501  } else if (cmap || d == 2 || d == 4) {
502  *ptype = L_FLATE_ENCODE;
503  } else if (d == 8 || d == 32) {
504  *ptype = L_JPEG_ENCODE;
505  } else {
506  return ERROR_INT("type selection failure", procName, 1);
507  }
508 
509  return 0;
510 }
511 
512 
513 /*---------------------------------------------------------------------*
514  * Convert specified image files to pdf without scaling *
515  *---------------------------------------------------------------------*/
539 l_ok
540 convertUnscaledFilesToPdf(const char *dirname,
541  const char *substr,
542  const char *title,
543  const char *fileout)
544 {
545 l_int32 ret;
546 SARRAY *sa;
547 
548  PROCNAME("convertUnscaledFilesToPdf");
549 
550  if (!dirname)
551  return ERROR_INT("dirname not defined", procName, 1);
552  if (!fileout)
553  return ERROR_INT("fileout not defined", procName, 1);
554 
555  if ((sa = getSortedPathnamesInDirectory(dirname, substr, 0, 0)) == NULL)
556  return ERROR_INT("sa not made", procName, 1);
557  ret = saConvertUnscaledFilesToPdf(sa, title, fileout);
558  sarrayDestroy(&sa);
559  return ret;
560 }
561 
562 
577 l_ok
579  const char *title,
580  const char *fileout)
581 {
582 l_uint8 *data;
583 l_int32 ret;
584 size_t nbytes;
585 
586  PROCNAME("saConvertUnscaledFilesToPdf");
587 
588  if (!sa)
589  return ERROR_INT("sa not defined", procName, 1);
590 
591  ret = saConvertUnscaledFilesToPdfData(sa, title, &data, &nbytes);
592  if (ret) {
593  if (data) LEPT_FREE(data);
594  return ERROR_INT("pdf data not made", procName, 1);
595  }
596 
597  ret = l_binaryWrite(fileout, "w", data, nbytes);
598  LEPT_FREE(data);
599  if (ret)
600  L_ERROR("pdf data not written to file\n", procName);
601  return ret;
602 }
603 
604 
622 l_ok
624  const char *title,
625  l_uint8 **pdata,
626  size_t *pnbytes)
627 {
628 char *fname;
629 l_uint8 *imdata;
630 l_int32 i, n, ret, npages;
631 size_t imbytes;
632 L_BYTEA *ba;
633 L_PTRA *pa_data;
634 
635  PROCNAME("saConvertUnscaledFilesToPdfData");
636 
637  if (!pdata)
638  return ERROR_INT("&data not defined", procName, 1);
639  *pdata = NULL;
640  if (!pnbytes)
641  return ERROR_INT("&nbytes not defined", procName, 1);
642  *pnbytes = 0;
643  if (!sa)
644  return ERROR_INT("sa not defined", procName, 1);
645 
646  /* Generate all the encoded pdf strings */
647  n = sarrayGetCount(sa);
648  pa_data = ptraCreate(n);
649  for (i = 0; i < n; i++) {
650  if (i && (i % 10 == 0)) lept_stderr(".. %d ", i);
651  fname = sarrayGetString(sa, i, L_NOCOPY);
652 
653  /* Generate the pdf data */
654  if (convertUnscaledToPdfData(fname, title, &imdata, &imbytes))
655  continue;
656 
657  /* ... and add it to the array of single page data */
658  ba = l_byteaInitFromMem(imdata, imbytes);
659  if (imdata) LEPT_FREE(imdata);
660  ptraAdd(pa_data, ba);
661  }
662  ptraGetActualCount(pa_data, &npages);
663  if (npages == 0) {
664  L_ERROR("no pdf files made\n", procName);
665  ptraDestroy(&pa_data, FALSE, FALSE);
666  return 1;
667  }
668 
669  /* Concatenate to generate a multipage pdf */
670  lept_stderr("\nconcatenating ... ");
671  ret = ptraConcatenatePdfToData(pa_data, NULL, pdata, pnbytes);
672  lept_stderr("done\n");
673 
674  /* Clean up */
675  ptraGetActualCount(pa_data, &npages); /* maybe failed to read some files */
676  for (i = 0; i < npages; i++) {
677  ba = (L_BYTEA *)ptraRemove(pa_data, i, L_NO_COMPACTION);
678  l_byteaDestroy(&ba);
679  }
680  ptraDestroy(&pa_data, FALSE, FALSE);
681  return ret;
682 }
683 
684 
701 l_ok
702 convertUnscaledToPdfData(const char *fname,
703  const char *title,
704  l_uint8 **pdata,
705  size_t *pnbytes)
706 {
707 const char *pdftitle = NULL;
708 char *tail = NULL;
709 l_int32 format;
710 L_COMP_DATA *cid;
711 
712  PROCNAME("convertUnscaledToPdfData");
713 
714  if (!pdata)
715  return ERROR_INT("&data not defined", procName, 1);
716  *pdata = NULL;
717  if (!pnbytes)
718  return ERROR_INT("&nbytes not defined", procName, 1);
719  *pnbytes = 0;
720  if (!fname)
721  return ERROR_INT("fname not defined", procName, 1);
722 
723  findFileFormat(fname, &format);
724  if (format == IFF_UNKNOWN) {
725  L_WARNING("file %s format is unknown; skip\n", procName, fname);
726  return 1;
727  }
728  if (format == IFF_PS || format == IFF_LPDF) {
729  L_WARNING("file %s format is %d; skip\n", procName, fname, format);
730  return 1;
731  }
732 
733  /* Generate the image data required for pdf generation, always
734  * in binary (not ascii85) coding. Note that jpeg, jp2k and some
735  * png files are not transcoded. */
736  l_generateCIDataForPdf(fname, NULL, 0, &cid);
737  if (!cid) {
738  L_ERROR("file %s format is %d; unreadable\n", procName, fname, format);
739  return 1;
740  }
741 
742  /* If %title == NULL, use the tail of %fname. */
743  if (title) {
744  pdftitle = title;
745  } else {
746  splitPathAtDirectory(fname, NULL, &tail);
747  pdftitle = tail;
748  }
749 
750  /* Generate the pdf string for this page (image). This destroys
751  * the cid by attaching it to an lpd and destroying the lpd. */
752  cidConvertToPdfData(cid, pdftitle, pdata, pnbytes);
753  LEPT_FREE(tail);
754  return 0;
755 }
756 
757 
758 /*---------------------------------------------------------------------*
759  * Convert multiple images to pdf (one image per page) *
760  *---------------------------------------------------------------------*/
789 l_ok
791  l_int32 res,
792  l_float32 scalefactor,
793  l_int32 type,
794  l_int32 quality,
795  const char *title,
796  const char *fileout)
797 {
798 l_uint8 *data;
799 l_int32 ret;
800 size_t nbytes;
801 
802  PROCNAME("pixaConvertToPdf");
803 
804  if (!pixa)
805  return ERROR_INT("pixa not defined", procName, 1);
806 
807  ret = pixaConvertToPdfData(pixa, res, scalefactor, type, quality,
808  title, &data, &nbytes);
809  if (ret) {
810  LEPT_FREE(data);
811  return ERROR_INT("conversion to pdf failed", procName, 1);
812  }
813 
814  ret = l_binaryWrite(fileout, "w", data, nbytes);
815  LEPT_FREE(data);
816  if (ret)
817  L_ERROR("pdf data not written to file\n", procName);
818  return ret;
819 }
820 
821 
843 l_ok
845  l_int32 res,
846  l_float32 scalefactor,
847  l_int32 type,
848  l_int32 quality,
849  const char *title,
850  l_uint8 **pdata,
851  size_t *pnbytes)
852 {
853 l_uint8 *imdata;
854 l_int32 i, n, ret, scaledres, pagetype;
855 size_t imbytes;
856 L_BYTEA *ba;
857 PIX *pixs, *pix;
858 L_PTRA *pa_data;
859 
860  PROCNAME("pixaConvertToPdfData");
861 
862  if (!pdata)
863  return ERROR_INT("&data not defined", procName, 1);
864  *pdata = NULL;
865  if (!pnbytes)
866  return ERROR_INT("&nbytes not defined", procName, 1);
867  *pnbytes = 0;
868  if (!pixa)
869  return ERROR_INT("pixa not defined", procName, 1);
870  if (scalefactor <= 0.0) scalefactor = 1.0;
871  if (type != L_DEFAULT_ENCODE && type != L_JPEG_ENCODE &&
872  type != L_G4_ENCODE && type != L_FLATE_ENCODE &&
873  type != L_JP2K_ENCODE) {
874  L_WARNING("invalid compression type; using per-page default\n",
875  procName);
876  type = L_DEFAULT_ENCODE;
877  }
878 
879  /* Generate all the encoded pdf strings */
880  n = pixaGetCount(pixa);
881  pa_data = ptraCreate(n);
882  for (i = 0; i < n; i++) {
883  if ((pixs = pixaGetPix(pixa, i, L_CLONE)) == NULL) {
884  L_ERROR("pix[%d] not retrieved\n", procName, i);
885  continue;
886  }
887  if (scalefactor != 1.0)
888  pix = pixScale(pixs, scalefactor, scalefactor);
889  else
890  pix = pixClone(pixs);
891  pixDestroy(&pixs);
892  scaledres = (l_int32)(res * scalefactor);
893 
894  /* Select the encoding type */
895  if (type != L_DEFAULT_ENCODE) {
896  pagetype = type;
897  } else if (selectDefaultPdfEncoding(pix, &pagetype) != 0) {
898  L_ERROR("encoding type selection failed for pix[%d]\n",
899  procName, i);
900  pixDestroy(&pix);
901  continue;
902  }
903 
904  ret = pixConvertToPdfData(pix, pagetype, quality, &imdata, &imbytes,
905  0, 0, scaledres, title, NULL, 0);
906  pixDestroy(&pix);
907  if (ret) {
908  LEPT_FREE(imdata);
909  L_ERROR("pdf encoding failed for pix[%d]\n", procName, i);
910  continue;
911  }
912  ba = l_byteaInitFromMem(imdata, imbytes);
913  LEPT_FREE(imdata);
914  ptraAdd(pa_data, ba);
915  }
916  ptraGetActualCount(pa_data, &n);
917  if (n == 0) {
918  L_ERROR("no pdf files made\n", procName);
919  ptraDestroy(&pa_data, FALSE, FALSE);
920  return 1;
921  }
922 
923  /* Concatenate them */
924  ret = ptraConcatenatePdfToData(pa_data, NULL, pdata, pnbytes);
925 
926  ptraGetActualCount(pa_data, &n); /* recalculate in case it changes */
927  for (i = 0; i < n; i++) {
928  ba = (L_BYTEA *)ptraRemove(pa_data, i, L_NO_COMPACTION);
929  l_byteaDestroy(&ba);
930  }
931  ptraDestroy(&pa_data, FALSE, FALSE);
932  return ret;
933 }
934 
935 
936 /*---------------------------------------------------------------------*
937  * Single page, multi-image converters *
938  *---------------------------------------------------------------------*/
998 l_ok
999 convertToPdf(const char *filein,
1000  l_int32 type,
1001  l_int32 quality,
1002  const char *fileout,
1003  l_int32 x,
1004  l_int32 y,
1005  l_int32 res,
1006  const char *title,
1007  L_PDF_DATA **plpd,
1008  l_int32 position)
1009 {
1010 l_uint8 *data;
1011 l_int32 ret;
1012 size_t nbytes;
1013 
1014  PROCNAME("convertToPdf");
1015 
1016  if (!filein)
1017  return ERROR_INT("filein not defined", procName, 1);
1018  if (!plpd || (position == L_LAST_IMAGE)) {
1019  if (!fileout)
1020  return ERROR_INT("fileout not defined", procName, 1);
1021  }
1022 
1023  if (convertToPdfData(filein, type, quality, &data, &nbytes, x, y,
1024  res, title, plpd, position))
1025  return ERROR_INT("pdf data not made", procName, 1);
1026 
1027  if (!plpd || (position == L_LAST_IMAGE)) {
1028  ret = l_binaryWrite(fileout, "w", data, nbytes);
1029  LEPT_FREE(data);
1030  if (ret)
1031  return ERROR_INT("pdf data not written to file", procName, 1);
1032  }
1033 
1034  return 0;
1035 }
1036 
1037 
1070 l_ok
1071 convertImageDataToPdf(l_uint8 *imdata,
1072  size_t size,
1073  l_int32 type,
1074  l_int32 quality,
1075  const char *fileout,
1076  l_int32 x,
1077  l_int32 y,
1078  l_int32 res,
1079  const char *title,
1080  L_PDF_DATA **plpd,
1081  l_int32 position)
1082 {
1083 l_int32 ret;
1084 PIX *pix;
1085 
1086  PROCNAME("convertImageDataToPdf");
1087 
1088  if (!imdata)
1089  return ERROR_INT("image data not defined", procName, 1);
1090  if (!plpd || (position == L_LAST_IMAGE)) {
1091  if (!fileout)
1092  return ERROR_INT("fileout not defined", procName, 1);
1093  }
1094 
1095  if ((pix = pixReadMem(imdata, size)) == NULL)
1096  return ERROR_INT("pix not read", procName, 1);
1097  if (type != L_JPEG_ENCODE && type != L_G4_ENCODE &&
1098  type != L_FLATE_ENCODE && type != L_JP2K_ENCODE) {
1099  selectDefaultPdfEncoding(pix, &type);
1100  }
1101  ret = pixConvertToPdf(pix, type, quality, fileout, x, y, res,
1102  title, plpd, position);
1103  pixDestroy(&pix);
1104  return ret;
1105 }
1106 
1107 
1139 l_ok
1140 convertToPdfData(const char *filein,
1141  l_int32 type,
1142  l_int32 quality,
1143  l_uint8 **pdata,
1144  size_t *pnbytes,
1145  l_int32 x,
1146  l_int32 y,
1147  l_int32 res,
1148  const char *title,
1149  L_PDF_DATA **plpd,
1150  l_int32 position)
1151 {
1152 PIX *pix;
1153 
1154  PROCNAME("convertToPdfData");
1155 
1156  if (!pdata)
1157  return ERROR_INT("&data not defined", procName, 1);
1158  *pdata = NULL;
1159  if (!pnbytes)
1160  return ERROR_INT("&nbytes not defined", procName, 1);
1161  *pnbytes = 0;
1162  if (!filein)
1163  return ERROR_INT("filein not defined", procName, 1);
1164 
1165  if ((pix = pixRead(filein)) == NULL)
1166  return ERROR_INT("pix not made", procName, 1);
1167 
1168  pixConvertToPdfData(pix, type, quality, pdata, pnbytes,
1169  x, y, res, (title) ? title : filein, plpd, position);
1170  pixDestroy(&pix);
1171  return 0;
1172 }
1173 
1174 
1207 l_ok
1209  size_t size,
1210  l_int32 type,
1211  l_int32 quality,
1212  l_uint8 **pdata,
1213  size_t *pnbytes,
1214  l_int32 x,
1215  l_int32 y,
1216  l_int32 res,
1217  const char *title,
1218  L_PDF_DATA **plpd,
1219  l_int32 position)
1220 {
1221 l_int32 ret;
1222 PIX *pix;
1223 
1224  PROCNAME("convertImageDataToPdfData");
1225 
1226  if (!pdata)
1227  return ERROR_INT("&data not defined", procName, 1);
1228  *pdata = NULL;
1229  if (!pnbytes)
1230  return ERROR_INT("&nbytes not defined", procName, 1);
1231  *pnbytes = 0;
1232  if (!imdata)
1233  return ERROR_INT("image data not defined", procName, 1);
1234  if (plpd) { /* part of multi-page invocation */
1235  if (position == L_FIRST_IMAGE)
1236  *plpd = NULL;
1237  }
1238 
1239  if ((pix = pixReadMem(imdata, size)) == NULL)
1240  return ERROR_INT("pix not read", procName, 1);
1241  if (type != L_JPEG_ENCODE && type != L_G4_ENCODE &&
1242  type != L_FLATE_ENCODE && type != L_JP2K_ENCODE) {
1243  selectDefaultPdfEncoding(pix, &type);
1244  }
1245  ret = pixConvertToPdfData(pix, type, quality, pdata, pnbytes,
1246  x, y, res, title, plpd, position);
1247  pixDestroy(&pix);
1248  return ret;
1249 }
1250 
1251 
1285 l_ok
1287  l_int32 type,
1288  l_int32 quality,
1289  const char *fileout,
1290  l_int32 x,
1291  l_int32 y,
1292  l_int32 res,
1293  const char *title,
1294  L_PDF_DATA **plpd,
1295  l_int32 position)
1296 {
1297 l_uint8 *data;
1298 l_int32 ret;
1299 size_t nbytes;
1300 
1301  PROCNAME("pixConvertToPdf");
1302 
1303  if (!pix)
1304  return ERROR_INT("pix not defined", procName, 1);
1305  if (!plpd || (position == L_LAST_IMAGE)) {
1306  if (!fileout)
1307  return ERROR_INT("fileout not defined", procName, 1);
1308  }
1309 
1310  if (pixConvertToPdfData(pix, type, quality, &data, &nbytes,
1311  x, y, res, title, plpd, position)) {
1312  LEPT_FREE(data);
1313  return ERROR_INT("pdf data not made", procName, 1);
1314  }
1315 
1316  if (!plpd || (position == L_LAST_IMAGE)) {
1317  ret = l_binaryWrite(fileout, "w", data, nbytes);
1318  LEPT_FREE(data);
1319  if (ret)
1320  return ERROR_INT("pdf data not written to file", procName, 1);
1321  }
1322  return 0;
1323 }
1324 
1325 
1345 l_ok
1347  PIX *pix,
1348  l_int32 res,
1349  const char *title)
1350 {
1351 l_uint8 *data;
1352 size_t nbytes, nbytes_written;
1353 
1354  PROCNAME("pixWriteStreamPdf");
1355 
1356  if (!fp)
1357  return ERROR_INT("stream not opened", procName, 1);
1358  if (!pix)
1359  return ERROR_INT("pix not defined", procName, 1);
1360 
1361  if (pixWriteMemPdf(&data, &nbytes, pix, res, title) != 0) {
1362  LEPT_FREE(data);
1363  return ERROR_INT("pdf data not made", procName, 1);
1364  }
1365 
1366  nbytes_written = fwrite(data, 1, nbytes, fp);
1367  LEPT_FREE(data);
1368  if (nbytes != nbytes_written)
1369  return ERROR_INT("failure writing pdf data to stream", procName, 1);
1370  return 0;
1371 }
1372 
1373 
1394 l_ok
1395 pixWriteMemPdf(l_uint8 **pdata,
1396  size_t *pnbytes,
1397  PIX *pix,
1398  l_int32 res,
1399  const char *title)
1400 {
1401 l_int32 ret, type;
1402 
1403  PROCNAME("pixWriteMemPdf");
1404 
1405  if (pdata) *pdata = NULL;
1406  if (pnbytes) *pnbytes = 0;
1407  if (!pdata || !pnbytes)
1408  return ERROR_INT("&data or &nbytes not defined", procName, 1);
1409  if (!pix)
1410  return ERROR_INT("pix not defined", procName, 1);
1411 
1412  selectDefaultPdfEncoding(pix, &type);
1413  ret = pixConvertToPdfData(pix, type, 75, pdata, pnbytes,
1414  0, 0, res, title, NULL, 0);
1415  if (ret)
1416  return ERROR_INT("pdf data not made", procName, 1);
1417  return 0;
1418 }
1419 
1420 
1421 /*---------------------------------------------------------------------*
1422  * Segmented multi-page, multi-image converter *
1423  *---------------------------------------------------------------------*/
1468 l_ok
1469 convertSegmentedFilesToPdf(const char *dirname,
1470  const char *substr,
1471  l_int32 res,
1472  l_int32 type,
1473  l_int32 thresh,
1474  BOXAA *baa,
1475  l_int32 quality,
1476  l_float32 scalefactor,
1477  const char *title,
1478  const char *fileout)
1479 {
1480 char *fname;
1481 l_uint8 *imdata, *data;
1482 l_int32 i, npages, nboxa, nboxes, ret;
1483 size_t imbytes, databytes;
1484 BOXA *boxa;
1485 L_BYTEA *ba;
1486 L_PTRA *pa_data;
1487 SARRAY *sa;
1488 
1489  PROCNAME("convertSegmentedFilesToPdf");
1490 
1491  if (!dirname)
1492  return ERROR_INT("dirname not defined", procName, 1);
1493  if (!fileout)
1494  return ERROR_INT("fileout not defined", procName, 1);
1495 
1496  if ((sa = getNumberedPathnamesInDirectory(dirname, substr, 0, 0, 10000))
1497  == NULL)
1498  return ERROR_INT("sa not made", procName, 1);
1499 
1500  npages = sarrayGetCount(sa);
1501  /* If necessary, extend the boxaa, which is page-aligned with
1502  * the image files, to be as large as the set of images. */
1503  if (baa) {
1504  nboxa = boxaaGetCount(baa);
1505  if (nboxa < npages) {
1506  boxa = boxaCreate(1);
1507  boxaaExtendWithInit(baa, npages, boxa);
1508  boxaDestroy(&boxa);
1509  }
1510  }
1511 
1512  /* Generate and save all the encoded pdf strings */
1513  pa_data = ptraCreate(npages);
1514  for (i = 0; i < npages; i++) {
1515  fname = sarrayGetString(sa, i, L_NOCOPY);
1516  if (!strcmp(fname, "")) continue;
1517  boxa = NULL;
1518  if (baa) {
1519  boxa = boxaaGetBoxa(baa, i, L_CLONE);
1520  nboxes = boxaGetCount(boxa);
1521  if (nboxes == 0)
1522  boxaDestroy(&boxa);
1523  }
1524  ret = convertToPdfDataSegmented(fname, res, type, thresh, boxa,
1525  quality, scalefactor, title,
1526  &imdata, &imbytes);
1527  boxaDestroy(&boxa); /* safe; in case nboxes > 0 */
1528  if (ret) {
1529  L_ERROR("pdf encoding failed for %s\n", procName, fname);
1530  continue;
1531  }
1532  ba = l_byteaInitFromMem(imdata, imbytes);
1533  if (imdata) LEPT_FREE(imdata);
1534  ptraAdd(pa_data, ba);
1535  }
1536  sarrayDestroy(&sa);
1537 
1538  ptraGetActualCount(pa_data, &npages);
1539  if (npages == 0) {
1540  L_ERROR("no pdf files made\n", procName);
1541  ptraDestroy(&pa_data, FALSE, FALSE);
1542  return 1;
1543  }
1544 
1545  /* Concatenate */
1546  ret = ptraConcatenatePdfToData(pa_data, NULL, &data, &databytes);
1547 
1548  /* Clean up */
1549  ptraGetActualCount(pa_data, &npages); /* recalculate in case it changes */
1550  for (i = 0; i < npages; i++) {
1551  ba = (L_BYTEA *)ptraRemove(pa_data, i, L_NO_COMPACTION);
1552  l_byteaDestroy(&ba);
1553  }
1554  ptraDestroy(&pa_data, FALSE, FALSE);
1555 
1556  if (ret) {
1557  if (data) LEPT_FREE(data);
1558  return ERROR_INT("pdf data not made", procName, 1);
1559  }
1560 
1561  ret = l_binaryWrite(fileout, "w", data, databytes);
1562  LEPT_FREE(data);
1563  if (ret)
1564  L_ERROR("pdf data not written to file\n", procName);
1565  return ret;
1566 }
1567 
1568 
1587 BOXAA *
1588 convertNumberedMasksToBoxaa(const char *dirname,
1589  const char *substr,
1590  l_int32 numpre,
1591  l_int32 numpost)
1592 {
1593 char *fname;
1594 l_int32 i, n;
1595 BOXA *boxa;
1596 BOXAA *baa;
1597 PIX *pix;
1598 SARRAY *sa;
1599 
1600  PROCNAME("convertNumberedMasksToBoxaa");
1601 
1602  if (!dirname)
1603  return (BOXAA *)ERROR_PTR("dirname not defined", procName, NULL);
1604 
1605  if ((sa = getNumberedPathnamesInDirectory(dirname, substr, numpre,
1606  numpost, 10000)) == NULL)
1607  return (BOXAA *)ERROR_PTR("sa not made", procName, NULL);
1608 
1609  /* Generate and save all the encoded pdf strings */
1610  n = sarrayGetCount(sa);
1611  baa = boxaaCreate(n);
1612  boxa = boxaCreate(1);
1613  boxaaInitFull(baa, boxa);
1614  boxaDestroy(&boxa);
1615  for (i = 0; i < n; i++) {
1616  fname = sarrayGetString(sa, i, L_NOCOPY);
1617  if (!strcmp(fname, "")) continue;
1618  if ((pix = pixRead(fname)) == NULL) {
1619  L_WARNING("invalid image on page %d\n", procName, i);
1620  continue;
1621  }
1622  boxa = pixConnComp(pix, NULL, 8);
1623  boxaaReplaceBoxa(baa, i, boxa);
1624  pixDestroy(&pix);
1625  }
1626 
1627  sarrayDestroy(&sa);
1628  return baa;
1629 }
1630 
1631 
1632 /*---------------------------------------------------------------------*
1633  * Segmented single page, multi-image converters *
1634  *---------------------------------------------------------------------*/
1697 l_ok
1698 convertToPdfSegmented(const char *filein,
1699  l_int32 res,
1700  l_int32 type,
1701  l_int32 thresh,
1702  BOXA *boxa,
1703  l_int32 quality,
1704  l_float32 scalefactor,
1705  const char *title,
1706  const char *fileout)
1707 {
1708 l_int32 ret;
1709 PIX *pixs;
1710 
1711  PROCNAME("convertToPdfSegmented");
1712 
1713  if (!filein)
1714  return ERROR_INT("filein not defined", procName, 1);
1715  if (!fileout)
1716  return ERROR_INT("fileout not defined", procName, 1);
1717  if (type != L_G4_ENCODE && type != L_JPEG_ENCODE &&
1718  type != L_FLATE_ENCODE)
1719  return ERROR_INT("invalid conversion type", procName, 1);
1720  if (boxa && scalefactor > 1.0) {
1721  L_WARNING("setting scalefactor to 1.0\n", procName);
1722  scalefactor = 1.0;
1723  }
1724 
1725  if ((pixs = pixRead(filein)) == NULL)
1726  return ERROR_INT("pixs not made", procName, 1);
1727 
1728  ret = pixConvertToPdfSegmented(pixs, res, type, thresh, boxa, quality,
1729  scalefactor, (title) ? title : filein,
1730  fileout);
1731  pixDestroy(&pixs);
1732  return ret;
1733 }
1734 
1735 
1758 l_ok
1760  l_int32 res,
1761  l_int32 type,
1762  l_int32 thresh,
1763  BOXA *boxa,
1764  l_int32 quality,
1765  l_float32 scalefactor,
1766  const char *title,
1767  const char *fileout)
1768 {
1769 l_uint8 *data;
1770 l_int32 ret;
1771 size_t nbytes;
1772 
1773  PROCNAME("pixConvertToPdfSegmented");
1774 
1775  if (!pixs)
1776  return ERROR_INT("pixs not defined", procName, 1);
1777  if (!fileout)
1778  return ERROR_INT("fileout not defined", procName, 1);
1779  if (type != L_G4_ENCODE && type != L_JPEG_ENCODE &&
1780  type != L_FLATE_ENCODE)
1781  return ERROR_INT("invalid conversion type", procName, 1);
1782  if (boxa && scalefactor > 1.0) {
1783  L_WARNING("setting scalefactor to 1.0\n", procName);
1784  scalefactor = 1.0;
1785  }
1786 
1787  ret = pixConvertToPdfDataSegmented(pixs, res, type, thresh, boxa, quality,
1788  scalefactor, title, &data, &nbytes);
1789  if (ret)
1790  return ERROR_INT("pdf generation failure", procName, 1);
1791 
1792  ret = l_binaryWrite(fileout, "w", data, nbytes);
1793  if (data) LEPT_FREE(data);
1794  return ret;
1795 }
1796 
1797 
1822 l_ok
1823 convertToPdfDataSegmented(const char *filein,
1824  l_int32 res,
1825  l_int32 type,
1826  l_int32 thresh,
1827  BOXA *boxa,
1828  l_int32 quality,
1829  l_float32 scalefactor,
1830  const char *title,
1831  l_uint8 **pdata,
1832  size_t *pnbytes)
1833 {
1834 l_int32 ret;
1835 PIX *pixs;
1836 
1837  PROCNAME("convertToPdfDataSegmented");
1838 
1839  if (!pdata)
1840  return ERROR_INT("&data not defined", procName, 1);
1841  *pdata = NULL;
1842  if (!pnbytes)
1843  return ERROR_INT("&nbytes not defined", procName, 1);
1844  *pnbytes = 0;
1845  if (!filein)
1846  return ERROR_INT("filein not defined", procName, 1);
1847  if (type != L_G4_ENCODE && type != L_JPEG_ENCODE &&
1848  type != L_FLATE_ENCODE)
1849  return ERROR_INT("invalid conversion type", procName, 1);
1850  if (boxa && scalefactor > 1.0) {
1851  L_WARNING("setting scalefactor to 1.0\n", procName);
1852  scalefactor = 1.0;
1853  }
1854 
1855  if ((pixs = pixRead(filein)) == NULL)
1856  return ERROR_INT("pixs not made", procName, 1);
1857 
1858  ret = pixConvertToPdfDataSegmented(pixs, res, type, thresh, boxa,
1859  quality, scalefactor,
1860  (title) ? title : filein,
1861  pdata, pnbytes);
1862  pixDestroy(&pixs);
1863  return ret;
1864 }
1865 
1866 
1890 l_ok
1892  l_int32 res,
1893  l_int32 type,
1894  l_int32 thresh,
1895  BOXA *boxa,
1896  l_int32 quality,
1897  l_float32 scalefactor,
1898  const char *title,
1899  l_uint8 **pdata,
1900  size_t *pnbytes)
1901 {
1902 l_int32 i, nbox, seq, bx, by, bw, bh, upscale;
1903 l_float32 scale;
1904 BOX *box, *boxc, *box2;
1905 PIX *pix, *pixt1, *pixt2, *pixt3, *pixt4, *pixt5, *pixt6;
1906 PIXCMAP *cmap;
1907 L_PDF_DATA *lpd;
1908 
1909  PROCNAME("pixConvertToPdfDataSegmented");
1910 
1911  if (!pdata)
1912  return ERROR_INT("&data not defined", procName, 1);
1913  *pdata = NULL;
1914  if (!pnbytes)
1915  return ERROR_INT("&nbytes not defined", procName, 1);
1916  *pnbytes = 0;
1917  if (!pixs)
1918  return ERROR_INT("pixs not defined", procName, 1);
1919  if (type != L_G4_ENCODE && type != L_JPEG_ENCODE &&
1920  type != L_FLATE_ENCODE)
1921  return ERROR_INT("invalid conversion type", procName, 1);
1922  if (boxa && (scalefactor <= 0.0 || scalefactor > 1.0)) {
1923  L_WARNING("setting scalefactor to 1.0\n", procName);
1924  scalefactor = 1.0;
1925  }
1926 
1927  /* Adjust scalefactor so that the product with res gives an integer */
1928  if (res <= 0)
1929  res = DefaultInputRes;
1930  scale = (l_float32)((l_int32)(scalefactor * res + 0.5)) / (l_float32)res;
1931  cmap = pixGetColormap(pixs);
1932 
1933  /* Simple case: single image to be encoded */
1934  if (!boxa || boxaGetCount(boxa) == 0) {
1935  if (pixGetDepth(pixs) > 1 && type == L_G4_ENCODE) {
1936  if (cmap)
1938  else
1939  pixt1 = pixConvertTo8(pixs, FALSE);
1940  pixt2 = pixScaleGray2xLIThresh(pixt1, thresh);
1941  pixConvertToPdfData(pixt2, type, quality, pdata, pnbytes,
1942  0, 0, 2 * res, title, NULL, 0);
1943  pixDestroy(&pixt1);
1944  pixDestroy(&pixt2);
1945  } else {
1946  pixConvertToPdfData(pixs, type, quality, pdata, pnbytes,
1947  0, 0, res, title, NULL, 0);
1948  }
1949  return 0;
1950  }
1951 
1952  /* Multiple images to be encoded. If %type == L_G4_ENCODE,
1953  * jpeg encode a version of pixs that is blanked in the non-image
1954  * regions, and paint the scaled non-image part onto it through a mask.
1955  * Otherwise, we must put the non-image part down first and
1956  * then render all the image regions separately on top of it,
1957  * at their own resolution. */
1958  pixt1 = pixSetBlackOrWhiteBoxa(pixs, boxa, L_SET_WHITE); /* non-image */
1959  nbox = boxaGetCount(boxa);
1960  if (type == L_G4_ENCODE) {
1961  pixt2 = pixCreateTemplate(pixs); /* only image regions */
1963  for (i = 0; i < nbox; i++) {
1964  box = boxaGetBox(boxa, i, L_CLONE);
1965  pix = pixClipRectangle(pixs, box, &boxc);
1966  boxGetGeometry(boxc, &bx, &by, &bw, &bh);
1967  pixRasterop(pixt2, bx, by, bw, bh, PIX_SRC, pix, 0, 0);
1968  pixDestroy(&pix);
1969  boxDestroy(&box);
1970  boxDestroy(&boxc);
1971  }
1973  if (pixGetDepth(pixt3) == 1)
1974  pixt4 = pixScaleToGray(pixt3, scale);
1975  else
1976  pixt4 = pixScale(pixt3, scale, scale);
1977  pixConvertToPdfData(pixt4, L_JPEG_ENCODE, quality, pdata, pnbytes,
1978  0, 0, (l_int32)(scale * res), title,
1979  &lpd, L_FIRST_IMAGE);
1980 
1981  if (pixGetDepth(pixt1) == 1) {
1982  pixt5 = pixClone(pixt1);
1983  upscale = 1;
1984  } else {
1985  pixt6 = pixConvertTo8(pixt1, 0);
1986  pixt5 = pixScaleGray2xLIThresh(pixt6, thresh);
1987  pixDestroy(&pixt6);
1988  upscale = 2;
1989  }
1990  pixConvertToPdfData(pixt5, L_G4_ENCODE, quality, pdata, pnbytes,
1991  0, 0, upscale * res, title, &lpd, L_LAST_IMAGE);
1992  pixDestroy(&pixt2);
1993  pixDestroy(&pixt3);
1994  pixDestroy(&pixt4);
1995  pixDestroy(&pixt5);
1996  } else {
1997  /* Put the non-image part down first. This is the full
1998  size of the page, so we can use it to find the page
1999  height in pixels, which is required for determining
2000  the LL corner of the image relative to the LL corner
2001  of the page. */
2002  pixConvertToPdfData(pixt1, type, quality, pdata, pnbytes, 0, 0,
2003  res, title, &lpd, L_FIRST_IMAGE);
2004  for (i = 0; i < nbox; i++) {
2005  box = boxaGetBox(boxa, i, L_CLONE);
2006  pixt2 = pixClipRectangle(pixs, box, &boxc);
2008  if (pixGetDepth(pixt3) == 1)
2009  pixt4 = pixScaleToGray(pixt3, scale);
2010  else
2011  pixt4 = pixScale(pixt3, scale, scale);
2012  box2 = boxTransform(boxc, 0, 0, scale, scale);
2013  boxGetGeometry(box2, &bx, &by, NULL, &bh);
2014  seq = (i == nbox - 1) ? L_LAST_IMAGE : L_NEXT_IMAGE;
2015  pixConvertToPdfData(pixt4, L_JPEG_ENCODE, quality, pdata, pnbytes,
2016  bx, by, (l_int32)(scale * res), title,
2017  &lpd, seq);
2018  pixDestroy(&pixt2);
2019  pixDestroy(&pixt3);
2020  pixDestroy(&pixt4);
2021  boxDestroy(&box);
2022  boxDestroy(&boxc);
2023  boxDestroy(&box2);
2024  }
2025  }
2026 
2027  pixDestroy(&pixt1);
2028  return 0;
2029 }
2030 
2031 
2032 /*---------------------------------------------------------------------*
2033  * Multi-page concatenation *
2034  *---------------------------------------------------------------------*/
2054 l_ok
2055 concatenatePdf(const char *dirname,
2056  const char *substr,
2057  const char *fileout)
2058 {
2059 l_int32 ret;
2060 SARRAY *sa;
2061 
2062  PROCNAME("concatenatePdf");
2063 
2064  if (!dirname)
2065  return ERROR_INT("dirname not defined", procName, 1);
2066  if (!fileout)
2067  return ERROR_INT("fileout not defined", procName, 1);
2068 
2069  if ((sa = getSortedPathnamesInDirectory(dirname, substr, 0, 0)) == NULL)
2070  return ERROR_INT("sa not made", procName, 1);
2071  ret = saConcatenatePdf(sa, fileout);
2072  sarrayDestroy(&sa);
2073  return ret;
2074 }
2075 
2076 
2089 l_ok
2091  const char *fileout)
2092 {
2093 l_uint8 *data;
2094 l_int32 ret;
2095 size_t nbytes;
2096 
2097  PROCNAME("saConcatenatePdf");
2098 
2099  if (!sa)
2100  return ERROR_INT("sa not defined", procName, 1);
2101  if (!fileout)
2102  return ERROR_INT("fileout not defined", procName, 1);
2103 
2104  ret = saConcatenatePdfToData(sa, &data, &nbytes);
2105  if (ret)
2106  return ERROR_INT("pdf data not made", procName, 1);
2107  ret = l_binaryWrite(fileout, "w", data, nbytes);
2108  LEPT_FREE(data);
2109  return ret;
2110 }
2111 
2112 
2125 l_ok
2127  const char *fileout)
2128 {
2129 l_uint8 *data;
2130 l_int32 ret;
2131 size_t nbytes;
2132 
2133  PROCNAME("ptraConcatenatePdf");
2134 
2135  if (!pa)
2136  return ERROR_INT("pa not defined", procName, 1);
2137  if (!fileout)
2138  return ERROR_INT("fileout not defined", procName, 1);
2139 
2140  ret = ptraConcatenatePdfToData(pa, NULL, &data, &nbytes);
2141  if (ret)
2142  return ERROR_INT("pdf data not made", procName, 1);
2143  ret = l_binaryWrite(fileout, "w", data, nbytes);
2144  LEPT_FREE(data);
2145  return ret;
2146 }
2147 
2148 
2169 l_ok
2170 concatenatePdfToData(const char *dirname,
2171  const char *substr,
2172  l_uint8 **pdata,
2173  size_t *pnbytes)
2174 {
2175 l_int32 ret;
2176 SARRAY *sa;
2177 
2178  PROCNAME("concatenatePdfToData");
2179 
2180  if (!pdata)
2181  return ERROR_INT("&data not defined", procName, 1);
2182  *pdata = NULL;
2183  if (!pnbytes)
2184  return ERROR_INT("&nbytes not defined", procName, 1);
2185  *pnbytes = 0;
2186  if (!dirname)
2187  return ERROR_INT("dirname not defined", procName, 1);
2188 
2189  if ((sa = getSortedPathnamesInDirectory(dirname, substr, 0, 0)) == NULL)
2190  return ERROR_INT("sa not made", procName, 1);
2191  ret = saConcatenatePdfToData(sa, pdata, pnbytes);
2192  sarrayDestroy(&sa);
2193  return ret;
2194 }
2195 
2196 
2210 l_ok
2212  l_uint8 **pdata,
2213  size_t *pnbytes)
2214 {
2215 char *fname;
2216 l_int32 i, npages, ret;
2217 L_BYTEA *bas;
2218 L_PTRA *pa_data; /* input pdf data for each page */
2219 
2220  PROCNAME("saConcatenatePdfToData");
2221 
2222  if (!pdata)
2223  return ERROR_INT("&data not defined", procName, 1);
2224  *pdata = NULL;
2225  if (!pnbytes)
2226  return ERROR_INT("&nbytes not defined", procName, 1);
2227  *pnbytes = 0;
2228  if (!sa)
2229  return ERROR_INT("sa not defined", procName, 1);
2230 
2231  /* Read the pdf files into memory */
2232  if ((npages = sarrayGetCount(sa)) == 0)
2233  return ERROR_INT("no filenames found", procName, 1);
2234  pa_data = ptraCreate(npages);
2235  for (i = 0; i < npages; i++) {
2236  fname = sarrayGetString(sa, i, L_NOCOPY);
2237  bas = l_byteaInitFromFile(fname);
2238  ptraAdd(pa_data, bas);
2239  }
2240 
2241  ret = ptraConcatenatePdfToData(pa_data, sa, pdata, pnbytes);
2242 
2243  /* Cleanup: some pages could have been removed */
2244  ptraGetActualCount(pa_data, &npages);
2245  for (i = 0; i < npages; i++) {
2246  bas = (L_BYTEA *)ptraRemove(pa_data, i, L_NO_COMPACTION);
2247  l_byteaDestroy(&bas);
2248  }
2249  ptraDestroy(&pa_data, FALSE, FALSE);
2250  return ret;
2251 }
2252 
2253 /* --------------------------------------------*/
2254 #endif /* USE_PDFIO */
2255 /* --------------------------------------------*/
l_ok boxaaInitFull(BOXAA *baa, BOXA *boxa)
boxaaInitFull()
Definition: boxbasic.c:1583
l_ok boxaaReplaceBoxa(BOXAA *baa, l_int32 index, BOXA *boxa)
boxaaReplaceBoxa()
Definition: boxbasic.c:1665
void boxDestroy(BOX **pbox)
boxDestroy()
Definition: boxbasic.c:282
BOXAA * boxaaCreate(l_int32 n)
boxaaCreate()
Definition: boxbasic.c:1244
l_int32 boxaaGetCount(BOXAA *baa)
boxaaGetCount()
Definition: boxbasic.c:1454
l_int32 boxaGetCount(BOXA *boxa)
boxaGetCount()
Definition: boxbasic.c:734
l_ok boxaaExtendWithInit(BOXAA *baa, l_int32 maxindex, BOXA *boxa)
boxaaExtendWithInit()
Definition: boxbasic.c:1623
l_ok boxGetGeometry(BOX *box, l_int32 *px, l_int32 *py, l_int32 *pw, l_int32 *ph)
boxGetGeometry()
Definition: boxbasic.c:313
void boxaDestroy(BOXA **pboxa)
boxaDestroy()
Definition: boxbasic.c:583
BOXA * boxaaGetBoxa(BOXAA *baa, l_int32 index, l_int32 accessflag)
boxaaGetBoxa()
Definition: boxbasic.c:1501
BOX * boxaGetBox(BOXA *boxa, l_int32 index, l_int32 accessflag)
boxaGetBox()
Definition: boxbasic.c:779
BOXA * boxaCreate(l_int32 n)
boxaCreate()
Definition: boxbasic.c:502
BOX * boxTransform(BOX *box, l_int32 shiftx, l_int32 shifty, l_float32 scalex, l_float32 scaley)
boxTransform()
Definition: boxfunc2.c:152
PIX * pixSetBlackOrWhiteBoxa(PIX *pixs, BOXA *boxa, l_int32 op)
pixSetBlackOrWhiteBoxa()
Definition: boxfunc3.c:286
void l_byteaDestroy(L_BYTEA **pba)
l_byteaDestroy()
Definition: bytearray.c:250
L_BYTEA * l_byteaInitFromMem(const l_uint8 *data, size_t size)
l_byteaInitFromMem()
Definition: bytearray.c:125
L_BYTEA * l_byteaInitFromFile(const char *fname)
l_byteaInitFromFile()
Definition: bytearray.c:154
l_ok pixNumColors(PIX *pixs, l_int32 factor, l_int32 *pncolors)
pixNumColors()
BOXA * pixConnComp(PIX *pixs, PIXA **ppixa, l_int32 connectivity)
pixConnComp()
Definition: conncomp.c:151
@ L_DEFAULT_ENCODE
Definition: imageio.h:158
@ L_FLATE_ENCODE
Definition: imageio.h:161
@ L_G4_ENCODE
Definition: imageio.h:160
@ L_JP2K_ENCODE
Definition: imageio.h:162
@ L_JPEG_ENCODE
Definition: imageio.h:159
@ L_FIRST_IMAGE
Definition: imageio.h:208
@ L_NEXT_IMAGE
Definition: imageio.h:209
@ L_LAST_IMAGE
Definition: imageio.h:210
l_ok concatenatePdf(const char *dirname, const char *substr, const char *fileout)
concatenatePdf()
Definition: pdfio1.c:2055
l_ok concatenatePdfToData(const char *dirname, const char *substr, l_uint8 **pdata, size_t *pnbytes)
concatenatePdfToData()
Definition: pdfio1.c:2170
l_ok saConvertFilesToPdf(SARRAY *sa, l_int32 res, l_float32 scalefactor, l_int32 type, l_int32 quality, const char *title, const char *fileout)
saConvertFilesToPdf()
Definition: pdfio1.c:303
l_ok convertUnscaledFilesToPdf(const char *dirname, const char *substr, const char *title, const char *fileout)
convertUnscaledFilesToPdf()
Definition: pdfio1.c:540
l_ok pixWriteMemPdf(l_uint8 **pdata, size_t *pnbytes, PIX *pix, l_int32 res, const char *title)
pixWriteMemPdf()
Definition: pdfio1.c:1395
l_ok convertToPdf(const char *filein, l_int32 type, l_int32 quality, const char *fileout, l_int32 x, l_int32 y, l_int32 res, const char *title, L_PDF_DATA **plpd, l_int32 position)
convertToPdf()
Definition: pdfio1.c:999
l_ok saConvertFilesToPdfData(SARRAY *sa, l_int32 res, l_float32 scalefactor, l_int32 type, l_int32 quality, const char *title, l_uint8 **pdata, size_t *pnbytes)
saConvertFilesToPdfData()
Definition: pdfio1.c:358
l_ok convertToPdfData(const char *filein, l_int32 type, l_int32 quality, l_uint8 **pdata, size_t *pnbytes, l_int32 x, l_int32 y, l_int32 res, const char *title, L_PDF_DATA **plpd, l_int32 position)
convertToPdfData()
Definition: pdfio1.c:1140
l_ok ptraConcatenatePdf(L_PTRA *pa, const char *fileout)
ptraConcatenatePdf()
Definition: pdfio1.c:2126
l_ok convertSegmentedFilesToPdf(const char *dirname, const char *substr, l_int32 res, l_int32 type, l_int32 thresh, BOXAA *baa, l_int32 quality, l_float32 scalefactor, const char *title, const char *fileout)
convertSegmentedFilesToPdf()
Definition: pdfio1.c:1469
l_ok pixaConvertToPdfData(PIXA *pixa, l_int32 res, l_float32 scalefactor, l_int32 type, l_int32 quality, const char *title, l_uint8 **pdata, size_t *pnbytes)
pixaConvertToPdfData()
Definition: pdfio1.c:844
l_ok saConcatenatePdfToData(SARRAY *sa, l_uint8 **pdata, size_t *pnbytes)
saConcatenatePdfToData()
Definition: pdfio1.c:2211
l_ok pixConvertToPdf(PIX *pix, l_int32 type, l_int32 quality, const char *fileout, l_int32 x, l_int32 y, l_int32 res, const char *title, L_PDF_DATA **plpd, l_int32 position)
pixConvertToPdf()
Definition: pdfio1.c:1286
l_ok convertToPdfSegmented(const char *filein, l_int32 res, l_int32 type, l_int32 thresh, BOXA *boxa, l_int32 quality, l_float32 scalefactor, const char *title, const char *fileout)
convertToPdfSegmented()
Definition: pdfio1.c:1698
l_ok pixaConvertToPdf(PIXA *pixa, l_int32 res, l_float32 scalefactor, l_int32 type, l_int32 quality, const char *title, const char *fileout)
pixaConvertToPdf()
Definition: pdfio1.c:790
l_ok convertImageDataToPdf(l_uint8 *imdata, size_t size, l_int32 type, l_int32 quality, const char *fileout, l_int32 x, l_int32 y, l_int32 res, const char *title, L_PDF_DATA **plpd, l_int32 position)
convertImageDataToPdf()
Definition: pdfio1.c:1071
l_ok pixWriteStreamPdf(FILE *fp, PIX *pix, l_int32 res, const char *title)
pixWriteStreamPdf()
Definition: pdfio1.c:1346
l_ok convertImageDataToPdfData(l_uint8 *imdata, size_t size, l_int32 type, l_int32 quality, l_uint8 **pdata, size_t *pnbytes, l_int32 x, l_int32 y, l_int32 res, const char *title, L_PDF_DATA **plpd, l_int32 position)
convertImageDataToPdfData()
Definition: pdfio1.c:1208
l_ok saConvertUnscaledFilesToPdf(SARRAY *sa, const char *title, const char *fileout)
saConvertUnscaledFilesToPdf()
Definition: pdfio1.c:578
l_ok convertToPdfDataSegmented(const char *filein, l_int32 res, l_int32 type, l_int32 thresh, BOXA *boxa, l_int32 quality, l_float32 scalefactor, const char *title, l_uint8 **pdata, size_t *pnbytes)
convertToPdfDataSegmented()
Definition: pdfio1.c:1823
l_ok convertUnscaledToPdfData(const char *fname, const char *title, l_uint8 **pdata, size_t *pnbytes)
convertUnscaledToPdfData()
Definition: pdfio1.c:702
l_ok saConcatenatePdf(SARRAY *sa, const char *fileout)
saConcatenatePdf()
Definition: pdfio1.c:2090
l_ok pixConvertToPdfSegmented(PIX *pixs, l_int32 res, l_int32 type, l_int32 thresh, BOXA *boxa, l_int32 quality, l_float32 scalefactor, const char *title, const char *fileout)
pixConvertToPdfSegmented()
Definition: pdfio1.c:1759
BOXAA * convertNumberedMasksToBoxaa(const char *dirname, const char *substr, l_int32 numpre, l_int32 numpost)
convertNumberedMasksToBoxaa()
Definition: pdfio1.c:1588
l_ok selectDefaultPdfEncoding(PIX *pix, l_int32 *ptype)
selectDefaultPdfEncoding()
Definition: pdfio1.c:477
l_ok convertFilesToPdf(const char *dirname, const char *substr, l_int32 res, l_float32 scalefactor, l_int32 type, l_int32 quality, const char *title, const char *fileout)
convertFilesToPdf()
Definition: pdfio1.c:253
l_ok saConvertUnscaledFilesToPdfData(SARRAY *sa, const char *title, l_uint8 **pdata, size_t *pnbytes)
saConvertUnscaledFilesToPdfData()
Definition: pdfio1.c:623
l_ok pixConvertToPdfDataSegmented(PIX *pixs, l_int32 res, l_int32 type, l_int32 thresh, BOXA *boxa, l_int32 quality, l_float32 scalefactor, const char *title, l_uint8 **pdata, size_t *pnbytes)
pixConvertToPdfDataSegmented()
Definition: pdfio1.c:1891
l_ok pixConvertToPdfData(PIX *pix, l_int32 type, l_int32 quality, l_uint8 **pdata, size_t *pnbytes, l_int32 x, l_int32 y, l_int32 res, const char *title, L_PDF_DATA **plpd, l_int32 position)
pixConvertToPdfData()
Definition: pdfio2.c:190
l_ok ptraConcatenatePdfToData(L_PTRA *pa_data, SARRAY *sa, l_uint8 **pdata, size_t *pnbytes)
ptraConcatenatePdfToData()
Definition: pdfio2.c:321
l_ok cidConvertToPdfData(L_COMP_DATA *cid, const char *title, l_uint8 **pdata, size_t *pnbytes)
cidConvertToPdfData()
Definition: pdfio2.c:1607
l_ok l_generateCIDataForPdf(const char *fname, PIX *pix, l_int32 quality, L_COMP_DATA **pcid)
l_generateCIDataForPdf()
Definition: pdfio2.c:539
void pixDestroy(PIX **ppix)
pixDestroy()
Definition: pix1.c:621
l_ok pixGetDimensions(const PIX *pix, l_int32 *pw, l_int32 *ph, l_int32 *pd)
pixGetDimensions()
Definition: pix1.c:1113
PIX * pixCreateTemplate(const PIX *pixs)
pixCreateTemplate()
Definition: pix1.c:383
PIX * pixClone(PIX *pixs)
pixClone()
Definition: pix1.c:593
l_ok pixSetBlackOrWhite(PIX *pixs, l_int32 op)
pixSetBlackOrWhite()
Definition: pix2.c:1021
PIX * pixClipRectangle(PIX *pixs, BOX *box, BOX **pboxc)
pixClipRectangle()
Definition: pix5.c:1026
@ REMOVE_CMAP_TO_GRAYSCALE
Definition: pix.h:257
@ REMOVE_CMAP_BASED_ON_SRC
Definition: pix.h:260
@ L_CLONE
Definition: pix.h:713
@ L_NOCOPY
Definition: pix.h:710
@ L_SET_WHITE
Definition: pix.h:906
#define PIX_SRC
Definition: pix.h:330
l_int32 pixaGetCount(PIXA *pixa)
pixaGetCount()
Definition: pixabasic.c:650
PIX * pixaGetPix(PIXA *pixa, l_int32 index, l_int32 accesstype)
pixaGetPix()
Definition: pixabasic.c:691
PIX * pixRemoveColormap(PIX *pixs, l_int32 type)
pixRemoveColormap()
Definition: pixconv.c:328
PIX * pixConvertTo8(PIX *pixs, l_int32 cmapflag)
pixConvertTo8()
Definition: pixconv.c:3133
L_PTRA * ptraCreate(l_int32 n)
ptraCreate()
Definition: ptra.c:144
l_ok ptraGetActualCount(L_PTRA *pa, l_int32 *pcount)
ptraGetActualCount()
Definition: ptra.c:735
l_ok ptraAdd(L_PTRA *pa, void *item)
ptraAdd()
Definition: ptra.c:250
void ptraDestroy(L_PTRA **ppa, l_int32 freeflag, l_int32 warnflag)
ptraDestroy()
Definition: ptra.c:194
void * ptraRemove(L_PTRA *pa, l_int32 index, l_int32 flag)
ptraRemove()
Definition: ptra.c:442
@ L_NO_COMPACTION
Definition: ptra.h:79
l_ok findFileFormat(const char *filename, l_int32 *pformat)
findFileFormat()
Definition: readfile.c:584
PIX * pixReadMem(const l_uint8 *data, size_t size)
pixReadMem()
Definition: readfile.c:844
PIX * pixRead(const char *filename)
pixRead()
Definition: readfile.c:193
l_ok pixRasterop(PIX *pixd, l_int32 dx, l_int32 dy, l_int32 dw, l_int32 dh, l_int32 op, PIX *pixs, l_int32 sx, l_int32 sy)
pixRasterop()
Definition: rop.c:204
char * sarrayGetString(SARRAY *sa, l_int32 index, l_int32 copyflag)
sarrayGetString()
Definition: sarray1.c:703
l_int32 sarrayGetCount(SARRAY *sa)
sarrayGetCount()
Definition: sarray1.c:643
void sarrayDestroy(SARRAY **psa)
sarrayDestroy()
Definition: sarray1.c:362
SARRAY * getSortedPathnamesInDirectory(const char *dirname, const char *substr, l_int32 first, l_int32 nfiles)
getSortedPathnamesInDirectory()
Definition: sarray1.c:1848
SARRAY * getNumberedPathnamesInDirectory(const char *dirname, const char *substr, l_int32 numpre, l_int32 numpost, l_int32 maxnum)
getNumberedPathnamesInDirectory()
Definition: sarray1.c:1800
PIX * pixScale(PIX *pixs, l_float32 scalex, l_float32 scaley)
pixScale()
Definition: scale1.c:250
PIX * pixScaleGray2xLIThresh(PIX *pixs, l_int32 thresh)
pixScaleGray2xLIThresh()
Definition: scale1.c:927
PIX * pixScaleToGray(PIX *pixs, l_float32 scalefactor)
pixScaleToGray()
Definition: scale2.c:208
Definition: pix.h:481
Definition: pix.h:492
Definition: pix.h:502
Definition: array.h:137
Definition: ptra.h:54
Definition: pix.h:139
Definition: pix.h:456
Definition: array.h:127
void lept_stderr(const char *fmt,...)
lept_stderr()
Definition: utils1.c:306
l_ok splitPathAtDirectory(const char *pathname, char **pdir, char **ptail)
splitPathAtDirectory()
Definition: utils2.c:2824
l_ok l_binaryWrite(const char *filename, const char *operation, const void *data, size_t nbytes)
l_binaryWrite()
Definition: utils2.c:1569