Leptonica  1.82.0
Image processing and image analysis suite
psio1.c
Go to the documentation of this file.
1 /*====================================================================*
2  - Copyright (C) 2001 Leptonica. All rights reserved.
3  -
4  - Redistribution and use in source and binary forms, with or without
5  - modification, are permitted provided that the following conditions
6  - are met:
7  - 1. Redistributions of source code must retain the above copyright
8  - notice, this list of conditions and the following disclaimer.
9  - 2. Redistributions in binary form must reproduce the above
10  - copyright notice, this list of conditions and the following
11  - disclaimer in the documentation and/or other materials
12  - provided with the distribution.
13  -
14  - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
15  - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
16  - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
17  - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY
18  - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19  - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20  - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21  - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22  - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
23  - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24  - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  *====================================================================*/
26 
110 #ifdef HAVE_CONFIG_H
111 #include <config_auto.h>
112 #endif /* HAVE_CONFIG_H */
113 
114 #include <string.h>
115 #include "allheaders.h"
116 
117 /* --------------------------------------------*/
118 #if USE_PSIO /* defined in environ.h */
119  /* --------------------------------------------*/
120 
121 /*-------------------------------------------------------------*
122  * Convert files in a directory to PS *
123  *-------------------------------------------------------------*/
124 /*
125  * \brief convertFilesToPS()
126  *
127  * \param[in] dirin input directory
128  * \param[in] substr [optional] substring filter on filenames; can be NULL
129  * \param[in] res typ. 300 or 600 ppi
130  * \param[in] fileout output ps file
131  * \return 0 if OK, 1 on error
132  *
133  * <pre>
134  * Notes:
135  * (1) This generates a PS file for all image files in a specified
136  * directory that contain the substr pattern to be matched.
137  * (2) Each image is written to a separate page in the output PS file.
138  * (3) All images are written compressed:
139  * * if tiffg4 --> use ccittg4
140  * * if jpeg --> use dct
141  * * all others --> use flate
142  * If the image is jpeg or tiffg4, we use the existing compressed
143  * strings for the encoding; otherwise, we read the image into
144  * a pix and flate-encode the pieces.
145  * (4) The resolution is often confusing. It is interpreted
146  * as the resolution of the output display device: "If the
147  * input image were digitized at 300 ppi, what would it
148  * look like when displayed at res ppi." So, for example,
149  * if res = 100 ppi, then the display pixels are 3x larger
150  * than the 300 ppi pixels, and the image will be rendered
151  * 3x larger.
152  * (5) The size of the PostScript file is independent of the resolution,
153  * because the entire file is encoded. The res parameter just
154  * tells the PS decomposer how to render the page. Therefore,
155  * for minimum file size without loss of visual information,
156  * if the output res is less than 300, you should downscale
157  * the image to the output resolution before wrapping in PS.
158  * (6) The "canvas" on which the image is rendered, at the given
159  * output resolution, is a standard page size (8.5 x 11 in).
160  * </pre>
161  */
162 l_ok
163 convertFilesToPS(const char *dirin,
164  const char *substr,
165  l_int32 res,
166  const char *fileout)
167 {
168 SARRAY *sa;
169 
170  PROCNAME("convertFilesToPS");
171 
172  if (!dirin)
173  return ERROR_INT("dirin not defined", procName, 1);
174  if (!fileout)
175  return ERROR_INT("fileout not defined", procName, 1);
176  if (res <= 0) {
177  L_INFO("setting res to 300 ppi\n", procName);
178  res = 300;
179  }
180  if (res < 10 || res > 4000)
181  L_WARNING("res is typically in the range 300-600 ppi\n", procName);
182 
183  /* Get all filtered and sorted full pathnames. */
184  sa = getSortedPathnamesInDirectory(dirin, substr, 0, 0);
185 
186  /* Generate the PS file. Don't use bounding boxes. */
187  l_psWriteBoundingBox(FALSE);
188  sarrayConvertFilesToPS(sa, res, fileout);
189  l_psWriteBoundingBox(TRUE);
190  sarrayDestroy(&sa);
191  return 0;
192 }
193 
194 
195 /*
196 
197  * \brief sarrayConvertFilesToPS()
198  *
199  * \param[in] sarray of full path names
200  * \param[in] res typ. 300 or 600 ppi
201  * \param[in] fileout output ps file
202  * \return 0 if OK, 1 on error
203  *
204  * <pre>
205  * Notes:
206  * (1) See convertFilesToPS()
207  * </pre>
208  */
209 l_ok
210 sarrayConvertFilesToPS(SARRAY *sa,
211  l_int32 res,
212  const char *fileout)
213 {
214 char *fname;
215 l_int32 i, nfiles, index, ret, format;
216 
217  PROCNAME("sarrayConvertFilesToPS");
218 
219  if (!sa)
220  return ERROR_INT("sa not defined", procName, 1);
221  if (!fileout)
222  return ERROR_INT("fileout not defined", procName, 1);
223  if (res <= 0) {
224  L_INFO("setting res to 300 ppi\n", procName);
225  res = 300;
226  }
227  if (res < 10 || res > 4000)
228  L_WARNING("res is typically in the range 300-600 ppi\n", procName);
229 
230  nfiles = sarrayGetCount(sa);
231  for (i = 0, index = 0; i < nfiles; i++) {
232  fname = sarrayGetString(sa, i, L_NOCOPY);
233  ret = pixReadHeader(fname, &format, NULL, NULL, NULL, NULL, NULL);
234  if (ret) continue;
235  if (format == IFF_UNKNOWN)
236  continue;
237 
238  writeImageCompressedToPSFile(fname, fileout, res, &index);
239  }
240 
241  return 0;
242 }
243 
244 
245 /*
246  * \brief convertFilesFittedToPS()
247  *
248  * \param[in] dirin input directory
249  * \param[in] substr [optional] substring filter on filenames; can be NULL)
250  * \param[in] xpts desired size in printer points; use 0 for default
251  * \param[in] ypts desired size in printer points; use 0 for default
252  * \param[in] fileout output ps file
253  * \return 0 if OK, 1 on error
254  *
255  * <pre>
256  * Notes:
257  * (1) This generates a PS file for all files in a specified directory
258  * that contain the substr pattern to be matched.
259  * (2) Each image is written to a separate page in the output PS file.
260  * (3) All images are written compressed:
261  * * if tiffg4 --> use ccittg4
262  * * if jpeg --> use dct
263  * * all others --> use flate
264  * If the image is jpeg or tiffg4, we use the existing compressed
265  * strings for the encoding; otherwise, we read the image into
266  * a pix and flate-encode the pieces.
267  * (4) The resolution is internally determined such that the images
268  * are rendered, in at least one direction, at 100% of the given
269  * size in printer points. Use 0.0 for xpts or ypts to get
270  * the default value, which is 612.0 or 792.0, rsp.
271  * (5) The size of the PostScript file is independent of the resolution,
272  * because the entire file is encoded. The %xpts and %ypts
273  * parameter tells the PS decomposer how to render the page.
274  * </pre>
275  */
276 l_ok
277 convertFilesFittedToPS(const char *dirin,
278  const char *substr,
279  l_float32 xpts,
280  l_float32 ypts,
281  const char *fileout)
282 {
283 SARRAY *sa;
284 
285  PROCNAME("convertFilesFittedToPS");
286 
287  if (!dirin)
288  return ERROR_INT("dirin not defined", procName, 1);
289  if (!fileout)
290  return ERROR_INT("fileout not defined", procName, 1);
291  if (xpts <= 0.0) {
292  L_INFO("setting xpts to 612.0 ppi\n", procName);
293  xpts = 612.0;
294  }
295  if (ypts <= 0.0) {
296  L_INFO("setting ypts to 792.0 ppi\n", procName);
297  ypts = 792.0;
298  }
299  if (xpts < 100.0 || xpts > 2000.0 || ypts < 100.0 || ypts > 2000.0)
300  L_WARNING("xpts,ypts are typically in the range 500-800\n", procName);
301 
302  /* Get all filtered and sorted full pathnames. */
303  sa = getSortedPathnamesInDirectory(dirin, substr, 0, 0);
304 
305  /* Generate the PS file. Don't use bounding boxes. */
306  l_psWriteBoundingBox(FALSE);
307  sarrayConvertFilesFittedToPS(sa, xpts, ypts, fileout);
308  l_psWriteBoundingBox(TRUE);
309  sarrayDestroy(&sa);
310  return 0;
311 }
312 
313 
314 /*
315  * \brief sarrayConvertFilesFittedToPS()
316  *
317  * \param[in] sarray of full path names
318  * \param[in] xpts desired size in printer points; use 0 for default
319  * \param[in] ypts desired size in printer points; use 0 for default
320  * \param[in] fileout output ps file
321  * \return 0 if OK, 1 on error
322  *
323  * <pre>
324  * Notes:
325  * (1) See convertFilesFittedToPS()
326  * </pre>
327  */
328 l_ok
329 sarrayConvertFilesFittedToPS(SARRAY *sa,
330  l_float32 xpts,
331  l_float32 ypts,
332  const char *fileout)
333 {
334 char *fname;
335 l_int32 ret, i, w, h, nfiles, index, format, res;
336 
337  PROCNAME("sarrayConvertFilesFittedToPS");
338 
339  if (!sa)
340  return ERROR_INT("sa not defined", procName, 1);
341  if (!fileout)
342  return ERROR_INT("fileout not defined", procName, 1);
343  if (xpts <= 0.0) {
344  L_INFO("setting xpts to 612.0\n", procName);
345  xpts = 612.0;
346  }
347  if (ypts <= 0.0) {
348  L_INFO("setting ypts to 792.0\n", procName);
349  ypts = 792.0;
350  }
351  if (xpts < 100.0 || xpts > 2000.0 || ypts < 100.0 || ypts > 2000.0)
352  L_WARNING("xpts,ypts are typically in the range 500-800\n", procName);
353 
354  nfiles = sarrayGetCount(sa);
355  for (i = 0, index = 0; i < nfiles; i++) {
356  fname = sarrayGetString(sa, i, L_NOCOPY);
357  ret = pixReadHeader(fname, &format, &w, &h, NULL, NULL, NULL);
358  if (ret) continue;
359  if (format == IFF_UNKNOWN)
360  continue;
361 
362  /* Be sure the entire image is wrapped */
363  if (xpts * h < ypts * w)
364  res = (l_int32)((l_float32)w * 72.0 / xpts);
365  else
366  res = (l_int32)((l_float32)h * 72.0 / ypts);
367 
368  writeImageCompressedToPSFile(fname, fileout, res, &index);
369  }
370 
371  return 0;
372 }
373 
374 
375 /*
376  * \brief writeImageCompressedToPSFile()
377  *
378  * \param[in] filein input image file
379  * \param[in] fileout output ps file
380  * \param[in] res output printer resolution
381  * \param[in,out] pindex index of image in output ps file
382  * \return 0 if OK, 1 on error
383  *
384  * <pre>
385  * Notes:
386  * (1) This wraps a single page image in PS.
387  * (2) The input file can be in any format. It is compressed as follows:
388  * * if in tiffg4 --> use ccittg4
389  * * if in jpeg --> use dct
390  * * all others --> use flate
391  * (3) Before the first call, set %index = 0. %index is incremented
392  * if the page is successfully written. It is used to decide
393  * whether to write (index == 0) or append (index > 0) to the file.
394  * </pre>
395  */
396 l_ok
397 writeImageCompressedToPSFile(const char *filein,
398  const char *fileout,
399  l_int32 res,
400  l_int32 *pindex)
401 {
402 const char *op;
403 l_int32 format, retval;
404 
405  PROCNAME("writeImageCompressedToPSFile");
406 
407  if (!pindex)
408  return ERROR_INT("&index not defined", procName, 1);
409 
410  findFileFormat(filein, &format);
411  if (format == IFF_UNKNOWN) {
412  L_ERROR("format of %s not known\n", procName, filein);
413  return 1;
414  }
415 
416  op = (*pindex == 0) ? "w" : "a";
417  if (format == IFF_JFIF_JPEG) {
418  retval = convertJpegToPS(filein, fileout, op, 0, 0,
419  res, 1.0, *pindex + 1, TRUE);
420  } else if (format == IFF_TIFF_G4) {
421  retval = convertG4ToPS(filein, fileout, op, 0, 0,
422  res, 1.0, *pindex + 1, FALSE, TRUE);
423  } else { /* all other image formats */
424  retval = convertFlateToPS(filein, fileout, op, 0, 0,
425  res, 1.0, *pindex + 1, TRUE);
426  }
427  if (retval == 0) (*pindex)++;
428 
429  return retval;
430 }
431 
432 
433 /*-------------------------------------------------------------*
434  * Convert mixed text/image files to PS *
435  *-------------------------------------------------------------*/
436 /*
437  * \brief convertSegmentedPagesToPS()
438  *
439  * \param[in] pagedir input page image directory
440  * \param[in] pagestr [optional] substring filter on page filenames;
441  * can be NULL
442  * \param[in] page_numpre number of characters in page name before number
443  * \param[in] maskdir input mask image directory
444  * \param[in] maskstr [optional] substring filter on mask filenames;
445  * can be NULL
446  * \param[in] mask_numpre number of characters in mask name before number
447  * \param[in] numpost number of characters in names after number
448  * \param[in] maxnum only consider page numbers up to this value
449  * \param[in] textscale scale of text output relative to pixs
450  * \param[in] imagescale scale of image output relative to pixs
451  * \param[in] threshold for binarization; typ. about 190; 0 for default
452  * \param[in] fileout output ps file
453  * \return 0 if OK, 1 on error
454  *
455  * <pre>
456  * Notes:
457  * (1) This generates a PS file for all page image and mask files in two
458  * specified directories and that contain the page numbers as
459  * specified below. The two directories can be the same, in which
460  * case the page and mask files are differentiated by the two
461  * substrings for string matches.
462  * (2) The page images are taken in lexicographic order.
463  * Mask images whose numbers match the page images are used to
464  * segment the page images. Page images without a matching
465  * mask image are scaled, thresholded and rendered entirely as text.
466  * (3) Each PS page is generated as a compressed representation of
467  * the page image, where the part of the image under the mask
468  * is suitably scaled and compressed as DCT (i.e., jpeg), and
469  * the remaining part of the page is suitably scaled, thresholded,
470  * compressed as G4 (i.e., tiff g4), and rendered by painting
471  * black through the resulting text mask.
472  * (4) The scaling is typically 2x down for the DCT component
473  * (%imagescale = 0.5) and 2x up for the G4 component
474  * (%textscale = 2.0).
475  * (5) The resolution is automatically set to fit to a
476  * letter-size (8.5 x 11 inch) page.
477  * (6) Both the DCT and the G4 encoding are PostScript level 2.
478  * (7) It is assumed that the page number is contained within
479  * the basename (the filename without directory or extension).
480  * %page_numpre is the number of characters in the page basename
481  * preceding the actual page number; %mask_numpre is likewise for
482  * the mask basename; %numpost is the number of characters
483  * following the page number. For example, for mask name
484  * mask_006.tif, mask_numpre = 5 ("mask_).
485  * (8) To render a page as is -- that is, with no thresholding
486  * of any pixels -- use a mask in the mask directory that is
487  * full size with all pixels set to 1. If the page is 1 bpp,
488  * it is not necessary to have a mask.
489  * </pre>
490  */
491 l_ok
492 convertSegmentedPagesToPS(const char *pagedir,
493  const char *pagestr,
494  l_int32 page_numpre,
495  const char *maskdir,
496  const char *maskstr,
497  l_int32 mask_numpre,
498  l_int32 numpost,
499  l_int32 maxnum,
500  l_float32 textscale,
501  l_float32 imagescale,
502  l_int32 threshold,
503  const char *fileout)
504 {
505 l_int32 pageno, i, npages;
506 PIX *pixs, *pixm;
507 SARRAY *sapage, *samask;
508 
509  PROCNAME("convertSegmentedPagesToPS");
510 
511  if (!pagedir)
512  return ERROR_INT("pagedir not defined", procName, 1);
513  if (!maskdir)
514  return ERROR_INT("maskdir not defined", procName, 1);
515  if (!fileout)
516  return ERROR_INT("fileout not defined", procName, 1);
517  if (threshold <= 0) {
518  L_INFO("setting threshold to 190\n", procName);
519  threshold = 190;
520  }
521 
522  /* Get numbered full pathnames; max size of sarray is maxnum */
523  sapage = getNumberedPathnamesInDirectory(pagedir, pagestr,
524  page_numpre, numpost, maxnum);
525  samask = getNumberedPathnamesInDirectory(maskdir, maskstr,
526  mask_numpre, numpost, maxnum);
527  sarrayPadToSameSize(sapage, samask, "");
528  if ((npages = sarrayGetCount(sapage)) == 0) {
529  sarrayDestroy(&sapage);
530  sarrayDestroy(&samask);
531  return ERROR_INT("no matching pages found", procName, 1);
532  }
533 
534  /* Generate the PS file */
535  pageno = 1;
536  for (i = 0; i < npages; i++) {
537  if ((pixs = pixReadIndexed(sapage, i)) == NULL)
538  continue;
539  pixm = pixReadIndexed(samask, i);
540  pixWriteSegmentedPageToPS(pixs, pixm, textscale, imagescale,
541  threshold, pageno, fileout);
542  pixDestroy(&pixs);
543  pixDestroy(&pixm);
544  pageno++;
545  }
546 
547  sarrayDestroy(&sapage);
548  sarrayDestroy(&samask);
549  return 0;
550 }
551 
552 
553 /*
554  * \brief pixWriteSegmentedPageToPS()
555  *
556  * \param[in] pixs all depths; colormap ok
557  * \param[in] pixm [optional] 1 bpp segmentation mask over image region
558  * \param[in] textscale scale of text output relative to pixs
559  * \param[in] imagescale scale of image output relative to pixs
560  * \param[in] threshold for binarization; typ. about 190; 0 for default
561  * \param[in] pageno page number in set; use 1 for new output file
562  * \param[in] fileout output ps file
563  * \return 0 if OK, 1 on error
564  *
565  * <pre>
566  * Notes:
567  * (1) This generates the PS string for a mixed text/image page,
568  * and adds it to an existing file if %pageno > 1.
569  * The PS output is determined by fitting the result to
570  * a letter-size (8.5 x 11 inch) page.
571  * (2) The two images (pixs and pixm) are at the same resolution
572  * (typically 300 ppi). They are used to generate two compressed
573  * images, pixb and pixc, that are put directly into the output
574  * PS file.
575  * (3) pixb is the text component. In the PostScript world, we think of
576  * it as a mask through which we paint black. It is produced by
577  * scaling pixs by %textscale, and thresholding to 1 bpp.
578  * (4) pixc is the image component, which is that part of pixs under
579  * the mask pixm. It is scaled from pixs by %imagescale.
580  * (5) Typical values are textscale = 2.0 and imagescale = 0.5.
581  * (6) If pixm == NULL, the page has only text. If it is all black,
582  * the page is all image and has no text.
583  * (7) This can be used to write a multi-page PS file, by using
584  * sequential page numbers with the same output file. It can
585  * also be used to write separate PS files for each page,
586  * by using different output files with %pageno = 0 or 1.
587  * </pre>
588  */
589 l_ok
590 pixWriteSegmentedPageToPS(PIX *pixs,
591  PIX *pixm,
592  l_float32 textscale,
593  l_float32 imagescale,
594  l_int32 threshold,
595  l_int32 pageno,
596  const char *fileout)
597 {
598 l_int32 alltext, notext, d, ret;
599 l_uint32 val;
600 l_float32 scaleratio;
601 PIX *pixmi, *pixmis, *pixt, *pixg, *pixsc, *pixb, *pixc;
602 
603  PROCNAME("pixWriteSegmentedPageToPS");
604 
605  if (!pixs)
606  return ERROR_INT("pixs not defined", procName, 1);
607  if (!fileout)
608  return ERROR_INT("fileout not defined", procName, 1);
609  if (imagescale <= 0.0 || textscale <= 0.0)
610  return ERROR_INT("relative scales must be > 0.0", procName, 1);
611 
612  /* Analyze the page. Determine the ratio by which the
613  * binary text mask is scaled relative to the image part.
614  * If there is no image region (alltext == TRUE), the
615  * text mask will be rendered directly to fit the page,
616  * and scaleratio = 1.0. */
617  alltext = TRUE;
618  notext = FALSE;
619  scaleratio = 1.0;
620  if (pixm) {
621  pixZero(pixm, &alltext); /* pixm empty: all text */
622  if (alltext) {
623  pixm = NULL; /* treat it as not existing here */
624  } else {
625  pixmi = pixInvert(NULL, pixm);
626  pixZero(pixmi, &notext); /* pixm full; no text */
627  pixDestroy(&pixmi);
628  scaleratio = textscale / imagescale;
629  }
630  }
631 
632  if (pixGetDepth(pixs) == 1) { /* render tiff g4 */
633  pixb = pixClone(pixs);
634  pixc = NULL;
635  } else {
636  pixt = pixConvertTo8Or32(pixs, L_CLONE, 0); /* clone if possible */
637 
638  /* Get the binary text mask. Note that pixg cannot be a
639  * clone of pixs, because it may be altered by pixSetMasked(). */
640  pixb = NULL;
641  if (notext == FALSE) {
642  d = pixGetDepth(pixt);
643  if (d == 8)
644  pixg = pixCopy(NULL, pixt);
645  else /* d == 32 */
646  pixg = pixConvertRGBToLuminance(pixt);
647  if (pixm) /* clear out the image parts */
648  pixSetMasked(pixg, pixm, 255);
649  if (textscale == 1.0)
650  pixsc = pixClone(pixg);
651  else if (textscale >= 0.7)
652  pixsc = pixScaleGrayLI(pixg, textscale, textscale);
653  else
654  pixsc = pixScaleAreaMap(pixg, textscale, textscale);
655  pixb = pixThresholdToBinary(pixsc, threshold);
656  pixDestroy(&pixg);
657  pixDestroy(&pixsc);
658  }
659 
660  /* Get the scaled image region */
661  pixc = NULL;
662  if (pixm) {
663  if (imagescale == 1.0)
664  pixsc = pixClone(pixt); /* can possibly be a clone of pixs */
665  else
666  pixsc = pixScale(pixt, imagescale, imagescale);
667 
668  /* If pixm is not full, clear the pixels in pixsc
669  * corresponding to bg in pixm, where there can be text
670  * that is written through the mask pixb. Note that
671  * we could skip this and use pixsc directly in
672  * pixWriteMixedToPS(); however, clearing these
673  * non-image regions to a white background will reduce
674  * the size of pixc (relative to pixsc), and hence
675  * reduce the size of the PS file that is generated.
676  * Use a copy so that we don't accidentally alter pixs. */
677  if (notext == FALSE) {
678  pixmis = pixScale(pixm, imagescale, imagescale);
679  pixmi = pixInvert(NULL, pixmis);
680  val = (d == 8) ? 0xff : 0xffffff00;
681  pixc = pixCopy(NULL, pixsc);
682  pixSetMasked(pixc, pixmi, val); /* clear non-image part */
683  pixDestroy(&pixmis);
684  pixDestroy(&pixmi);
685  } else {
686  pixc = pixClone(pixsc);
687  }
688  pixDestroy(&pixsc);
689  }
690  pixDestroy(&pixt);
691  }
692 
693  /* Generate the PS file. Don't use bounding boxes. */
694  l_psWriteBoundingBox(FALSE);
695  ret = pixWriteMixedToPS(pixb, pixc, scaleratio, pageno, fileout);
696  l_psWriteBoundingBox(TRUE);
697  pixDestroy(&pixb);
698  pixDestroy(&pixc);
699  return ret;
700 }
701 
702 
703 /*
704  * \brief pixWriteMixedToPS()
705  *
706  * \param[in] pixb [optional] 1 bpp mask; typically for text
707  * \param[in] pixc [optional] 8 or 32 bpp image regions
708  * \param[in] scale scale factor for rendering pixb, relative to pixc;
709  * typ. 4.0
710  * \param[in] pageno page number in set; use 1 for new output file
711  * \param[in] fileout output ps file
712  * \return 0 if OK, 1 on error
713  *
714  * <pre>
715  * Notes:
716  * (1) This low level function generates the PS string for a mixed
717  * text/image page, and adds it to an existing file if
718  * %pageno > 1.
719  * (2) The two images (pixb and pixc) are typically generated at the
720  * resolution that they will be rendered in the PS file.
721  * (3) pixb is the text component. In the PostScript world, we think of
722  * it as a mask through which we paint black.
723  * (4) pixc is the (typically halftone) image component. It is
724  * white in the rest of the page. To minimize the size of the
725  * PS file, it should be rendered at a resolution that is at
726  * least equal to its actual resolution.
727  * (5) %scale gives the ratio of resolution of pixb to pixc.
728  * Typical resolutions are: 600 ppi for pixb, 150 ppi for pixc;
729  * so %scale = 4.0. If one of the images is not defined,
730  * the value of %scale is ignored.
731  * (6) We write pixc with DCT compression (jpeg). This is followed
732  * by painting the text as black through the mask pixb. If
733  * pixc doesn't exist (alltext), we write the text with the
734  * PS "image" operator instead of the "imagemask" operator,
735  * because ghostscript's ps2pdf is flaky when the latter is used.
736  * (7) The actual output resolution is determined by fitting the
737  * result to a letter-size (8.5 x 11 inch) page.
738  * <pre>
739  */
740 l_ok
741 pixWriteMixedToPS(PIX *pixb,
742  PIX *pixc,
743  l_float32 scale,
744  l_int32 pageno,
745  const char *fileout)
746 {
747 char *tname;
748 const char *op;
749 l_int32 resb, resc, endpage, maskop, ret;
750 
751  PROCNAME("pixWriteMixedToPS");
752 
753  if (!pixb && !pixc)
754  return ERROR_INT("pixb and pixc both undefined", procName, 1);
755  if (!fileout)
756  return ERROR_INT("fileout not defined", procName, 1);
757 
758  /* Compute the resolution that fills a letter-size page. */
759  if (!pixc) {
760  resb = getResLetterPage(pixGetWidth(pixb), pixGetHeight(pixb), 0);
761  } else {
762  resc = getResLetterPage(pixGetWidth(pixc), pixGetHeight(pixc), 0);
763  if (pixb)
764  resb = (l_int32)(scale * resc);
765  }
766 
767  /* Write the jpeg image first */
768  if (pixc) {
769  tname = l_makeTempFilename();
770  pixWrite(tname, pixc, IFF_JFIF_JPEG);
771  endpage = (pixb) ? FALSE : TRUE;
772  op = (pageno <= 1) ? "w" : "a";
773  ret = convertJpegToPS(tname, fileout, op, 0, 0, resc, 1.0,
774  pageno, endpage);
775  lept_rmfile(tname);
776  LEPT_FREE(tname);
777  if (ret)
778  return ERROR_INT("jpeg data not written", procName, 1);
779  }
780 
781  /* Write the binary data, either directly or, if there is
782  * a jpeg image on the page, through the mask. */
783  if (pixb) {
784  tname = l_makeTempFilename();
785  pixWrite(tname, pixb, IFF_TIFF_G4);
786  op = (pageno <= 1 && !pixc) ? "w" : "a";
787  maskop = (pixc) ? 1 : 0;
788  ret = convertG4ToPS(tname, fileout, op, 0, 0, resb, 1.0,
789  pageno, maskop, 1);
790  lept_rmfile(tname);
791  LEPT_FREE(tname);
792  if (ret)
793  return ERROR_INT("tiff data not written", procName, 1);
794  }
795 
796  return 0;
797 }
798 
799 
800 /*-------------------------------------------------------------*
801  * Convert any image file to PS for embedding *
802  *-------------------------------------------------------------*/
803 /*
804  * \brief convertToPSEmbed()
805  *
806  * \param[in] filein input image file, any format
807  * \param[in] fileout output ps file
808  * \param[in] level PostScript compression: 1 (uncompressed), 2 or 3
809  * \return 0 if OK, 1 on error
810  *
811  * <pre>
812  * Notes:
813  * (1) This is a wrapper function that generates a PS file with
814  * a bounding box, from any input image file.
815  * (2) Do the best job of compression given the specified level.
816  * %level=3 does flate compression on anything that is not
817  * tiffg4 (1 bpp) or jpeg (8 bpp or rgb).
818  * (3) If %level=2 and the file is not tiffg4 or jpeg, it will
819  * first be written to file as jpeg with quality = 75.
820  * This will remove the colormap and cause some degradation
821  * in the image.
822  * (4) The bounding box is required when a program such as TeX
823  * (through epsf) places and rescales the image. It is
824  * sized for fitting the image to an 8.5 x 11.0 inch page.
825  * </pre>
826  */
827 l_ok
828 convertToPSEmbed(const char *filein,
829  const char *fileout,
830  l_int32 level)
831 {
832 char *tname;
833 l_int32 d, format;
834 PIX *pix, *pixs;
835 
836  PROCNAME("convertToPSEmbed");
837 
838  if (!filein)
839  return ERROR_INT("filein not defined", procName, 1);
840  if (!fileout)
841  return ERROR_INT("fileout not defined", procName, 1);
842  if (level != 1 && level != 2 && level != 3) {
843  L_ERROR("invalid level specified; using level 2\n", procName);
844  level = 2;
845  }
846 
847  if (level == 1) { /* no compression */
848  pixWritePSEmbed(filein, fileout);
849  return 0;
850  }
851 
852  /* Find the format and write out directly if in jpeg or tiff g4 */
853  findFileFormat(filein, &format);
854  if (format == IFF_JFIF_JPEG) {
855  convertJpegToPSEmbed(filein, fileout);
856  return 0;
857  } else if (format == IFF_TIFF_G4) {
858  convertG4ToPSEmbed(filein, fileout);
859  return 0;
860  } else if (format == IFF_UNKNOWN) {
861  L_ERROR("format of %s not known\n", procName, filein);
862  return 1;
863  }
864 
865  /* If level 3, flate encode. */
866  if (level == 3) {
867  convertFlateToPSEmbed(filein, fileout);
868  return 0;
869  }
870 
871  /* OK, it's level 2, so we must convert to jpeg or tiff g4 */
872  if ((pixs = pixRead(filein)) == NULL)
873  return ERROR_INT("image not read from file", procName, 1);
874  d = pixGetDepth(pixs);
875  if ((d == 2 || d == 4) && !pixGetColormap(pixs))
876  pix = pixConvertTo8(pixs, 0);
877  else if (d == 16)
878  pix = pixConvert16To8(pixs, L_MS_BYTE);
879  else
881  pixDestroy(&pixs);
882  if (!pix)
883  return ERROR_INT("converted pix not made", procName, 1);
884 
885  d = pixGetDepth(pix);
886  tname = l_makeTempFilename();
887  if (d == 1) {
888  if (pixWrite(tname, pix, IFF_TIFF_G4)) {
889  LEPT_FREE(tname);
890  pixDestroy(&pix);
891  return ERROR_INT("g4 tiff not written", procName, 1);
892  }
893  convertG4ToPSEmbed(tname, fileout);
894  } else {
895  if (pixWrite(tname, pix, IFF_JFIF_JPEG)) {
896  LEPT_FREE(tname);
897  pixDestroy(&pix);
898  return ERROR_INT("jpeg not written", procName, 1);
899  }
900  convertJpegToPSEmbed(tname, fileout);
901  }
902 
903  lept_rmfile(tname);
904  LEPT_FREE(tname);
905  pixDestroy(&pix);
906  return 0;
907 }
908 
909 
910 /*-------------------------------------------------------------*
911  * Write all images in a pixa out to PS *
912  *-------------------------------------------------------------*/
913 /*
914  * \brief pixaWriteCompressedToPS()
915  *
916  * \param[in] pixa any set of images
917  * \param[in] fileout output ps file
918  * \param[in] res resolution for the set of input images
919  * \param[in] level PostScript compression capability: 2 or 3
920  * \return 0 if OK, 1 on error
921  *
922  * <pre>
923  * Notes:
924  * (1) This generates a PostScript file of multiple page images,
925  * all with bounding boxes.
926  * (2) See pixWriteCompressedToPS() for details.
927  * (3) To generate a pdf from %fileout, use:
928  * ps2pdf <infile.ps> <outfile.pdf>
929  * </pre>
930  */
931 l_ok
932 pixaWriteCompressedToPS(PIXA *pixa,
933  const char *fileout,
934  l_int32 res,
935  l_int32 level)
936 {
937 l_int32 i, n, index, ret;
938 PIX *pix;
939 
940  PROCNAME("pixaWriteCompressedToPS");
941 
942  if (!pixa)
943  return ERROR_INT("pixa not defined", procName, 1);
944  if (!fileout)
945  return ERROR_INT("fileout not defined", procName, 1);
946  if (level != 2 && level != 3) {
947  L_ERROR("only levels 2 and 3 permitted; using level 2\n", procName);
948  level = 2;
949  }
950 
951  index = 0;
952  n = pixaGetCount(pixa);
953  for (i = 0; i < n; i++) {
954  pix = pixaGetPix(pixa, i, L_CLONE);
955  ret = pixWriteCompressedToPS(pix, fileout, res, level, &index);
956  if (ret) L_ERROR("PS string not written for image %d\n", procName, i);
957  pixDestroy(&pix);
958  }
959  return 0;
960 }
961 
962 
963 /*
964  * \brief pixWriteCompressedToPS()
965  *
966  * \param[in] pix any depth; colormap OK
967  * \param[in] fileout output ps file
968  * \param[in] res of input image
969  * \param[in] level PostScript compression capability: 2 or 3
970  * \param[in,out] pindex index of image in output ps file
971  * \return 0 if OK, 1 on error
972  *
973  * <pre>
974  * Notes:
975  * (1) This generates a PostScript string for %pix, and writes it
976  * to a file, with a bounding box.
977  * (2) *pindex keeps track of the number of images that have been
978  * written to %fileout. If this is the first image to be
979  * converted, set *pindex == 0 before passing it in. If the
980  * PostScript string is successfully generated, this will increment
981  * *pindex. If *pindex > 0, the PostScript string will be
982  * appended to %fileout.
983  * (3) PostScript level 2 enables lossless tiffg4 and lossy jpeg
984  * compression. Level 3 adds lossless flate (essentially gzip)
985  * compression.
986  * * For images with a colormap, lossless flate is often better in
987  * both quality and size than jpeg.
988  * * The decision for images without a colormap affects compression
989  * efficiency: %level2 (jpeg) is usually better than %level3 (flate)
990  * * Because jpeg does not handle 16 bpp, if %level == 2, the image
991  * is converted to 8 bpp (using MSB) and compressed with jpeg,
992  * cmap + level2: jpeg
993  * cmap + level3: flate
994  * 1 bpp: tiffg4
995  * 2 or 4 bpp + level2: jpeg
996  * 2 or 4 bpp + level3: flate
997  * 8 bpp + level2: jpeg
998  * 8 bpp + level3: flate
999  * 16 bpp + level2: jpeg [converted to 8 bpp, with warning]
1000  * 16 bpp + level3: flate
1001  * 32 bpp + level2: jpeg
1002  * 32 bpp + level3: flate
1003  * </pre>
1004  */
1005 l_ok
1006 pixWriteCompressedToPS(PIX *pix,
1007  const char *fileout,
1008  l_int32 res,
1009  l_int32 level,
1010  l_int32 *pindex)
1011 {
1012 char *tname;
1013 l_int32 writeout, d;
1014 PIX *pixt;
1015 PIXCMAP *cmap;
1016 
1017  PROCNAME("pixWriteCompressedToPS");
1018 
1019  if (!pix)
1020  return ERROR_INT("pix not defined", procName, 1);
1021  if (!fileout)
1022  return ERROR_INT("fileout not defined", procName, 1);
1023  if (level != 2 && level != 3) {
1024  L_ERROR("only levels 2 and 3 permitted; using level 2\n", procName);
1025  level = 2;
1026  }
1027  if (!pindex)
1028  return ERROR_INT("&index not defined", procName, 1);
1029 
1030  tname = l_makeTempFilename();
1031  writeout = TRUE;
1032  d = pixGetDepth(pix);
1033  cmap = pixGetColormap(pix);
1034  if (d == 1) {
1035  if (pixWrite(tname, pix, IFF_TIFF_G4))
1036  writeout = FALSE;
1037  } else if (level == 3) {
1038  if (pixWrite(tname, pix, IFF_PNG))
1039  writeout = FALSE;
1040  } else { /* level == 2 */
1041  if (cmap) {
1042  pixt = pixConvertForPSWrap(pix);
1043  if (pixWrite(tname, pixt, IFF_JFIF_JPEG))
1044  writeout = FALSE;
1045  pixDestroy(&pixt);
1046  } else if (d == 16) {
1047  L_WARNING("d = 16; converting to 8 bpp for jpeg\n", procName);
1048  pixt = pixConvert16To8(pix, L_MS_BYTE);
1049  if (pixWrite(tname, pixt, IFF_JFIF_JPEG))
1050  writeout = FALSE;
1051  pixDestroy(&pixt);
1052  } else if (d == 2 || d == 4) {
1053  pixt = pixConvertTo8(pix, 0);
1054  if (pixWrite(tname, pixt, IFF_JFIF_JPEG))
1055  writeout = FALSE;
1056  pixDestroy(&pixt);
1057  } else if (d == 8 || d == 32) {
1058  if (pixWrite(tname, pix, IFF_JFIF_JPEG))
1059  writeout = FALSE;
1060  } else { /* shouldn't happen */
1061  L_ERROR("invalid depth with level 2: %d\n", procName, d);
1062  writeout = FALSE;
1063  }
1064  }
1065 
1066  if (writeout)
1067  writeImageCompressedToPSFile(tname, fileout, res, pindex);
1068 
1069  if (lept_rmfile(tname) != 0)
1070  L_ERROR("temp file %s was not deleted\n", procName, tname);
1071  LEPT_FREE(tname);
1072  return (writeout) ? 0 : 1;
1073 }
1074 
1075 /* --------------------------------------------*/
1076 #endif /* USE_PSIO */
1077 /* --------------------------------------------*/
PIX * pixThresholdToBinary(PIX *pixs, l_int32 thresh)
pixThresholdToBinary()
Definition: grayquant.c:447
void pixDestroy(PIX **ppix)
pixDestroy()
Definition: pix1.c:621
PIX * pixCopy(PIX *pixd, const PIX *pixs)
pixCopy()
Definition: pix1.c:705
PIX * pixClone(PIX *pixs)
pixClone()
Definition: pix1.c:593
l_ok pixZero(PIX *pix, l_int32 *pempty)
pixZero()
Definition: pix3.c:1815
PIX * pixInvert(PIX *pixd, PIX *pixs)
pixInvert()
Definition: pix3.c:1509
l_ok pixSetMasked(PIX *pixd, PIX *pixm, l_uint32 val)
pixSetMasked()
Definition: pix3.c:163
@ REMOVE_CMAP_BASED_ON_SRC
Definition: pix.h:260
@ L_CLONE
Definition: pix.h:713
@ L_NOCOPY
Definition: pix.h:710
@ L_MS_BYTE
Definition: pix.h:849
l_int32 pixaGetCount(PIXA *pixa)
pixaGetCount()
Definition: pixabasic.c:650
PIX * pixaGetPix(PIXA *pixa, l_int32 index, l_int32 accesstype)
pixaGetPix()
Definition: pixabasic.c:691
PIX * pixConvertTo8Or32(PIX *pixs, l_int32 copyflag, l_int32 warnflag)
pixConvertTo8Or32()
Definition: pixconv.c:3492
PIX * pixRemoveColormap(PIX *pixs, l_int32 type)
pixRemoveColormap()
Definition: pixconv.c:328
PIX * pixConvertForPSWrap(PIX *pixs)
pixConvertForPSWrap()
Definition: pixconv.c:3931
PIX * pixConvertTo8(PIX *pixs, l_int32 cmapflag)
pixConvertTo8()
Definition: pixconv.c:3133
PIX * pixConvertRGBToLuminance(PIX *pixs)
pixConvertRGBToLuminance()
Definition: pixconv.c:742
PIX * pixConvert16To8(PIX *pixs, l_int32 type)
pixConvert16To8()
Definition: pixconv.c:1762
l_ok convertFlateToPS(const char *filein, const char *fileout, const char *operation, l_int32 x, l_int32 y, l_int32 res, l_float32 scale, l_int32 pageno, l_int32 endpage)
convertFlateToPS()
Definition: psio2.c:1667
l_ok convertJpegToPS(const char *filein, const char *fileout, const char *operation, l_int32 x, l_int32 y, l_int32 res, l_float32 scale, l_int32 pageno, l_int32 endpage)
convertJpegToPS()
Definition: psio2.c:794
l_ok pixWritePSEmbed(const char *filein, const char *fileout)
pixWritePSEmbed()
Definition: psio2.c:188
l_ok convertFlateToPSEmbed(const char *filein, const char *fileout)
convertFlateToPSEmbed()
Definition: psio2.c:1553
l_ok convertG4ToPSEmbed(const char *filein, const char *fileout)
convertG4ToPSEmbed()
Definition: psio2.c:1076
l_ok convertJpegToPSEmbed(const char *filein, const char *fileout)
convertJpegToPSEmbed()
Definition: psio2.c:678
l_ok convertG4ToPS(const char *filein, const char *fileout, const char *operation, l_int32 x, l_int32 y, l_int32 res, l_float32 scale, l_int32 pageno, l_int32 maskflag, l_int32 endpage)
convertG4ToPS()
Definition: psio2.c:1182
l_int32 getResLetterPage(l_int32 w, l_int32 h, l_float32 fillfract)
getResLetterPage()
Definition: psio2.c:1992
l_ok pixReadHeader(const char *filename, l_int32 *pformat, l_int32 *pw, l_int32 *ph, l_int32 *pbps, l_int32 *pspp, l_int32 *piscmap)
pixReadHeader()
Definition: readfile.c:446
l_ok findFileFormat(const char *filename, l_int32 *pformat)
findFileFormat()
Definition: readfile.c:584
PIX * pixRead(const char *filename)
pixRead()
Definition: readfile.c:193
PIX * pixReadIndexed(SARRAY *sa, l_int32 index)
pixReadIndexed()
Definition: readfile.c:281
char * sarrayGetString(SARRAY *sa, l_int32 index, l_int32 copyflag)
sarrayGetString()
Definition: sarray1.c:703
l_ok sarrayPadToSameSize(SARRAY *sa1, SARRAY *sa2, const char *padstring)
sarrayPadToSameSize()
Definition: sarray1.c:1064
l_int32 sarrayGetCount(SARRAY *sa)
sarrayGetCount()
Definition: sarray1.c:643
void sarrayDestroy(SARRAY **psa)
sarrayDestroy()
Definition: sarray1.c:362
SARRAY * getSortedPathnamesInDirectory(const char *dirname, const char *substr, l_int32 first, l_int32 nfiles)
getSortedPathnamesInDirectory()
Definition: sarray1.c:1848
SARRAY * getNumberedPathnamesInDirectory(const char *dirname, const char *substr, l_int32 numpre, l_int32 numpost, l_int32 maxnum)
getNumberedPathnamesInDirectory()
Definition: sarray1.c:1800
PIX * pixScale(PIX *pixs, l_float32 scalex, l_float32 scaley)
pixScale()
Definition: scale1.c:250
PIX * pixScaleGrayLI(PIX *pixs, l_float32 scalex, l_float32 scaley)
pixScaleGrayLI()
Definition: scale1.c:780
PIX * pixScaleAreaMap(PIX *pix, l_float32 scalex, l_float32 scaley)
pixScaleAreaMap()
Definition: scale1.c:1914
Definition: pix.h:139
Definition: pix.h:456
Definition: array.h:127
l_int32 lept_rmfile(const char *filepath)
lept_rmfile()
Definition: utils2.c:2517
char * l_makeTempFilename(void)
l_makeTempFilename()
Definition: utils2.c:3397