Libcroco
cr-utils.c
Go to the documentation of this file.
1 /* -*- Mode: C; indent-tabs-mode: nil; c-basic-offset: 8 -*- */
2 
3 /*
4  * This file is part of The Croco Library
5  *
6  * This program is free software; you can redistribute it and/or
7  * modify it under the terms of version 2.1 of the GNU Lesser General Public
8  * License as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  * GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU Lesser General Public License
16  * along with this program; if not, write to the Free Software
17  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
18  * USA
19  *
20  * Author: Dodji Seketeli
21  * See COPYRIGHTS file for copyright information.
22  */
23 
24 #include "cr-utils.h"
25 #include "cr-string.h"
26 
27 /**
28  *@file:
29  *Some misc utility functions used
30  *in the libcroco.
31  *Note that troughout this file I will
32  *refer to the CSS SPECIFICATIONS DOCUMENTATION
33  *written by the w3c guys. You can find that document
34  *at http://www.w3.org/TR/REC-CSS2/ .
35  */
36 
37 /****************************
38  *Encoding transformations and
39  *encoding helpers
40  ****************************/
41 
42 /*
43  *Here is the correspondance between the ucs-4 charactere codes
44  *and there matching utf-8 encoding pattern as dscribed by RFC 2279:
45  *
46  *UCS-4 range (hex.) UTF-8 octet sequence (binary)
47  *------------------ -----------------------------
48  *0000 0000-0000 007F 0xxxxxxx
49  *0000 0080-0000 07FF 110xxxxx 10xxxxxx
50  *0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
51  *0001 0000-001F FFFF 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
52  *0020 0000-03FF FFFF 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
53  *0400 0000-7FFF FFFF 1111110x 10xxxxxx ... 10xxxxxx
54  */
55 
56 /**
57  *Given an utf8 string buffer, calculates
58  *the length of this string if it was encoded
59  *in ucs4.
60  *@param a_in_start a pointer to the begining of
61  *the input utf8 string.
62  *@param a_in_end a pointre to the end of the input
63  *utf8 string (points to the last byte of the buffer)
64  *@param a_len out parameter the calculated length.
65  *@return CR_OK upon succesfull completion, an error code
66  *otherwise.
67  */
68 enum CRStatus
69 cr_utils_utf8_str_len_as_ucs4 (const guchar * a_in_start,
70  const guchar * a_in_end, gulong * a_len)
71 {
72  guchar *byte_ptr = NULL;
73  gint len = 0;
74 
75  /*
76  *to store the final decoded
77  *unicode char
78  */
79  guint c = 0;
80 
81  g_return_val_if_fail (a_in_start && a_in_end && a_len,
83  *a_len = 0;
84 
85  for (byte_ptr = (guchar *) a_in_start;
86  byte_ptr <= a_in_end; byte_ptr++) {
87  gint nb_bytes_2_decode = 0;
88 
89  if (*byte_ptr <= 0x7F) {
90  /*
91  *7 bits long char
92  *encoded over 1 byte:
93  * 0xxx xxxx
94  */
95  c = *byte_ptr;
96  nb_bytes_2_decode = 1;
97 
98  } else if ((*byte_ptr & 0xE0) == 0xC0) {
99  /*
100  *up to 11 bits long char.
101  *encoded over 2 bytes:
102  *110x xxxx 10xx xxxx
103  */
104  c = *byte_ptr & 0x1F;
105  nb_bytes_2_decode = 2;
106 
107  } else if ((*byte_ptr & 0xF0) == 0xE0) {
108  /*
109  *up to 16 bit long char
110  *encoded over 3 bytes:
111  *1110 xxxx 10xx xxxx 10xx xxxx
112  */
113  c = *byte_ptr & 0x0F;
114  nb_bytes_2_decode = 3;
115 
116  } else if ((*byte_ptr & 0xF8) == 0xF0) {
117  /*
118  *up to 21 bits long char
119  *encoded over 4 bytes:
120  *1111 0xxx 10xx xxxx 10xx xxxx 10xx xxxx
121  */
122  c = *byte_ptr & 0x7;
123  nb_bytes_2_decode = 4;
124 
125  } else if ((*byte_ptr & 0xFC) == 0xF8) {
126  /*
127  *up to 26 bits long char
128  *encoded over 5 bytes.
129  *1111 10xx 10xx xxxx 10xx xxxx
130  *10xx xxxx 10xx xxxx
131  */
132  c = *byte_ptr & 3;
133  nb_bytes_2_decode = 5;
134 
135  } else if ((*byte_ptr & 0xFE) == 0xFC) {
136  /*
137  *up to 31 bits long char
138  *encoded over 6 bytes:
139  *1111 110x 10xx xxxx 10xx xxxx
140  *10xx xxxx 10xx xxxx 10xx xxxx
141  */
142  c = *byte_ptr & 1;
143  nb_bytes_2_decode = 6;
144 
145  } else {
146  /*
147  *BAD ENCODING
148  */
149  return CR_ENCODING_ERROR;
150  }
151 
152  /*
153  *Go and decode the remaining byte(s)
154  *(if any) to get the current character.
155  */
156  for (; nb_bytes_2_decode > 1; nb_bytes_2_decode--) {
157  /*decode the next byte */
158  byte_ptr++;
159 
160  /*byte pattern must be: 10xx xxxx */
161  if ((*byte_ptr & 0xC0) != 0x80) {
162  return CR_ENCODING_ERROR;
163  }
164 
165  c = (c << 6) | (*byte_ptr & 0x3F);
166  }
167 
168  len++;
169  }
170 
171  *a_len = len;
172 
173  return CR_OK;
174 }
175 
176 /**
177  *Given an ucs4 string, this function
178  *returns the size (in bytes) this string
179  *would have occupied if it was encoded in utf-8.
180  *@param a_in_start a pointer to the beginning of the input
181  *buffer.
182  *@param a_in_end a pointer to the end of the input buffer.
183  *@param a_len out parameter. The computed length.
184  *@return CR_OK upon successfull completion, an error code otherwise.
185  */
186 enum CRStatus
187 cr_utils_ucs4_str_len_as_utf8 (const guint32 * a_in_start,
188  const guint32 * a_in_end, gulong * a_len)
189 {
190  gint len = 0;
191  guint32 *char_ptr = NULL;
192 
193  g_return_val_if_fail (a_in_start && a_in_end && a_len,
195 
196  for (char_ptr = (guint32 *) a_in_start;
197  char_ptr <= a_in_end; char_ptr++) {
198  if (*char_ptr <= 0x7F) {
199  /*the utf-8 char would take 1 byte */
200  len += 1;
201  } else if (*char_ptr <= 0x7FF) {
202  /*the utf-8 char would take 2 bytes */
203  len += 2;
204  } else if (*char_ptr <= 0xFFFF) {
205  len += 3;
206  } else if (*char_ptr <= 0x1FFFFF) {
207  len += 4;
208  } else if (*char_ptr <= 0x3FFFFFF) {
209  len += 5;
210  } else if (*char_ptr <= 0x7FFFFFFF) {
211  len += 6;
212  }
213  }
214 
215  *a_len = len;
216  return CR_OK;
217 }
218 
219 /**
220  *Given an ucsA string, this function
221  *returns the size (in bytes) this string
222  *would have occupied if it was encoded in utf-8.
223  *@param a_in_start a pointer to the beginning of the input
224  *buffer.
225  *@param a_in_end a pointer to the end of the input buffer.
226  *@param a_len out parameter. The computed length.
227  *@return CR_OK upon successfull completion, an error code otherwise.
228  */
229 enum CRStatus
230 cr_utils_ucs1_str_len_as_utf8 (const guchar * a_in_start,
231  const guchar * a_in_end, gulong * a_len)
232 {
233  gint len = 0;
234  guchar *char_ptr = NULL;
235 
236  g_return_val_if_fail (a_in_start && a_in_end && a_len,
238 
239  for (char_ptr = (guchar *) a_in_start;
240  char_ptr <= a_in_end; char_ptr++) {
241  if (*char_ptr <= 0x7F) {
242  /*the utf-8 char would take 1 byte */
243  len += 1;
244  } else {
245  /*the utf-8 char would take 2 bytes */
246  len += 2;
247  }
248  }
249 
250  *a_len = len;
251  return CR_OK;
252 }
253 
254 /**
255  *Converts an utf8 buffer into an ucs4 buffer.
256  *
257  *@param a_in the input utf8 buffer to convert.
258  *@param a_in_len in/out parameter. The size of the
259  *input buffer to convert. After return, this parameter contains
260  *the actual number of bytes consumed.
261  *@param a_out the output converted ucs4 buffer. Must be allocated by
262  *the caller.
263  *@param a_out_len in/out parameter. The size of the output buffer.
264  *If this size is actually smaller than the real needed size, the function
265  *just converts what it can and returns a success status. After return,
266  *this param points to the actual number of characters decoded.
267  *@return CR_OK upon successfull completion, an error code otherwise.
268  */
269 enum CRStatus
270 cr_utils_utf8_to_ucs4 (const guchar * a_in,
271  gulong * a_in_len, guint32 * a_out, gulong * a_out_len)
272 {
273  gulong in_len = 0,
274  out_len = 0,
275  in_index = 0,
276  out_index = 0;
277  enum CRStatus status = CR_OK;
278 
279  /*
280  *to store the final decoded
281  *unicode char
282  */
283  guint c = 0;
284 
285  g_return_val_if_fail (a_in && a_in_len
286  && a_out && a_out_len, CR_BAD_PARAM_ERROR);
287 
288  if (*a_in_len < 1) {
289  status = CR_OK;
290  goto end;
291  }
292 
293  in_len = *a_in_len;
294  out_len = *a_out_len;
295 
296  for (in_index = 0, out_index = 0;
297  (in_index < in_len) && (out_index < out_len);
298  in_index++, out_index++) {
299  gint nb_bytes_2_decode = 0;
300 
301  if (a_in[in_index] <= 0x7F) {
302  /*
303  *7 bits long char
304  *encoded over 1 byte:
305  * 0xxx xxxx
306  */
307  c = a_in[in_index];
308  nb_bytes_2_decode = 1;
309 
310  } else if ((a_in[in_index] & 0xE0) == 0xC0) {
311  /*
312  *up to 11 bits long char.
313  *encoded over 2 bytes:
314  *110x xxxx 10xx xxxx
315  */
316  c = a_in[in_index] & 0x1F;
317  nb_bytes_2_decode = 2;
318 
319  } else if ((a_in[in_index] & 0xF0) == 0xE0) {
320  /*
321  *up to 16 bit long char
322  *encoded over 3 bytes:
323  *1110 xxxx 10xx xxxx 10xx xxxx
324  */
325  c = a_in[in_index] & 0x0F;
326  nb_bytes_2_decode = 3;
327 
328  } else if ((a_in[in_index] & 0xF8) == 0xF0) {
329  /*
330  *up to 21 bits long char
331  *encoded over 4 bytes:
332  *1111 0xxx 10xx xxxx 10xx xxxx 10xx xxxx
333  */
334  c = a_in[in_index] & 0x7;
335  nb_bytes_2_decode = 4;
336 
337  } else if ((a_in[in_index] & 0xFC) == 0xF8) {
338  /*
339  *up to 26 bits long char
340  *encoded over 5 bytes.
341  *1111 10xx 10xx xxxx 10xx xxxx
342  *10xx xxxx 10xx xxxx
343  */
344  c = a_in[in_index] & 3;
345  nb_bytes_2_decode = 5;
346 
347  } else if ((a_in[in_index] & 0xFE) == 0xFC) {
348  /*
349  *up to 31 bits long char
350  *encoded over 6 bytes:
351  *1111 110x 10xx xxxx 10xx xxxx
352  *10xx xxxx 10xx xxxx 10xx xxxx
353  */
354  c = a_in[in_index] & 1;
355  nb_bytes_2_decode = 6;
356 
357  } else {
358  /*BAD ENCODING */
359  goto end;
360  }
361 
362  /*
363  *Go and decode the remaining byte(s)
364  *(if any) to get the current character.
365  */
366  for (; nb_bytes_2_decode > 1; nb_bytes_2_decode--) {
367  /*decode the next byte */
368  in_index++;
369 
370  /*byte pattern must be: 10xx xxxx */
371  if ((a_in[in_index] & 0xC0) != 0x80) {
372  goto end;
373  }
374 
375  c = (c << 6) | (a_in[in_index] & 0x3F);
376  }
377 
378  /*
379  *The decoded ucs4 char is now
380  *in c.
381  */
382 
383  /************************
384  *Some security tests
385  ***********************/
386 
387  /*be sure c is a char */
388  if (c == 0xFFFF || c == 0xFFFE)
389  goto end;
390 
391  /*be sure c is inferior to the max ucs4 char value */
392  if (c > 0x10FFFF)
393  goto end;
394 
395  /*
396  *c must be less than UTF16 "lower surrogate begin"
397  *or higher than UTF16 "High surrogate end"
398  */
399  if (c >= 0xD800 && c <= 0xDFFF)
400  goto end;
401 
402  /*Avoid characters that equals zero */
403  if (c == 0)
404  goto end;
405 
406  a_out[out_index] = c;
407  }
408 
409  end:
410  *a_out_len = out_index + 1;
411  *a_in_len = in_index + 1;
412 
413  return status;
414 }
415 
416 /**
417  *Reads a character from an utf8 buffer.
418  *Actually decode the next character code (unicode character code)
419  *and returns it.
420  *@param a_in the starting address of the utf8 buffer.
421  *@param a_in_len the length of the utf8 buffer.
422  *@param a_out output parameter. The resulting read char.
423  *@param a_consumed the number of the bytes consumed to
424  *decode the returned character code.
425  *@return CR_OK upon successfull completion, an error code otherwise.
426  */
427 enum CRStatus
428 cr_utils_read_char_from_utf8_buf (const guchar * a_in,
429  gulong a_in_len,
430  guint32 * a_out, gulong * a_consumed)
431 {
432  gulong in_index = 0,
433  nb_bytes_2_decode = 0;
434  enum CRStatus status = CR_OK;
435 
436  /*
437  *to store the final decoded
438  *unicode char
439  */
440  guint32 c = 0;
441 
442  g_return_val_if_fail (a_in && a_out && a_out
443  && a_consumed, CR_BAD_PARAM_ERROR);
444 
445  if (a_in_len < 1) {
446  status = CR_OK;
447  goto end;
448  }
449 
450  if (*a_in <= 0x7F) {
451  /*
452  *7 bits long char
453  *encoded over 1 byte:
454  * 0xxx xxxx
455  */
456  c = *a_in;
457  nb_bytes_2_decode = 1;
458 
459  } else if ((*a_in & 0xE0) == 0xC0) {
460  /*
461  *up to 11 bits long char.
462  *encoded over 2 bytes:
463  *110x xxxx 10xx xxxx
464  */
465  c = *a_in & 0x1F;
466  nb_bytes_2_decode = 2;
467 
468  } else if ((*a_in & 0xF0) == 0xE0) {
469  /*
470  *up to 16 bit long char
471  *encoded over 3 bytes:
472  *1110 xxxx 10xx xxxx 10xx xxxx
473  */
474  c = *a_in & 0x0F;
475  nb_bytes_2_decode = 3;
476 
477  } else if ((*a_in & 0xF8) == 0xF0) {
478  /*
479  *up to 21 bits long char
480  *encoded over 4 bytes:
481  *1111 0xxx 10xx xxxx 10xx xxxx 10xx xxxx
482  */
483  c = *a_in & 0x7;
484  nb_bytes_2_decode = 4;
485 
486  } else if ((*a_in & 0xFC) == 0xF8) {
487  /*
488  *up to 26 bits long char
489  *encoded over 5 bytes.
490  *1111 10xx 10xx xxxx 10xx xxxx
491  *10xx xxxx 10xx xxxx
492  */
493  c = *a_in & 3;
494  nb_bytes_2_decode = 5;
495 
496  } else if ((*a_in & 0xFE) == 0xFC) {
497  /*
498  *up to 31 bits long char
499  *encoded over 6 bytes:
500  *1111 110x 10xx xxxx 10xx xxxx
501  *10xx xxxx 10xx xxxx 10xx xxxx
502  */
503  c = *a_in & 1;
504  nb_bytes_2_decode = 6;
505 
506  } else {
507  /*BAD ENCODING */
508  goto end;
509  }
510 
511  if (nb_bytes_2_decode > a_in_len) {
512  status = CR_END_OF_INPUT_ERROR;
513  goto end;
514  }
515 
516  /*
517  *Go and decode the remaining byte(s)
518  *(if any) to get the current character.
519  */
520  for (in_index = 1; in_index < nb_bytes_2_decode; in_index++) {
521  /*byte pattern must be: 10xx xxxx */
522  if ((a_in[in_index] & 0xC0) != 0x80) {
523  goto end;
524  }
525 
526  c = (c << 6) | (a_in[in_index] & 0x3F);
527  }
528 
529  /*
530  *The decoded ucs4 char is now
531  *in c.
532  */
533 
534  /************************
535  *Some security tests
536  ***********************/
537 
538  /*be sure c is a char */
539  if (c == 0xFFFF || c == 0xFFFE)
540  goto end;
541 
542  /*be sure c is inferior to the max ucs4 char value */
543  if (c > 0x10FFFF)
544  goto end;
545 
546  /*
547  *c must be less than UTF16 "lower surrogate begin"
548  *or higher than UTF16 "High surrogate end"
549  */
550  if (c >= 0xD800 && c <= 0xDFFF)
551  goto end;
552 
553  /*Avoid characters that equals zero */
554  if (c == 0)
555  goto end;
556 
557  *a_out = c;
558 
559  end:
560  *a_consumed = nb_bytes_2_decode;
561 
562  return status;
563 }
564 
565 /**
566  *
567  */
568 enum CRStatus
569 cr_utils_utf8_str_len_as_ucs1 (const guchar * a_in_start,
570  const guchar * a_in_end, gulong * a_len)
571 {
572  /*
573  *Note: this function can be made shorter
574  *but it considers all the cases of the utf8 encoding
575  *to ease further extensions ...
576  */
577 
578  guchar *byte_ptr = NULL;
579  gint len = 0;
580 
581  /*
582  *to store the final decoded
583  *unicode char
584  */
585  guint c = 0;
586 
587  g_return_val_if_fail (a_in_start && a_in_end && a_len,
589  *a_len = 0;
590 
591  for (byte_ptr = (guchar *) a_in_start;
592  byte_ptr <= a_in_end; byte_ptr++) {
593  gint nb_bytes_2_decode = 0;
594 
595  if (*byte_ptr <= 0x7F) {
596  /*
597  *7 bits long char
598  *encoded over 1 byte:
599  * 0xxx xxxx
600  */
601  c = *byte_ptr;
602  nb_bytes_2_decode = 1;
603 
604  } else if ((*byte_ptr & 0xE0) == 0xC0) {
605  /*
606  *up to 11 bits long char.
607  *encoded over 2 bytes:
608  *110x xxxx 10xx xxxx
609  */
610  c = *byte_ptr & 0x1F;
611  nb_bytes_2_decode = 2;
612 
613  } else if ((*byte_ptr & 0xF0) == 0xE0) {
614  /*
615  *up to 16 bit long char
616  *encoded over 3 bytes:
617  *1110 xxxx 10xx xxxx 10xx xxxx
618  */
619  c = *byte_ptr & 0x0F;
620  nb_bytes_2_decode = 3;
621 
622  } else if ((*byte_ptr & 0xF8) == 0xF0) {
623  /*
624  *up to 21 bits long char
625  *encoded over 4 bytes:
626  *1111 0xxx 10xx xxxx 10xx xxxx 10xx xxxx
627  */
628  c = *byte_ptr & 0x7;
629  nb_bytes_2_decode = 4;
630 
631  } else if ((*byte_ptr & 0xFC) == 0xF8) {
632  /*
633  *up to 26 bits long char
634  *encoded over 5 bytes.
635  *1111 10xx 10xx xxxx 10xx xxxx
636  *10xx xxxx 10xx xxxx
637  */
638  c = *byte_ptr & 3;
639  nb_bytes_2_decode = 5;
640 
641  } else if ((*byte_ptr & 0xFE) == 0xFC) {
642  /*
643  *up to 31 bits long char
644  *encoded over 6 bytes:
645  *1111 110x 10xx xxxx 10xx xxxx
646  *10xx xxxx 10xx xxxx 10xx xxxx
647  */
648  c = *byte_ptr & 1;
649  nb_bytes_2_decode = 6;
650 
651  } else {
652  /*
653  *BAD ENCODING
654  */
655  return CR_ENCODING_ERROR;
656  }
657 
658  /*
659  *Go and decode the remaining byte(s)
660  *(if any) to get the current character.
661  */
662  for (; nb_bytes_2_decode > 1; nb_bytes_2_decode--) {
663  /*decode the next byte */
664  byte_ptr++;
665 
666  /*byte pattern must be: 10xx xxxx */
667  if ((*byte_ptr & 0xC0) != 0x80) {
668  return CR_ENCODING_ERROR;
669  }
670 
671  c = (c << 6) | (*byte_ptr & 0x3F);
672  }
673 
674  /*
675  *The decoded ucs4 char is now
676  *in c.
677  */
678 
679  if (c <= 0xFF) { /*Add other conditions to support
680  *other char sets (ucs2, ucs3, ucs4).
681  */
682  len++;
683  } else {
684  /*the char is too long to fit
685  *into the supposed charset len.
686  */
687  return CR_ENCODING_ERROR;
688  }
689  }
690 
691  *a_len = len;
692 
693  return CR_OK;
694 }
695 
696 /**
697  *Converts an utf8 string into an ucs4 string.
698  *@param a_in the input string to convert.
699  *@param a_in_len in/out parameter. The length of the input
700  *string. After return, points to the actual number of bytes
701  *consumed. This can be usefull to debug the input stream in case
702  *of encoding error.
703  *@param a_out out parameter. Points to the output string. It is allocated
704  *by this function and must be freed by the caller.
705  *@param a_out_len out parameter. The length of the output string.
706  *@return CR_OK upon successfull completion, an error code otherwise.
707  *
708  */
709 enum CRStatus
710 cr_utils_utf8_str_to_ucs4 (const guchar * a_in,
711  gulong * a_in_len,
712  guint32 ** a_out, gulong * a_out_len)
713 {
714  enum CRStatus status = CR_OK;
715 
716  g_return_val_if_fail (a_in && a_in_len
717  && a_out && a_out_len, CR_BAD_PARAM_ERROR);
718 
719  status = cr_utils_utf8_str_len_as_ucs4 (a_in,
720  &a_in[*a_in_len - 1],
721  a_out_len);
722 
723  g_return_val_if_fail (status == CR_OK, status);
724 
725  *a_out = g_malloc0 (*a_out_len * sizeof (guint32));
726 
727  status = cr_utils_utf8_to_ucs4 (a_in, a_in_len, *a_out, a_out_len);
728 
729  return status;
730 }
731 
732 /**
733  *Converts an ucs4 buffer into an utf8 buffer.
734  *
735  *@param a_in the input ucs4 buffer to convert.
736  *@param a_in_len in/out parameter. The size of the
737  *input buffer to convert. After return, this parameter contains
738  *the actual number of characters consumed.
739  *@param a_out the output converted utf8 buffer. Must be allocated by
740  *the caller.
741  *@param a_out_len in/out parameter. The size of the output buffer.
742  *If this size is actually smaller than the real needed size, the function
743  *just converts what it can and returns a success status. After return,
744  *this param points to the actual number of bytes in the buffer.
745  *@return CR_OK upon successfull completion, an error code otherwise.
746  */
747 enum CRStatus
748 cr_utils_ucs4_to_utf8 (const guint32 * a_in,
749  gulong * a_in_len, guchar * a_out, gulong * a_out_len)
750 {
751  gulong in_len = 0,
752  in_index = 0,
753  out_index = 0;
754  enum CRStatus status = CR_OK;
755 
756  g_return_val_if_fail (a_in && a_in_len && a_out && a_out_len,
758 
759  if (*a_in_len < 1) {
760  status = CR_OK;
761  goto end;
762  }
763 
764  in_len = *a_in_len;
765 
766  for (in_index = 0; in_index < in_len; in_index++) {
767  /*
768  *FIXME: return whenever we encounter forbidden char values.
769  */
770 
771  if (a_in[in_index] <= 0x7F) {
772  a_out[out_index] = a_in[in_index];
773  out_index++;
774  } else if (a_in[in_index] <= 0x7FF) {
775  a_out[out_index] = (0xC0 | (a_in[in_index] >> 6));
776  a_out[out_index + 1] =
777  (0x80 | (a_in[in_index] & 0x3F));
778  out_index += 2;
779  } else if (a_in[in_index] <= 0xFFFF) {
780  a_out[out_index] = (0xE0 | (a_in[in_index] >> 12));
781  a_out[out_index + 1] =
782  (0x80 | ((a_in[in_index] >> 6) & 0x3F));
783  a_out[out_index + 2] =
784  (0x80 | (a_in[in_index] & 0x3F));
785  out_index += 3;
786  } else if (a_in[in_index] <= 0x1FFFFF) {
787  a_out[out_index] = (0xF0 | (a_in[in_index] >> 18));
788  a_out[out_index + 1]
789  = (0x80 | ((a_in[in_index] >> 12) & 0x3F));
790  a_out[out_index + 2]
791  = (0x80 | ((a_in[in_index] >> 6) & 0x3F));
792  a_out[out_index + 3]
793  = (0x80 | (a_in[in_index] & 0x3F));
794  out_index += 4;
795  } else if (a_in[in_index] <= 0x3FFFFFF) {
796  a_out[out_index] = (0xF8 | (a_in[in_index] >> 24));
797  a_out[out_index + 1] =
798  (0x80 | (a_in[in_index] >> 18));
799  a_out[out_index + 2]
800  = (0x80 | ((a_in[in_index] >> 12) & 0x3F));
801  a_out[out_index + 3]
802  = (0x80 | ((a_in[in_index] >> 6) & 0x3F));
803  a_out[out_index + 4]
804  = (0x80 | (a_in[in_index] & 0x3F));
805  out_index += 5;
806  } else if (a_in[in_index] <= 0x7FFFFFFF) {
807  a_out[out_index] = (0xFC | (a_in[in_index] >> 30));
808  a_out[out_index + 1] =
809  (0x80 | (a_in[in_index] >> 24));
810  a_out[out_index + 2]
811  = (0x80 | ((a_in[in_index] >> 18) & 0x3F));
812  a_out[out_index + 3]
813  = (0x80 | ((a_in[in_index] >> 12) & 0x3F));
814  a_out[out_index + 4]
815  = (0x80 | ((a_in[in_index] >> 6) & 0x3F));
816  a_out[out_index + 4]
817  = (0x80 | (a_in[in_index] & 0x3F));
818  out_index += 6;
819  } else {
820  status = CR_ENCODING_ERROR;
821  goto end;
822  }
823  } /*end for */
824 
825  end:
826  *a_in_len = in_index + 1;
827  *a_out_len = out_index + 1;
828 
829  return status;
830 }
831 
832 /**
833  *Converts an ucs4 string into an utf8 string.
834  *@param a_in the input string to convert.
835  *@param a_in_len in/out parameter. The length of the input
836  *string. After return, points to the actual number of characters
837  *consumed. This can be usefull to debug the input string in case
838  *of encoding error.
839  *@param a_out out parameter. Points to the output string. It is allocated
840  *by this function and must be freed by the caller.
841  *@param a_out_len out parameter. The length (in bytes) of the output string.
842  *@return CR_OK upon successfull completion, an error code otherwise.
843  */
844 enum CRStatus
845 cr_utils_ucs4_str_to_utf8 (const guint32 * a_in,
846  gulong * a_in_len,
847  guchar ** a_out, gulong * a_out_len)
848 {
849  enum CRStatus status = CR_OK;
850 
851  g_return_val_if_fail (a_in && a_in_len && a_out
852  && a_out_len, CR_BAD_PARAM_ERROR);
853 
854  status = cr_utils_ucs4_str_len_as_utf8 (a_in,
855  &a_in[*a_out_len - 1],
856  a_out_len);
857 
858  g_return_val_if_fail (status == CR_OK, status);
859 
860  status = cr_utils_ucs4_to_utf8 (a_in, a_in_len, *a_out, a_out_len);
861 
862  return status;
863 }
864 
865 /**
866  *Converts an ucs1 buffer into an utf8 buffer.
867  *The caller must know the size of the resulting buffer and
868  *allocate it prior to calling this function.
869  *
870  *@param a_in the input ucs1 buffer.
871  *
872  *@param a_in_len in/out parameter. The length of the input buffer.
873  *After return, points to the number of bytes actually consumed even
874  *in case of encoding error.
875  *
876  *@param a_out out parameter. The output utf8 converted buffer.
877  *
878  *@param a_out_len in/out parameter. The size of the output buffer.
879  *If the output buffer size is shorter than the actual needed size,
880  *this function just convert what it can.
881  *
882  *@return CR_OK upon successfull completion, an error code otherwise.
883  *
884  */
885 enum CRStatus
886 cr_utils_ucs1_to_utf8 (const guchar * a_in,
887  gulong * a_in_len, guchar * a_out, gulong * a_out_len)
888 {
889  gulong out_index = 0,
890  in_index = 0,
891  in_len = 0,
892  out_len = 0;
893  enum CRStatus status = CR_OK;
894 
895  g_return_val_if_fail (a_in && a_in_len
896  && a_out_len,
898 
899  if (*a_in_len == 0) {
900  *a_out_len = 0 ;
901  return status;
902  }
903  g_return_val_if_fail (a_out, CR_BAD_PARAM_ERROR) ;
904 
905  in_len = *a_in_len;
906  out_len = *a_out_len;
907 
908  for (in_index = 0, out_index = 0;
909  (in_index < in_len) && (out_index < out_len); in_index++) {
910  /*
911  *FIXME: return whenever we encounter forbidden char values.
912  */
913 
914  if (a_in[in_index] <= 0x7F) {
915  a_out[out_index] = a_in[in_index];
916  out_index++;
917  } else {
918  a_out[out_index] = (0xC0 | (a_in[in_index] >> 6));
919  a_out[out_index + 1] =
920  (0x80 | (a_in[in_index] & 0x3F));
921  out_index += 2;
922  }
923  } /*end for */
924 
925  *a_in_len = in_index;
926  *a_out_len = out_index;
927 
928  return status;
929 }
930 
931 /**
932  *Converts an ucs1 string into an utf8 string.
933  *@param a_in_start the beginning of the input string to convert.
934  *@param a_in_end the end of the input string to convert.
935  *@param a_out out parameter. The converted string.
936  *@param a_out out parameter. The length of the converted string.
937  *@return CR_OK upon successfull completion, an error code otherwise.
938  *
939  */
940 enum CRStatus
941 cr_utils_ucs1_str_to_utf8 (const guchar * a_in,
942  gulong * a_in_len,
943  guchar ** a_out, gulong * a_out_len)
944 {
945  gulong out_len = 0;
946  enum CRStatus status = CR_OK;
947 
948  g_return_val_if_fail (a_in && a_in_len && a_out
949  && a_out_len, CR_BAD_PARAM_ERROR);
950 
951  if (*a_in_len < 1) {
952  *a_out_len = 0;
953  *a_out = NULL;
954  return CR_OK;
955  }
956 
957  status = cr_utils_ucs1_str_len_as_utf8 (a_in, &a_in[*a_in_len - 1],
958  &out_len);
959 
960  g_return_val_if_fail (status == CR_OK, status);
961 
962  *a_out = g_malloc0 (out_len);
963 
964  status = cr_utils_ucs1_to_utf8 (a_in, a_in_len, *a_out, &out_len);
965 
966  *a_out_len = out_len;
967 
968  return status;
969 }
970 
971 /**
972  *Converts an utf8 buffer into an ucs1 buffer.
973  *The caller must know the size of the resulting
974  *converted buffer, and allocated it prior to calling this
975  *function.
976  *
977  *@param a_in the input utf8 buffer to convert.
978  *
979  *@param a_in_len in/out parameter. The size of the input utf8 buffer.
980  *After return, points to the number of bytes consumed
981  *by the function even in case of encoding error.
982  *
983  *@param a_out out parameter. Points to the resulting buffer.
984  *Must be allocated by the caller. If the size of a_out is shorter
985  *than its required size, this function converts what it can and return
986  *a successfull status.
987  *
988  *@param a_out_len in/out parameter. The size of the output buffer.
989  *After return, points to the number of bytes consumed even in case of
990  *encoding error.
991  *
992  *@return CR_OK upon successfull completion, an error code otherwise.
993  */
994 enum CRStatus
995 cr_utils_utf8_to_ucs1 (const guchar * a_in,
996  gulong * a_in_len, guchar * a_out, gulong * a_out_len)
997 {
998  gulong in_index = 0,
999  out_index = 0,
1000  in_len = 0,
1001  out_len = 0;
1002  enum CRStatus status = CR_OK;
1003 
1004  /*
1005  *to store the final decoded
1006  *unicode char
1007  */
1008  guint32 c = 0;
1009 
1010  g_return_val_if_fail (a_in && a_in_len
1011  && a_out && a_out_len, CR_BAD_PARAM_ERROR);
1012 
1013  if (*a_in_len < 1) {
1014  goto end;
1015  }
1016 
1017  in_len = *a_in_len;
1018  out_len = *a_out_len;
1019 
1020  for (in_index = 0, out_index = 0;
1021  (in_index < in_len) && (out_index < out_len);
1022  in_index++, out_index++) {
1023  gint nb_bytes_2_decode = 0;
1024 
1025  if (a_in[in_index] <= 0x7F) {
1026  /*
1027  *7 bits long char
1028  *encoded over 1 byte:
1029  * 0xxx xxxx
1030  */
1031  c = a_in[in_index];
1032  nb_bytes_2_decode = 1;
1033 
1034  } else if ((a_in[in_index] & 0xE0) == 0xC0) {
1035  /*
1036  *up to 11 bits long char.
1037  *encoded over 2 bytes:
1038  *110x xxxx 10xx xxxx
1039  */
1040  c = a_in[in_index] & 0x1F;
1041  nb_bytes_2_decode = 2;
1042 
1043  } else if ((a_in[in_index] & 0xF0) == 0xE0) {
1044  /*
1045  *up to 16 bit long char
1046  *encoded over 3 bytes:
1047  *1110 xxxx 10xx xxxx 10xx xxxx
1048  */
1049  c = a_in[in_index] & 0x0F;
1050  nb_bytes_2_decode = 3;
1051 
1052  } else if ((a_in[in_index] & 0xF8) == 0xF0) {
1053  /*
1054  *up to 21 bits long char
1055  *encoded over 4 bytes:
1056  *1111 0xxx 10xx xxxx 10xx xxxx 10xx xxxx
1057  */
1058  c = a_in[in_index] & 0x7;
1059  nb_bytes_2_decode = 4;
1060 
1061  } else if ((a_in[in_index] & 0xFC) == 0xF8) {
1062  /*
1063  *up to 26 bits long char
1064  *encoded over 5 bytes.
1065  *1111 10xx 10xx xxxx 10xx xxxx
1066  *10xx xxxx 10xx xxxx
1067  */
1068  c = a_in[in_index] & 3;
1069  nb_bytes_2_decode = 5;
1070 
1071  } else if ((a_in[in_index] & 0xFE) == 0xFC) {
1072  /*
1073  *up to 31 bits long char
1074  *encoded over 6 bytes:
1075  *1111 110x 10xx xxxx 10xx xxxx
1076  *10xx xxxx 10xx xxxx 10xx xxxx
1077  */
1078  c = a_in[in_index] & 1;
1079  nb_bytes_2_decode = 6;
1080 
1081  } else {
1082  /*BAD ENCODING */
1083  status = CR_ENCODING_ERROR;
1084  goto end;
1085  }
1086 
1087  /*
1088  *Go and decode the remaining byte(s)
1089  *(if any) to get the current character.
1090  */
1091  if (in_index + nb_bytes_2_decode - 1 >= in_len) {
1092  goto end;
1093  }
1094 
1095  for (; nb_bytes_2_decode > 1; nb_bytes_2_decode--) {
1096  /*decode the next byte */
1097  in_index++;
1098 
1099  /*byte pattern must be: 10xx xxxx */
1100  if ((a_in[in_index] & 0xC0) != 0x80) {
1101  status = CR_ENCODING_ERROR;
1102  goto end;
1103  }
1104 
1105  c = (c << 6) | (a_in[in_index] & 0x3F);
1106  }
1107 
1108  /*
1109  *The decoded ucs4 char is now
1110  *in c.
1111  */
1112 
1113  if (c > 0xFF) {
1114  status = CR_ENCODING_ERROR;
1115  goto end;
1116  }
1117 
1118  a_out[out_index] = c;
1119  }
1120 
1121  end:
1122  *a_out_len = out_index;
1123  *a_in_len = in_index;
1124 
1125  return status;
1126 }
1127 
1128 /**
1129  *Converts an utf8 buffer into an
1130  *ucs1 buffer.
1131  *@param a_in_start the start of the input buffer.
1132  *@param a_in_end the end of the input buffer.
1133  *@param a_out out parameter. The resulting converted ucs4 buffer.
1134  *Must be freed by the caller.
1135  *@param a_out_len out parameter. The length of the converted buffer.
1136  *@return CR_OK upon successfull completion, an error code otherwise.
1137  *Note that out parameters are valid if and only if this function
1138  *returns CR_OK.
1139  */
1140 enum CRStatus
1141 cr_utils_utf8_str_to_ucs1 (const guchar * a_in,
1142  gulong * a_in_len,
1143  guchar ** a_out, gulong * a_out_len)
1144 {
1145  enum CRStatus status = CR_OK;
1146 
1147  g_return_val_if_fail (a_in && a_in_len
1148  && a_out && a_out_len, CR_BAD_PARAM_ERROR);
1149 
1150  if (*a_in_len < 1) {
1151  *a_out_len = 0;
1152  *a_out = NULL;
1153  return CR_OK;
1154  }
1155 
1156  status = cr_utils_utf8_str_len_as_ucs4 (a_in, &a_in[*a_in_len - 1],
1157  a_out_len);
1158 
1159  g_return_val_if_fail (status == CR_OK, status);
1160 
1161  *a_out = g_malloc0 (*a_out_len * sizeof (guint32));
1162 
1163  status = cr_utils_utf8_to_ucs1 (a_in, a_in_len, *a_out, a_out_len);
1164  return status;
1165 }
1166 
1167 /*****************************************
1168  *CSS basic types identification utilities
1169  *****************************************/
1170 
1171 /**
1172  *Returns TRUE if a_char is a white space as
1173  *defined in the css spec in chap 4.1.1.
1174  *
1175  *white-space ::= ' '| \t|\r|\n|\f
1176  *
1177  *@param a_char the character to test.
1178  *return TRUE if is a white space, false otherwise.
1179  */
1180 gboolean
1181 cr_utils_is_white_space (guint32 a_char)
1182 {
1183  switch (a_char) {
1184  case ' ':
1185  case '\t':
1186  case '\r':
1187  case '\n':
1188  case '\f':
1189  return TRUE;
1190  break;
1191  default:
1192  return FALSE;
1193  }
1194 }
1195 
1196 /**
1197  *Returns true if the character is a newline
1198  *as defined in the css spec in the chap 4.1.1.
1199  *
1200  *nl ::= \n|\r\n|\r|\f
1201  *
1202  *@param a_char the character to test.
1203  *@return TRUE if the character is a newline, FALSE otherwise.
1204  */
1205 gboolean
1206 cr_utils_is_newline (guint32 a_char)
1207 {
1208  switch (a_char) {
1209  case '\n':
1210  case '\r':
1211  case '\f':
1212  return TRUE;
1213  break;
1214  default:
1215  return FALSE;
1216  }
1217 }
1218 
1219 /**
1220  *returns TRUE if the char is part of an hexa num char:
1221  *i.e hexa_char ::= [0-9A-F]
1222  */
1223 gboolean
1224 cr_utils_is_hexa_char (guint32 a_char)
1225 {
1226  if ((a_char >= '0' && a_char <= '9')
1227  || (a_char >= 'A' && a_char <= 'F')) {
1228  return TRUE;
1229  }
1230  return FALSE;
1231 }
1232 
1233 /**
1234  *Returns true if the character is a nonascii
1235  *character (as defined in the css spec chap 4.1.1):
1236  *
1237  *nonascii ::= [^\0-\177]
1238  *
1239  *@param a_char the character to test.
1240  *@return TRUE if the character is a nonascii char,
1241  *FALSE otherwise.
1242  */
1243 gboolean
1244 cr_utils_is_nonascii (guint32 a_char)
1245 {
1246  if (a_char <= 177) {
1247  return FALSE;
1248  }
1249 
1250  return TRUE;
1251 }
1252 
1253 /**
1254  *Dumps a character a_nb times on a file.
1255  *@param a_char the char to dump
1256  *@param a_fp the destination file pointer
1257  *@param a_nb the number of times a_char is to be dumped.
1258  */
1259 void
1260 cr_utils_dump_n_chars (guchar a_char, FILE * a_fp, glong a_nb)
1261 {
1262  glong i = 0;
1263 
1264  for (i = 0; i < a_nb; i++) {
1265  fprintf (a_fp, "%c", a_char);
1266  }
1267 }
1268 
1269 void
1270 cr_utils_dump_n_chars2 (guchar a_char, GString * a_string, glong a_nb)
1271 {
1272  glong i = 0;
1273 
1274  g_return_if_fail (a_string);
1275 
1276  for (i = 0; i < a_nb; i++) {
1277  g_string_append_printf (a_string, "%c", a_char);
1278  }
1279 }
1280 
1281 /**
1282  *Duplicates a list of GString instances.
1283  *@return the duplicated list of GString instances or NULL if
1284  *something bad happened.
1285  *@param a_list_of_strings the list of strings to be duplicated.
1286  */
1287 GList *
1288 cr_utils_dup_glist_of_string (GList const * a_list_of_strings)
1289 {
1290  GList const *cur = NULL;
1291  GList *result = NULL;
1292 
1293  g_return_val_if_fail (a_list_of_strings, NULL);
1294 
1295  for (cur = a_list_of_strings; cur; cur = cur->next) {
1296  GString *str = NULL;
1297 
1298  str = g_string_new_len (((GString *) cur->data)->str,
1299  ((GString *) cur->data)->len);
1300  if (str)
1301  result = g_list_append (result, str);
1302  }
1303 
1304  return result;
1305 }
1306 
1307 /**
1308  *Duplicate a GList where the GList::data is a CRString.
1309  *@param a_list_of_strings the list to duplicate
1310  *@return the duplicated list, or NULL if something bad
1311  *happened.
1312  */
1313 GList *
1314 cr_utils_dup_glist_of_cr_string (GList const * a_list_of_strings)
1315 {
1316  GList const *cur = NULL;
1317  GList *result = NULL;
1318 
1319  g_return_val_if_fail (a_list_of_strings, NULL);
1320 
1321  for (cur = a_list_of_strings; cur; cur = cur->next) {
1322  CRString *str = NULL;
1323 
1324  str = cr_string_dup ((CRString const *) cur->data) ;
1325  if (str)
1326  result = g_list_append (result, str);
1327  }
1328 
1329  return result;
1330 }
cr_utils_utf8_to_ucs4
enum CRStatus cr_utils_utf8_to_ucs4(const guchar *a_in, gulong *a_in_len, guint32 *a_out, gulong *a_out_len)
Converts an utf8 buffer into an ucs4 buffer.
Definition: cr-utils.c:270
cr_utils_ucs1_str_to_utf8
enum CRStatus cr_utils_ucs1_str_to_utf8(const guchar *a_in, gulong *a_in_len, guchar **a_out, gulong *a_out_len)
Converts an ucs1 string into an utf8 string.
Definition: cr-utils.c:941
cr_utils_utf8_str_len_as_ucs1
enum CRStatus cr_utils_utf8_str_len_as_ucs1(const guchar *a_in_start, const guchar *a_in_end, gulong *a_len)
Definition: cr-utils.c:569
cr_utils_ucs1_str_len_as_utf8
enum CRStatus cr_utils_ucs1_str_len_as_utf8(const guchar *a_in_start, const guchar *a_in_end, gulong *a_len)
Given an ucsA string, this function returns the size (in bytes) this string would have occupied if it...
Definition: cr-utils.c:230
cr_utils_read_char_from_utf8_buf
enum CRStatus cr_utils_read_char_from_utf8_buf(const guchar *a_in, gulong a_in_len, guint32 *a_out, gulong *a_consumed)
Reads a character from an utf8 buffer.
Definition: cr-utils.c:428
CR_BAD_PARAM_ERROR
@ CR_BAD_PARAM_ERROR
Definition: cr-utils.h:45
cr_utils_utf8_str_len_as_ucs4
enum CRStatus cr_utils_utf8_str_len_as_ucs4(const guchar *a_in_start, const guchar *a_in_end, gulong *a_len)
Given an utf8 string buffer, calculates the length of this string if it was encoded in ucs4.
Definition: cr-utils.c:69
cr_utils_utf8_to_ucs1
enum CRStatus cr_utils_utf8_to_ucs1(const guchar *a_in, gulong *a_in_len, guchar *a_out, gulong *a_out_len)
Converts an utf8 buffer into an ucs1 buffer.
Definition: cr-utils.c:995
cr_utils_ucs4_str_to_utf8
enum CRStatus cr_utils_ucs4_str_to_utf8(const guint32 *a_in, gulong *a_in_len, guchar **a_out, gulong *a_out_len)
Converts an ucs4 string into an utf8 string.
Definition: cr-utils.c:845
CRString
typedefG_BEGIN_DECLS struct _CRString CRString
Definition: cr-string.h:37
cr_utils_ucs1_to_utf8
enum CRStatus cr_utils_ucs1_to_utf8(const guchar *a_in, gulong *a_in_len, guchar *a_out, gulong *a_out_len)
Converts an ucs1 buffer into an utf8 buffer.
Definition: cr-utils.c:886
cr_utils_ucs4_to_utf8
enum CRStatus cr_utils_ucs4_to_utf8(const guint32 *a_in, gulong *a_in_len, guchar *a_out, gulong *a_out_len)
Converts an ucs4 buffer into an utf8 buffer.
Definition: cr-utils.c:748
cr_utils_utf8_str_to_ucs4
enum CRStatus cr_utils_utf8_str_to_ucs4(const guchar *a_in, gulong *a_in_len, guint32 **a_out, gulong *a_out_len)
Converts an utf8 string into an ucs4 string.
Definition: cr-utils.c:710
cr_utils_is_white_space
gboolean cr_utils_is_white_space(guint32 a_char)
Returns TRUE if a_char is a white space as defined in the css spec in chap 4.1.1.
Definition: cr-utils.c:1181
cr_utils_is_hexa_char
gboolean cr_utils_is_hexa_char(guint32 a_char)
returns TRUE if the char is part of an hexa num char: i.e hexa_char ::= [0-9A-F]
Definition: cr-utils.c:1224
CR_OK
@ CR_OK
Definition: cr-utils.h:44
cr-string.h
cr_utils_is_newline
gboolean cr_utils_is_newline(guint32 a_char)
Returns true if the character is a newline as defined in the css spec in the chap 4....
Definition: cr-utils.c:1206
cr_utils_is_nonascii
gboolean cr_utils_is_nonascii(guint32 a_char)
Returns true if the character is a nonascii character (as defined in the css spec chap 4....
Definition: cr-utils.c:1244
cr_string_dup
CRString * cr_string_dup(CRString const *a_this)
Definition: cr-string.c:94
cr_utils_ucs4_str_len_as_utf8
enum CRStatus cr_utils_ucs4_str_len_as_utf8(const guint32 *a_in_start, const guint32 *a_in_end, gulong *a_len)
Given an ucs4 string, this function returns the size (in bytes) this string would have occupied if it...
Definition: cr-utils.c:187
cr_utils_dup_glist_of_cr_string
GList * cr_utils_dup_glist_of_cr_string(GList const *a_list_of_strings)
Duplicate a GList where the GList::data is a CRString.
Definition: cr-utils.c:1314
cr_utils_dump_n_chars
void cr_utils_dump_n_chars(guchar a_char, FILE *a_fp, glong a_nb)
Dumps a character a_nb times on a file.
Definition: cr-utils.c:1260
cr_utils_dup_glist_of_string
GList * cr_utils_dup_glist_of_string(GList const *a_list_of_strings)
Duplicates a list of GString instances.
Definition: cr-utils.c:1288
CRStatus
CRStatus
The status type returned by the methods of the croco library.
Definition: cr-utils.h:43
cr_utils_utf8_str_to_ucs1
enum CRStatus cr_utils_utf8_str_to_ucs1(const guchar *a_in, gulong *a_in_len, guchar **a_out, gulong *a_out_len)
Converts an utf8 buffer into an ucs1 buffer.
Definition: cr-utils.c:1141
cr-utils.h
cr_utils_dump_n_chars2
void cr_utils_dump_n_chars2(guchar a_char, GString *a_string, glong a_nb)
Definition: cr-utils.c:1270
CR_END_OF_INPUT_ERROR
@ CR_END_OF_INPUT_ERROR
Definition: cr-utils.h:52
CR_ENCODING_ERROR
@ CR_ENCODING_ERROR
Definition: cr-utils.h:57