Ruby  3.1.4p223 (2023-03-30 revision HEAD)
pack.c
1 /**********************************************************************
2 
3  pack.c -
4 
5  $Author$
6  created at: Thu Feb 10 15:17:05 JST 1994
7 
8  Copyright (C) 1993-2007 Yukihiro Matsumoto
9 
10 **********************************************************************/
11 
12 #include "ruby/internal/config.h"
13 
14 #include <ctype.h>
15 #include <errno.h>
16 #include <float.h>
17 #include <sys/types.h>
18 
19 #include "internal.h"
20 #include "internal/array.h"
21 #include "internal/bits.h"
22 #include "internal/string.h"
23 #include "internal/symbol.h"
24 #include "internal/variable.h"
25 #include "ruby/util.h"
26 
27 #include "builtin.h"
28 
29 /*
30  * It is intentional that the condition for natstr is HAVE_TRUE_LONG_LONG
31  * instead of HAVE_LONG_LONG or LONG_LONG.
32  * This means q! and Q! means always the standard long long type and
33  * causes ArgumentError for platforms which has no long long type,
34  * even if the platform has an implementation specific 64bit type.
35  * This behavior is consistent with the document of pack/unpack.
36  */
37 #ifdef HAVE_TRUE_LONG_LONG
38 static const char natstr[] = "sSiIlLqQjJ";
39 #else
40 static const char natstr[] = "sSiIlLjJ";
41 #endif
42 static const char endstr[] = "sSiIlLqQjJ";
43 
44 #ifdef HAVE_TRUE_LONG_LONG
45 /* It is intentional to use long long instead of LONG_LONG. */
46 # define NATINT_LEN_Q NATINT_LEN(long long, 8)
47 #else
48 # define NATINT_LEN_Q 8
49 #endif
50 
51 #if SIZEOF_SHORT != 2 || SIZEOF_LONG != 4 || (defined(HAVE_TRUE_LONG_LONG) && SIZEOF_LONG_LONG != 8)
52 # define NATINT_PACK
53 #endif
54 
55 #ifdef DYNAMIC_ENDIAN
56 /* for universal binary of NEXTSTEP and MacOS X */
57 /* useless since autoconf 2.63? */
58 static int
59 is_bigendian(void)
60 {
61  static int init = 0;
62  static int endian_value;
63  char *p;
64 
65  if (init) return endian_value;
66  init = 1;
67  p = (char*)&init;
68  return endian_value = p[0]?0:1;
69 }
70 # define BIGENDIAN_P() (is_bigendian())
71 #elif defined(WORDS_BIGENDIAN)
72 # define BIGENDIAN_P() 1
73 #else
74 # define BIGENDIAN_P() 0
75 #endif
76 
77 #ifdef NATINT_PACK
78 # define NATINT_LEN(type,len) (natint?(int)sizeof(type):(int)(len))
79 #else
80 # define NATINT_LEN(type,len) ((int)sizeof(type))
81 #endif
82 
83 typedef union {
84  float f;
85  uint32_t u;
86  char buf[4];
88 typedef union {
89  double d;
90  uint64_t u;
91  char buf[8];
93 #define swapf(x) swap32(x)
94 #define swapd(x) swap64(x)
95 
96 #define rb_ntohf(x) (BIGENDIAN_P()?(x):swapf(x))
97 #define rb_ntohd(x) (BIGENDIAN_P()?(x):swapd(x))
98 #define rb_htonf(x) (BIGENDIAN_P()?(x):swapf(x))
99 #define rb_htond(x) (BIGENDIAN_P()?(x):swapd(x))
100 #define rb_htovf(x) (BIGENDIAN_P()?swapf(x):(x))
101 #define rb_htovd(x) (BIGENDIAN_P()?swapd(x):(x))
102 #define rb_vtohf(x) (BIGENDIAN_P()?swapf(x):(x))
103 #define rb_vtohd(x) (BIGENDIAN_P()?swapd(x):(x))
104 
105 #define FLOAT_CONVWITH(x) FLOAT_SWAPPER x;
106 #define HTONF(x) ((x).u = rb_htonf((x).u))
107 #define HTOVF(x) ((x).u = rb_htovf((x).u))
108 #define NTOHF(x) ((x).u = rb_ntohf((x).u))
109 #define VTOHF(x) ((x).u = rb_vtohf((x).u))
110 
111 #define DOUBLE_CONVWITH(x) DOUBLE_SWAPPER x;
112 #define HTOND(x) ((x).u = rb_htond((x).u))
113 #define HTOVD(x) ((x).u = rb_htovd((x).u))
114 #define NTOHD(x) ((x).u = rb_ntohd((x).u))
115 #define VTOHD(x) ((x).u = rb_vtohd((x).u))
116 
117 #define MAX_INTEGER_PACK_SIZE 8
118 
119 static const char toofew[] = "too few arguments";
120 
121 static void encodes(VALUE,const char*,long,int,int);
122 static void qpencode(VALUE,VALUE,long);
123 
124 static unsigned long utf8_to_uv(const char*,long*);
125 
126 static ID id_associated;
127 
128 static void
129 str_associate(VALUE str, VALUE add)
130 {
131  /* assert(NIL_P(rb_attr_get(str, id_associated))); */
132  rb_ivar_set(str, id_associated, add);
133 }
134 
135 static VALUE
136 str_associated(VALUE str)
137 {
138  VALUE associates = rb_ivar_lookup(str, id_associated, Qfalse);
139  if (!associates)
140  rb_raise(rb_eArgError, "no associated pointer");
141  return associates;
142 }
143 
144 static VALUE
145 associated_pointer(VALUE associates, const char *t)
146 {
147  const VALUE *p = RARRAY_CONST_PTR(associates);
148  const VALUE *pend = p + RARRAY_LEN(associates);
149  for (; p < pend; p++) {
150  VALUE tmp = *p;
151  if (RB_TYPE_P(tmp, T_STRING) && RSTRING_PTR(tmp) == t) return tmp;
152  }
153  rb_raise(rb_eArgError, "non associated pointer");
155 }
156 
157 static void
158 unknown_directive(const char *mode, char type, VALUE fmt)
159 {
160  char unknown[5];
161 
162  if (ISPRINT(type)) {
163  unknown[0] = type;
164  unknown[1] = '\0';
165  }
166  else {
167  snprintf(unknown, sizeof(unknown), "\\x%.2x", type & 0xff);
168  }
169  fmt = rb_str_quote_unprintable(fmt);
170  rb_warning("unknown %s directive '%s' in '%"PRIsVALUE"'",
171  mode, unknown, fmt);
172 }
173 
174 static float
175 VALUE_to_float(VALUE obj)
176 {
177  VALUE v = rb_to_float(obj);
178  double d = RFLOAT_VALUE(v);
179 
180  if (isnan(d)) {
181  return NAN;
182  }
183  else if (d < -FLT_MAX) {
184  return -INFINITY;
185  }
186  else if (d <= FLT_MAX) {
187  return d;
188  }
189  else {
190  return INFINITY;
191  }
192 }
193 
194 static VALUE
195 pack_pack(rb_execution_context_t *ec, VALUE ary, VALUE fmt, VALUE buffer)
196 {
197  static const char nul10[] = "\0\0\0\0\0\0\0\0\0\0";
198  static const char spc10[] = " ";
199  const char *p, *pend;
200  VALUE res, from, associates = 0;
201  char type;
202  long len, idx, plen;
203  const char *ptr;
204  int enc_info = 1; /* 0 - BINARY, 1 - US-ASCII, 2 - UTF-8 */
205 #ifdef NATINT_PACK
206  int natint; /* native integer */
207 #endif
208  int integer_size, bigendian_p;
209 
210  StringValue(fmt);
211  p = RSTRING_PTR(fmt);
212  pend = p + RSTRING_LEN(fmt);
213 
214  if (NIL_P(buffer)) {
215  res = rb_str_buf_new(0);
216  }
217  else {
218  if (!RB_TYPE_P(buffer, T_STRING))
219  rb_raise(rb_eTypeError, "buffer must be String, not %s", rb_obj_classname(buffer));
220  rb_str_modify(buffer);
221  res = buffer;
222  }
223 
224  idx = 0;
225 
226 #define TOO_FEW (rb_raise(rb_eArgError, toofew), 0)
227 #define MORE_ITEM (idx < RARRAY_LEN(ary))
228 #define THISFROM (MORE_ITEM ? RARRAY_AREF(ary, idx) : TOO_FEW)
229 #define NEXTFROM (MORE_ITEM ? RARRAY_AREF(ary, idx++) : TOO_FEW)
230 
231  while (p < pend) {
232  int explicit_endian = 0;
233  if (RSTRING_PTR(fmt) + RSTRING_LEN(fmt) != pend) {
234  rb_raise(rb_eRuntimeError, "format string modified");
235  }
236  type = *p++; /* get data type */
237 #ifdef NATINT_PACK
238  natint = 0;
239 #endif
240 
241  if (ISSPACE(type)) continue;
242  if (type == '#') {
243  while ((p < pend) && (*p != '\n')) {
244  p++;
245  }
246  continue;
247  }
248 
249  {
250  modifiers:
251  switch (*p) {
252  case '_':
253  case '!':
254  if (strchr(natstr, type)) {
255 #ifdef NATINT_PACK
256  natint = 1;
257 #endif
258  p++;
259  }
260  else {
261  rb_raise(rb_eArgError, "'%c' allowed only after types %s", *p, natstr);
262  }
263  goto modifiers;
264 
265  case '<':
266  case '>':
267  if (!strchr(endstr, type)) {
268  rb_raise(rb_eArgError, "'%c' allowed only after types %s", *p, endstr);
269  }
270  if (explicit_endian) {
271  rb_raise(rb_eRangeError, "Can't use both '<' and '>'");
272  }
273  explicit_endian = *p++;
274  goto modifiers;
275  }
276  }
277 
278  if (*p == '*') { /* set data length */
279  len = strchr("@Xxu", type) ? 0
280  : strchr("PMm", type) ? 1
281  : RARRAY_LEN(ary) - idx;
282  p++;
283  }
284  else if (ISDIGIT(*p)) {
285  errno = 0;
286  len = STRTOUL(p, (char**)&p, 10);
287  if (errno) {
288  rb_raise(rb_eRangeError, "pack length too big");
289  }
290  }
291  else {
292  len = 1;
293  }
294 
295  switch (type) {
296  case 'U':
297  /* if encoding is US-ASCII, upgrade to UTF-8 */
298  if (enc_info == 1) enc_info = 2;
299  break;
300  case 'm': case 'M': case 'u':
301  /* keep US-ASCII (do nothing) */
302  break;
303  default:
304  /* fall back to BINARY */
305  enc_info = 0;
306  break;
307  }
308  switch (type) {
309  case 'A': case 'a': case 'Z':
310  case 'B': case 'b':
311  case 'H': case 'h':
312  from = NEXTFROM;
313  if (NIL_P(from)) {
314  ptr = "";
315  plen = 0;
316  }
317  else {
318  StringValue(from);
319  ptr = RSTRING_PTR(from);
320  plen = RSTRING_LEN(from);
321  }
322 
323  if (p[-1] == '*')
324  len = plen;
325 
326  switch (type) {
327  case 'a': /* arbitrary binary string (null padded) */
328  case 'A': /* arbitrary binary string (ASCII space padded) */
329  case 'Z': /* null terminated string */
330  if (plen >= len) {
331  rb_str_buf_cat(res, ptr, len);
332  if (p[-1] == '*' && type == 'Z')
333  rb_str_buf_cat(res, nul10, 1);
334  }
335  else {
336  rb_str_buf_cat(res, ptr, plen);
337  len -= plen;
338  while (len >= 10) {
339  rb_str_buf_cat(res, (type == 'A')?spc10:nul10, 10);
340  len -= 10;
341  }
342  rb_str_buf_cat(res, (type == 'A')?spc10:nul10, len);
343  }
344  break;
345 
346 #define castchar(from) (char)((from) & 0xff)
347 
348  case 'b': /* bit string (ascending) */
349  {
350  int byte = 0;
351  long i, j = 0;
352 
353  if (len > plen) {
354  j = (len - plen + 1)/2;
355  len = plen;
356  }
357  for (i=0; i++ < len; ptr++) {
358  if (*ptr & 1)
359  byte |= 128;
360  if (i & 7)
361  byte >>= 1;
362  else {
363  char c = castchar(byte);
364  rb_str_buf_cat(res, &c, 1);
365  byte = 0;
366  }
367  }
368  if (len & 7) {
369  char c;
370  byte >>= 7 - (len & 7);
371  c = castchar(byte);
372  rb_str_buf_cat(res, &c, 1);
373  }
374  len = j;
375  goto grow;
376  }
377  break;
378 
379  case 'B': /* bit string (descending) */
380  {
381  int byte = 0;
382  long i, j = 0;
383 
384  if (len > plen) {
385  j = (len - plen + 1)/2;
386  len = plen;
387  }
388  for (i=0; i++ < len; ptr++) {
389  byte |= *ptr & 1;
390  if (i & 7)
391  byte <<= 1;
392  else {
393  char c = castchar(byte);
394  rb_str_buf_cat(res, &c, 1);
395  byte = 0;
396  }
397  }
398  if (len & 7) {
399  char c;
400  byte <<= 7 - (len & 7);
401  c = castchar(byte);
402  rb_str_buf_cat(res, &c, 1);
403  }
404  len = j;
405  goto grow;
406  }
407  break;
408 
409  case 'h': /* hex string (low nibble first) */
410  {
411  int byte = 0;
412  long i, j = 0;
413 
414  if (len > plen) {
415  j = (len + 1) / 2 - (plen + 1) / 2;
416  len = plen;
417  }
418  for (i=0; i++ < len; ptr++) {
419  if (ISALPHA(*ptr))
420  byte |= (((*ptr & 15) + 9) & 15) << 4;
421  else
422  byte |= (*ptr & 15) << 4;
423  if (i & 1)
424  byte >>= 4;
425  else {
426  char c = castchar(byte);
427  rb_str_buf_cat(res, &c, 1);
428  byte = 0;
429  }
430  }
431  if (len & 1) {
432  char c = castchar(byte);
433  rb_str_buf_cat(res, &c, 1);
434  }
435  len = j;
436  goto grow;
437  }
438  break;
439 
440  case 'H': /* hex string (high nibble first) */
441  {
442  int byte = 0;
443  long i, j = 0;
444 
445  if (len > plen) {
446  j = (len + 1) / 2 - (plen + 1) / 2;
447  len = plen;
448  }
449  for (i=0; i++ < len; ptr++) {
450  if (ISALPHA(*ptr))
451  byte |= ((*ptr & 15) + 9) & 15;
452  else
453  byte |= *ptr & 15;
454  if (i & 1)
455  byte <<= 4;
456  else {
457  char c = castchar(byte);
458  rb_str_buf_cat(res, &c, 1);
459  byte = 0;
460  }
461  }
462  if (len & 1) {
463  char c = castchar(byte);
464  rb_str_buf_cat(res, &c, 1);
465  }
466  len = j;
467  goto grow;
468  }
469  break;
470  }
471  break;
472 
473  case 'c': /* signed char */
474  case 'C': /* unsigned char */
475  integer_size = 1;
476  bigendian_p = BIGENDIAN_P(); /* not effective */
477  goto pack_integer;
478 
479  case 's': /* s for int16_t, s! for signed short */
480  integer_size = NATINT_LEN(short, 2);
481  bigendian_p = BIGENDIAN_P();
482  goto pack_integer;
483 
484  case 'S': /* S for uint16_t, S! for unsigned short */
485  integer_size = NATINT_LEN(short, 2);
486  bigendian_p = BIGENDIAN_P();
487  goto pack_integer;
488 
489  case 'i': /* i and i! for signed int */
490  integer_size = (int)sizeof(int);
491  bigendian_p = BIGENDIAN_P();
492  goto pack_integer;
493 
494  case 'I': /* I and I! for unsigned int */
495  integer_size = (int)sizeof(int);
496  bigendian_p = BIGENDIAN_P();
497  goto pack_integer;
498 
499  case 'l': /* l for int32_t, l! for signed long */
500  integer_size = NATINT_LEN(long, 4);
501  bigendian_p = BIGENDIAN_P();
502  goto pack_integer;
503 
504  case 'L': /* L for uint32_t, L! for unsigned long */
505  integer_size = NATINT_LEN(long, 4);
506  bigendian_p = BIGENDIAN_P();
507  goto pack_integer;
508 
509  case 'q': /* q for int64_t, q! for signed long long */
510  integer_size = NATINT_LEN_Q;
511  bigendian_p = BIGENDIAN_P();
512  goto pack_integer;
513 
514  case 'Q': /* Q for uint64_t, Q! for unsigned long long */
515  integer_size = NATINT_LEN_Q;
516  bigendian_p = BIGENDIAN_P();
517  goto pack_integer;
518 
519  case 'j': /* j for intptr_t */
520  integer_size = sizeof(intptr_t);
521  bigendian_p = BIGENDIAN_P();
522  goto pack_integer;
523 
524  case 'J': /* J for uintptr_t */
525  integer_size = sizeof(uintptr_t);
526  bigendian_p = BIGENDIAN_P();
527  goto pack_integer;
528 
529  case 'n': /* 16 bit (2 bytes) integer (network byte-order) */
530  integer_size = 2;
531  bigendian_p = 1;
532  goto pack_integer;
533 
534  case 'N': /* 32 bit (4 bytes) integer (network byte-order) */
535  integer_size = 4;
536  bigendian_p = 1;
537  goto pack_integer;
538 
539  case 'v': /* 16 bit (2 bytes) integer (VAX byte-order) */
540  integer_size = 2;
541  bigendian_p = 0;
542  goto pack_integer;
543 
544  case 'V': /* 32 bit (4 bytes) integer (VAX byte-order) */
545  integer_size = 4;
546  bigendian_p = 0;
547  goto pack_integer;
548 
549  pack_integer:
550  if (explicit_endian) {
551  bigendian_p = explicit_endian == '>';
552  }
553  if (integer_size > MAX_INTEGER_PACK_SIZE)
554  rb_bug("unexpected intger size for pack: %d", integer_size);
555  while (len-- > 0) {
556  char intbuf[MAX_INTEGER_PACK_SIZE];
557 
558  from = NEXTFROM;
559  rb_integer_pack(from, intbuf, integer_size, 1, 0,
562  rb_str_buf_cat(res, intbuf, integer_size);
563  }
564  break;
565 
566  case 'f': /* single precision float in native format */
567  case 'F': /* ditto */
568  while (len-- > 0) {
569  float f;
570 
571  from = NEXTFROM;
572  f = VALUE_to_float(from);
573  rb_str_buf_cat(res, (char*)&f, sizeof(float));
574  }
575  break;
576 
577  case 'e': /* single precision float in VAX byte-order */
578  while (len-- > 0) {
579  FLOAT_CONVWITH(tmp);
580 
581  from = NEXTFROM;
582  tmp.f = VALUE_to_float(from);
583  HTOVF(tmp);
584  rb_str_buf_cat(res, tmp.buf, sizeof(float));
585  }
586  break;
587 
588  case 'E': /* double precision float in VAX byte-order */
589  while (len-- > 0) {
590  DOUBLE_CONVWITH(tmp);
591  from = NEXTFROM;
592  tmp.d = RFLOAT_VALUE(rb_to_float(from));
593  HTOVD(tmp);
594  rb_str_buf_cat(res, tmp.buf, sizeof(double));
595  }
596  break;
597 
598  case 'd': /* double precision float in native format */
599  case 'D': /* ditto */
600  while (len-- > 0) {
601  double d;
602 
603  from = NEXTFROM;
604  d = RFLOAT_VALUE(rb_to_float(from));
605  rb_str_buf_cat(res, (char*)&d, sizeof(double));
606  }
607  break;
608 
609  case 'g': /* single precision float in network byte-order */
610  while (len-- > 0) {
611  FLOAT_CONVWITH(tmp);
612  from = NEXTFROM;
613  tmp.f = VALUE_to_float(from);
614  HTONF(tmp);
615  rb_str_buf_cat(res, tmp.buf, sizeof(float));
616  }
617  break;
618 
619  case 'G': /* double precision float in network byte-order */
620  while (len-- > 0) {
621  DOUBLE_CONVWITH(tmp);
622 
623  from = NEXTFROM;
624  tmp.d = RFLOAT_VALUE(rb_to_float(from));
625  HTOND(tmp);
626  rb_str_buf_cat(res, tmp.buf, sizeof(double));
627  }
628  break;
629 
630  case 'x': /* null byte */
631  grow:
632  while (len >= 10) {
633  rb_str_buf_cat(res, nul10, 10);
634  len -= 10;
635  }
636  rb_str_buf_cat(res, nul10, len);
637  break;
638 
639  case 'X': /* back up byte */
640  shrink:
641  plen = RSTRING_LEN(res);
642  if (plen < len)
643  rb_raise(rb_eArgError, "X outside of string");
644  rb_str_set_len(res, plen - len);
645  break;
646 
647  case '@': /* null fill to absolute position */
648  len -= RSTRING_LEN(res);
649  if (len > 0) goto grow;
650  len = -len;
651  if (len > 0) goto shrink;
652  break;
653 
654  case '%':
655  rb_raise(rb_eArgError, "%% is not supported");
656  break;
657 
658  case 'U': /* Unicode character */
659  while (len-- > 0) {
660  SIGNED_VALUE l;
661  char buf[8];
662  int le;
663 
664  from = NEXTFROM;
665  from = rb_to_int(from);
666  l = NUM2LONG(from);
667  if (l < 0) {
668  rb_raise(rb_eRangeError, "pack(U): value out of range");
669  }
670  le = rb_uv_to_utf8(buf, l);
671  rb_str_buf_cat(res, (char*)buf, le);
672  }
673  break;
674 
675  case 'u': /* uuencoded string */
676  case 'm': /* base64 encoded string */
677  from = NEXTFROM;
678  StringValue(from);
679  ptr = RSTRING_PTR(from);
680  plen = RSTRING_LEN(from);
681 
682  if (len == 0 && type == 'm') {
683  encodes(res, ptr, plen, type, 0);
684  ptr += plen;
685  break;
686  }
687  if (len <= 2)
688  len = 45;
689  else if (len > 63 && type == 'u')
690  len = 63;
691  else
692  len = len / 3 * 3;
693  while (plen > 0) {
694  long todo;
695 
696  if (plen > len)
697  todo = len;
698  else
699  todo = plen;
700  encodes(res, ptr, todo, type, 1);
701  plen -= todo;
702  ptr += todo;
703  }
704  break;
705 
706  case 'M': /* quoted-printable encoded string */
707  from = rb_obj_as_string(NEXTFROM);
708  if (len <= 1)
709  len = 72;
710  qpencode(res, from, len);
711  break;
712 
713  case 'P': /* pointer to packed byte string */
714  from = THISFROM;
715  if (!NIL_P(from)) {
716  StringValue(from);
717  if (RSTRING_LEN(from) < len) {
718  rb_raise(rb_eArgError, "too short buffer for P(%ld for %ld)",
719  RSTRING_LEN(from), len);
720  }
721  }
722  len = 1;
723  /* FALL THROUGH */
724  case 'p': /* pointer to string */
725  while (len-- > 0) {
726  char *t;
727  from = NEXTFROM;
728  if (NIL_P(from)) {
729  t = 0;
730  }
731  else {
732  t = StringValuePtr(from);
733  }
734  if (!associates) {
735  associates = rb_ary_new();
736  }
737  rb_ary_push(associates, from);
738  rb_str_buf_cat(res, (char*)&t, sizeof(char*));
739  }
740  break;
741 
742  case 'w': /* BER compressed integer */
743  while (len-- > 0) {
744  VALUE buf = rb_str_new(0, 0);
745  size_t numbytes;
746  int sign;
747  char *cp;
748 
749  from = NEXTFROM;
750  from = rb_to_int(from);
751  numbytes = rb_absint_numwords(from, 7, NULL);
752  if (numbytes == 0)
753  numbytes = 1;
754  buf = rb_str_new(NULL, numbytes);
755 
756  sign = rb_integer_pack(from, RSTRING_PTR(buf), RSTRING_LEN(buf), 1, 1, INTEGER_PACK_BIG_ENDIAN);
757 
758  if (sign < 0)
759  rb_raise(rb_eArgError, "can't compress negative numbers");
760  if (sign == 2)
761  rb_bug("buffer size problem?");
762 
763  cp = RSTRING_PTR(buf);
764  while (1 < numbytes) {
765  *cp |= 0x80;
766  cp++;
767  numbytes--;
768  }
769 
770  rb_str_buf_cat(res, RSTRING_PTR(buf), RSTRING_LEN(buf));
771  }
772  break;
773 
774  default: {
775  unknown_directive("pack", type, fmt);
776  break;
777  }
778  }
779  }
780 
781  if (associates) {
782  str_associate(res, associates);
783  }
784  switch (enc_info) {
785  case 1:
787  break;
788  case 2:
790  break;
791  default:
792  /* do nothing, keep ASCII-8BIT */
793  break;
794  }
795  return res;
796 }
797 
798 static const char uu_table[] =
799 "`!\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_";
800 static const char b64_table[] =
801 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
802 
803 static void
804 encodes(VALUE str, const char *s0, long len, int type, int tail_lf)
805 {
806  enum {buff_size = 4096, encoded_unit = 4, input_unit = 3};
807  char buff[buff_size + 1]; /* +1 for tail_lf */
808  long i = 0;
809  const char *const trans = type == 'u' ? uu_table : b64_table;
810  char padding;
811  const unsigned char *s = (const unsigned char *)s0;
812 
813  if (type == 'u') {
814  buff[i++] = (char)len + ' ';
815  padding = '`';
816  }
817  else {
818  padding = '=';
819  }
820  while (len >= input_unit) {
821  while (len >= input_unit && buff_size-i >= encoded_unit) {
822  buff[i++] = trans[077 & (*s >> 2)];
823  buff[i++] = trans[077 & (((*s << 4) & 060) | ((s[1] >> 4) & 017))];
824  buff[i++] = trans[077 & (((s[1] << 2) & 074) | ((s[2] >> 6) & 03))];
825  buff[i++] = trans[077 & s[2]];
826  s += input_unit;
827  len -= input_unit;
828  }
829  if (buff_size-i < encoded_unit) {
830  rb_str_buf_cat(str, buff, i);
831  i = 0;
832  }
833  }
834 
835  if (len == 2) {
836  buff[i++] = trans[077 & (*s >> 2)];
837  buff[i++] = trans[077 & (((*s << 4) & 060) | ((s[1] >> 4) & 017))];
838  buff[i++] = trans[077 & (((s[1] << 2) & 074) | (('\0' >> 6) & 03))];
839  buff[i++] = padding;
840  }
841  else if (len == 1) {
842  buff[i++] = trans[077 & (*s >> 2)];
843  buff[i++] = trans[077 & (((*s << 4) & 060) | (('\0' >> 4) & 017))];
844  buff[i++] = padding;
845  buff[i++] = padding;
846  }
847  if (tail_lf) buff[i++] = '\n';
848  rb_str_buf_cat(str, buff, i);
849  if ((size_t)i > sizeof(buff)) rb_bug("encodes() buffer overrun");
850 }
851 
852 static const char hex_table[] = "0123456789ABCDEF";
853 
854 static void
855 qpencode(VALUE str, VALUE from, long len)
856 {
857  char buff[1024];
858  long i = 0, n = 0, prev = EOF;
859  unsigned char *s = (unsigned char*)RSTRING_PTR(from);
860  unsigned char *send = s + RSTRING_LEN(from);
861 
862  while (s < send) {
863  if ((*s > 126) ||
864  (*s < 32 && *s != '\n' && *s != '\t') ||
865  (*s == '=')) {
866  buff[i++] = '=';
867  buff[i++] = hex_table[*s >> 4];
868  buff[i++] = hex_table[*s & 0x0f];
869  n += 3;
870  prev = EOF;
871  }
872  else if (*s == '\n') {
873  if (prev == ' ' || prev == '\t') {
874  buff[i++] = '=';
875  buff[i++] = *s;
876  }
877  buff[i++] = *s;
878  n = 0;
879  prev = *s;
880  }
881  else {
882  buff[i++] = *s;
883  n++;
884  prev = *s;
885  }
886  if (n > len) {
887  buff[i++] = '=';
888  buff[i++] = '\n';
889  n = 0;
890  prev = '\n';
891  }
892  if (i > 1024 - 5) {
893  rb_str_buf_cat(str, buff, i);
894  i = 0;
895  }
896  s++;
897  }
898  if (n > 0) {
899  buff[i++] = '=';
900  buff[i++] = '\n';
901  }
902  if (i > 0) {
903  rb_str_buf_cat(str, buff, i);
904  }
905 }
906 
907 static inline int
908 hex2num(char c)
909 {
910  int n;
911  n = ruby_digit36_to_number_table[(unsigned char)c];
912  if (16 <= n)
913  n = -1;
914  return n;
915 }
916 
917 #define PACK_LENGTH_ADJUST_SIZE(sz) do { \
918  tmp_len = 0; \
919  if (len > (long)((send-s)/(sz))) { \
920  if (!star) { \
921  tmp_len = len-(send-s)/(sz); \
922  } \
923  len = (send-s)/(sz); \
924  } \
925 } while (0)
926 
927 #define PACK_ITEM_ADJUST() do { \
928  if (tmp_len > 0 && mode == UNPACK_ARRAY) \
929  rb_ary_store(ary, RARRAY_LEN(ary)+tmp_len-1, Qnil); \
930 } while (0)
931 
932 /* Workaround for Oracle Developer Studio (Oracle Solaris Studio)
933  * 12.4/12.5/12.6 C compiler optimization bug
934  * with "-xO4" optimization option.
935  */
936 #if defined(__SUNPRO_C) && 0x5130 <= __SUNPRO_C && __SUNPRO_C <= 0x5150
937 # define AVOID_CC_BUG volatile
938 #else
939 # define AVOID_CC_BUG
940 #endif
941 
942 /* unpack mode */
943 #define UNPACK_ARRAY 0
944 #define UNPACK_BLOCK 1
945 #define UNPACK_1 2
946 
947 static VALUE
948 pack_unpack_internal(VALUE str, VALUE fmt, int mode, long offset)
949 {
950 #define hexdigits ruby_hexdigits
951  char *s, *send;
952  char *p, *pend;
953  VALUE ary, associates = Qfalse;
954  char type;
955  long len;
956  AVOID_CC_BUG long tmp_len;
957  int star;
958 #ifdef NATINT_PACK
959  int natint; /* native integer */
960 #endif
961  int signed_p, integer_size, bigendian_p;
962 #define UNPACK_PUSH(item) do {\
963  VALUE item_val = (item);\
964  if ((mode) == UNPACK_BLOCK) {\
965  rb_yield(item_val);\
966  }\
967  else if ((mode) == UNPACK_ARRAY) {\
968  rb_ary_push(ary, item_val);\
969  }\
970  else /* if ((mode) == UNPACK_1) { */ {\
971  return item_val; \
972  }\
973  } while (0)
974 
975  StringValue(str);
976  StringValue(fmt);
977 
978  if (offset < 0) rb_raise(rb_eArgError, "offset can't be negative");
979  len = RSTRING_LEN(str);
980  if (offset > len) rb_raise(rb_eArgError, "offset outside of string");
981 
982  s = RSTRING_PTR(str);
983  send = s + len;
984  s += offset;
985 
986  p = RSTRING_PTR(fmt);
987  pend = p + RSTRING_LEN(fmt);
988 
989 #define UNPACK_FETCH(var, type) (memcpy((var), s, sizeof(type)), s += sizeof(type))
990 
991  ary = mode == UNPACK_ARRAY ? rb_ary_new() : Qnil;
992  while (p < pend) {
993  int explicit_endian = 0;
994  type = *p++;
995 #ifdef NATINT_PACK
996  natint = 0;
997 #endif
998 
999  if (ISSPACE(type)) continue;
1000  if (type == '#') {
1001  while ((p < pend) && (*p != '\n')) {
1002  p++;
1003  }
1004  continue;
1005  }
1006 
1007  star = 0;
1008  {
1009  modifiers:
1010  switch (*p) {
1011  case '_':
1012  case '!':
1013 
1014  if (strchr(natstr, type)) {
1015 #ifdef NATINT_PACK
1016  natint = 1;
1017 #endif
1018  p++;
1019  }
1020  else {
1021  rb_raise(rb_eArgError, "'%c' allowed only after types %s", *p, natstr);
1022  }
1023  goto modifiers;
1024 
1025  case '<':
1026  case '>':
1027  if (!strchr(endstr, type)) {
1028  rb_raise(rb_eArgError, "'%c' allowed only after types %s", *p, endstr);
1029  }
1030  if (explicit_endian) {
1031  rb_raise(rb_eRangeError, "Can't use both '<' and '>'");
1032  }
1033  explicit_endian = *p++;
1034  goto modifiers;
1035  }
1036  }
1037 
1038  if (p >= pend)
1039  len = 1;
1040  else if (*p == '*') {
1041  star = 1;
1042  len = send - s;
1043  p++;
1044  }
1045  else if (ISDIGIT(*p)) {
1046  errno = 0;
1047  len = STRTOUL(p, (char**)&p, 10);
1048  if (len < 0 || errno) {
1049  rb_raise(rb_eRangeError, "pack length too big");
1050  }
1051  }
1052  else {
1053  len = (type != '@');
1054  }
1055 
1056  switch (type) {
1057  case '%':
1058  rb_raise(rb_eArgError, "%% is not supported");
1059  break;
1060 
1061  case 'A':
1062  if (len > send - s) len = send - s;
1063  {
1064  long end = len;
1065  char *t = s + len - 1;
1066 
1067  while (t >= s) {
1068  if (*t != ' ' && *t != '\0') break;
1069  t--; len--;
1070  }
1071  UNPACK_PUSH(rb_str_new(s, len));
1072  s += end;
1073  }
1074  break;
1075 
1076  case 'Z':
1077  {
1078  char *t = s;
1079 
1080  if (len > send-s) len = send-s;
1081  while (t < s+len && *t) t++;
1082  UNPACK_PUSH(rb_str_new(s, t-s));
1083  if (t < send) t++;
1084  s = star ? t : s+len;
1085  }
1086  break;
1087 
1088  case 'a':
1089  if (len > send - s) len = send - s;
1090  UNPACK_PUSH(rb_str_new(s, len));
1091  s += len;
1092  break;
1093 
1094  case 'b':
1095  {
1096  VALUE bitstr;
1097  char *t;
1098  int bits;
1099  long i;
1100 
1101  if (p[-1] == '*' || len > (send - s) * 8)
1102  len = (send - s) * 8;
1103  bits = 0;
1104  bitstr = rb_usascii_str_new(0, len);
1105  t = RSTRING_PTR(bitstr);
1106  for (i=0; i<len; i++) {
1107  if (i & 7) bits >>= 1;
1108  else bits = (unsigned char)*s++;
1109  *t++ = (bits & 1) ? '1' : '0';
1110  }
1111  UNPACK_PUSH(bitstr);
1112  }
1113  break;
1114 
1115  case 'B':
1116  {
1117  VALUE bitstr;
1118  char *t;
1119  int bits;
1120  long i;
1121 
1122  if (p[-1] == '*' || len > (send - s) * 8)
1123  len = (send - s) * 8;
1124  bits = 0;
1125  bitstr = rb_usascii_str_new(0, len);
1126  t = RSTRING_PTR(bitstr);
1127  for (i=0; i<len; i++) {
1128  if (i & 7) bits <<= 1;
1129  else bits = (unsigned char)*s++;
1130  *t++ = (bits & 128) ? '1' : '0';
1131  }
1132  UNPACK_PUSH(bitstr);
1133  }
1134  break;
1135 
1136  case 'h':
1137  {
1138  VALUE bitstr;
1139  char *t;
1140  int bits;
1141  long i;
1142 
1143  if (p[-1] == '*' || len > (send - s) * 2)
1144  len = (send - s) * 2;
1145  bits = 0;
1146  bitstr = rb_usascii_str_new(0, len);
1147  t = RSTRING_PTR(bitstr);
1148  for (i=0; i<len; i++) {
1149  if (i & 1)
1150  bits >>= 4;
1151  else
1152  bits = (unsigned char)*s++;
1153  *t++ = hexdigits[bits & 15];
1154  }
1155  UNPACK_PUSH(bitstr);
1156  }
1157  break;
1158 
1159  case 'H':
1160  {
1161  VALUE bitstr;
1162  char *t;
1163  int bits;
1164  long i;
1165 
1166  if (p[-1] == '*' || len > (send - s) * 2)
1167  len = (send - s) * 2;
1168  bits = 0;
1169  bitstr = rb_usascii_str_new(0, len);
1170  t = RSTRING_PTR(bitstr);
1171  for (i=0; i<len; i++) {
1172  if (i & 1)
1173  bits <<= 4;
1174  else
1175  bits = (unsigned char)*s++;
1176  *t++ = hexdigits[(bits >> 4) & 15];
1177  }
1178  UNPACK_PUSH(bitstr);
1179  }
1180  break;
1181 
1182  case 'c':
1183  signed_p = 1;
1184  integer_size = 1;
1185  bigendian_p = BIGENDIAN_P(); /* not effective */
1186  goto unpack_integer;
1187 
1188  case 'C':
1189  signed_p = 0;
1190  integer_size = 1;
1191  bigendian_p = BIGENDIAN_P(); /* not effective */
1192  goto unpack_integer;
1193 
1194  case 's':
1195  signed_p = 1;
1196  integer_size = NATINT_LEN(short, 2);
1197  bigendian_p = BIGENDIAN_P();
1198  goto unpack_integer;
1199 
1200  case 'S':
1201  signed_p = 0;
1202  integer_size = NATINT_LEN(short, 2);
1203  bigendian_p = BIGENDIAN_P();
1204  goto unpack_integer;
1205 
1206  case 'i':
1207  signed_p = 1;
1208  integer_size = (int)sizeof(int);
1209  bigendian_p = BIGENDIAN_P();
1210  goto unpack_integer;
1211 
1212  case 'I':
1213  signed_p = 0;
1214  integer_size = (int)sizeof(int);
1215  bigendian_p = BIGENDIAN_P();
1216  goto unpack_integer;
1217 
1218  case 'l':
1219  signed_p = 1;
1220  integer_size = NATINT_LEN(long, 4);
1221  bigendian_p = BIGENDIAN_P();
1222  goto unpack_integer;
1223 
1224  case 'L':
1225  signed_p = 0;
1226  integer_size = NATINT_LEN(long, 4);
1227  bigendian_p = BIGENDIAN_P();
1228  goto unpack_integer;
1229 
1230  case 'q':
1231  signed_p = 1;
1232  integer_size = NATINT_LEN_Q;
1233  bigendian_p = BIGENDIAN_P();
1234  goto unpack_integer;
1235 
1236  case 'Q':
1237  signed_p = 0;
1238  integer_size = NATINT_LEN_Q;
1239  bigendian_p = BIGENDIAN_P();
1240  goto unpack_integer;
1241 
1242  case 'j':
1243  signed_p = 1;
1244  integer_size = sizeof(intptr_t);
1245  bigendian_p = BIGENDIAN_P();
1246  goto unpack_integer;
1247 
1248  case 'J':
1249  signed_p = 0;
1250  integer_size = sizeof(uintptr_t);
1251  bigendian_p = BIGENDIAN_P();
1252  goto unpack_integer;
1253 
1254  case 'n':
1255  signed_p = 0;
1256  integer_size = 2;
1257  bigendian_p = 1;
1258  goto unpack_integer;
1259 
1260  case 'N':
1261  signed_p = 0;
1262  integer_size = 4;
1263  bigendian_p = 1;
1264  goto unpack_integer;
1265 
1266  case 'v':
1267  signed_p = 0;
1268  integer_size = 2;
1269  bigendian_p = 0;
1270  goto unpack_integer;
1271 
1272  case 'V':
1273  signed_p = 0;
1274  integer_size = 4;
1275  bigendian_p = 0;
1276  goto unpack_integer;
1277 
1278  unpack_integer:
1279  if (explicit_endian) {
1280  bigendian_p = explicit_endian == '>';
1281  }
1282  PACK_LENGTH_ADJUST_SIZE(integer_size);
1283  while (len-- > 0) {
1284  int flags = bigendian_p ? INTEGER_PACK_BIG_ENDIAN : INTEGER_PACK_LITTLE_ENDIAN;
1285  VALUE val;
1286  if (signed_p)
1287  flags |= INTEGER_PACK_2COMP;
1288  val = rb_integer_unpack(s, integer_size, 1, 0, flags);
1289  UNPACK_PUSH(val);
1290  s += integer_size;
1291  }
1292  PACK_ITEM_ADJUST();
1293  break;
1294 
1295  case 'f':
1296  case 'F':
1297  PACK_LENGTH_ADJUST_SIZE(sizeof(float));
1298  while (len-- > 0) {
1299  float tmp;
1300  UNPACK_FETCH(&tmp, float);
1301  UNPACK_PUSH(DBL2NUM((double)tmp));
1302  }
1303  PACK_ITEM_ADJUST();
1304  break;
1305 
1306  case 'e':
1307  PACK_LENGTH_ADJUST_SIZE(sizeof(float));
1308  while (len-- > 0) {
1309  FLOAT_CONVWITH(tmp);
1310  UNPACK_FETCH(tmp.buf, float);
1311  VTOHF(tmp);
1312  UNPACK_PUSH(DBL2NUM(tmp.f));
1313  }
1314  PACK_ITEM_ADJUST();
1315  break;
1316 
1317  case 'E':
1318  PACK_LENGTH_ADJUST_SIZE(sizeof(double));
1319  while (len-- > 0) {
1320  DOUBLE_CONVWITH(tmp);
1321  UNPACK_FETCH(tmp.buf, double);
1322  VTOHD(tmp);
1323  UNPACK_PUSH(DBL2NUM(tmp.d));
1324  }
1325  PACK_ITEM_ADJUST();
1326  break;
1327 
1328  case 'D':
1329  case 'd':
1330  PACK_LENGTH_ADJUST_SIZE(sizeof(double));
1331  while (len-- > 0) {
1332  double tmp;
1333  UNPACK_FETCH(&tmp, double);
1334  UNPACK_PUSH(DBL2NUM(tmp));
1335  }
1336  PACK_ITEM_ADJUST();
1337  break;
1338 
1339  case 'g':
1340  PACK_LENGTH_ADJUST_SIZE(sizeof(float));
1341  while (len-- > 0) {
1342  FLOAT_CONVWITH(tmp);
1343  UNPACK_FETCH(tmp.buf, float);
1344  NTOHF(tmp);
1345  UNPACK_PUSH(DBL2NUM(tmp.f));
1346  }
1347  PACK_ITEM_ADJUST();
1348  break;
1349 
1350  case 'G':
1351  PACK_LENGTH_ADJUST_SIZE(sizeof(double));
1352  while (len-- > 0) {
1353  DOUBLE_CONVWITH(tmp);
1354  UNPACK_FETCH(tmp.buf, double);
1355  NTOHD(tmp);
1356  UNPACK_PUSH(DBL2NUM(tmp.d));
1357  }
1358  PACK_ITEM_ADJUST();
1359  break;
1360 
1361  case 'U':
1362  if (len > send - s) len = send - s;
1363  while (len > 0 && s < send) {
1364  long alen = send - s;
1365  unsigned long l;
1366 
1367  l = utf8_to_uv(s, &alen);
1368  s += alen; len--;
1369  UNPACK_PUSH(ULONG2NUM(l));
1370  }
1371  break;
1372 
1373  case 'u':
1374  {
1375  VALUE buf = rb_str_new(0, (send - s)*3/4);
1376  char *ptr = RSTRING_PTR(buf);
1377  long total = 0;
1378 
1379  while (s < send && (unsigned char)*s > ' ' && (unsigned char)*s < 'a') {
1380  long a,b,c,d;
1381  char hunk[3];
1382 
1383  len = ((unsigned char)*s++ - ' ') & 077;
1384 
1385  total += len;
1386  if (total > RSTRING_LEN(buf)) {
1387  len -= total - RSTRING_LEN(buf);
1388  total = RSTRING_LEN(buf);
1389  }
1390 
1391  while (len > 0) {
1392  long mlen = len > 3 ? 3 : len;
1393 
1394  if (s < send && (unsigned char)*s >= ' ' && (unsigned char)*s < 'a')
1395  a = ((unsigned char)*s++ - ' ') & 077;
1396  else
1397  a = 0;
1398  if (s < send && (unsigned char)*s >= ' ' && (unsigned char)*s < 'a')
1399  b = ((unsigned char)*s++ - ' ') & 077;
1400  else
1401  b = 0;
1402  if (s < send && (unsigned char)*s >= ' ' && (unsigned char)*s < 'a')
1403  c = ((unsigned char)*s++ - ' ') & 077;
1404  else
1405  c = 0;
1406  if (s < send && (unsigned char)*s >= ' ' && (unsigned char)*s < 'a')
1407  d = ((unsigned char)*s++ - ' ') & 077;
1408  else
1409  d = 0;
1410  hunk[0] = (char)(a << 2 | b >> 4);
1411  hunk[1] = (char)(b << 4 | c >> 2);
1412  hunk[2] = (char)(c << 6 | d);
1413  memcpy(ptr, hunk, mlen);
1414  ptr += mlen;
1415  len -= mlen;
1416  }
1417  if (s < send && (unsigned char)*s != '\r' && *s != '\n')
1418  s++; /* possible checksum byte */
1419  if (s < send && *s == '\r') s++;
1420  if (s < send && *s == '\n') s++;
1421  }
1422 
1423  rb_str_set_len(buf, total);
1424  UNPACK_PUSH(buf);
1425  }
1426  break;
1427 
1428  case 'm':
1429  {
1430  VALUE buf = rb_str_new(0, (send - s + 3)*3/4); /* +3 is for skipping paddings */
1431  char *ptr = RSTRING_PTR(buf);
1432  int a = -1,b = -1,c = 0,d = 0;
1433  static signed char b64_xtable[256];
1434 
1435  if (b64_xtable['/'] <= 0) {
1436  int i;
1437 
1438  for (i = 0; i < 256; i++) {
1439  b64_xtable[i] = -1;
1440  }
1441  for (i = 0; i < 64; i++) {
1442  b64_xtable[(unsigned char)b64_table[i]] = (char)i;
1443  }
1444  }
1445  if (len == 0) {
1446  while (s < send) {
1447  a = b = c = d = -1;
1448  a = b64_xtable[(unsigned char)*s++];
1449  if (s >= send || a == -1) rb_raise(rb_eArgError, "invalid base64");
1450  b = b64_xtable[(unsigned char)*s++];
1451  if (s >= send || b == -1) rb_raise(rb_eArgError, "invalid base64");
1452  if (*s == '=') {
1453  if (s + 2 == send && *(s + 1) == '=') break;
1454  rb_raise(rb_eArgError, "invalid base64");
1455  }
1456  c = b64_xtable[(unsigned char)*s++];
1457  if (s >= send || c == -1) rb_raise(rb_eArgError, "invalid base64");
1458  if (s + 1 == send && *s == '=') break;
1459  d = b64_xtable[(unsigned char)*s++];
1460  if (d == -1) rb_raise(rb_eArgError, "invalid base64");
1461  *ptr++ = castchar(a << 2 | b >> 4);
1462  *ptr++ = castchar(b << 4 | c >> 2);
1463  *ptr++ = castchar(c << 6 | d);
1464  }
1465  if (c == -1) {
1466  *ptr++ = castchar(a << 2 | b >> 4);
1467  if (b & 0xf) rb_raise(rb_eArgError, "invalid base64");
1468  }
1469  else if (d == -1) {
1470  *ptr++ = castchar(a << 2 | b >> 4);
1471  *ptr++ = castchar(b << 4 | c >> 2);
1472  if (c & 0x3) rb_raise(rb_eArgError, "invalid base64");
1473  }
1474  }
1475  else {
1476  while (s < send) {
1477  a = b = c = d = -1;
1478  while ((a = b64_xtable[(unsigned char)*s]) == -1 && s < send) {s++;}
1479  if (s >= send) break;
1480  s++;
1481  while ((b = b64_xtable[(unsigned char)*s]) == -1 && s < send) {s++;}
1482  if (s >= send) break;
1483  s++;
1484  while ((c = b64_xtable[(unsigned char)*s]) == -1 && s < send) {if (*s == '=') break; s++;}
1485  if (*s == '=' || s >= send) break;
1486  s++;
1487  while ((d = b64_xtable[(unsigned char)*s]) == -1 && s < send) {if (*s == '=') break; s++;}
1488  if (*s == '=' || s >= send) break;
1489  s++;
1490  *ptr++ = castchar(a << 2 | b >> 4);
1491  *ptr++ = castchar(b << 4 | c >> 2);
1492  *ptr++ = castchar(c << 6 | d);
1493  a = -1;
1494  }
1495  if (a != -1 && b != -1) {
1496  if (c == -1)
1497  *ptr++ = castchar(a << 2 | b >> 4);
1498  else {
1499  *ptr++ = castchar(a << 2 | b >> 4);
1500  *ptr++ = castchar(b << 4 | c >> 2);
1501  }
1502  }
1503  }
1504  rb_str_set_len(buf, ptr - RSTRING_PTR(buf));
1505  UNPACK_PUSH(buf);
1506  }
1507  break;
1508 
1509  case 'M':
1510  {
1511  VALUE buf = rb_str_new(0, send - s);
1512  char *ptr = RSTRING_PTR(buf), *ss = s;
1513  int csum = 0;
1514  int c1, c2;
1515 
1516  while (s < send) {
1517  if (*s == '=') {
1518  if (++s == send) break;
1519  if (s+1 < send && *s == '\r' && *(s+1) == '\n')
1520  s++;
1521  if (*s != '\n') {
1522  if ((c1 = hex2num(*s)) == -1) break;
1523  if (++s == send) break;
1524  if ((c2 = hex2num(*s)) == -1) break;
1525  csum |= *ptr++ = castchar(c1 << 4 | c2);
1526  }
1527  }
1528  else {
1529  csum |= *ptr++ = *s;
1530  }
1531  s++;
1532  ss = s;
1533  }
1534  rb_str_set_len(buf, ptr - RSTRING_PTR(buf));
1535  rb_str_buf_cat(buf, ss, send-ss);
1538  UNPACK_PUSH(buf);
1539  }
1540  break;
1541 
1542  case '@':
1543  if (len > RSTRING_LEN(str))
1544  rb_raise(rb_eArgError, "@ outside of string");
1545  s = RSTRING_PTR(str) + len;
1546  break;
1547 
1548  case 'X':
1549  if (len > s - RSTRING_PTR(str))
1550  rb_raise(rb_eArgError, "X outside of string");
1551  s -= len;
1552  break;
1553 
1554  case 'x':
1555  if (len > send - s)
1556  rb_raise(rb_eArgError, "x outside of string");
1557  s += len;
1558  break;
1559 
1560  case 'P':
1561  if (sizeof(char *) <= (size_t)(send - s)) {
1562  VALUE tmp = Qnil;
1563  char *t;
1564 
1565  UNPACK_FETCH(&t, char *);
1566  if (t) {
1567  if (!associates) associates = str_associated(str);
1568  tmp = associated_pointer(associates, t);
1569  if (len < RSTRING_LEN(tmp)) {
1570  tmp = rb_str_new(t, len);
1571  str_associate(tmp, associates);
1572  }
1573  }
1574  UNPACK_PUSH(tmp);
1575  }
1576  break;
1577 
1578  case 'p':
1579  if (len > (long)((send - s) / sizeof(char *)))
1580  len = (send - s) / sizeof(char *);
1581  while (len-- > 0) {
1582  if ((size_t)(send - s) < sizeof(char *))
1583  break;
1584  else {
1585  VALUE tmp = Qnil;
1586  char *t;
1587 
1588  UNPACK_FETCH(&t, char *);
1589  if (t) {
1590  if (!associates) associates = str_associated(str);
1591  tmp = associated_pointer(associates, t);
1592  }
1593  UNPACK_PUSH(tmp);
1594  }
1595  }
1596  break;
1597 
1598  case 'w':
1599  {
1600  char *s0 = s;
1601  while (len > 0 && s < send) {
1602  if (*s & 0x80) {
1603  s++;
1604  }
1605  else {
1606  s++;
1607  UNPACK_PUSH(rb_integer_unpack(s0, s-s0, 1, 1, INTEGER_PACK_BIG_ENDIAN));
1608  len--;
1609  s0 = s;
1610  }
1611  }
1612  }
1613  break;
1614 
1615  default:
1616  unknown_directive("unpack", type, fmt);
1617  break;
1618  }
1619  }
1620 
1621  return ary;
1622 }
1623 
1624 static VALUE
1625 pack_unpack(rb_execution_context_t *ec, VALUE str, VALUE fmt, VALUE offset)
1626 {
1627  int mode = rb_block_given_p() ? UNPACK_BLOCK : UNPACK_ARRAY;
1628  return pack_unpack_internal(str, fmt, mode, RB_NUM2LONG(offset));
1629 }
1630 
1631 static VALUE
1632 pack_unpack1(rb_execution_context_t *ec, VALUE str, VALUE fmt, VALUE offset)
1633 {
1634  return pack_unpack_internal(str, fmt, UNPACK_1, RB_NUM2LONG(offset));
1635 }
1636 
1637 int
1638 rb_uv_to_utf8(char buf[6], unsigned long uv)
1639 {
1640  if (uv <= 0x7f) {
1641  buf[0] = (char)uv;
1642  return 1;
1643  }
1644  if (uv <= 0x7ff) {
1645  buf[0] = castchar(((uv>>6)&0xff)|0xc0);
1646  buf[1] = castchar((uv&0x3f)|0x80);
1647  return 2;
1648  }
1649  if (uv <= 0xffff) {
1650  buf[0] = castchar(((uv>>12)&0xff)|0xe0);
1651  buf[1] = castchar(((uv>>6)&0x3f)|0x80);
1652  buf[2] = castchar((uv&0x3f)|0x80);
1653  return 3;
1654  }
1655  if (uv <= 0x1fffff) {
1656  buf[0] = castchar(((uv>>18)&0xff)|0xf0);
1657  buf[1] = castchar(((uv>>12)&0x3f)|0x80);
1658  buf[2] = castchar(((uv>>6)&0x3f)|0x80);
1659  buf[3] = castchar((uv&0x3f)|0x80);
1660  return 4;
1661  }
1662  if (uv <= 0x3ffffff) {
1663  buf[0] = castchar(((uv>>24)&0xff)|0xf8);
1664  buf[1] = castchar(((uv>>18)&0x3f)|0x80);
1665  buf[2] = castchar(((uv>>12)&0x3f)|0x80);
1666  buf[3] = castchar(((uv>>6)&0x3f)|0x80);
1667  buf[4] = castchar((uv&0x3f)|0x80);
1668  return 5;
1669  }
1670  if (uv <= 0x7fffffff) {
1671  buf[0] = castchar(((uv>>30)&0xff)|0xfc);
1672  buf[1] = castchar(((uv>>24)&0x3f)|0x80);
1673  buf[2] = castchar(((uv>>18)&0x3f)|0x80);
1674  buf[3] = castchar(((uv>>12)&0x3f)|0x80);
1675  buf[4] = castchar(((uv>>6)&0x3f)|0x80);
1676  buf[5] = castchar((uv&0x3f)|0x80);
1677  return 6;
1678  }
1679  rb_raise(rb_eRangeError, "pack(U): value out of range");
1680 
1682 }
1683 
1684 static const unsigned long utf8_limits[] = {
1685  0x0, /* 1 */
1686  0x80, /* 2 */
1687  0x800, /* 3 */
1688  0x10000, /* 4 */
1689  0x200000, /* 5 */
1690  0x4000000, /* 6 */
1691  0x80000000, /* 7 */
1692 };
1693 
1694 static unsigned long
1695 utf8_to_uv(const char *p, long *lenp)
1696 {
1697  int c = *p++ & 0xff;
1698  unsigned long uv = c;
1699  long n;
1700 
1701  if (!(uv & 0x80)) {
1702  *lenp = 1;
1703  return uv;
1704  }
1705  if (!(uv & 0x40)) {
1706  *lenp = 1;
1707  rb_raise(rb_eArgError, "malformed UTF-8 character");
1708  }
1709 
1710  if (!(uv & 0x20)) { n = 2; uv &= 0x1f; }
1711  else if (!(uv & 0x10)) { n = 3; uv &= 0x0f; }
1712  else if (!(uv & 0x08)) { n = 4; uv &= 0x07; }
1713  else if (!(uv & 0x04)) { n = 5; uv &= 0x03; }
1714  else if (!(uv & 0x02)) { n = 6; uv &= 0x01; }
1715  else {
1716  *lenp = 1;
1717  rb_raise(rb_eArgError, "malformed UTF-8 character");
1718  }
1719  if (n > *lenp) {
1720  rb_raise(rb_eArgError, "malformed UTF-8 character (expected %ld bytes, given %ld bytes)",
1721  n, *lenp);
1722  }
1723  *lenp = n--;
1724  if (n != 0) {
1725  while (n--) {
1726  c = *p++ & 0xff;
1727  if ((c & 0xc0) != 0x80) {
1728  *lenp -= n + 1;
1729  rb_raise(rb_eArgError, "malformed UTF-8 character");
1730  }
1731  else {
1732  c &= 0x3f;
1733  uv = uv << 6 | c;
1734  }
1735  }
1736  }
1737  n = *lenp - 1;
1738  if (uv < utf8_limits[n]) {
1739  rb_raise(rb_eArgError, "redundant UTF-8 sequence");
1740  }
1741  return uv;
1742 }
1743 
1744 #include "pack.rbinc"
1745 
1746 void
1747 Init_pack(void)
1748 {
1749  id_associated = rb_make_internal_id();
1750 }
int rb_block_given_p(void)
Determines if the current method is given a block.
Definition: eval.c:854
#define ENC_CODERANGE_7BIT
Old name of RUBY_ENC_CODERANGE_7BIT.
Definition: coderange.h:180
#define ENC_CODERANGE_VALID
Old name of RUBY_ENC_CODERANGE_VALID.
Definition: coderange.h:181
#define ISSPACE
Old name of rb_isspace.
Definition: ctype.h:88
#define RFLOAT_VALUE
Old name of rb_float_value.
Definition: double.h:28
#define T_STRING
Old name of RUBY_T_STRING.
Definition: value_type.h:78
#define ULONG2NUM
Old name of RB_ULONG2NUM.
Definition: long.h:60
#define UNREACHABLE_RETURN
Old name of RBIMPL_UNREACHABLE_RETURN.
Definition: assume.h:31
#define STRTOUL
Old name of ruby_strtoul.
Definition: ctype.h:104
#define ISDIGIT
Old name of rb_isdigit.
Definition: ctype.h:93
#define ISALPHA
Old name of rb_isalpha.
Definition: ctype.h:92
#define ISASCII
Old name of rb_isascii.
Definition: ctype.h:85
#define Qnil
Old name of RUBY_Qnil.
#define Qfalse
Old name of RUBY_Qfalse.
#define NIL_P
Old name of RB_NIL_P.
#define DBL2NUM
Old name of rb_float_new.
Definition: double.h:29
#define ISPRINT
Old name of rb_isprint.
Definition: ctype.h:86
#define NUM2LONG
Old name of RB_NUM2LONG.
Definition: long.h:51
#define ENCODING_CODERANGE_SET(obj, encindex, cr)
Old name of RB_ENCODING_CODERANGE_SET.
Definition: coderange.h:189
void rb_raise(VALUE exc, const char *fmt,...)
Exception entry point.
Definition: error.c:3025
void rb_bug(const char *fmt,...)
Interpreter panic switch.
Definition: error.c:802
VALUE rb_eRangeError
RangeError exception.
Definition: error.c:1103
VALUE rb_eTypeError
TypeError exception.
Definition: error.c:1099
VALUE rb_eRuntimeError
RuntimeError exception.
Definition: error.c:1097
VALUE rb_eArgError
ArgumentError exception.
Definition: error.c:1100
void rb_warning(const char *fmt,...)
Issues a warning.
Definition: error.c:449
VALUE rb_to_float(VALUE val)
Identical to rb_check_to_float(), except it raises on error.
Definition: object.c:3470
VALUE rb_to_int(VALUE val)
Identical to rb_check_to_int(), except it raises in case of conversion mismatch.
Definition: object.c:2998
int rb_utf8_encindex(void)
Identical to rb_utf8_encoding(), except it returns the encoding's index instead of the encoding itsel...
Definition: encoding.c:1533
int rb_ascii8bit_encindex(void)
Identical to rb_ascii8bit_encoding(), except it returns the encoding's index instead of the encoding ...
Definition: encoding.c:1521
void rb_enc_set_index(VALUE obj, int encindex)
Destructively assigns an encoding (via its index) to an object.
Definition: encoding.c:1030
int rb_usascii_encindex(void)
Identical to rb_usascii_encoding(), except it returns the encoding's index instead of the encoding it...
Definition: encoding.c:1545
Defines RBIMPL_HAS_BUILTIN.
VALUE rb_ary_new(void)
Allocates a new, empty array.
Definition: array.c:750
VALUE rb_ary_push(VALUE ary, VALUE elem)
Special case of rb_ary_cat() that it adds only one element.
Definition: array.c:1308
int rb_integer_pack(VALUE val, void *words, size_t numwords, size_t wordsize, size_t nails, int flags)
Exports an integer into a buffer.
Definition: bignum.c:3559
size_t rb_absint_numwords(VALUE val, size_t word_numbits, size_t *nlz_bits_ret)
Calculates the number of words needed represent the absolute value of the passed integer.
Definition: bignum.c:3393
VALUE rb_integer_unpack(const void *words, size_t numwords, size_t wordsize, size_t nails, int flags)
Import an integer from a buffer.
Definition: bignum.c:3645
#define INTEGER_PACK_LITTLE_ENDIAN
Little endian combination.
Definition: bignum.h:567
#define INTEGER_PACK_BIG_ENDIAN
Big endian combination.
Definition: bignum.h:572
int rb_uv_to_utf8(char buf[6], unsigned long uv)
Encodes a Unicode codepoint into its UTF-8 representation.
Definition: pack.c:1638
#define INTEGER_PACK_2COMP
Uses 2's complement representation.
Definition: bignum.h:549
VALUE rb_str_buf_cat(VALUE, const char *, long)
Just another name of rb_str_cat.
void rb_str_modify(VALUE str)
Declares that the string is about to be modified.
Definition: string.c:2459
VALUE rb_usascii_str_new(const char *ptr, long len)
Identical to rb_str_new(), except it generates a string of "US ASCII" encoding.
Definition: string.c:924
void rb_str_set_len(VALUE str, long len)
Overwrites the length of the string.
Definition: string.c:3039
VALUE rb_str_new(const char *ptr, long len)
Allocates an instance of rb_cString.
Definition: string.c:918
VALUE rb_str_buf_new(long capa)
Allocates a "string buffer".
Definition: string.c:1506
VALUE rb_obj_as_string(VALUE obj)
Try converting an object to its stringised representation using its to_s method, if any.
Definition: string.c:1657
VALUE rb_ivar_set(VALUE obj, ID name, VALUE val)
Identical to rb_iv_set(), except it accepts the name as an ID instead of a C string.
Definition: variable.c:1575
const signed char ruby_digit36_to_number_table[]
Character to number mapping like ‘'a’->10,'b'->11` etc.
Definition: util.c:76
#define RB_NUM2LONG
Just another name of rb_num2long_inline.
Definition: long.h:57
VALUE type(ANYARGS)
ANYARGS-ed function type.
Definition: cxxanyargs.hpp:56
#define RARRAY_LEN
Just another name of rb_array_len.
Definition: rarray.h:68
#define RARRAY_CONST_PTR
Just another name of rb_array_const_ptr.
Definition: rarray.h:69
#define StringValue(v)
Ensures that the parameter object is a String.
Definition: rstring.h:72
#define StringValuePtr(v)
Identical to StringValue, except it returns a char*.
Definition: rstring.h:82
static char * RSTRING_PTR(VALUE str)
Queries the contents pointer of the string.
Definition: rstring.h:497
static long RSTRING_LEN(VALUE str)
Queries the length of the string.
Definition: rstring.h:483
const char * rb_obj_classname(VALUE obj)
Queries the name of the class of the passed object.
Definition: variable.c:309
intptr_t SIGNED_VALUE
A signed integer type that has the same width with VALUE.
Definition: value.h:63
uintptr_t ID
Type that represents a Ruby identifier such as a variable name.
Definition: value.h:52
uintptr_t VALUE
Type that represents a Ruby object.
Definition: value.h:40
static bool RB_TYPE_P(VALUE obj, enum ruby_value_type t)
Queries if the given object is of given type.
Definition: value_type.h:375