Ruby  3.1.4p223 (2023-03-30 revision HEAD)
re.c
1 /**********************************************************************
2 
3  re.c -
4 
5  $Author$
6  created at: Mon Aug 9 18:24:49 JST 1993
7 
8  Copyright (C) 1993-2007 Yukihiro Matsumoto
9 
10 **********************************************************************/
11 
12 #include "ruby/internal/config.h"
13 
14 #include <ctype.h>
15 
16 #include "encindex.h"
17 #include "internal.h"
18 #include "internal/hash.h"
19 #include "internal/imemo.h"
20 #include "internal/re.h"
21 #include "internal/string.h"
22 #include "internal/variable.h"
23 #include "regint.h"
24 #include "ruby/encoding.h"
25 #include "ruby/re.h"
26 #include "ruby/util.h"
27 
29 
30 typedef char onig_errmsg_buffer[ONIG_MAX_ERROR_MESSAGE_LEN];
31 #define errcpy(err, msg) strlcpy((err), (msg), ONIG_MAX_ERROR_MESSAGE_LEN)
32 
33 #define BEG(no) (regs->beg[(no)])
34 #define END(no) (regs->end[(no)])
35 
36 #if 'a' == 97 /* it's ascii */
37 static const char casetable[] = {
38  '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
39  '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
40  '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
41  '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
42  /* ' ' '!' '"' '#' '$' '%' '&' ''' */
43  '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
44  /* '(' ')' '*' '+' ',' '-' '.' '/' */
45  '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
46  /* '0' '1' '2' '3' '4' '5' '6' '7' */
47  '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
48  /* '8' '9' ':' ';' '<' '=' '>' '?' */
49  '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
50  /* '@' 'A' 'B' 'C' 'D' 'E' 'F' 'G' */
51  '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
52  /* 'H' 'I' 'J' 'K' 'L' 'M' 'N' 'O' */
53  '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
54  /* 'P' 'Q' 'R' 'S' 'T' 'U' 'V' 'W' */
55  '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
56  /* 'X' 'Y' 'Z' '[' '\' ']' '^' '_' */
57  '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137',
58  /* '`' 'a' 'b' 'c' 'd' 'e' 'f' 'g' */
59  '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
60  /* 'h' 'i' 'j' 'k' 'l' 'm' 'n' 'o' */
61  '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
62  /* 'p' 'q' 'r' 's' 't' 'u' 'v' 'w' */
63  '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
64  /* 'x' 'y' 'z' '{' '|' '}' '~' */
65  '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177',
66  '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
67  '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
68  '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
69  '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
70  '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247',
71  '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257',
72  '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',
73  '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',
74  '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307',
75  '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317',
76  '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327',
77  '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337',
78  '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
79  '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
80  '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
81  '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377',
82 };
83 #else
84 # error >>> "You lose. You will need a translation table for your character set." <<<
85 #endif
86 
87 int
88 rb_memcicmp(const void *x, const void *y, long len)
89 {
90  const unsigned char *p1 = x, *p2 = y;
91  int tmp;
92 
93  while (len--) {
94  if ((tmp = casetable[(unsigned)*p1++] - casetable[(unsigned)*p2++]))
95  return tmp;
96  }
97  return 0;
98 }
99 
100 #ifdef HAVE_MEMMEM
101 static inline long
102 rb_memsearch_ss(const unsigned char *xs, long m, const unsigned char *ys, long n)
103 {
104  const unsigned char *y;
105 
106  if ((y = memmem(ys, n, xs, m)) != NULL)
107  return y - ys;
108  else
109  return -1;
110 }
111 #else
112 static inline long
113 rb_memsearch_ss(const unsigned char *xs, long m, const unsigned char *ys, long n)
114 {
115  const unsigned char *x = xs, *xe = xs + m;
116  const unsigned char *y = ys, *ye = ys + n;
117 #define VALUE_MAX ((VALUE)~(VALUE)0)
118  VALUE hx, hy, mask = VALUE_MAX >> ((SIZEOF_VALUE - m) * CHAR_BIT);
119 
120  if (m > SIZEOF_VALUE)
121  rb_bug("!!too long pattern string!!");
122 
123  if (!(y = memchr(y, *x, n - m + 1)))
124  return -1;
125 
126  /* Prepare hash value */
127  for (hx = *x++, hy = *y++; x < xe; ++x, ++y) {
128  hx <<= CHAR_BIT;
129  hy <<= CHAR_BIT;
130  hx |= *x;
131  hy |= *y;
132  }
133  /* Searching */
134  while (hx != hy) {
135  if (y == ye)
136  return -1;
137  hy <<= CHAR_BIT;
138  hy |= *y;
139  hy &= mask;
140  y++;
141  }
142  return y - ys - m;
143 }
144 #endif
145 
146 static inline long
147 rb_memsearch_qs(const unsigned char *xs, long m, const unsigned char *ys, long n)
148 {
149  const unsigned char *x = xs, *xe = xs + m;
150  const unsigned char *y = ys;
151  VALUE i, qstable[256];
152 
153  /* Preprocessing */
154  for (i = 0; i < 256; ++i)
155  qstable[i] = m + 1;
156  for (; x < xe; ++x)
157  qstable[*x] = xe - x;
158  /* Searching */
159  for (; y + m <= ys + n; y += *(qstable + y[m])) {
160  if (*xs == *y && memcmp(xs, y, m) == 0)
161  return y - ys;
162  }
163  return -1;
164 }
165 
166 static inline unsigned int
167 rb_memsearch_qs_utf8_hash(const unsigned char *x)
168 {
169  register const unsigned int mix = 8353;
170  register unsigned int h = *x;
171  if (h < 0xC0) {
172  return h + 256;
173  }
174  else if (h < 0xE0) {
175  h *= mix;
176  h += x[1];
177  }
178  else if (h < 0xF0) {
179  h *= mix;
180  h += x[1];
181  h *= mix;
182  h += x[2];
183  }
184  else if (h < 0xF5) {
185  h *= mix;
186  h += x[1];
187  h *= mix;
188  h += x[2];
189  h *= mix;
190  h += x[3];
191  }
192  else {
193  return h + 256;
194  }
195  return (unsigned char)h;
196 }
197 
198 static inline long
199 rb_memsearch_qs_utf8(const unsigned char *xs, long m, const unsigned char *ys, long n)
200 {
201  const unsigned char *x = xs, *xe = xs + m;
202  const unsigned char *y = ys;
203  VALUE i, qstable[512];
204 
205  /* Preprocessing */
206  for (i = 0; i < 512; ++i) {
207  qstable[i] = m + 1;
208  }
209  for (; x < xe; ++x) {
210  qstable[rb_memsearch_qs_utf8_hash(x)] = xe - x;
211  }
212  /* Searching */
213  for (; y + m <= ys + n; y += qstable[rb_memsearch_qs_utf8_hash(y+m)]) {
214  if (*xs == *y && memcmp(xs, y, m) == 0)
215  return y - ys;
216  }
217  return -1;
218 }
219 
220 static inline long
221 rb_memsearch_wchar(const unsigned char *xs, long m, const unsigned char *ys, long n)
222 {
223  const unsigned char *x = xs, x0 = *xs, *y = ys;
224  enum {char_size = 2};
225 
226  for (n -= m; n >= 0; n -= char_size, y += char_size) {
227  if (x0 == *y && memcmp(x+1, y+1, m-1) == 0)
228  return y - ys;
229  }
230  return -1;
231 }
232 
233 static inline long
234 rb_memsearch_qchar(const unsigned char *xs, long m, const unsigned char *ys, long n)
235 {
236  const unsigned char *x = xs, x0 = *xs, *y = ys;
237  enum {char_size = 4};
238 
239  for (n -= m; n >= 0; n -= char_size, y += char_size) {
240  if (x0 == *y && memcmp(x+1, y+1, m-1) == 0)
241  return y - ys;
242  }
243  return -1;
244 }
245 
246 long
247 rb_memsearch(const void *x0, long m, const void *y0, long n, rb_encoding *enc)
248 {
249  const unsigned char *x = x0, *y = y0;
250 
251  if (m > n) return -1;
252  else if (m == n) {
253  return memcmp(x0, y0, m) == 0 ? 0 : -1;
254  }
255  else if (m < 1) {
256  return 0;
257  }
258  else if (m == 1) {
259  const unsigned char *ys = memchr(y, *x, n);
260 
261  if (ys)
262  return ys - y;
263  else
264  return -1;
265  }
266  else if (LIKELY(rb_enc_mbminlen(enc) == 1)) {
267  if (m <= SIZEOF_VALUE) {
268  return rb_memsearch_ss(x0, m, y0, n);
269  }
270  else if (enc == rb_utf8_encoding()){
271  return rb_memsearch_qs_utf8(x0, m, y0, n);
272  }
273  }
274  else if (LIKELY(rb_enc_mbminlen(enc) == 2)) {
275  return rb_memsearch_wchar(x0, m, y0, n);
276  }
277  else if (LIKELY(rb_enc_mbminlen(enc) == 4)) {
278  return rb_memsearch_qchar(x0, m, y0, n);
279  }
280  return rb_memsearch_qs(x0, m, y0, n);
281 }
282 
283 #define REG_LITERAL FL_USER5
284 #define REG_ENCODING_NONE FL_USER6
285 
286 #define KCODE_FIXED FL_USER4
287 
288 #define ARG_REG_OPTION_MASK \
289  (ONIG_OPTION_IGNORECASE|ONIG_OPTION_MULTILINE|ONIG_OPTION_EXTEND)
290 #define ARG_ENCODING_FIXED 16
291 #define ARG_ENCODING_NONE 32
292 
293 static int
294 char_to_option(int c)
295 {
296  int val;
297 
298  switch (c) {
299  case 'i':
300  val = ONIG_OPTION_IGNORECASE;
301  break;
302  case 'x':
303  val = ONIG_OPTION_EXTEND;
304  break;
305  case 'm':
306  val = ONIG_OPTION_MULTILINE;
307  break;
308  default:
309  val = 0;
310  break;
311  }
312  return val;
313 }
314 
315 enum { OPTBUF_SIZE = 4 };
316 
317 static char *
318 option_to_str(char str[OPTBUF_SIZE], int options)
319 {
320  char *p = str;
321  if (options & ONIG_OPTION_MULTILINE) *p++ = 'm';
322  if (options & ONIG_OPTION_IGNORECASE) *p++ = 'i';
323  if (options & ONIG_OPTION_EXTEND) *p++ = 'x';
324  *p = 0;
325  return str;
326 }
327 
328 extern int
329 rb_char_to_option_kcode(int c, int *option, int *kcode)
330 {
331  *option = 0;
332 
333  switch (c) {
334  case 'n':
335  *kcode = rb_ascii8bit_encindex();
336  return (*option = ARG_ENCODING_NONE);
337  case 'e':
338  *kcode = ENCINDEX_EUC_JP;
339  break;
340  case 's':
341  *kcode = ENCINDEX_Windows_31J;
342  break;
343  case 'u':
344  *kcode = rb_utf8_encindex();
345  break;
346  default:
347  *kcode = -1;
348  return (*option = char_to_option(c));
349  }
350  *option = ARG_ENCODING_FIXED;
351  return 1;
352 }
353 
354 static void
355 rb_reg_check(VALUE re)
356 {
357  if (!RREGEXP_PTR(re) || !RREGEXP_SRC(re) || !RREGEXP_SRC_PTR(re)) {
358  rb_raise(rb_eTypeError, "uninitialized Regexp");
359  }
360 }
361 
362 static void
363 rb_reg_expr_str(VALUE str, const char *s, long len,
364  rb_encoding *enc, rb_encoding *resenc, int term)
365 {
366  const char *p, *pend;
367  int cr = ENC_CODERANGE_UNKNOWN;
368  int need_escape = 0;
369  int c, clen;
370 
371  p = s; pend = p + len;
372  rb_str_coderange_scan_restartable(p, pend, enc, &cr);
373  if (rb_enc_asciicompat(enc) && ENC_CODERANGE_CLEAN_P(cr)) {
374  while (p < pend) {
375  c = rb_enc_ascget(p, pend, &clen, enc);
376  if (c == -1) {
377  if (enc == resenc) {
378  p += mbclen(p, pend, enc);
379  }
380  else {
381  need_escape = 1;
382  break;
383  }
384  }
385  else if (c != term && rb_enc_isprint(c, enc)) {
386  p += clen;
387  }
388  else {
389  need_escape = 1;
390  break;
391  }
392  }
393  }
394  else {
395  need_escape = 1;
396  }
397 
398  if (!need_escape) {
399  rb_str_buf_cat(str, s, len);
400  }
401  else {
402  int unicode_p = rb_enc_unicode_p(enc);
403  p = s;
404  while (p<pend) {
405  c = rb_enc_ascget(p, pend, &clen, enc);
406  if (c == '\\' && p+clen < pend) {
407  int n = clen + mbclen(p+clen, pend, enc);
408  rb_str_buf_cat(str, p, n);
409  p += n;
410  continue;
411  }
412  else if (c == -1) {
413  clen = rb_enc_precise_mbclen(p, pend, enc);
414  if (!MBCLEN_CHARFOUND_P(clen)) {
415  c = (unsigned char)*p;
416  clen = 1;
417  goto hex;
418  }
419  if (resenc) {
420  unsigned int c = rb_enc_mbc_to_codepoint(p, pend, enc);
421  rb_str_buf_cat_escaped_char(str, c, unicode_p);
422  }
423  else {
424  clen = MBCLEN_CHARFOUND_LEN(clen);
425  rb_str_buf_cat(str, p, clen);
426  }
427  }
428  else if (c == term) {
429  char c = '\\';
430  rb_str_buf_cat(str, &c, 1);
431  rb_str_buf_cat(str, p, clen);
432  }
433  else if (rb_enc_isprint(c, enc)) {
434  rb_str_buf_cat(str, p, clen);
435  }
436  else if (!rb_enc_isspace(c, enc)) {
437  char b[8];
438 
439  hex:
440  snprintf(b, sizeof(b), "\\x%02X", c);
441  rb_str_buf_cat(str, b, 4);
442  }
443  else {
444  rb_str_buf_cat(str, p, clen);
445  }
446  p += clen;
447  }
448  }
449 }
450 
451 static VALUE
452 rb_reg_desc(const char *s, long len, VALUE re)
453 {
454  rb_encoding *enc = rb_enc_get(re);
455  VALUE str = rb_str_buf_new2("/");
457  if (resenc == NULL) resenc = rb_default_external_encoding();
458 
459  if (re && rb_enc_asciicompat(enc)) {
460  rb_enc_copy(str, re);
461  }
462  else {
464  }
465  rb_reg_expr_str(str, s, len, enc, resenc, '/');
466  rb_str_buf_cat2(str, "/");
467  if (re) {
468  char opts[OPTBUF_SIZE];
469  rb_reg_check(re);
470  if (*option_to_str(opts, RREGEXP_PTR(re)->options))
471  rb_str_buf_cat2(str, opts);
472  if (RBASIC(re)->flags & REG_ENCODING_NONE)
473  rb_str_buf_cat2(str, "n");
474  }
475  return str;
476 }
477 
478 
479 /*
480  * call-seq:
481  * rxp.source -> str
482  *
483  * Returns the original string of the pattern.
484  *
485  * /ab+c/ix.source #=> "ab+c"
486  *
487  * Note that escape sequences are retained as is.
488  *
489  * /\x20\+/.source #=> "\\x20\\+"
490  *
491  */
492 
493 static VALUE
494 rb_reg_source(VALUE re)
495 {
496  VALUE str;
497 
498  rb_reg_check(re);
499  str = rb_str_dup(RREGEXP_SRC(re));
500  return str;
501 }
502 
503 /*
504  * call-seq:
505  * rxp.inspect -> string
506  *
507  * Produce a nicely formatted string-version of _rxp_. Perhaps surprisingly,
508  * <code>#inspect</code> actually produces the more natural version of
509  * the string than <code>#to_s</code>.
510  *
511  * /ab+c/ix.inspect #=> "/ab+c/ix"
512  *
513  */
514 
515 static VALUE
516 rb_reg_inspect(VALUE re)
517 {
518  if (!RREGEXP_PTR(re) || !RREGEXP_SRC(re) || !RREGEXP_SRC_PTR(re)) {
519  return rb_any_to_s(re);
520  }
521  return rb_reg_desc(RREGEXP_SRC_PTR(re), RREGEXP_SRC_LEN(re), re);
522 }
523 
524 static VALUE rb_reg_str_with_term(VALUE re, int term);
525 
526 /*
527  * call-seq:
528  * rxp.to_s -> str
529  *
530  * Returns a string containing the regular expression and its options (using the
531  * <code>(?opts:source)</code> notation. This string can be fed back in to
532  * Regexp::new to a regular expression with the same semantics as the
533  * original. (However, <code>Regexp#==</code> may not return true
534  * when comparing the two, as the source of the regular expression
535  * itself may differ, as the example shows). Regexp#inspect produces
536  * a generally more readable version of <i>rxp</i>.
537  *
538  * r1 = /ab+c/ix #=> /ab+c/ix
539  * s1 = r1.to_s #=> "(?ix-m:ab+c)"
540  * r2 = Regexp.new(s1) #=> /(?ix-m:ab+c)/
541  * r1 == r2 #=> false
542  * r1.source #=> "ab+c"
543  * r2.source #=> "(?ix-m:ab+c)"
544  */
545 
546 static VALUE
547 rb_reg_to_s(VALUE re)
548 {
549  return rb_reg_str_with_term(re, '/');
550 }
551 
552 static VALUE
553 rb_reg_str_with_term(VALUE re, int term)
554 {
555  int options, opt;
556  const int embeddable = ONIG_OPTION_MULTILINE|ONIG_OPTION_IGNORECASE|ONIG_OPTION_EXTEND;
557  long len;
558  const UChar* ptr;
559  VALUE str = rb_str_buf_new2("(?");
560  char optbuf[OPTBUF_SIZE + 1]; /* for '-' */
561  rb_encoding *enc = rb_enc_get(re);
562 
563  rb_reg_check(re);
564 
565  rb_enc_copy(str, re);
566  options = RREGEXP_PTR(re)->options;
567  ptr = (UChar*)RREGEXP_SRC_PTR(re);
568  len = RREGEXP_SRC_LEN(re);
569  again:
570  if (len >= 4 && ptr[0] == '(' && ptr[1] == '?') {
571  int err = 1;
572  ptr += 2;
573  if ((len -= 2) > 0) {
574  do {
575  opt = char_to_option((int )*ptr);
576  if (opt != 0) {
577  options |= opt;
578  }
579  else {
580  break;
581  }
582  ++ptr;
583  } while (--len > 0);
584  }
585  if (len > 1 && *ptr == '-') {
586  ++ptr;
587  --len;
588  do {
589  opt = char_to_option((int )*ptr);
590  if (opt != 0) {
591  options &= ~opt;
592  }
593  else {
594  break;
595  }
596  ++ptr;
597  } while (--len > 0);
598  }
599  if (*ptr == ')') {
600  --len;
601  ++ptr;
602  goto again;
603  }
604  if (*ptr == ':' && ptr[len-1] == ')') {
605  Regexp *rp;
606  VALUE verbose = ruby_verbose;
608 
609  ++ptr;
610  len -= 2;
611  err = onig_new(&rp, ptr, ptr + len, options,
612  enc, OnigDefaultSyntax, NULL);
613  onig_free(rp);
614  ruby_verbose = verbose;
615  }
616  if (err) {
617  options = RREGEXP_PTR(re)->options;
618  ptr = (UChar*)RREGEXP_SRC_PTR(re);
619  len = RREGEXP_SRC_LEN(re);
620  }
621  }
622 
623  if (*option_to_str(optbuf, options)) rb_str_buf_cat2(str, optbuf);
624 
625  if ((options & embeddable) != embeddable) {
626  optbuf[0] = '-';
627  option_to_str(optbuf + 1, ~options);
628  rb_str_buf_cat2(str, optbuf);
629  }
630 
631  rb_str_buf_cat2(str, ":");
632  if (rb_enc_asciicompat(enc)) {
633  rb_reg_expr_str(str, (char*)ptr, len, enc, NULL, term);
634  rb_str_buf_cat2(str, ")");
635  }
636  else {
637  const char *s, *e;
638  char *paren;
639  ptrdiff_t n;
640  rb_str_buf_cat2(str, ")");
642  str = rb_str_encode(str, rb_enc_from_encoding(enc), 0, Qnil);
643 
644  /* backup encoded ")" to paren */
645  s = RSTRING_PTR(str);
646  e = RSTRING_END(str);
647  s = rb_enc_left_char_head(s, e-1, e, enc);
648  n = e - s;
649  paren = ALLOCA_N(char, n);
650  memcpy(paren, s, n);
651  rb_str_resize(str, RSTRING_LEN(str) - n);
652 
653  rb_reg_expr_str(str, (char*)ptr, len, enc, NULL, term);
654  rb_str_buf_cat(str, paren, n);
655  }
656  rb_enc_copy(str, re);
657 
658  return str;
659 }
660 
661 NORETURN(static void rb_reg_raise(const char *s, long len, const char *err, VALUE re));
662 
663 static void
664 rb_reg_raise(const char *s, long len, const char *err, VALUE re)
665 {
666  VALUE desc = rb_reg_desc(s, len, re);
667 
668  rb_raise(rb_eRegexpError, "%s: %"PRIsVALUE, err, desc);
669 }
670 
671 static VALUE
672 rb_enc_reg_error_desc(const char *s, long len, rb_encoding *enc, int options, const char *err)
673 {
674  char opts[OPTBUF_SIZE + 1]; /* for '/' */
675  VALUE desc = rb_str_buf_new2(err);
677  if (resenc == NULL) resenc = rb_default_external_encoding();
678 
679  rb_enc_associate(desc, enc);
680  rb_str_buf_cat2(desc, ": /");
681  rb_reg_expr_str(desc, s, len, enc, resenc, '/');
682  opts[0] = '/';
683  option_to_str(opts + 1, options);
684  rb_str_buf_cat2(desc, opts);
685  return rb_exc_new3(rb_eRegexpError, desc);
686 }
687 
688 NORETURN(static void rb_enc_reg_raise(const char *s, long len, rb_encoding *enc, int options, const char *err));
689 
690 static void
691 rb_enc_reg_raise(const char *s, long len, rb_encoding *enc, int options, const char *err)
692 {
693  rb_exc_raise(rb_enc_reg_error_desc(s, len, enc, options, err));
694 }
695 
696 static VALUE
697 rb_reg_error_desc(VALUE str, int options, const char *err)
698 {
699  return rb_enc_reg_error_desc(RSTRING_PTR(str), RSTRING_LEN(str),
700  rb_enc_get(str), options, err);
701 }
702 
703 NORETURN(static void rb_reg_raise_str(VALUE str, int options, const char *err));
704 
705 static void
706 rb_reg_raise_str(VALUE str, int options, const char *err)
707 {
708  rb_exc_raise(rb_reg_error_desc(str, options, err));
709 }
710 
711 
712 /*
713  * call-seq:
714  * rxp.casefold? -> true or false
715  *
716  * Returns the value of the case-insensitive flag.
717  *
718  * /a/.casefold? #=> false
719  * /a/i.casefold? #=> true
720  * /(?i:a)/.casefold? #=> false
721  */
722 
723 static VALUE
724 rb_reg_casefold_p(VALUE re)
725 {
726  rb_reg_check(re);
727  return RBOOL(RREGEXP_PTR(re)->options & ONIG_OPTION_IGNORECASE);
728 }
729 
730 
731 /*
732  * call-seq:
733  * rxp.options -> integer
734  *
735  * Returns the set of bits corresponding to the options used when
736  * creating this Regexp (see Regexp::new for details. Note that
737  * additional bits may be set in the returned options: these are used
738  * internally by the regular expression code. These extra bits are
739  * ignored if the options are passed to Regexp::new.
740  *
741  * Regexp::IGNORECASE #=> 1
742  * Regexp::EXTENDED #=> 2
743  * Regexp::MULTILINE #=> 4
744  *
745  * /cat/.options #=> 0
746  * /cat/ix.options #=> 3
747  * Regexp.new('cat', true).options #=> 1
748  * /\xa1\xa2/e.options #=> 16
749  *
750  * r = /cat/ix
751  * Regexp.new(r.source, r.options) #=> /cat/ix
752  */
753 
754 static VALUE
755 rb_reg_options_m(VALUE re)
756 {
757  int options = rb_reg_options(re);
758  return INT2NUM(options);
759 }
760 
761 static int
762 reg_names_iter(const OnigUChar *name, const OnigUChar *name_end,
763  int back_num, int *back_refs, OnigRegex regex, void *arg)
764 {
765  VALUE ary = (VALUE)arg;
766  rb_ary_push(ary, rb_enc_str_new((const char *)name, name_end-name, regex->enc));
767  return 0;
768 }
769 
770 /*
771  * call-seq:
772  * rxp.names -> [name1, name2, ...]
773  *
774  * Returns a list of names of captures as an array of strings.
775  *
776  * /(?<foo>.)(?<bar>.)(?<baz>.)/.names
777  * #=> ["foo", "bar", "baz"]
778  *
779  * /(?<foo>.)(?<foo>.)/.names
780  * #=> ["foo"]
781  *
782  * /(.)(.)/.names
783  * #=> []
784  */
785 
786 static VALUE
787 rb_reg_names(VALUE re)
788 {
789  VALUE ary;
790  rb_reg_check(re);
791  ary = rb_ary_new_capa(onig_number_of_names(RREGEXP_PTR(re)));
792  onig_foreach_name(RREGEXP_PTR(re), reg_names_iter, (void*)ary);
793  return ary;
794 }
795 
796 static int
797 reg_named_captures_iter(const OnigUChar *name, const OnigUChar *name_end,
798  int back_num, int *back_refs, OnigRegex regex, void *arg)
799 {
800  VALUE hash = (VALUE)arg;
801  VALUE ary = rb_ary_new2(back_num);
802  int i;
803 
804  for (i = 0; i < back_num; i++)
805  rb_ary_store(ary, i, INT2NUM(back_refs[i]));
806 
807  rb_hash_aset(hash, rb_str_new((const char*)name, name_end-name),ary);
808 
809  return 0;
810 }
811 
812 /*
813  * call-seq:
814  * rxp.named_captures -> hash
815  *
816  * Returns a hash representing information about named captures of <i>rxp</i>.
817  *
818  * A key of the hash is a name of the named captures.
819  * A value of the hash is an array which is list of indexes of corresponding
820  * named captures.
821  *
822  * /(?<foo>.)(?<bar>.)/.named_captures
823  * #=> {"foo"=>[1], "bar"=>[2]}
824  *
825  * /(?<foo>.)(?<foo>.)/.named_captures
826  * #=> {"foo"=>[1, 2]}
827  *
828  * If there are no named captures, an empty hash is returned.
829  *
830  * /(.)(.)/.named_captures
831  * #=> {}
832  */
833 
834 static VALUE
835 rb_reg_named_captures(VALUE re)
836 {
837  regex_t *reg = (rb_reg_check(re), RREGEXP_PTR(re));
838  VALUE hash = rb_hash_new_with_size(onig_number_of_names(reg));
839  onig_foreach_name(reg, reg_named_captures_iter, (void*)hash);
840  return hash;
841 }
842 
843 static int
844 onig_new_with_source(regex_t** reg, const UChar* pattern, const UChar* pattern_end,
845  OnigOptionType option, OnigEncoding enc, const OnigSyntaxType* syntax,
846  OnigErrorInfo* einfo, const char *sourcefile, int sourceline)
847 {
848  int r;
849 
850  *reg = (regex_t* )malloc(sizeof(regex_t));
851  if (IS_NULL(*reg)) return ONIGERR_MEMORY;
852 
853  r = onig_reg_init(*reg, option, ONIGENC_CASE_FOLD_DEFAULT, enc, syntax);
854  if (r) goto err;
855 
856  r = onig_compile_ruby(*reg, pattern, pattern_end, einfo, sourcefile, sourceline);
857  if (r) {
858  err:
859  onig_free(*reg);
860  *reg = NULL;
861  }
862  return r;
863 }
864 
865 static Regexp*
866 make_regexp(const char *s, long len, rb_encoding *enc, int flags, onig_errmsg_buffer err,
867  const char *sourcefile, int sourceline)
868 {
869  Regexp *rp;
870  int r;
871  OnigErrorInfo einfo;
872 
873  /* Handle escaped characters first. */
874 
875  /* Build a copy of the string (in dest) with the
876  escaped characters translated, and generate the regex
877  from that.
878  */
879 
880  r = onig_new_with_source(&rp, (UChar*)s, (UChar*)(s + len), flags,
881  enc, OnigDefaultSyntax, &einfo, sourcefile, sourceline);
882  if (r) {
883  onig_error_code_to_str((UChar*)err, r, &einfo);
884  return 0;
885  }
886  return rp;
887 }
888 
889 
890 /*
891  * Document-class: MatchData
892  *
893  * MatchData encapsulates the result of matching a Regexp against
894  * string. It is returned by Regexp#match and String#match, and also
895  * stored in a global variable returned by Regexp.last_match.
896  *
897  * Usage:
898  *
899  * url = 'https://docs.ruby-lang.org/en/2.5.0/MatchData.html'
900  * m = url.match(/(\d\.?)+/) # => #<MatchData "2.5.0" 1:"0">
901  * m.string # => "https://docs.ruby-lang.org/en/2.5.0/MatchData.html"
902  * m.regexp # => /(\d\.?)+/
903  * # entire matched substring:
904  * m[0] # => "2.5.0"
905  *
906  * # Working with unnamed captures
907  * m = url.match(%r{([^/]+)/([^/]+)\.html$})
908  * m.captures # => ["2.5.0", "MatchData"]
909  * m[1] # => "2.5.0"
910  * m.values_at(1, 2) # => ["2.5.0", "MatchData"]
911  *
912  * # Working with named captures
913  * m = url.match(%r{(?<version>[^/]+)/(?<module>[^/]+)\.html$})
914  * m.captures # => ["2.5.0", "MatchData"]
915  * m.named_captures # => {"version"=>"2.5.0", "module"=>"MatchData"}
916  * m[:version] # => "2.5.0"
917  * m.values_at(:version, :module)
918  * # => ["2.5.0", "MatchData"]
919  * # Numerical indexes are working, too
920  * m[1] # => "2.5.0"
921  * m.values_at(1, 2) # => ["2.5.0", "MatchData"]
922  *
923  * == Global variables equivalence
924  *
925  * Parts of last MatchData (returned by Regexp.last_match) are also
926  * aliased as global variables:
927  *
928  * * <code>$~</code> is Regexp.last_match;
929  * * <code>$&</code> is Regexp.last_match<code>[ 0 ]</code>;
930  * * <code>$1</code>, <code>$2</code>, and so on are
931  * Regexp.last_match<code>[ i ]</code> (captures by number);
932  * * <code>$`</code> is Regexp.last_match<code>.pre_match</code>;
933  * * <code>$'</code> is Regexp.last_match<code>.post_match</code>;
934  * * <code>$+</code> is Regexp.last_match<code>[ -1 ]</code> (the last capture).
935  *
936  * See also "Special global variables" section in Regexp documentation.
937  */
938 
940 
941 static VALUE
942 match_alloc(VALUE klass)
943 {
944  NEWOBJ_OF(match, struct RMatch, klass, T_MATCH);
945 
946  match->str = 0;
947  match->rmatch = 0;
948  match->regexp = 0;
949  match->rmatch = ZALLOC(struct rmatch);
950 
951  return (VALUE)match;
952 }
953 
954 int
955 rb_reg_region_copy(struct re_registers *to, const struct re_registers *from)
956 {
957  onig_region_copy(to, (OnigRegion *)from);
958  if (to->allocated) return 0;
959  rb_gc();
960  onig_region_copy(to, (OnigRegion *)from);
961  if (to->allocated) return 0;
962  return ONIGERR_MEMORY;
963 }
964 
965 typedef struct {
966  long byte_pos;
967  long char_pos;
968 } pair_t;
969 
970 static int
971 pair_byte_cmp(const void *pair1, const void *pair2)
972 {
973  long diff = ((pair_t*)pair1)->byte_pos - ((pair_t*)pair2)->byte_pos;
974 #if SIZEOF_LONG > SIZEOF_INT
975  return diff ? diff > 0 ? 1 : -1 : 0;
976 #else
977  return (int)diff;
978 #endif
979 }
980 
981 static void
982 update_char_offset(VALUE match)
983 {
984  struct rmatch *rm = RMATCH(match)->rmatch;
985  struct re_registers *regs;
986  int i, num_regs, num_pos;
987  long c;
988  char *s, *p, *q;
989  rb_encoding *enc;
990  pair_t *pairs;
991 
993  return;
994 
995  regs = &rm->regs;
996  num_regs = rm->regs.num_regs;
997 
998  if (rm->char_offset_num_allocated < num_regs) {
999  REALLOC_N(rm->char_offset, struct rmatch_offset, num_regs);
1000  rm->char_offset_num_allocated = num_regs;
1001  }
1002 
1003  enc = rb_enc_get(RMATCH(match)->str);
1004  if (rb_enc_mbmaxlen(enc) == 1) {
1005  for (i = 0; i < num_regs; i++) {
1006  rm->char_offset[i].beg = BEG(i);
1007  rm->char_offset[i].end = END(i);
1008  }
1009  return;
1010  }
1011 
1012  pairs = ALLOCA_N(pair_t, num_regs*2);
1013  num_pos = 0;
1014  for (i = 0; i < num_regs; i++) {
1015  if (BEG(i) < 0)
1016  continue;
1017  pairs[num_pos++].byte_pos = BEG(i);
1018  pairs[num_pos++].byte_pos = END(i);
1019  }
1020  qsort(pairs, num_pos, sizeof(pair_t), pair_byte_cmp);
1021 
1022  s = p = RSTRING_PTR(RMATCH(match)->str);
1023  c = 0;
1024  for (i = 0; i < num_pos; i++) {
1025  q = s + pairs[i].byte_pos;
1026  c += rb_enc_strlen(p, q, enc);
1027  pairs[i].char_pos = c;
1028  p = q;
1029  }
1030 
1031  for (i = 0; i < num_regs; i++) {
1032  pair_t key, *found;
1033  if (BEG(i) < 0) {
1034  rm->char_offset[i].beg = -1;
1035  rm->char_offset[i].end = -1;
1036  continue;
1037  }
1038 
1039  key.byte_pos = BEG(i);
1040  found = bsearch(&key, pairs, num_pos, sizeof(pair_t), pair_byte_cmp);
1041  rm->char_offset[i].beg = found->char_pos;
1042 
1043  key.byte_pos = END(i);
1044  found = bsearch(&key, pairs, num_pos, sizeof(pair_t), pair_byte_cmp);
1045  rm->char_offset[i].end = found->char_pos;
1046  }
1047 }
1048 
1049 static void
1050 match_check(VALUE match)
1051 {
1052  if (!RMATCH(match)->regexp) {
1053  rb_raise(rb_eTypeError, "uninitialized MatchData");
1054  }
1055 }
1056 
1057 /* :nodoc: */
1058 static VALUE
1059 match_init_copy(VALUE obj, VALUE orig)
1060 {
1061  struct rmatch *rm;
1062 
1063  if (!OBJ_INIT_COPY(obj, orig)) return obj;
1064 
1065  RMATCH(obj)->str = RMATCH(orig)->str;
1066  RMATCH(obj)->regexp = RMATCH(orig)->regexp;
1067 
1068  rm = RMATCH(obj)->rmatch;
1069  if (rb_reg_region_copy(&rm->regs, RMATCH_REGS(orig)))
1070  rb_memerror();
1071 
1072  if (RMATCH(orig)->rmatch->char_offset_num_allocated) {
1073  if (rm->char_offset_num_allocated < rm->regs.num_regs) {
1074  REALLOC_N(rm->char_offset, struct rmatch_offset, rm->regs.num_regs);
1075  rm->char_offset_num_allocated = rm->regs.num_regs;
1076  }
1078  struct rmatch_offset, rm->regs.num_regs);
1079  RB_GC_GUARD(orig);
1080  }
1081 
1082  return obj;
1083 }
1084 
1085 
1086 /*
1087  * call-seq:
1088  * mtch.regexp -> regexp
1089  *
1090  * Returns the regexp.
1091  *
1092  * m = /a.*b/.match("abc")
1093  * m.regexp #=> /a.*b/
1094  */
1095 
1096 static VALUE
1097 match_regexp(VALUE match)
1098 {
1099  VALUE regexp;
1100  match_check(match);
1101  regexp = RMATCH(match)->regexp;
1102  if (NIL_P(regexp)) {
1103  VALUE str = rb_reg_nth_match(0, match);
1104  regexp = rb_reg_regcomp(rb_reg_quote(str));
1105  RMATCH(match)->regexp = regexp;
1106  }
1107  return regexp;
1108 }
1109 
1110 /*
1111  * call-seq:
1112  * mtch.names -> [name1, name2, ...]
1113  *
1114  * Returns a list of names of captures as an array of strings.
1115  * This is the same as mtch.regexp.names.
1116  *
1117  * /(?<foo>.)(?<bar>.)(?<baz>.)/.match("hoge").names
1118  * #=> ["foo", "bar", "baz"]
1119  *
1120  * m = /(?<x>.)(?<y>.)?/.match("a") #=> #<MatchData "a" x:"a" y:nil>
1121  * m.names #=> ["x", "y"]
1122  */
1123 
1124 static VALUE
1125 match_names(VALUE match)
1126 {
1127  match_check(match);
1128  if (NIL_P(RMATCH(match)->regexp))
1129  return rb_ary_new_capa(0);
1130  return rb_reg_names(RMATCH(match)->regexp);
1131 }
1132 
1133 /*
1134  * call-seq:
1135  * mtch.length -> integer
1136  * mtch.size -> integer
1137  *
1138  * Returns the number of elements in the match array.
1139  *
1140  * m = /(.)(.)(\d+)(\d)/.match("THX1138.")
1141  * m.length #=> 5
1142  * m.size #=> 5
1143  */
1144 
1145 static VALUE
1146 match_size(VALUE match)
1147 {
1148  match_check(match);
1149  return INT2FIX(RMATCH_REGS(match)->num_regs);
1150 }
1151 
1152 static int name_to_backref_number(struct re_registers *, VALUE, const char*, const char*);
1153 NORETURN(static void name_to_backref_error(VALUE name));
1154 
1155 static void
1156 name_to_backref_error(VALUE name)
1157 {
1158  rb_raise(rb_eIndexError, "undefined group name reference: % "PRIsVALUE,
1159  name);
1160 }
1161 
1162 static void
1163 backref_number_check(struct re_registers *regs, int i)
1164 {
1165  if (i < 0 || regs->num_regs <= i)
1166  rb_raise(rb_eIndexError, "index %d out of matches", i);
1167 }
1168 
1169 static int
1170 match_backref_number(VALUE match, VALUE backref)
1171 {
1172  const char *name;
1173  int num;
1174 
1175  struct re_registers *regs = RMATCH_REGS(match);
1176  VALUE regexp = RMATCH(match)->regexp;
1177 
1178  match_check(match);
1179  if (SYMBOL_P(backref)) {
1180  backref = rb_sym2str(backref);
1181  }
1182  else if (!RB_TYPE_P(backref, T_STRING)) {
1183  return NUM2INT(backref);
1184  }
1185  name = StringValueCStr(backref);
1186 
1187  num = name_to_backref_number(regs, regexp, name, name + RSTRING_LEN(backref));
1188 
1189  if (num < 1) {
1190  name_to_backref_error(backref);
1191  }
1192 
1193  return num;
1194 }
1195 
1196 int
1198 {
1199  return match_backref_number(match, backref);
1200 }
1201 
1202 /*
1203  * call-seq:
1204  * mtch.offset(n) -> array
1205  *
1206  * Returns a two-element array containing the beginning and ending offsets of
1207  * the <em>n</em>th match.
1208  * <em>n</em> can be a string or symbol to reference a named capture.
1209  *
1210  * m = /(.)(.)(\d+)(\d)/.match("THX1138.")
1211  * m.offset(0) #=> [1, 7]
1212  * m.offset(4) #=> [6, 7]
1213  *
1214  * m = /(?<foo>.)(.)(?<bar>.)/.match("hoge")
1215  * p m.offset(:foo) #=> [0, 1]
1216  * p m.offset(:bar) #=> [2, 3]
1217  *
1218  */
1219 
1220 static VALUE
1221 match_offset(VALUE match, VALUE n)
1222 {
1223  int i = match_backref_number(match, n);
1224  struct re_registers *regs = RMATCH_REGS(match);
1225 
1226  match_check(match);
1227  backref_number_check(regs, i);
1228 
1229  if (BEG(i) < 0)
1230  return rb_assoc_new(Qnil, Qnil);
1231 
1232  update_char_offset(match);
1233  return rb_assoc_new(INT2FIX(RMATCH(match)->rmatch->char_offset[i].beg),
1234  INT2FIX(RMATCH(match)->rmatch->char_offset[i].end));
1235 }
1236 
1237 
1238 /*
1239  * call-seq:
1240  * mtch.begin(n) -> integer
1241  *
1242  * Returns the offset of the start of the <em>n</em>th element of the match
1243  * array in the string.
1244  * <em>n</em> can be a string or symbol to reference a named capture.
1245  *
1246  * m = /(.)(.)(\d+)(\d)/.match("THX1138.")
1247  * m.begin(0) #=> 1
1248  * m.begin(2) #=> 2
1249  *
1250  * m = /(?<foo>.)(.)(?<bar>.)/.match("hoge")
1251  * p m.begin(:foo) #=> 0
1252  * p m.begin(:bar) #=> 2
1253  */
1254 
1255 static VALUE
1256 match_begin(VALUE match, VALUE n)
1257 {
1258  int i = match_backref_number(match, n);
1259  struct re_registers *regs = RMATCH_REGS(match);
1260 
1261  match_check(match);
1262  backref_number_check(regs, i);
1263 
1264  if (BEG(i) < 0)
1265  return Qnil;
1266 
1267  update_char_offset(match);
1268  return INT2FIX(RMATCH(match)->rmatch->char_offset[i].beg);
1269 }
1270 
1271 
1272 /*
1273  * call-seq:
1274  * mtch.end(n) -> integer
1275  *
1276  * Returns the offset of the character immediately following the end of the
1277  * <em>n</em>th element of the match array in the string.
1278  * <em>n</em> can be a string or symbol to reference a named capture.
1279  *
1280  * m = /(.)(.)(\d+)(\d)/.match("THX1138.")
1281  * m.end(0) #=> 7
1282  * m.end(2) #=> 3
1283  *
1284  * m = /(?<foo>.)(.)(?<bar>.)/.match("hoge")
1285  * p m.end(:foo) #=> 1
1286  * p m.end(:bar) #=> 3
1287  */
1288 
1289 static VALUE
1290 match_end(VALUE match, VALUE n)
1291 {
1292  int i = match_backref_number(match, n);
1293  struct re_registers *regs = RMATCH_REGS(match);
1294 
1295  match_check(match);
1296  backref_number_check(regs, i);
1297 
1298  if (BEG(i) < 0)
1299  return Qnil;
1300 
1301  update_char_offset(match);
1302  return INT2FIX(RMATCH(match)->rmatch->char_offset[i].end);
1303 }
1304 
1305 /*
1306  * call-seq:
1307  * mtch.match(n) -> string or nil
1308  *
1309  * Returns the captured substring corresponding to the argument.
1310  * <em>n</em> can be a string or symbol to reference a named capture.
1311  *
1312  * m = /(.)(.)(\d+)(\d)(\w)?/.match("THX1138.")
1313  * m.match(0) #=> "HX1138"
1314  * m.match(4) #=> "8"
1315  * m.match(5) #=> nil
1316  *
1317  * m = /(?<foo>.)(.)(?<bar>.+)/.match("hoge")
1318  * m.match(:foo) #=> "h"
1319  * m.match(:bar) #=> "ge"
1320  *
1321  */
1322 
1323 static VALUE
1324 match_nth(VALUE match, VALUE n)
1325 {
1326  int i = match_backref_number(match, n);
1327  struct re_registers *regs = RMATCH_REGS(match);
1328 
1329  backref_number_check(regs, i);
1330 
1331  long start = BEG(i), end = END(i);
1332  if (start < 0)
1333  return Qnil;
1334 
1335  return rb_str_subseq(RMATCH(match)->str, start, end - start);
1336 }
1337 
1338 /*
1339  * call-seq:
1340  * mtch.match_length(n) -> array
1341  *
1342  * Returns the length of the captured substring corresponding to the argument.
1343  * <em>n</em> can be a string or symbol to reference a named capture.
1344  *
1345  * m = /(.)(.)(\d+)(\d)(\w)?/.match("THX1138.")
1346  * m.match_length(0) #=> 6
1347  * m.match_length(4) #=> 1
1348  * m.match_length(5) #=> nil
1349  *
1350  * m = /(?<foo>.)(.)(?<bar>.+)/.match("hoge")
1351  * m.match_length(:foo) #=> 1
1352  * m.match_length(:bar) #=> 2
1353  *
1354  */
1355 
1356 static VALUE
1357 match_nth_length(VALUE match, VALUE n)
1358 {
1359  int i = match_backref_number(match, n);
1360  struct re_registers *regs = RMATCH_REGS(match);
1361 
1362  match_check(match);
1363  backref_number_check(regs, i);
1364 
1365  if (BEG(i) < 0)
1366  return Qnil;
1367 
1368  update_char_offset(match);
1369  const struct rmatch_offset *const ofs =
1370  &RMATCH(match)->rmatch->char_offset[i];
1371  return LONG2NUM(ofs->end - ofs->beg);
1372 }
1373 
1374 #define MATCH_BUSY FL_USER2
1375 
1376 void
1378 {
1379  FL_SET(match, MATCH_BUSY);
1380 }
1381 
1382 void
1383 rb_match_unbusy(VALUE match)
1384 {
1385  FL_UNSET(match, MATCH_BUSY);
1386 }
1387 
1388 int
1389 rb_match_count(VALUE match)
1390 {
1391  struct re_registers *regs;
1392  if (NIL_P(match)) return -1;
1393  regs = RMATCH_REGS(match);
1394  if (!regs) return -1;
1395  return regs->num_regs;
1396 }
1397 
1398 int
1399 rb_match_nth_defined(int nth, VALUE match)
1400 {
1401  struct re_registers *regs;
1402  if (NIL_P(match)) return FALSE;
1403  regs = RMATCH_REGS(match);
1404  if (!regs) return FALSE;
1405  if (nth >= regs->num_regs) {
1406  return FALSE;
1407  }
1408  if (nth < 0) {
1409  nth += regs->num_regs;
1410  if (nth <= 0) return FALSE;
1411  }
1412  return (BEG(nth) != -1);
1413 }
1414 
1415 static void
1416 match_set_string(VALUE m, VALUE string, long pos, long len)
1417 {
1418  struct RMatch *match = (struct RMatch *)m;
1419  struct rmatch *rmatch = match->rmatch;
1420 
1421  match->str = string;
1422  match->regexp = Qnil;
1423  int err = onig_region_resize(&rmatch->regs, 1);
1424  if (err) rb_memerror();
1425  rmatch->regs.beg[0] = pos;
1426  rmatch->regs.end[0] = pos + len;
1427 }
1428 
1429 void
1430 rb_backref_set_string(VALUE string, long pos, long len)
1431 {
1432  VALUE match = rb_backref_get();
1433  if (NIL_P(match) || FL_TEST(match, MATCH_BUSY)) {
1434  match = match_alloc(rb_cMatch);
1435  }
1436  match_set_string(match, string, pos, len);
1437  rb_backref_set(match);
1438 }
1439 
1440 /*
1441  * call-seq:
1442  * rxp.fixed_encoding? -> true or false
1443  *
1444  * Returns false if rxp is applicable to
1445  * a string with any ASCII compatible encoding.
1446  * Returns true otherwise.
1447  *
1448  * r = /a/
1449  * r.fixed_encoding? #=> false
1450  * r =~ "\u{6666} a" #=> 2
1451  * r =~ "\xa1\xa2 a".force_encoding("euc-jp") #=> 2
1452  * r =~ "abc".force_encoding("euc-jp") #=> 0
1453  *
1454  * r = /a/u
1455  * r.fixed_encoding? #=> true
1456  * r.encoding #=> #<Encoding:UTF-8>
1457  * r =~ "\u{6666} a" #=> 2
1458  * r =~ "\xa1\xa2".force_encoding("euc-jp") #=> Encoding::CompatibilityError
1459  * r =~ "abc".force_encoding("euc-jp") #=> 0
1460  *
1461  * r = /\u{6666}/
1462  * r.fixed_encoding? #=> true
1463  * r.encoding #=> #<Encoding:UTF-8>
1464  * r =~ "\u{6666} a" #=> 0
1465  * r =~ "\xa1\xa2".force_encoding("euc-jp") #=> Encoding::CompatibilityError
1466  * r =~ "abc".force_encoding("euc-jp") #=> nil
1467  */
1468 
1469 static VALUE
1470 rb_reg_fixed_encoding_p(VALUE re)
1471 {
1472  return RBOOL(FL_TEST(re, KCODE_FIXED));
1473 }
1474 
1475 static VALUE
1476 rb_reg_preprocess(const char *p, const char *end, rb_encoding *enc,
1477  rb_encoding **fixed_enc, onig_errmsg_buffer err);
1478 
1479 NORETURN(static void reg_enc_error(VALUE re, VALUE str));
1480 
1481 static void
1482 reg_enc_error(VALUE re, VALUE str)
1483 {
1485  "incompatible encoding regexp match (%s regexp with %s string)",
1486  rb_enc_name(rb_enc_get(re)),
1487  rb_enc_name(rb_enc_get(str)));
1488 }
1489 
1490 static inline int
1491 str_coderange(VALUE str)
1492 {
1493  int cr = ENC_CODERANGE(str);
1494  if (cr == ENC_CODERANGE_UNKNOWN) {
1495  cr = rb_enc_str_coderange(str);
1496  }
1497  return cr;
1498 }
1499 
1500 static rb_encoding*
1501 rb_reg_prepare_enc(VALUE re, VALUE str, int warn)
1502 {
1503  rb_encoding *enc = 0;
1504  int cr = str_coderange(str);
1505 
1506  if (cr == ENC_CODERANGE_BROKEN) {
1508  "invalid byte sequence in %s",
1509  rb_enc_name(rb_enc_get(str)));
1510  }
1511 
1512  rb_reg_check(re);
1513  enc = rb_enc_get(str);
1514  if (RREGEXP_PTR(re)->enc == enc) {
1515  }
1516  else if (cr == ENC_CODERANGE_7BIT &&
1517  RREGEXP_PTR(re)->enc == rb_usascii_encoding()) {
1518  enc = RREGEXP_PTR(re)->enc;
1519  }
1520  else if (!rb_enc_asciicompat(enc)) {
1521  reg_enc_error(re, str);
1522  }
1523  else if (rb_reg_fixed_encoding_p(re)) {
1524  if ((!rb_enc_asciicompat(RREGEXP_PTR(re)->enc) ||
1525  cr != ENC_CODERANGE_7BIT)) {
1526  reg_enc_error(re, str);
1527  }
1528  enc = RREGEXP_PTR(re)->enc;
1529  }
1530  else if (warn && (RBASIC(re)->flags & REG_ENCODING_NONE) &&
1531  enc != rb_ascii8bit_encoding() &&
1532  cr != ENC_CODERANGE_7BIT) {
1533  rb_warn("historical binary regexp match /.../n against %s string",
1534  rb_enc_name(enc));
1535  }
1536  return enc;
1537 }
1538 
1539 regex_t *
1540 rb_reg_prepare_re0(VALUE re, VALUE str, onig_errmsg_buffer err)
1541 {
1542  regex_t *reg = RREGEXP_PTR(re);
1543  int r;
1544  OnigErrorInfo einfo;
1545  const char *pattern;
1546  VALUE unescaped;
1547  rb_encoding *fixed_enc = 0;
1548  rb_encoding *enc = rb_reg_prepare_enc(re, str, 1);
1549 
1550  if (reg->enc == enc) return reg;
1551 
1552  rb_reg_check(re);
1553  reg = RREGEXP_PTR(re);
1554  pattern = RREGEXP_SRC_PTR(re);
1555 
1556  unescaped = rb_reg_preprocess(
1557  pattern, pattern + RREGEXP_SRC_LEN(re), enc,
1558  &fixed_enc, err);
1559 
1560  if (NIL_P(unescaped)) {
1561  rb_raise(rb_eArgError, "regexp preprocess failed: %s", err);
1562  }
1563 
1564  const char *ptr;
1565  long len;
1566  RSTRING_GETMEM(unescaped, ptr, len);
1567  r = onig_new(&reg, (UChar *)ptr, (UChar *)(ptr + len),
1568  reg->options, enc,
1569  OnigDefaultSyntax, &einfo);
1570  if (r) {
1571  onig_error_code_to_str((UChar*)err, r, &einfo);
1572  rb_reg_raise(pattern, RREGEXP_SRC_LEN(re), err, re);
1573  }
1574 
1575  RB_GC_GUARD(unescaped);
1576  return reg;
1577 }
1578 
1579 regex_t *
1581 {
1582  onig_errmsg_buffer err = "";
1583  return rb_reg_prepare_re0(re, str, err);
1584 }
1585 
1586 long
1587 rb_reg_adjust_startpos(VALUE re, VALUE str, long pos, int reverse)
1588 {
1589  long range;
1590  rb_encoding *enc;
1591  UChar *p, *string;
1592 
1593  enc = rb_reg_prepare_enc(re, str, 0);
1594 
1595  if (reverse) {
1596  range = -pos;
1597  }
1598  else {
1599  range = RSTRING_LEN(str) - pos;
1600  }
1601 
1602  if (pos > 0 && ONIGENC_MBC_MAXLEN(enc) != 1 && pos < RSTRING_LEN(str)) {
1603  string = (UChar*)RSTRING_PTR(str);
1604 
1605  if (range > 0) {
1606  p = onigenc_get_right_adjust_char_head(enc, string, string + pos, string + RSTRING_LEN(str));
1607  }
1608  else {
1609  p = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, string, string + pos, string + RSTRING_LEN(str));
1610  }
1611  return p - string;
1612  }
1613 
1614  return pos;
1615 }
1616 
1617 /* returns byte offset */
1618 static long
1619 rb_reg_search_set_match(VALUE re, VALUE str, long pos, int reverse, int set_backref_str, VALUE *set_match)
1620 {
1621  long result;
1622  VALUE match;
1623  struct re_registers regi, *regs = &regi;
1624  char *start, *range;
1625  long len;
1626  regex_t *reg;
1627  int tmpreg;
1628  onig_errmsg_buffer err = "";
1629 
1630  RSTRING_GETMEM(str, start, len);
1631  range = start;
1632  if (pos > len || pos < 0) {
1634  return -1;
1635  }
1636 
1637  reg = rb_reg_prepare_re0(re, str, err);
1638  tmpreg = reg != RREGEXP_PTR(re);
1639  if (!tmpreg) RREGEXP(re)->usecnt++;
1640 
1641  MEMZERO(regs, struct re_registers, 1);
1642  if (!reverse) {
1643  range += len;
1644  }
1645  result = onig_search(reg,
1646  (UChar*)start,
1647  ((UChar*)(start + len)),
1648  ((UChar*)(start + pos)),
1649  ((UChar*)range),
1650  regs, ONIG_OPTION_NONE);
1651  if (!tmpreg) RREGEXP(re)->usecnt--;
1652  if (tmpreg) {
1653  if (RREGEXP(re)->usecnt) {
1654  onig_free(reg);
1655  }
1656  else {
1657  onig_free(RREGEXP_PTR(re));
1658  RREGEXP_PTR(re) = reg;
1659  }
1660  }
1661  if (result < 0) {
1662  if (regs == &regi)
1663  onig_region_free(regs, 0);
1664  if (result == ONIG_MISMATCH) {
1666  return result;
1667  }
1668  else {
1669  onig_error_code_to_str((UChar*)err, (int)result);
1670  rb_reg_raise(RREGEXP_SRC_PTR(re), RREGEXP_SRC_LEN(re), err, re);
1671  }
1672  }
1673 
1674  match = match_alloc(rb_cMatch);
1675  int copy_err = rb_reg_region_copy(RMATCH_REGS(match), regs);
1676  onig_region_free(regs, 0);
1677  if (copy_err) rb_memerror();
1678 
1679  if (set_backref_str) {
1680  RMATCH(match)->str = rb_str_new4(str);
1681  }
1682 
1683  RMATCH(match)->regexp = re;
1684  rb_backref_set(match);
1685  if (set_match) *set_match = match;
1686 
1687  return result;
1688 }
1689 
1690 long
1691 rb_reg_search0(VALUE re, VALUE str, long pos, int reverse, int set_backref_str)
1692 {
1693  return rb_reg_search_set_match(re, str, pos, reverse, set_backref_str, NULL);
1694 }
1695 
1696 long
1697 rb_reg_search(VALUE re, VALUE str, long pos, int reverse)
1698 {
1699  return rb_reg_search0(re, str, pos, reverse, 1);
1700 }
1701 
1702 bool
1703 rb_reg_start_with_p(VALUE re, VALUE str)
1704 {
1705  long result;
1706  VALUE match;
1707  struct re_registers regi, *regs = &regi;
1708  regex_t *reg;
1709  int tmpreg;
1710  onig_errmsg_buffer err = "";
1711 
1712  reg = rb_reg_prepare_re0(re, str, err);
1713  tmpreg = reg != RREGEXP_PTR(re);
1714  if (!tmpreg) RREGEXP(re)->usecnt++;
1715 
1716  match = rb_backref_get();
1717  if (!NIL_P(match)) {
1718  if (FL_TEST(match, MATCH_BUSY)) {
1719  match = Qnil;
1720  }
1721  else {
1722  regs = RMATCH_REGS(match);
1723  }
1724  }
1725  if (NIL_P(match)) {
1726  MEMZERO(regs, struct re_registers, 1);
1727  }
1728  const char *ptr;
1729  long len;
1730  RSTRING_GETMEM(str, ptr, len);
1731  result = onig_match(reg,
1732  (UChar*)(ptr),
1733  ((UChar*)(ptr + len)),
1734  (UChar*)(ptr),
1735  regs, ONIG_OPTION_NONE);
1736  if (!tmpreg) RREGEXP(re)->usecnt--;
1737  if (tmpreg) {
1738  if (RREGEXP(re)->usecnt) {
1739  onig_free(reg);
1740  }
1741  else {
1742  onig_free(RREGEXP_PTR(re));
1743  RREGEXP_PTR(re) = reg;
1744  }
1745  }
1746  if (result < 0) {
1747  if (regs == &regi)
1748  onig_region_free(regs, 0);
1749  if (result == ONIG_MISMATCH) {
1751  return false;
1752  }
1753  else {
1754  onig_error_code_to_str((UChar*)err, (int)result);
1755  rb_reg_raise(RREGEXP_SRC_PTR(re), RREGEXP_SRC_LEN(re), err, re);
1756  }
1757  }
1758 
1759  if (NIL_P(match)) {
1760  int err;
1761  match = match_alloc(rb_cMatch);
1762  err = rb_reg_region_copy(RMATCH_REGS(match), regs);
1763  onig_region_free(regs, 0);
1764  if (err) rb_memerror();
1765  }
1766 
1767  RMATCH(match)->str = rb_str_new4(str);
1768 
1769  RMATCH(match)->regexp = re;
1770  rb_backref_set(match);
1771 
1772  return true;
1773 }
1774 
1775 VALUE
1776 rb_reg_nth_defined(int nth, VALUE match)
1777 {
1778  struct re_registers *regs;
1779  if (NIL_P(match)) return Qnil;
1780  match_check(match);
1781  regs = RMATCH_REGS(match);
1782  if (nth >= regs->num_regs) {
1783  return Qnil;
1784  }
1785  if (nth < 0) {
1786  nth += regs->num_regs;
1787  if (nth <= 0) return Qnil;
1788  }
1789  return RBOOL(BEG(nth) != -1);
1790 }
1791 
1792 VALUE
1793 rb_reg_nth_match(int nth, VALUE match)
1794 {
1795  VALUE str;
1796  long start, end, len;
1797  struct re_registers *regs;
1798 
1799  if (NIL_P(match)) return Qnil;
1800  match_check(match);
1801  regs = RMATCH_REGS(match);
1802  if (nth >= regs->num_regs) {
1803  return Qnil;
1804  }
1805  if (nth < 0) {
1806  nth += regs->num_regs;
1807  if (nth <= 0) return Qnil;
1808  }
1809  start = BEG(nth);
1810  if (start == -1) return Qnil;
1811  end = END(nth);
1812  len = end - start;
1813  str = rb_str_subseq(RMATCH(match)->str, start, len);
1814  return str;
1815 }
1816 
1817 VALUE
1819 {
1820  return rb_reg_nth_match(0, match);
1821 }
1822 
1823 
1824 /*
1825  * call-seq:
1826  * mtch.pre_match -> str
1827  *
1828  * Returns the portion of the original string before the current match.
1829  * Equivalent to the special variable <code>$`</code>.
1830  *
1831  * m = /(.)(.)(\d+)(\d)/.match("THX1138.")
1832  * m.pre_match #=> "T"
1833  */
1834 
1835 VALUE
1837 {
1838  VALUE str;
1839  struct re_registers *regs;
1840 
1841  if (NIL_P(match)) return Qnil;
1842  match_check(match);
1843  regs = RMATCH_REGS(match);
1844  if (BEG(0) == -1) return Qnil;
1845  str = rb_str_subseq(RMATCH(match)->str, 0, BEG(0));
1846  return str;
1847 }
1848 
1849 
1850 /*
1851  * call-seq:
1852  * mtch.post_match -> str
1853  *
1854  * Returns the portion of the original string after the current match.
1855  * Equivalent to the special variable <code>$'</code>.
1856  *
1857  * m = /(.)(.)(\d+)(\d)/.match("THX1138: The Movie")
1858  * m.post_match #=> ": The Movie"
1859  */
1860 
1861 VALUE
1863 {
1864  VALUE str;
1865  long pos;
1866  struct re_registers *regs;
1867 
1868  if (NIL_P(match)) return Qnil;
1869  match_check(match);
1870  regs = RMATCH_REGS(match);
1871  if (BEG(0) == -1) return Qnil;
1872  str = RMATCH(match)->str;
1873  pos = END(0);
1874  str = rb_str_subseq(str, pos, RSTRING_LEN(str) - pos);
1875  return str;
1876 }
1877 
1878 VALUE
1880 {
1881  int i;
1882  struct re_registers *regs;
1883 
1884  if (NIL_P(match)) return Qnil;
1885  match_check(match);
1886  regs = RMATCH_REGS(match);
1887  if (BEG(0) == -1) return Qnil;
1888 
1889  for (i=regs->num_regs-1; BEG(i) == -1 && i > 0; i--)
1890  ;
1891  if (i == 0) return Qnil;
1892  return rb_reg_nth_match(i, match);
1893 }
1894 
1895 static VALUE
1896 last_match_getter(ID _x, VALUE *_y)
1897 {
1899 }
1900 
1901 static VALUE
1902 prematch_getter(ID _x, VALUE *_y)
1903 {
1904  return rb_reg_match_pre(rb_backref_get());
1905 }
1906 
1907 static VALUE
1908 postmatch_getter(ID _x, VALUE *_y)
1909 {
1911 }
1912 
1913 static VALUE
1914 last_paren_match_getter(ID _x, VALUE *_y)
1915 {
1917 }
1918 
1919 static VALUE
1920 match_array(VALUE match, int start)
1921 {
1922  struct re_registers *regs;
1923  VALUE ary;
1924  VALUE target;
1925  int i;
1926 
1927  match_check(match);
1928  regs = RMATCH_REGS(match);
1929  ary = rb_ary_new2(regs->num_regs);
1930  target = RMATCH(match)->str;
1931 
1932  for (i=start; i<regs->num_regs; i++) {
1933  if (regs->beg[i] == -1) {
1934  rb_ary_push(ary, Qnil);
1935  }
1936  else {
1937  VALUE str = rb_str_subseq(target, regs->beg[i], regs->end[i]-regs->beg[i]);
1938  rb_ary_push(ary, str);
1939  }
1940  }
1941  return ary;
1942 }
1943 
1944 
1945 /*
1946  * call-seq:
1947  * mtch.to_a -> anArray
1948  *
1949  * Returns the array of matches.
1950  *
1951  * m = /(.)(.)(\d+)(\d)/.match("THX1138.")
1952  * m.to_a #=> ["HX1138", "H", "X", "113", "8"]
1953  *
1954  * Because <code>to_a</code> is called when expanding
1955  * <code>*</code><em>variable</em>, there's a useful assignment
1956  * shortcut for extracting matched fields. This is slightly slower than
1957  * accessing the fields directly (as an intermediate array is
1958  * generated).
1959  *
1960  * all,f1,f2,f3 = * /(.)(.)(\d+)(\d)/.match("THX1138.")
1961  * all #=> "HX1138"
1962  * f1 #=> "H"
1963  * f2 #=> "X"
1964  * f3 #=> "113"
1965  */
1966 
1967 static VALUE
1968 match_to_a(VALUE match)
1969 {
1970  return match_array(match, 0);
1971 }
1972 
1973 
1974 /*
1975  * call-seq:
1976  * mtch.captures -> array
1977  *
1978  * Returns the array of captures; equivalent to <code>mtch.to_a[1..-1]</code>.
1979  *
1980  * f1,f2,f3,f4 = /(.)(.)(\d+)(\d)/.match("THX1138.").captures
1981  * f1 #=> "H"
1982  * f2 #=> "X"
1983  * f3 #=> "113"
1984  * f4 #=> "8"
1985  */
1986 static VALUE
1987 match_captures(VALUE match)
1988 {
1989  return match_array(match, 1);
1990 }
1991 
1992 static int
1993 name_to_backref_number(struct re_registers *regs, VALUE regexp, const char* name, const char* name_end)
1994 {
1995  if (NIL_P(regexp)) return -1;
1996  return onig_name_to_backref_number(RREGEXP_PTR(regexp),
1997  (const unsigned char *)name, (const unsigned char *)name_end, regs);
1998 }
1999 
2000 #define NAME_TO_NUMBER(regs, re, name, name_ptr, name_end) \
2001  (NIL_P(re) ? 0 : \
2002  !rb_enc_compatible(RREGEXP_SRC(re), (name)) ? 0 : \
2003  name_to_backref_number((regs), (re), (name_ptr), (name_end)))
2004 
2005 static int
2006 namev_to_backref_number(struct re_registers *regs, VALUE re, VALUE name)
2007 {
2008  int num;
2009 
2010  if (SYMBOL_P(name)) {
2011  name = rb_sym2str(name);
2012  }
2013  else if (!RB_TYPE_P(name, T_STRING)) {
2014  return -1;
2015  }
2016  num = NAME_TO_NUMBER(regs, re, name,
2017  RSTRING_PTR(name), RSTRING_END(name));
2018  if (num < 1) {
2019  name_to_backref_error(name);
2020  }
2021  return num;
2022 }
2023 
2024 static VALUE
2025 match_ary_subseq(VALUE match, long beg, long len, VALUE result)
2026 {
2027  long olen = RMATCH_REGS(match)->num_regs;
2028  long j, end = olen < beg+len ? olen : beg+len;
2029  if (NIL_P(result)) result = rb_ary_new_capa(len);
2030  if (len == 0) return result;
2031 
2032  for (j = beg; j < end; j++) {
2033  rb_ary_push(result, rb_reg_nth_match((int)j, match));
2034  }
2035  if (beg + len > j) {
2036  rb_ary_resize(result, RARRAY_LEN(result) + (beg + len) - j);
2037  }
2038  return result;
2039 }
2040 
2041 static VALUE
2042 match_ary_aref(VALUE match, VALUE idx, VALUE result)
2043 {
2044  long beg, len;
2045  int num_regs = RMATCH_REGS(match)->num_regs;
2046 
2047  /* check if idx is Range */
2048  switch (rb_range_beg_len(idx, &beg, &len, (long)num_regs, !NIL_P(result))) {
2049  case Qfalse:
2050  if (NIL_P(result)) return rb_reg_nth_match(NUM2INT(idx), match);
2051  rb_ary_push(result, rb_reg_nth_match(NUM2INT(idx), match));
2052  return result;
2053  case Qnil:
2054  return Qnil;
2055  default:
2056  return match_ary_subseq(match, beg, len, result);
2057  }
2058 }
2059 
2060 /*
2061  * call-seq:
2062  * mtch[i] -> str or nil
2063  * mtch[start, length] -> array
2064  * mtch[range] -> array
2065  * mtch[name] -> str or nil
2066  *
2067  * Match Reference -- MatchData acts as an array, and may be accessed
2068  * using the normal array indexing techniques. <code>mtch[0]</code>
2069  * is equivalent to the special variable <code>$&</code>, and returns
2070  * the entire matched string. <code>mtch[1]</code>,
2071  * <code>mtch[2]</code>, and so on return the values of the matched
2072  * backreferences (portions of the pattern between parentheses).
2073  *
2074  * m = /(.)(.)(\d+)(\d)/.match("THX1138.")
2075  * m #=> #<MatchData "HX1138" 1:"H" 2:"X" 3:"113" 4:"8">
2076  * m[0] #=> "HX1138"
2077  * m[1, 2] #=> ["H", "X"]
2078  * m[1..3] #=> ["H", "X", "113"]
2079  * m[-3, 2] #=> ["X", "113"]
2080  *
2081  * m = /(?<foo>a+)b/.match("ccaaab")
2082  * m #=> #<MatchData "aaab" foo:"aaa">
2083  * m["foo"] #=> "aaa"
2084  * m[:foo] #=> "aaa"
2085  */
2086 
2087 static VALUE
2088 match_aref(int argc, VALUE *argv, VALUE match)
2089 {
2090  VALUE idx, length;
2091 
2092  match_check(match);
2093  rb_scan_args(argc, argv, "11", &idx, &length);
2094 
2095  if (NIL_P(length)) {
2096  if (FIXNUM_P(idx)) {
2097  return rb_reg_nth_match(FIX2INT(idx), match);
2098  }
2099  else {
2100  int num = namev_to_backref_number(RMATCH_REGS(match), RMATCH(match)->regexp, idx);
2101  if (num >= 0) {
2102  return rb_reg_nth_match(num, match);
2103  }
2104  else {
2105  return match_ary_aref(match, idx, Qnil);
2106  }
2107  }
2108  }
2109  else {
2110  long beg = NUM2LONG(idx);
2111  long len = NUM2LONG(length);
2112  long num_regs = RMATCH_REGS(match)->num_regs;
2113  if (len < 0) {
2114  return Qnil;
2115  }
2116  if (beg < 0) {
2117  beg += num_regs;
2118  if (beg < 0) return Qnil;
2119  }
2120  else if (beg > num_regs) {
2121  return Qnil;
2122  }
2123  if (beg+len > num_regs) {
2124  len = num_regs - beg;
2125  }
2126  return match_ary_subseq(match, beg, len, Qnil);
2127  }
2128 }
2129 
2130 /*
2131  * call-seq:
2132  *
2133  * mtch.values_at(index, ...) -> array
2134  *
2135  * Uses each <i>index</i> to access the matching values, returning an array of
2136  * the corresponding matches.
2137  *
2138  * m = /(.)(.)(\d+)(\d)/.match("THX1138: The Movie")
2139  * m.to_a #=> ["HX1138", "H", "X", "113", "8"]
2140  * m.values_at(0, 2, -2) #=> ["HX1138", "X", "113"]
2141  * m.values_at(1..2, -1) #=> ["H", "X", "8"]
2142  *
2143  * m = /(?<a>\d+) *(?<op>[+\-*\/]) *(?<b>\d+)/.match("1 + 2")
2144  * m.to_a #=> ["1 + 2", "1", "+", "2"]
2145  * m.values_at(:a, :b, :op) #=> ["1", "2", "+"]
2146  */
2147 
2148 static VALUE
2149 match_values_at(int argc, VALUE *argv, VALUE match)
2150 {
2151  VALUE result;
2152  int i;
2153 
2154  match_check(match);
2155  result = rb_ary_new2(argc);
2156 
2157  for (i=0; i<argc; i++) {
2158  if (FIXNUM_P(argv[i])) {
2159  rb_ary_push(result, rb_reg_nth_match(FIX2INT(argv[i]), match));
2160  }
2161  else {
2162  int num = namev_to_backref_number(RMATCH_REGS(match), RMATCH(match)->regexp, argv[i]);
2163  if (num >= 0) {
2164  rb_ary_push(result, rb_reg_nth_match(num, match));
2165  }
2166  else {
2167  match_ary_aref(match, argv[i], result);
2168  }
2169  }
2170  }
2171  return result;
2172 }
2173 
2174 
2175 /*
2176  * call-seq:
2177  * mtch.to_s -> str
2178  *
2179  * Returns the entire matched string.
2180  *
2181  * m = /(.)(.)(\d+)(\d)/.match("THX1138.")
2182  * m.to_s #=> "HX1138"
2183  */
2184 
2185 static VALUE
2186 match_to_s(VALUE match)
2187 {
2188  VALUE str = rb_reg_last_match(match);
2189 
2190  match_check(match);
2191  if (NIL_P(str)) str = rb_str_new(0,0);
2192  return str;
2193 }
2194 
2195 static int
2196 match_named_captures_iter(const OnigUChar *name, const OnigUChar *name_end,
2197  int back_num, int *back_refs, OnigRegex regex, void *arg) {
2198  struct MEMO *memo = MEMO_CAST(arg);
2199  VALUE hash = memo->v1;
2200  VALUE match = memo->v2;
2201 
2202  VALUE key = rb_enc_str_new((const char *)name, name_end-name, regex->enc);
2203  VALUE value;
2204 
2205  int i;
2206  int found = 0;
2207 
2208  for (i = 0; i < back_num; i++) {
2209  value = rb_reg_nth_match(back_refs[i], match);
2210  if (RTEST(value)) {
2211  rb_hash_aset(hash, key, value);
2212  found = 1;
2213  }
2214  }
2215 
2216  if (found == 0) {
2217  rb_hash_aset(hash, key, Qnil);
2218  }
2219 
2220  return 0;
2221 }
2222 
2223 /*
2224  * call-seq:
2225  * mtch.named_captures -> hash
2226  *
2227  * Returns a Hash using named capture.
2228  *
2229  * A key of the hash is a name of the named captures.
2230  * A value of the hash is a string of last successful capture of corresponding
2231  * group.
2232  *
2233  * m = /(?<a>.)(?<b>.)/.match("01")
2234  * m.named_captures #=> {"a" => "0", "b" => "1"}
2235  *
2236  * m = /(?<a>.)(?<b>.)?/.match("0")
2237  * m.named_captures #=> {"a" => "0", "b" => nil}
2238  *
2239  * m = /(?<a>.)(?<a>.)/.match("01")
2240  * m.named_captures #=> {"a" => "1"}
2241  *
2242  * m = /(?<a>x)|(?<a>y)/.match("x")
2243  * m.named_captures #=> {"a" => "x"}
2244  */
2245 
2246 static VALUE
2247 match_named_captures(VALUE match)
2248 {
2249  VALUE hash;
2250  struct MEMO *memo;
2251 
2252  match_check(match);
2253  if (NIL_P(RMATCH(match)->regexp))
2254  return rb_hash_new();
2255 
2256  hash = rb_hash_new();
2257  memo = MEMO_NEW(hash, match, 0);
2258 
2259  onig_foreach_name(RREGEXP(RMATCH(match)->regexp)->ptr, match_named_captures_iter, (void*)memo);
2260 
2261  return hash;
2262 }
2263 
2264 /*
2265  * call-seq:
2266  * mtch.string -> str
2267  *
2268  * Returns a frozen copy of the string passed in to <code>match</code>.
2269  *
2270  * m = /(.)(.)(\d+)(\d)/.match("THX1138.")
2271  * m.string #=> "THX1138."
2272  */
2273 
2274 static VALUE
2275 match_string(VALUE match)
2276 {
2277  match_check(match);
2278  return RMATCH(match)->str; /* str is frozen */
2279 }
2280 
2282  const UChar *name;
2283  long len;
2284 };
2285 
2286 static int
2287 match_inspect_name_iter(const OnigUChar *name, const OnigUChar *name_end,
2288  int back_num, int *back_refs, OnigRegex regex, void *arg0)
2289 {
2290  struct backref_name_tag *arg = (struct backref_name_tag *)arg0;
2291  int i;
2292 
2293  for (i = 0; i < back_num; i++) {
2294  arg[back_refs[i]].name = name;
2295  arg[back_refs[i]].len = name_end - name;
2296  }
2297  return 0;
2298 }
2299 
2300 /*
2301  * call-seq:
2302  * mtch.inspect -> str
2303  *
2304  * Returns a printable version of <i>mtch</i>.
2305  *
2306  * puts /.$/.match("foo").inspect
2307  * #=> #<MatchData "o">
2308  *
2309  * puts /(.)(.)(.)/.match("foo").inspect
2310  * #=> #<MatchData "foo" 1:"f" 2:"o" 3:"o">
2311  *
2312  * puts /(.)(.)?(.)/.match("fo").inspect
2313  * #=> #<MatchData "fo" 1:"f" 2:nil 3:"o">
2314  *
2315  * puts /(?<foo>.)(?<bar>.)(?<baz>.)/.match("hoge").inspect
2316  * #=> #<MatchData "hog" foo:"h" bar:"o" baz:"g">
2317  *
2318  */
2319 
2320 static VALUE
2321 match_inspect(VALUE match)
2322 {
2323  VALUE cname = rb_class_path(rb_obj_class(match));
2324  VALUE str;
2325  int i;
2326  struct re_registers *regs = RMATCH_REGS(match);
2327  int num_regs = regs->num_regs;
2328  struct backref_name_tag *names;
2329  VALUE regexp = RMATCH(match)->regexp;
2330 
2331  if (regexp == 0) {
2332  return rb_sprintf("#<%"PRIsVALUE":%p>", cname, (void*)match);
2333  }
2334  else if (NIL_P(regexp)) {
2335  return rb_sprintf("#<%"PRIsVALUE": %"PRIsVALUE">",
2336  cname, rb_reg_nth_match(0, match));
2337  }
2338 
2339  names = ALLOCA_N(struct backref_name_tag, num_regs);
2340  MEMZERO(names, struct backref_name_tag, num_regs);
2341 
2342  onig_foreach_name(RREGEXP_PTR(regexp),
2343  match_inspect_name_iter, names);
2344 
2345  str = rb_str_buf_new2("#<");
2346  rb_str_append(str, cname);
2347 
2348  for (i = 0; i < num_regs; i++) {
2349  VALUE v;
2350  rb_str_buf_cat2(str, " ");
2351  if (0 < i) {
2352  if (names[i].name)
2353  rb_str_buf_cat(str, (const char *)names[i].name, names[i].len);
2354  else {
2355  rb_str_catf(str, "%d", i);
2356  }
2357  rb_str_buf_cat2(str, ":");
2358  }
2359  v = rb_reg_nth_match(i, match);
2360  if (NIL_P(v))
2361  rb_str_buf_cat2(str, "nil");
2362  else
2364  }
2365  rb_str_buf_cat2(str, ">");
2366 
2367  return str;
2368 }
2369 
2371 
2372 static int
2373 read_escaped_byte(const char **pp, const char *end, onig_errmsg_buffer err)
2374 {
2375  const char *p = *pp;
2376  int code;
2377  int meta_prefix = 0, ctrl_prefix = 0;
2378  size_t len;
2379 
2380  if (p == end || *p++ != '\\') {
2381  errcpy(err, "too short escaped multibyte character");
2382  return -1;
2383  }
2384 
2385 again:
2386  if (p == end) {
2387  errcpy(err, "too short escape sequence");
2388  return -1;
2389  }
2390  switch (*p++) {
2391  case '\\': code = '\\'; break;
2392  case 'n': code = '\n'; break;
2393  case 't': code = '\t'; break;
2394  case 'r': code = '\r'; break;
2395  case 'f': code = '\f'; break;
2396  case 'v': code = '\013'; break;
2397  case 'a': code = '\007'; break;
2398  case 'e': code = '\033'; break;
2399 
2400  /* \OOO */
2401  case '0': case '1': case '2': case '3':
2402  case '4': case '5': case '6': case '7':
2403  p--;
2404  code = scan_oct(p, end < p+3 ? end-p : 3, &len);
2405  p += len;
2406  break;
2407 
2408  case 'x': /* \xHH */
2409  code = scan_hex(p, end < p+2 ? end-p : 2, &len);
2410  if (len < 1) {
2411  errcpy(err, "invalid hex escape");
2412  return -1;
2413  }
2414  p += len;
2415  break;
2416 
2417  case 'M': /* \M-X, \M-\C-X, \M-\cX */
2418  if (meta_prefix) {
2419  errcpy(err, "duplicate meta escape");
2420  return -1;
2421  }
2422  meta_prefix = 1;
2423  if (p+1 < end && *p++ == '-' && (*p & 0x80) == 0) {
2424  if (*p == '\\') {
2425  p++;
2426  goto again;
2427  }
2428  else {
2429  code = *p++;
2430  break;
2431  }
2432  }
2433  errcpy(err, "too short meta escape");
2434  return -1;
2435 
2436  case 'C': /* \C-X, \C-\M-X */
2437  if (p == end || *p++ != '-') {
2438  errcpy(err, "too short control escape");
2439  return -1;
2440  }
2441  case 'c': /* \cX, \c\M-X */
2442  if (ctrl_prefix) {
2443  errcpy(err, "duplicate control escape");
2444  return -1;
2445  }
2446  ctrl_prefix = 1;
2447  if (p < end && (*p & 0x80) == 0) {
2448  if (*p == '\\') {
2449  p++;
2450  goto again;
2451  }
2452  else {
2453  code = *p++;
2454  break;
2455  }
2456  }
2457  errcpy(err, "too short control escape");
2458  return -1;
2459 
2460  default:
2461  errcpy(err, "unexpected escape sequence");
2462  return -1;
2463  }
2464  if (code < 0 || 0xff < code) {
2465  errcpy(err, "invalid escape code");
2466  return -1;
2467  }
2468 
2469  if (ctrl_prefix)
2470  code &= 0x1f;
2471  if (meta_prefix)
2472  code |= 0x80;
2473 
2474  *pp = p;
2475  return code;
2476 }
2477 
2478 static int
2479 unescape_escaped_nonascii(const char **pp, const char *end, rb_encoding *enc,
2480  VALUE buf, rb_encoding **encp, onig_errmsg_buffer err)
2481 {
2482  const char *p = *pp;
2483  int chmaxlen = rb_enc_mbmaxlen(enc);
2484  unsigned char *area = ALLOCA_N(unsigned char, chmaxlen);
2485  char *chbuf = (char *)area;
2486  int chlen = 0;
2487  int byte;
2488  int l;
2489 
2490  memset(chbuf, 0, chmaxlen);
2491 
2492  byte = read_escaped_byte(&p, end, err);
2493  if (byte == -1) {
2494  return -1;
2495  }
2496 
2497  area[chlen++] = byte;
2498  while (chlen < chmaxlen &&
2499  MBCLEN_NEEDMORE_P(rb_enc_precise_mbclen(chbuf, chbuf+chlen, enc))) {
2500  byte = read_escaped_byte(&p, end, err);
2501  if (byte == -1) {
2502  return -1;
2503  }
2504  area[chlen++] = byte;
2505  }
2506 
2507  l = rb_enc_precise_mbclen(chbuf, chbuf+chlen, enc);
2508  if (MBCLEN_INVALID_P(l)) {
2509  errcpy(err, "invalid multibyte escape");
2510  return -1;
2511  }
2512  if (1 < chlen || (area[0] & 0x80)) {
2513  rb_str_buf_cat(buf, chbuf, chlen);
2514 
2515  if (*encp == 0)
2516  *encp = enc;
2517  else if (*encp != enc) {
2518  errcpy(err, "escaped non ASCII character in UTF-8 regexp");
2519  return -1;
2520  }
2521  }
2522  else {
2523  char escbuf[5];
2524  snprintf(escbuf, sizeof(escbuf), "\\x%02X", area[0]&0xff);
2525  rb_str_buf_cat(buf, escbuf, 4);
2526  }
2527  *pp = p;
2528  return 0;
2529 }
2530 
2531 static int
2532 check_unicode_range(unsigned long code, onig_errmsg_buffer err)
2533 {
2534  if ((0xd800 <= code && code <= 0xdfff) || /* Surrogates */
2535  0x10ffff < code) {
2536  errcpy(err, "invalid Unicode range");
2537  return -1;
2538  }
2539  return 0;
2540 }
2541 
2542 static int
2543 append_utf8(unsigned long uv,
2544  VALUE buf, rb_encoding **encp, onig_errmsg_buffer err)
2545 {
2546  if (check_unicode_range(uv, err) != 0)
2547  return -1;
2548  if (uv < 0x80) {
2549  char escbuf[5];
2550  snprintf(escbuf, sizeof(escbuf), "\\x%02X", (int)uv);
2551  rb_str_buf_cat(buf, escbuf, 4);
2552  }
2553  else {
2554  int len;
2555  char utf8buf[6];
2556  len = rb_uv_to_utf8(utf8buf, uv);
2557  rb_str_buf_cat(buf, utf8buf, len);
2558 
2559  if (*encp == 0)
2560  *encp = rb_utf8_encoding();
2561  else if (*encp != rb_utf8_encoding()) {
2562  errcpy(err, "UTF-8 character in non UTF-8 regexp");
2563  return -1;
2564  }
2565  }
2566  return 0;
2567 }
2568 
2569 static int
2570 unescape_unicode_list(const char **pp, const char *end,
2571  VALUE buf, rb_encoding **encp, onig_errmsg_buffer err)
2572 {
2573  const char *p = *pp;
2574  int has_unicode = 0;
2575  unsigned long code;
2576  size_t len;
2577 
2578  while (p < end && ISSPACE(*p)) p++;
2579 
2580  while (1) {
2581  code = ruby_scan_hex(p, end-p, &len);
2582  if (len == 0)
2583  break;
2584  if (6 < len) { /* max 10FFFF */
2585  errcpy(err, "invalid Unicode range");
2586  return -1;
2587  }
2588  p += len;
2589  if (append_utf8(code, buf, encp, err) != 0)
2590  return -1;
2591  has_unicode = 1;
2592 
2593  while (p < end && ISSPACE(*p)) p++;
2594  }
2595 
2596  if (has_unicode == 0) {
2597  errcpy(err, "invalid Unicode list");
2598  return -1;
2599  }
2600 
2601  *pp = p;
2602 
2603  return 0;
2604 }
2605 
2606 static int
2607 unescape_unicode_bmp(const char **pp, const char *end,
2608  VALUE buf, rb_encoding **encp, onig_errmsg_buffer err)
2609 {
2610  const char *p = *pp;
2611  size_t len;
2612  unsigned long code;
2613 
2614  if (end < p+4) {
2615  errcpy(err, "invalid Unicode escape");
2616  return -1;
2617  }
2618  code = ruby_scan_hex(p, 4, &len);
2619  if (len != 4) {
2620  errcpy(err, "invalid Unicode escape");
2621  return -1;
2622  }
2623  if (append_utf8(code, buf, encp, err) != 0)
2624  return -1;
2625  *pp = p + 4;
2626  return 0;
2627 }
2628 
2629 static int
2630 unescape_nonascii(const char *p, const char *end, rb_encoding *enc,
2631  VALUE buf, rb_encoding **encp, int *has_property,
2632  onig_errmsg_buffer err)
2633 {
2634  unsigned char c;
2635  char smallbuf[2];
2636 
2637  while (p < end) {
2638  int chlen = rb_enc_precise_mbclen(p, end, enc);
2639  if (!MBCLEN_CHARFOUND_P(chlen)) {
2640  invalid_multibyte:
2641  errcpy(err, "invalid multibyte character");
2642  return -1;
2643  }
2644  chlen = MBCLEN_CHARFOUND_LEN(chlen);
2645  if (1 < chlen || (*p & 0x80)) {
2646  multibyte:
2647  rb_str_buf_cat(buf, p, chlen);
2648  p += chlen;
2649  if (*encp == 0)
2650  *encp = enc;
2651  else if (*encp != enc) {
2652  errcpy(err, "non ASCII character in UTF-8 regexp");
2653  return -1;
2654  }
2655  continue;
2656  }
2657 
2658  switch (c = *p++) {
2659  case '\\':
2660  if (p == end) {
2661  errcpy(err, "too short escape sequence");
2662  return -1;
2663  }
2664  chlen = rb_enc_precise_mbclen(p, end, enc);
2665  if (!MBCLEN_CHARFOUND_P(chlen)) {
2666  goto invalid_multibyte;
2667  }
2668  if ((chlen = MBCLEN_CHARFOUND_LEN(chlen)) > 1) {
2669  /* include the previous backslash */
2670  --p;
2671  ++chlen;
2672  goto multibyte;
2673  }
2674  switch (c = *p++) {
2675  case '1': case '2': case '3':
2676  case '4': case '5': case '6': case '7': /* \O, \OO, \OOO or backref */
2677  {
2678  size_t len = end-(p-1), octlen;
2679  if (ruby_scan_oct(p-1, len < 3 ? len : 3, &octlen) <= 0177) {
2680  /* backref or 7bit octal.
2681  no need to unescape anyway.
2682  re-escaping may break backref */
2683  goto escape_asis;
2684  }
2685  }
2686  /* xxx: How about more than 199 subexpressions? */
2687 
2688  case '0': /* \0, \0O, \0OO */
2689 
2690  case 'x': /* \xHH */
2691  case 'c': /* \cX, \c\M-X */
2692  case 'C': /* \C-X, \C-\M-X */
2693  case 'M': /* \M-X, \M-\C-X, \M-\cX */
2694  p = p-2;
2695  if (enc == rb_usascii_encoding()) {
2696  const char *pbeg = p;
2697  int byte = read_escaped_byte(&p, end, err);
2698  if (byte == -1) return -1;
2699  c = byte;
2700  rb_str_buf_cat(buf, pbeg, p-pbeg);
2701  }
2702  else {
2703  if (unescape_escaped_nonascii(&p, end, enc, buf, encp, err) != 0)
2704  return -1;
2705  }
2706  break;
2707 
2708  case 'u':
2709  if (p == end) {
2710  errcpy(err, "too short escape sequence");
2711  return -1;
2712  }
2713  if (*p == '{') {
2714  /* \u{H HH HHH HHHH HHHHH HHHHHH ...} */
2715  p++;
2716  if (unescape_unicode_list(&p, end, buf, encp, err) != 0)
2717  return -1;
2718  if (p == end || *p++ != '}') {
2719  errcpy(err, "invalid Unicode list");
2720  return -1;
2721  }
2722  break;
2723  }
2724  else {
2725  /* \uHHHH */
2726  if (unescape_unicode_bmp(&p, end, buf, encp, err) != 0)
2727  return -1;
2728  break;
2729  }
2730 
2731  case 'p': /* \p{Hiragana} */
2732  case 'P':
2733  if (!*encp) {
2734  *has_property = 1;
2735  }
2736  goto escape_asis;
2737 
2738  default: /* \n, \\, \d, \9, etc. */
2739 escape_asis:
2740  smallbuf[0] = '\\';
2741  smallbuf[1] = c;
2742  rb_str_buf_cat(buf, smallbuf, 2);
2743  break;
2744  }
2745  break;
2746 
2747  default:
2748  rb_str_buf_cat(buf, (char *)&c, 1);
2749  break;
2750  }
2751  }
2752 
2753  return 0;
2754 }
2755 
2756 static VALUE
2757 rb_reg_preprocess(const char *p, const char *end, rb_encoding *enc,
2758  rb_encoding **fixed_enc, onig_errmsg_buffer err)
2759 {
2760  VALUE buf;
2761  int has_property = 0;
2762 
2763  buf = rb_str_buf_new(0);
2764 
2765  if (rb_enc_asciicompat(enc))
2766  *fixed_enc = 0;
2767  else {
2768  *fixed_enc = enc;
2769  rb_enc_associate(buf, enc);
2770  }
2771 
2772  if (unescape_nonascii(p, end, enc, buf, fixed_enc, &has_property, err) != 0)
2773  return Qnil;
2774 
2775  if (has_property && !*fixed_enc) {
2776  *fixed_enc = enc;
2777  }
2778 
2779  if (*fixed_enc) {
2780  rb_enc_associate(buf, *fixed_enc);
2781  }
2782 
2783  return buf;
2784 }
2785 
2786 VALUE
2787 rb_reg_check_preprocess(VALUE str)
2788 {
2789  rb_encoding *fixed_enc = 0;
2790  onig_errmsg_buffer err = "";
2791  VALUE buf;
2792  char *p, *end;
2793  rb_encoding *enc;
2794 
2795  StringValue(str);
2796  p = RSTRING_PTR(str);
2797  end = p + RSTRING_LEN(str);
2798  enc = rb_enc_get(str);
2799 
2800  buf = rb_reg_preprocess(p, end, enc, &fixed_enc, err);
2801  RB_GC_GUARD(str);
2802 
2803  if (NIL_P(buf)) {
2804  return rb_reg_error_desc(str, 0, err);
2805  }
2806  return Qnil;
2807 }
2808 
2809 static VALUE
2810 rb_reg_preprocess_dregexp(VALUE ary, int options)
2811 {
2812  rb_encoding *fixed_enc = 0;
2813  rb_encoding *regexp_enc = 0;
2814  onig_errmsg_buffer err = "";
2815  int i;
2816  VALUE result = 0;
2817  rb_encoding *ascii8bit = rb_ascii8bit_encoding();
2818 
2819  if (RARRAY_LEN(ary) == 0) {
2820  rb_raise(rb_eArgError, "no arguments given");
2821  }
2822 
2823  for (i = 0; i < RARRAY_LEN(ary); i++) {
2824  VALUE str = RARRAY_AREF(ary, i);
2825  VALUE buf;
2826  char *p, *end;
2827  rb_encoding *src_enc;
2828 
2829  src_enc = rb_enc_get(str);
2830  if (options & ARG_ENCODING_NONE &&
2831  src_enc != ascii8bit) {
2832  if (str_coderange(str) != ENC_CODERANGE_7BIT)
2833  rb_raise(rb_eRegexpError, "/.../n has a non escaped non ASCII character in non ASCII-8BIT script");
2834  else
2835  src_enc = ascii8bit;
2836  }
2837 
2838  StringValue(str);
2839  p = RSTRING_PTR(str);
2840  end = p + RSTRING_LEN(str);
2841 
2842  buf = rb_reg_preprocess(p, end, src_enc, &fixed_enc, err);
2843 
2844  if (NIL_P(buf))
2845  rb_raise(rb_eArgError, "%s", err);
2846 
2847  if (fixed_enc != 0) {
2848  if (regexp_enc != 0 && regexp_enc != fixed_enc) {
2849  rb_raise(rb_eRegexpError, "encoding mismatch in dynamic regexp : %s and %s",
2850  rb_enc_name(regexp_enc), rb_enc_name(fixed_enc));
2851  }
2852  regexp_enc = fixed_enc;
2853  }
2854 
2855  if (!result)
2856  result = rb_str_new3(str);
2857  else
2858  rb_str_buf_append(result, str);
2859  }
2860  if (regexp_enc) {
2861  rb_enc_associate(result, regexp_enc);
2862  }
2863 
2864  return result;
2865 }
2866 
2867 static int
2868 rb_reg_initialize(VALUE obj, const char *s, long len, rb_encoding *enc,
2869  int options, onig_errmsg_buffer err,
2870  const char *sourcefile, int sourceline)
2871 {
2872  struct RRegexp *re = RREGEXP(obj);
2873  VALUE unescaped;
2874  rb_encoding *fixed_enc = 0;
2876 
2877  rb_check_frozen(obj);
2878  if (FL_TEST(obj, REG_LITERAL))
2879  rb_raise(rb_eSecurityError, "can't modify literal regexp");
2880  if (re->ptr)
2881  rb_raise(rb_eTypeError, "already initialized regexp");
2882  re->ptr = 0;
2883 
2884  if (rb_enc_dummy_p(enc)) {
2885  errcpy(err, "can't make regexp with dummy encoding");
2886  return -1;
2887  }
2888 
2889  unescaped = rb_reg_preprocess(s, s+len, enc, &fixed_enc, err);
2890  if (NIL_P(unescaped))
2891  return -1;
2892 
2893  if (fixed_enc) {
2894  if ((fixed_enc != enc && (options & ARG_ENCODING_FIXED)) ||
2895  (fixed_enc != a_enc && (options & ARG_ENCODING_NONE))) {
2896  errcpy(err, "incompatible character encoding");
2897  return -1;
2898  }
2899  if (fixed_enc != a_enc) {
2900  options |= ARG_ENCODING_FIXED;
2901  enc = fixed_enc;
2902  }
2903  }
2904  else if (!(options & ARG_ENCODING_FIXED)) {
2905  enc = rb_usascii_encoding();
2906  }
2907 
2908  rb_enc_associate((VALUE)re, enc);
2909  if ((options & ARG_ENCODING_FIXED) || fixed_enc) {
2910  re->basic.flags |= KCODE_FIXED;
2911  }
2912  if (options & ARG_ENCODING_NONE) {
2913  re->basic.flags |= REG_ENCODING_NONE;
2914  }
2915 
2916  re->ptr = make_regexp(RSTRING_PTR(unescaped), RSTRING_LEN(unescaped), enc,
2917  options & ARG_REG_OPTION_MASK, err,
2918  sourcefile, sourceline);
2919  if (!re->ptr) return -1;
2920  RB_GC_GUARD(unescaped);
2921  return 0;
2922 }
2923 
2924 static void
2925 reg_set_source(VALUE reg, VALUE str, rb_encoding *enc)
2926 {
2927  rb_encoding *regenc = rb_enc_get(reg);
2928  if (regenc != enc) {
2929  str = rb_enc_associate(rb_str_dup(str), enc = regenc);
2930  }
2931  RB_OBJ_WRITE(reg, &RREGEXP(reg)->src, rb_fstring(str));
2932 }
2933 
2934 static int
2935 rb_reg_initialize_str(VALUE obj, VALUE str, int options, onig_errmsg_buffer err,
2936  const char *sourcefile, int sourceline)
2937 {
2938  int ret;
2939  rb_encoding *str_enc = rb_enc_get(str), *enc = str_enc;
2940  if (options & ARG_ENCODING_NONE) {
2941  rb_encoding *ascii8bit = rb_ascii8bit_encoding();
2942  if (enc != ascii8bit) {
2943  if (str_coderange(str) != ENC_CODERANGE_7BIT) {
2944  errcpy(err, "/.../n has a non escaped non ASCII character in non ASCII-8BIT script");
2945  return -1;
2946  }
2947  enc = ascii8bit;
2948  }
2949  }
2950  ret = rb_reg_initialize(obj, RSTRING_PTR(str), RSTRING_LEN(str), enc,
2951  options, err, sourcefile, sourceline);
2952  if (ret == 0) reg_set_source(obj, str, str_enc);
2953  return ret;
2954 }
2955 
2956 static VALUE
2957 rb_reg_s_alloc(VALUE klass)
2958 {
2960 
2961  re->ptr = 0;
2962  RB_OBJ_WRITE(re, &re->src, 0);
2963  re->usecnt = 0;
2964 
2965  return (VALUE)re;
2966 }
2967 
2968 VALUE
2969 rb_reg_alloc(void)
2970 {
2971  return rb_reg_s_alloc(rb_cRegexp);
2972 }
2973 
2974 VALUE
2975 rb_reg_new_str(VALUE s, int options)
2976 {
2977  return rb_reg_init_str(rb_reg_alloc(), s, options);
2978 }
2979 
2980 VALUE
2981 rb_reg_init_str(VALUE re, VALUE s, int options)
2982 {
2983  onig_errmsg_buffer err = "";
2984 
2985  if (rb_reg_initialize_str(re, s, options, err, NULL, 0) != 0) {
2986  rb_reg_raise_str(s, options, err);
2987  }
2988 
2989  return re;
2990 }
2991 
2992 static VALUE
2993 rb_reg_init_str_enc(VALUE re, VALUE s, rb_encoding *enc, int options)
2994 {
2995  onig_errmsg_buffer err = "";
2996 
2997  if (rb_reg_initialize(re, RSTRING_PTR(s), RSTRING_LEN(s),
2998  enc, options, err, NULL, 0) != 0) {
2999  rb_reg_raise_str(s, options, err);
3000  }
3001  reg_set_source(re, s, enc);
3002 
3003  return re;
3004 }
3005 
3006 MJIT_FUNC_EXPORTED VALUE
3007 rb_reg_new_ary(VALUE ary, int opt)
3008 {
3009  VALUE re = rb_reg_new_str(rb_reg_preprocess_dregexp(ary, opt), opt);
3010  rb_obj_freeze(re);
3011  return re;
3012 }
3013 
3014 VALUE
3015 rb_enc_reg_new(const char *s, long len, rb_encoding *enc, int options)
3016 {
3017  VALUE re = rb_reg_alloc();
3018  onig_errmsg_buffer err = "";
3019 
3020  if (rb_reg_initialize(re, s, len, enc, options, err, NULL, 0) != 0) {
3021  rb_enc_reg_raise(s, len, enc, options, err);
3022  }
3023  RB_OBJ_WRITE(re, &RREGEXP(re)->src, rb_fstring(rb_enc_str_new(s, len, enc)));
3024 
3025  return re;
3026 }
3027 
3028 VALUE
3029 rb_reg_new(const char *s, long len, int options)
3030 {
3031  return rb_enc_reg_new(s, len, rb_ascii8bit_encoding(), options);
3032 }
3033 
3034 VALUE
3035 rb_reg_compile(VALUE str, int options, const char *sourcefile, int sourceline)
3036 {
3037  VALUE re = rb_reg_alloc();
3038  onig_errmsg_buffer err = "";
3039 
3040  if (!str) str = rb_str_new(0,0);
3041  if (rb_reg_initialize_str(re, str, options, err, sourcefile, sourceline) != 0) {
3042  rb_set_errinfo(rb_reg_error_desc(str, options, err));
3043  return Qnil;
3044  }
3045  FL_SET(re, REG_LITERAL);
3046  rb_obj_freeze(re);
3047  return re;
3048 }
3049 
3050 static VALUE reg_cache;
3051 
3052 VALUE
3054 {
3055  if (reg_cache && RREGEXP_SRC_LEN(reg_cache) == RSTRING_LEN(str)
3056  && ENCODING_GET(reg_cache) == ENCODING_GET(str)
3057  && memcmp(RREGEXP_SRC_PTR(reg_cache), RSTRING_PTR(str), RSTRING_LEN(str)) == 0)
3058  return reg_cache;
3059 
3060  return reg_cache = rb_reg_new_str(str, 0);
3061 }
3062 
3063 static st_index_t reg_hash(VALUE re);
3064 /*
3065  * call-seq:
3066  * rxp.hash -> integer
3067  *
3068  * Produce a hash based on the text and options of this regular expression.
3069  *
3070  * See also Object#hash.
3071  */
3072 
3073 VALUE
3074 rb_reg_hash(VALUE re)
3075 {
3076  st_index_t hashval = reg_hash(re);
3077  return ST2FIX(hashval);
3078 }
3079 
3080 static st_index_t
3081 reg_hash(VALUE re)
3082 {
3083  st_index_t hashval;
3084 
3085  rb_reg_check(re);
3086  hashval = RREGEXP_PTR(re)->options;
3087  hashval = rb_hash_uint(hashval, rb_memhash(RREGEXP_SRC_PTR(re), RREGEXP_SRC_LEN(re)));
3088  return rb_hash_end(hashval);
3089 }
3090 
3091 
3092 /*
3093  * call-seq:
3094  * rxp == other_rxp -> true or false
3095  * rxp.eql?(other_rxp) -> true or false
3096  *
3097  * Equality---Two regexps are equal if their patterns are identical, they have
3098  * the same character set code, and their <code>casefold?</code> values are the
3099  * same.
3100  *
3101  * /abc/ == /abc/x #=> false
3102  * /abc/ == /abc/i #=> false
3103  * /abc/ == /abc/u #=> false
3104  * /abc/u == /abc/n #=> false
3105  */
3106 
3107 VALUE
3108 rb_reg_equal(VALUE re1, VALUE re2)
3109 {
3110  if (re1 == re2) return Qtrue;
3111  if (!RB_TYPE_P(re2, T_REGEXP)) return Qfalse;
3112  rb_reg_check(re1); rb_reg_check(re2);
3113  if (FL_TEST(re1, KCODE_FIXED) != FL_TEST(re2, KCODE_FIXED)) return Qfalse;
3114  if (RREGEXP_PTR(re1)->options != RREGEXP_PTR(re2)->options) return Qfalse;
3115  if (RREGEXP_SRC_LEN(re1) != RREGEXP_SRC_LEN(re2)) return Qfalse;
3116  if (ENCODING_GET(re1) != ENCODING_GET(re2)) return Qfalse;
3117  return RBOOL(memcmp(RREGEXP_SRC_PTR(re1), RREGEXP_SRC_PTR(re2), RREGEXP_SRC_LEN(re1)) == 0);
3118 }
3119 
3120 /*
3121  * call-seq:
3122  * mtch.hash -> integer
3123  *
3124  * Produce a hash based on the target string, regexp and matched
3125  * positions of this matchdata.
3126  *
3127  * See also Object#hash.
3128  */
3129 
3130 static VALUE
3131 match_hash(VALUE match)
3132 {
3133  const struct re_registers *regs;
3134  st_index_t hashval;
3135 
3136  match_check(match);
3137  hashval = rb_hash_start(rb_str_hash(RMATCH(match)->str));
3138  hashval = rb_hash_uint(hashval, reg_hash(match_regexp(match)));
3139  regs = RMATCH_REGS(match);
3140  hashval = rb_hash_uint(hashval, regs->num_regs);
3141  hashval = rb_hash_uint(hashval, rb_memhash(regs->beg, regs->num_regs * sizeof(*regs->beg)));
3142  hashval = rb_hash_uint(hashval, rb_memhash(regs->end, regs->num_regs * sizeof(*regs->end)));
3143  hashval = rb_hash_end(hashval);
3144  return ST2FIX(hashval);
3145 }
3146 
3147 /*
3148  * call-seq:
3149  * mtch == mtch2 -> true or false
3150  * mtch.eql?(mtch2) -> true or false
3151  *
3152  * Equality---Two matchdata are equal if their target strings,
3153  * patterns, and matched positions are identical.
3154  */
3155 
3156 static VALUE
3157 match_equal(VALUE match1, VALUE match2)
3158 {
3159  const struct re_registers *regs1, *regs2;
3160 
3161  if (match1 == match2) return Qtrue;
3162  if (!RB_TYPE_P(match2, T_MATCH)) return Qfalse;
3163  if (!RMATCH(match1)->regexp || !RMATCH(match2)->regexp) return Qfalse;
3164  if (!rb_str_equal(RMATCH(match1)->str, RMATCH(match2)->str)) return Qfalse;
3165  if (!rb_reg_equal(match_regexp(match1), match_regexp(match2))) return Qfalse;
3166  regs1 = RMATCH_REGS(match1);
3167  regs2 = RMATCH_REGS(match2);
3168  if (regs1->num_regs != regs2->num_regs) return Qfalse;
3169  if (memcmp(regs1->beg, regs2->beg, regs1->num_regs * sizeof(*regs1->beg))) return Qfalse;
3170  if (memcmp(regs1->end, regs2->end, regs1->num_regs * sizeof(*regs1->end))) return Qfalse;
3171  return Qtrue;
3172 }
3173 
3174 static VALUE
3175 reg_operand(VALUE s, int check)
3176 {
3177  if (SYMBOL_P(s)) {
3178  return rb_sym2str(s);
3179  }
3180  else if (RB_TYPE_P(s, T_STRING)) {
3181  return s;
3182  }
3183  else {
3184  return check ? rb_str_to_str(s) : rb_check_string_type(s);
3185  }
3186 }
3187 
3188 static long
3189 reg_match_pos(VALUE re, VALUE *strp, long pos, VALUE* set_match)
3190 {
3191  VALUE str = *strp;
3192 
3193  if (NIL_P(str)) {
3195  return -1;
3196  }
3197  *strp = str = reg_operand(str, TRUE);
3198  if (pos != 0) {
3199  if (pos < 0) {
3200  VALUE l = rb_str_length(str);
3201  pos += NUM2INT(l);
3202  if (pos < 0) {
3203  return pos;
3204  }
3205  }
3206  pos = rb_str_offset(str, pos);
3207  }
3208  return rb_reg_search_set_match(re, str, pos, 0, 1, set_match);
3209 }
3210 
3211 /*
3212  * call-seq:
3213  * rxp =~ str -> integer or nil
3214  *
3215  * Match---Matches <i>rxp</i> against <i>str</i>.
3216  *
3217  * /at/ =~ "input data" #=> 7
3218  * /ax/ =~ "input data" #=> nil
3219  *
3220  * If <code>=~</code> is used with a regexp literal with named captures,
3221  * captured strings (or nil) is assigned to local variables named by
3222  * the capture names.
3223  *
3224  * /(?<lhs>\w+)\s*=\s*(?<rhs>\w+)/ =~ " x = y "
3225  * p lhs #=> "x"
3226  * p rhs #=> "y"
3227  *
3228  * If it is not matched, nil is assigned for the variables.
3229  *
3230  * /(?<lhs>\w+)\s*=\s*(?<rhs>\w+)/ =~ " x = "
3231  * p lhs #=> nil
3232  * p rhs #=> nil
3233  *
3234  * This assignment is implemented in the Ruby parser.
3235  * The parser detects 'regexp-literal =~ expression' for the assignment.
3236  * The regexp must be a literal without interpolation and placed at left hand side.
3237  *
3238  * The assignment does not occur if the regexp is not a literal.
3239  *
3240  * re = /(?<lhs>\w+)\s*=\s*(?<rhs>\w+)/
3241  * re =~ " x = y "
3242  * p lhs # undefined local variable
3243  * p rhs # undefined local variable
3244  *
3245  * A regexp interpolation, <code>#{}</code>, also disables
3246  * the assignment.
3247  *
3248  * rhs_pat = /(?<rhs>\w+)/
3249  * /(?<lhs>\w+)\s*=\s*#{rhs_pat}/ =~ "x = y"
3250  * p lhs # undefined local variable
3251  *
3252  * The assignment does not occur if the regexp is placed at the right hand side.
3253  *
3254  * " x = y " =~ /(?<lhs>\w+)\s*=\s*(?<rhs>\w+)/
3255  * p lhs, rhs # undefined local variable
3256  *
3257  */
3258 
3259 VALUE
3261 {
3262  long pos = reg_match_pos(re, &str, 0, NULL);
3263  if (pos < 0) return Qnil;
3264  pos = rb_str_sublen(str, pos);
3265  return LONG2FIX(pos);
3266 }
3267 
3268 /*
3269  * call-seq:
3270  * rxp === str -> true or false
3271  *
3272  * Case Equality---Used in case statements.
3273  *
3274  * a = "HELLO"
3275  * case a
3276  * when /\A[a-z]*\z/; print "Lower case\n"
3277  * when /\A[A-Z]*\z/; print "Upper case\n"
3278  * else; print "Mixed case\n"
3279  * end
3280  * #=> "Upper case"
3281  *
3282  * Following a regular expression literal with the #=== operator allows you to
3283  * compare against a String.
3284  *
3285  * /^[a-z]*$/ === "HELLO" #=> false
3286  * /^[A-Z]*$/ === "HELLO" #=> true
3287  */
3288 
3289 static VALUE
3290 rb_reg_eqq(VALUE re, VALUE str)
3291 {
3292  long start;
3293 
3294  str = reg_operand(str, FALSE);
3295  if (NIL_P(str)) {
3297  return Qfalse;
3298  }
3299  start = rb_reg_search(re, str, 0, 0);
3300  if (start < 0) {
3301  return Qfalse;
3302  }
3303  return Qtrue;
3304 }
3305 
3306 
3307 /*
3308  * call-seq:
3309  * ~ rxp -> integer or nil
3310  *
3311  * Match---Matches <i>rxp</i> against the contents of <code>$_</code>.
3312  * Equivalent to <code><i>rxp</i> =~ $_</code>.
3313  *
3314  * $_ = "input data"
3315  * ~ /at/ #=> 7
3316  */
3317 
3318 VALUE
3320 {
3321  long start;
3322  VALUE line = rb_lastline_get();
3323 
3324  if (!RB_TYPE_P(line, T_STRING)) {
3326  return Qnil;
3327  }
3328 
3329  start = rb_reg_search(re, line, 0, 0);
3330  if (start < 0) {
3331  return Qnil;
3332  }
3333  start = rb_str_sublen(line, start);
3334  return LONG2FIX(start);
3335 }
3336 
3337 
3338 /*
3339  * call-seq:
3340  * rxp.match(str, pos=0) -> matchdata or nil
3341  * rxp.match(str, pos=0) {|match| block } -> obj
3342  *
3343  * Returns a MatchData object describing the match, or
3344  * <code>nil</code> if there was no match. This is equivalent to
3345  * retrieving the value of the special variable <code>$~</code>
3346  * following a normal match. If the second parameter is present, it
3347  * specifies the position in the string to begin the search.
3348  *
3349  * /(.)(.)(.)/.match("abc")[2] #=> "b"
3350  * /(.)(.)/.match("abc", 1)[2] #=> "c"
3351  *
3352  * If a block is given, invoke the block with MatchData if match succeed, so
3353  * that you can write
3354  *
3355  * /M(.*)/.match("Matz") do |m|
3356  * puts m[0]
3357  * puts m[1]
3358  * end
3359  *
3360  * instead of
3361  *
3362  * if m = /M(.*)/.match("Matz")
3363  * puts m[0]
3364  * puts m[1]
3365  * end
3366  *
3367  * The return value is a value from block execution in this case.
3368  */
3369 
3370 static VALUE
3371 rb_reg_match_m(int argc, VALUE *argv, VALUE re)
3372 {
3373  VALUE result = Qnil, str, initpos;
3374  long pos;
3375 
3376  if (rb_scan_args(argc, argv, "11", &str, &initpos) == 2) {
3377  pos = NUM2LONG(initpos);
3378  }
3379  else {
3380  pos = 0;
3381  }
3382 
3383  pos = reg_match_pos(re, &str, pos, &result);
3384  if (pos < 0) {
3386  return Qnil;
3387  }
3388  rb_match_busy(result);
3389  if (!NIL_P(result) && rb_block_given_p()) {
3390  return rb_yield(result);
3391  }
3392  return result;
3393 }
3394 
3395 /*
3396  * call-seq:
3397  * rxp.match?(str) -> true or false
3398  * rxp.match?(str, pos=0) -> true or false
3399  *
3400  * Returns <code>true</code> or <code>false</code> to indicate whether the
3401  * regexp is matched or not without updating $~ and other related variables.
3402  * If the second parameter is present, it specifies the position in the string
3403  * to begin the search.
3404  *
3405  * /R.../.match?("Ruby") #=> true
3406  * /R.../.match?("Ruby", 1) #=> false
3407  * /P.../.match?("Ruby") #=> false
3408  * $& #=> nil
3409  */
3410 
3411 static VALUE
3412 rb_reg_match_m_p(int argc, VALUE *argv, VALUE re)
3413 {
3414  long pos = rb_check_arity(argc, 1, 2) > 1 ? NUM2LONG(argv[1]) : 0;
3415  return rb_reg_match_p(re, argv[0], pos);
3416 }
3417 
3418 VALUE
3419 rb_reg_match_p(VALUE re, VALUE str, long pos)
3420 {
3421  regex_t *reg;
3422  onig_errmsg_buffer err = "";
3423  OnigPosition result;
3424  const UChar *start, *end;
3425  int tmpreg;
3426 
3427  if (NIL_P(str)) return Qfalse;
3428  str = SYMBOL_P(str) ? rb_sym2str(str) : StringValue(str);
3429  if (pos) {
3430  if (pos < 0) {
3431  pos += NUM2LONG(rb_str_length(str));
3432  if (pos < 0) return Qfalse;
3433  }
3434  if (pos > 0) {
3435  long len = 1;
3436  const char *beg = rb_str_subpos(str, pos, &len);
3437  if (!beg) return Qfalse;
3438  pos = beg - RSTRING_PTR(str);
3439  }
3440  }
3441  reg = rb_reg_prepare_re0(re, str, err);
3442  tmpreg = reg != RREGEXP_PTR(re);
3443  if (!tmpreg) RREGEXP(re)->usecnt++;
3444  start = ((UChar*)RSTRING_PTR(str));
3445  end = start + RSTRING_LEN(str);
3446  result = onig_search(reg, start, end, start + pos, end,
3447  NULL, ONIG_OPTION_NONE);
3448  if (!tmpreg) RREGEXP(re)->usecnt--;
3449  if (tmpreg) {
3450  if (RREGEXP(re)->usecnt) {
3451  onig_free(reg);
3452  }
3453  else {
3454  onig_free(RREGEXP_PTR(re));
3455  RREGEXP_PTR(re) = reg;
3456  }
3457  }
3458  if (result < 0) {
3459  if (result == ONIG_MISMATCH) {
3460  return Qfalse;
3461  }
3462  else {
3463  onig_error_code_to_str((UChar*)err, (int)result);
3464  rb_reg_raise(RREGEXP_SRC_PTR(re), RREGEXP_SRC_LEN(re), err, re);
3465  }
3466  }
3467  return Qtrue;
3468 }
3469 
3470 /*
3471  * Document-method: compile
3472  *
3473  * Alias for Regexp.new
3474  */
3475 
3476 /*
3477  * call-seq:
3478  * Regexp.new(string, [options]) -> regexp
3479  * Regexp.new(regexp) -> regexp
3480  * Regexp.compile(string, [options]) -> regexp
3481  * Regexp.compile(regexp) -> regexp
3482  *
3483  * Constructs a new regular expression from +pattern+, which can be either a
3484  * String or a Regexp (in which case that regexp's options are propagated),
3485  * and new options may not be specified (a change as of Ruby 1.8).
3486  *
3487  * If +options+ is an Integer, it should be one or more of the constants
3488  * Regexp::EXTENDED, Regexp::IGNORECASE, and Regexp::MULTILINE,
3489  * <em>or</em>-ed together. Otherwise, if +options+ is not
3490  * +nil+ or +false+, the regexp will be case insensitive.
3491  *
3492  * r1 = Regexp.new('^a-z+:\\s+\w+') #=> /^a-z+:\s+\w+/
3493  * r2 = Regexp.new('cat', true) #=> /cat/i
3494  * r3 = Regexp.new(r2) #=> /cat/i
3495  * r4 = Regexp.new('dog', Regexp::EXTENDED | Regexp::IGNORECASE) #=> /dog/ix
3496  */
3497 
3498 static VALUE
3499 rb_reg_initialize_m(int argc, VALUE *argv, VALUE self)
3500 {
3501  int flags = 0;
3502  VALUE str;
3503  rb_encoding *enc = 0;
3504 
3505  rb_check_arity(argc, 1, 3);
3506  if (RB_TYPE_P(argv[0], T_REGEXP)) {
3507  VALUE re = argv[0];
3508 
3509  if (argc > 1) {
3510  rb_warn("flags ignored");
3511  }
3512  rb_reg_check(re);
3513  flags = rb_reg_options(re);
3514  str = RREGEXP_SRC(re);
3515  }
3516  else {
3517  if (argc >= 2) {
3518  if (FIXNUM_P(argv[1])) flags = FIX2INT(argv[1]);
3519  else if (RTEST(argv[1])) flags = ONIG_OPTION_IGNORECASE;
3520  }
3521  if (argc == 3 && !NIL_P(argv[2])) {
3522  char *kcode = StringValuePtr(argv[2]);
3523  if (kcode[0] == 'n' || kcode[0] == 'N') {
3524  enc = rb_ascii8bit_encoding();
3525  flags |= ARG_ENCODING_NONE;
3526  }
3527  else {
3528  rb_category_warn(RB_WARN_CATEGORY_DEPRECATED, "encoding option is ignored - %s", kcode);
3529  }
3530  }
3531  str = StringValue(argv[0]);
3532  }
3533  if (enc && rb_enc_get(str) != enc)
3534  rb_reg_init_str_enc(self, str, enc, flags);
3535  else
3536  rb_reg_init_str(self, str, flags);
3537  return self;
3538 }
3539 
3540 VALUE
3542 {
3543  rb_encoding *enc = rb_enc_get(str);
3544  char *s, *send, *t;
3545  VALUE tmp;
3546  int c, clen;
3547  int ascii_only = rb_enc_str_asciionly_p(str);
3548 
3549  s = RSTRING_PTR(str);
3550  send = s + RSTRING_LEN(str);
3551  while (s < send) {
3552  c = rb_enc_ascget(s, send, &clen, enc);
3553  if (c == -1) {
3554  s += mbclen(s, send, enc);
3555  continue;
3556  }
3557  switch (c) {
3558  case '[': case ']': case '{': case '}':
3559  case '(': case ')': case '|': case '-':
3560  case '*': case '.': case '\\':
3561  case '?': case '+': case '^': case '$':
3562  case ' ': case '#':
3563  case '\t': case '\f': case '\v': case '\n': case '\r':
3564  goto meta_found;
3565  }
3566  s += clen;
3567  }
3568  tmp = rb_str_new3(str);
3569  if (ascii_only) {
3571  }
3572  return tmp;
3573 
3574  meta_found:
3575  tmp = rb_str_new(0, RSTRING_LEN(str)*2);
3576  if (ascii_only) {
3578  }
3579  else {
3580  rb_enc_copy(tmp, str);
3581  }
3582  t = RSTRING_PTR(tmp);
3583  /* copy upto metacharacter */
3584  const char *p = RSTRING_PTR(str);
3585  memcpy(t, p, s - p);
3586  t += s - p;
3587 
3588  while (s < send) {
3589  c = rb_enc_ascget(s, send, &clen, enc);
3590  if (c == -1) {
3591  int n = mbclen(s, send, enc);
3592 
3593  while (n--)
3594  *t++ = *s++;
3595  continue;
3596  }
3597  s += clen;
3598  switch (c) {
3599  case '[': case ']': case '{': case '}':
3600  case '(': case ')': case '|': case '-':
3601  case '*': case '.': case '\\':
3602  case '?': case '+': case '^': case '$':
3603  case '#':
3604  t += rb_enc_mbcput('\\', t, enc);
3605  break;
3606  case ' ':
3607  t += rb_enc_mbcput('\\', t, enc);
3608  t += rb_enc_mbcput(' ', t, enc);
3609  continue;
3610  case '\t':
3611  t += rb_enc_mbcput('\\', t, enc);
3612  t += rb_enc_mbcput('t', t, enc);
3613  continue;
3614  case '\n':
3615  t += rb_enc_mbcput('\\', t, enc);
3616  t += rb_enc_mbcput('n', t, enc);
3617  continue;
3618  case '\r':
3619  t += rb_enc_mbcput('\\', t, enc);
3620  t += rb_enc_mbcput('r', t, enc);
3621  continue;
3622  case '\f':
3623  t += rb_enc_mbcput('\\', t, enc);
3624  t += rb_enc_mbcput('f', t, enc);
3625  continue;
3626  case '\v':
3627  t += rb_enc_mbcput('\\', t, enc);
3628  t += rb_enc_mbcput('v', t, enc);
3629  continue;
3630  }
3631  t += rb_enc_mbcput(c, t, enc);
3632  }
3633  rb_str_resize(tmp, t - RSTRING_PTR(tmp));
3634  return tmp;
3635 }
3636 
3637 
3638 /*
3639  * call-seq:
3640  * Regexp.escape(str) -> string
3641  * Regexp.quote(str) -> string
3642  *
3643  * Escapes any characters that would have special meaning in a regular
3644  * expression. Returns a new escaped string with the same or compatible
3645  * encoding. For any string,
3646  * <code>Regexp.new(Regexp.escape(<i>str</i>))=~<i>str</i></code> will be true.
3647  *
3648  * Regexp.escape('\*?{}.') #=> \\\*\?\{\}\.
3649  *
3650  */
3651 
3652 static VALUE
3653 rb_reg_s_quote(VALUE c, VALUE str)
3654 {
3655  return rb_reg_quote(reg_operand(str, TRUE));
3656 }
3657 
3658 int
3660 {
3661  int options;
3662 
3663  rb_reg_check(re);
3664  options = RREGEXP_PTR(re)->options & ARG_REG_OPTION_MASK;
3665  if (RBASIC(re)->flags & KCODE_FIXED) options |= ARG_ENCODING_FIXED;
3666  if (RBASIC(re)->flags & REG_ENCODING_NONE) options |= ARG_ENCODING_NONE;
3667  return options;
3668 }
3669 
3670 static VALUE
3671 rb_check_regexp_type(VALUE re)
3672 {
3673  return rb_check_convert_type(re, T_REGEXP, "Regexp", "to_regexp");
3674 }
3675 
3676 /*
3677  * call-seq:
3678  * Regexp.try_convert(obj) -> re or nil
3679  *
3680  * Try to convert <i>obj</i> into a Regexp, using to_regexp method.
3681  * Returns converted regexp or nil if <i>obj</i> cannot be converted
3682  * for any reason.
3683  *
3684  * Regexp.try_convert(/re/) #=> /re/
3685  * Regexp.try_convert("re") #=> nil
3686  *
3687  * o = Object.new
3688  * Regexp.try_convert(o) #=> nil
3689  * def o.to_regexp() /foo/ end
3690  * Regexp.try_convert(o) #=> /foo/
3691  *
3692  */
3693 static VALUE
3694 rb_reg_s_try_convert(VALUE dummy, VALUE re)
3695 {
3696  return rb_check_regexp_type(re);
3697 }
3698 
3699 static VALUE
3700 rb_reg_s_union(VALUE self, VALUE args0)
3701 {
3702  long argc = RARRAY_LEN(args0);
3703 
3704  if (argc == 0) {
3705  VALUE args[1];
3706  args[0] = rb_str_new2("(?!)");
3707  return rb_class_new_instance(1, args, rb_cRegexp);
3708  }
3709  else if (argc == 1) {
3710  VALUE arg = rb_ary_entry(args0, 0);
3711  VALUE re = rb_check_regexp_type(arg);
3712  if (!NIL_P(re))
3713  return re;
3714  else {
3715  VALUE quoted;
3716  quoted = rb_reg_s_quote(Qnil, arg);
3717  return rb_reg_new_str(quoted, 0);
3718  }
3719  }
3720  else {
3721  int i;
3722  VALUE source = rb_str_buf_new(0);
3723  rb_encoding *result_enc;
3724 
3725  int has_asciionly = 0;
3726  rb_encoding *has_ascii_compat_fixed = 0;
3727  rb_encoding *has_ascii_incompat = 0;
3728 
3729  for (i = 0; i < argc; i++) {
3730  volatile VALUE v;
3731  VALUE e = rb_ary_entry(args0, i);
3732 
3733  if (0 < i)
3734  rb_str_buf_cat_ascii(source, "|");
3735 
3736  v = rb_check_regexp_type(e);
3737  if (!NIL_P(v)) {
3738  rb_encoding *enc = rb_enc_get(v);
3739  if (!rb_enc_asciicompat(enc)) {
3740  if (!has_ascii_incompat)
3741  has_ascii_incompat = enc;
3742  else if (has_ascii_incompat != enc)
3743  rb_raise(rb_eArgError, "incompatible encodings: %s and %s",
3744  rb_enc_name(has_ascii_incompat), rb_enc_name(enc));
3745  }
3746  else if (rb_reg_fixed_encoding_p(v)) {
3747  if (!has_ascii_compat_fixed)
3748  has_ascii_compat_fixed = enc;
3749  else if (has_ascii_compat_fixed != enc)
3750  rb_raise(rb_eArgError, "incompatible encodings: %s and %s",
3751  rb_enc_name(has_ascii_compat_fixed), rb_enc_name(enc));
3752  }
3753  else {
3754  has_asciionly = 1;
3755  }
3756  v = rb_reg_str_with_term(v, -1);
3757  }
3758  else {
3759  rb_encoding *enc;
3760  StringValue(e);
3761  enc = rb_enc_get(e);
3762  if (!rb_enc_asciicompat(enc)) {
3763  if (!has_ascii_incompat)
3764  has_ascii_incompat = enc;
3765  else if (has_ascii_incompat != enc)
3766  rb_raise(rb_eArgError, "incompatible encodings: %s and %s",
3767  rb_enc_name(has_ascii_incompat), rb_enc_name(enc));
3768  }
3769  else if (rb_enc_str_asciionly_p(e)) {
3770  has_asciionly = 1;
3771  }
3772  else {
3773  if (!has_ascii_compat_fixed)
3774  has_ascii_compat_fixed = enc;
3775  else if (has_ascii_compat_fixed != enc)
3776  rb_raise(rb_eArgError, "incompatible encodings: %s and %s",
3777  rb_enc_name(has_ascii_compat_fixed), rb_enc_name(enc));
3778  }
3779  v = rb_reg_s_quote(Qnil, e);
3780  }
3781  if (has_ascii_incompat) {
3782  if (has_asciionly) {
3783  rb_raise(rb_eArgError, "ASCII incompatible encoding: %s",
3784  rb_enc_name(has_ascii_incompat));
3785  }
3786  if (has_ascii_compat_fixed) {
3787  rb_raise(rb_eArgError, "incompatible encodings: %s and %s",
3788  rb_enc_name(has_ascii_incompat), rb_enc_name(has_ascii_compat_fixed));
3789  }
3790  }
3791 
3792  if (i == 0) {
3793  rb_enc_copy(source, v);
3794  }
3795  rb_str_append(source, v);
3796  }
3797 
3798  if (has_ascii_incompat) {
3799  result_enc = has_ascii_incompat;
3800  }
3801  else if (has_ascii_compat_fixed) {
3802  result_enc = has_ascii_compat_fixed;
3803  }
3804  else {
3805  result_enc = rb_ascii8bit_encoding();
3806  }
3807 
3808  rb_enc_associate(source, result_enc);
3809  return rb_class_new_instance(1, &source, rb_cRegexp);
3810  }
3811 }
3812 
3813 /*
3814  * call-seq:
3815  * Regexp.union(pat1, pat2, ...) -> new_regexp
3816  * Regexp.union(pats_ary) -> new_regexp
3817  *
3818  * Return a Regexp object that is the union of the given
3819  * <em>pattern</em>s, i.e., will match any of its parts. The
3820  * <em>pattern</em>s can be Regexp objects, in which case their
3821  * options will be preserved, or Strings. If no patterns are given,
3822  * returns <code>/(?!)/</code>. The behavior is unspecified if any
3823  * given <em>pattern</em> contains capture.
3824  *
3825  * Regexp.union #=> /(?!)/
3826  * Regexp.union("penzance") #=> /penzance/
3827  * Regexp.union("a+b*c") #=> /a\+b\*c/
3828  * Regexp.union("skiing", "sledding") #=> /skiing|sledding/
3829  * Regexp.union(["skiing", "sledding"]) #=> /skiing|sledding/
3830  * Regexp.union(/dogs/, /cats/i) #=> /(?-mix:dogs)|(?i-mx:cats)/
3831  *
3832  * Note: the arguments for ::union will try to be converted into a regular
3833  * expression literal via #to_regexp.
3834  */
3835 static VALUE
3836 rb_reg_s_union_m(VALUE self, VALUE args)
3837 {
3838  VALUE v;
3839  if (RARRAY_LEN(args) == 1 &&
3840  !NIL_P(v = rb_check_array_type(rb_ary_entry(args, 0)))) {
3841  return rb_reg_s_union(self, v);
3842  }
3843  return rb_reg_s_union(self, args);
3844 }
3845 
3846 /* :nodoc: */
3847 static VALUE
3848 rb_reg_init_copy(VALUE copy, VALUE re)
3849 {
3850  if (!OBJ_INIT_COPY(copy, re)) return copy;
3851  rb_reg_check(re);
3852  return rb_reg_init_str(copy, RREGEXP_SRC(re), rb_reg_options(re));
3853 }
3854 
3855 VALUE
3856 rb_reg_regsub(VALUE str, VALUE src, struct re_registers *regs, VALUE regexp)
3857 {
3858  VALUE val = 0;
3859  char *p, *s, *e;
3860  int no, clen;
3861  rb_encoding *str_enc = rb_enc_get(str);
3862  rb_encoding *src_enc = rb_enc_get(src);
3863  int acompat = rb_enc_asciicompat(str_enc);
3864  long n;
3865 #define ASCGET(s,e,cl) (acompat ? (*(cl)=1,ISASCII((s)[0])?(s)[0]:-1) : rb_enc_ascget((s), (e), (cl), str_enc))
3866 
3867  RSTRING_GETMEM(str, s, n);
3868  p = s;
3869  e = s + n;
3870 
3871  while (s < e) {
3872  int c = ASCGET(s, e, &clen);
3873  char *ss;
3874 
3875  if (c == -1) {
3876  s += mbclen(s, e, str_enc);
3877  continue;
3878  }
3879  ss = s;
3880  s += clen;
3881 
3882  if (c != '\\' || s == e) continue;
3883 
3884  if (!val) {
3885  val = rb_str_buf_new(ss-p);
3886  }
3887  rb_enc_str_buf_cat(val, p, ss-p, str_enc);
3888 
3889  c = ASCGET(s, e, &clen);
3890  if (c == -1) {
3891  s += mbclen(s, e, str_enc);
3892  rb_enc_str_buf_cat(val, ss, s-ss, str_enc);
3893  p = s;
3894  continue;
3895  }
3896  s += clen;
3897 
3898  p = s;
3899  switch (c) {
3900  case '1': case '2': case '3': case '4':
3901  case '5': case '6': case '7': case '8': case '9':
3902  if (!NIL_P(regexp) && onig_noname_group_capture_is_active(RREGEXP_PTR(regexp))) {
3903  no = c - '0';
3904  }
3905  else {
3906  continue;
3907  }
3908  break;
3909 
3910  case 'k':
3911  if (s < e && ASCGET(s, e, &clen) == '<') {
3912  char *name, *name_end;
3913 
3914  name_end = name = s + clen;
3915  while (name_end < e) {
3916  c = ASCGET(name_end, e, &clen);
3917  if (c == '>') break;
3918  name_end += c == -1 ? mbclen(name_end, e, str_enc) : clen;
3919  }
3920  if (name_end < e) {
3921  VALUE n = rb_str_subseq(str, (long)(name - RSTRING_PTR(str)),
3922  (long)(name_end - name));
3923  if ((no = NAME_TO_NUMBER(regs, regexp, n, name, name_end)) < 1) {
3924  name_to_backref_error(n);
3925  }
3926  p = s = name_end + clen;
3927  break;
3928  }
3929  else {
3930  rb_raise(rb_eRuntimeError, "invalid group name reference format");
3931  }
3932  }
3933 
3934  rb_enc_str_buf_cat(val, ss, s-ss, str_enc);
3935  continue;
3936 
3937  case '0':
3938  case '&':
3939  no = 0;
3940  break;
3941 
3942  case '`':
3943  rb_enc_str_buf_cat(val, RSTRING_PTR(src), BEG(0), src_enc);
3944  continue;
3945 
3946  case '\'':
3947  rb_enc_str_buf_cat(val, RSTRING_PTR(src)+END(0), RSTRING_LEN(src)-END(0), src_enc);
3948  continue;
3949 
3950  case '+':
3951  no = regs->num_regs-1;
3952  while (BEG(no) == -1 && no > 0) no--;
3953  if (no == 0) continue;
3954  break;
3955 
3956  case '\\':
3957  rb_enc_str_buf_cat(val, s-clen, clen, str_enc);
3958  continue;
3959 
3960  default:
3961  rb_enc_str_buf_cat(val, ss, s-ss, str_enc);
3962  continue;
3963  }
3964 
3965  if (no >= 0) {
3966  if (no >= regs->num_regs) continue;
3967  if (BEG(no) == -1) continue;
3968  rb_enc_str_buf_cat(val, RSTRING_PTR(src)+BEG(no), END(no)-BEG(no), src_enc);
3969  }
3970  }
3971 
3972  if (!val) return str;
3973  if (p < e) {
3974  rb_enc_str_buf_cat(val, p, e-p, str_enc);
3975  }
3976 
3977  return val;
3978 }
3979 
3980 static VALUE
3981 ignorecase_getter(ID _x, VALUE *_y)
3982 {
3983  rb_category_warn(RB_WARN_CATEGORY_DEPRECATED, "variable $= is no longer effective");
3984  return Qfalse;
3985 }
3986 
3987 static void
3988 ignorecase_setter(VALUE val, ID id, VALUE *_)
3989 {
3990  rb_category_warn(RB_WARN_CATEGORY_DEPRECATED, "variable $= is no longer effective; ignored");
3991 }
3992 
3993 static VALUE
3994 match_getter(void)
3995 {
3996  VALUE match = rb_backref_get();
3997 
3998  if (NIL_P(match)) return Qnil;
3999  rb_match_busy(match);
4000  return match;
4001 }
4002 
4003 static VALUE
4004 get_LAST_MATCH_INFO(ID _x, VALUE *_y)
4005 {
4006  return match_getter();
4007 }
4008 
4009 static void
4010 match_setter(VALUE val, ID _x, VALUE *_y)
4011 {
4012  if (!NIL_P(val)) {
4013  Check_Type(val, T_MATCH);
4014  }
4015  rb_backref_set(val);
4016 }
4017 
4018 /*
4019  * call-seq:
4020  * Regexp.last_match -> matchdata
4021  * Regexp.last_match(n) -> str
4022  *
4023  * The first form returns the MatchData object generated by the
4024  * last successful pattern match. Equivalent to reading the special global
4025  * variable <code>$~</code> (see Special global variables in Regexp for
4026  * details).
4027  *
4028  * The second form returns the <i>n</i>th field in this MatchData object.
4029  * _n_ can be a string or symbol to reference a named capture.
4030  *
4031  * Note that the last_match is local to the thread and method scope of the
4032  * method that did the pattern match.
4033  *
4034  * /c(.)t/ =~ 'cat' #=> 0
4035  * Regexp.last_match #=> #<MatchData "cat" 1:"a">
4036  * Regexp.last_match(0) #=> "cat"
4037  * Regexp.last_match(1) #=> "a"
4038  * Regexp.last_match(2) #=> nil
4039  *
4040  * /(?<lhs>\w+)\s*=\s*(?<rhs>\w+)/ =~ "var = val"
4041  * Regexp.last_match #=> #<MatchData "var = val" lhs:"var" rhs:"val">
4042  * Regexp.last_match(:lhs) #=> "var"
4043  * Regexp.last_match(:rhs) #=> "val"
4044  */
4045 
4046 static VALUE
4047 rb_reg_s_last_match(int argc, VALUE *argv, VALUE _)
4048 {
4049  if (rb_check_arity(argc, 0, 1) == 1) {
4050  VALUE match = rb_backref_get();
4051  int n;
4052  if (NIL_P(match)) return Qnil;
4053  n = match_backref_number(match, argv[0]);
4054  return rb_reg_nth_match(n, match);
4055  }
4056  return match_getter();
4057 }
4058 
4059 static void
4060 re_warn(const char *s)
4061 {
4062  rb_warn("%s", s);
4063 }
4064 
4065 /*
4066  * Document-class: RegexpError
4067  *
4068  * Raised when given an invalid regexp expression.
4069  *
4070  * Regexp.new("?")
4071  *
4072  * <em>raises the exception:</em>
4073  *
4074  * RegexpError: target of repeat operator is not specified: /?/
4075  */
4076 
4077 /*
4078  * Document-class: Regexp
4079  *
4080  * A Regexp holds a regular expression, used to match a pattern
4081  * against strings. Regexps are created using the <code>/.../</code>
4082  * and <code>%r{...}</code> literals, and by the Regexp::new
4083  * constructor.
4084  *
4085  * You can create a \Regexp object explicitly with:
4086  *
4087  * - A {regexp literal}[doc/syntax/literals_rdoc.html#label-Regexp+Literals].
4088  *
4089  * :include: doc/regexp.rdoc
4090  */
4091 
4092 void
4093 Init_Regexp(void)
4094 {
4096 
4097  onigenc_set_default_encoding(ONIG_ENCODING_ASCII);
4098  onig_set_warn_func(re_warn);
4099  onig_set_verb_warn_func(re_warn);
4100 
4101  rb_define_virtual_variable("$~", get_LAST_MATCH_INFO, match_setter);
4102  rb_define_virtual_variable("$&", last_match_getter, 0);
4103  rb_define_virtual_variable("$`", prematch_getter, 0);
4104  rb_define_virtual_variable("$'", postmatch_getter, 0);
4105  rb_define_virtual_variable("$+", last_paren_match_getter, 0);
4106 
4107  rb_gvar_ractor_local("$~");
4108  rb_gvar_ractor_local("$&");
4109  rb_gvar_ractor_local("$`");
4110  rb_gvar_ractor_local("$'");
4111  rb_gvar_ractor_local("$+");
4112 
4113  rb_define_virtual_variable("$=", ignorecase_getter, ignorecase_setter);
4114 
4115  rb_cRegexp = rb_define_class("Regexp", rb_cObject);
4116  rb_define_alloc_func(rb_cRegexp, rb_reg_s_alloc);
4118  rb_define_singleton_method(rb_cRegexp, "quote", rb_reg_s_quote, 1);
4119  rb_define_singleton_method(rb_cRegexp, "escape", rb_reg_s_quote, 1);
4120  rb_define_singleton_method(rb_cRegexp, "union", rb_reg_s_union_m, -2);
4121  rb_define_singleton_method(rb_cRegexp, "last_match", rb_reg_s_last_match, -1);
4122  rb_define_singleton_method(rb_cRegexp, "try_convert", rb_reg_s_try_convert, 1);
4123 
4124  rb_define_method(rb_cRegexp, "initialize", rb_reg_initialize_m, -1);
4125  rb_define_method(rb_cRegexp, "initialize_copy", rb_reg_init_copy, 1);
4126  rb_define_method(rb_cRegexp, "hash", rb_reg_hash, 0);
4127  rb_define_method(rb_cRegexp, "eql?", rb_reg_equal, 1);
4128  rb_define_method(rb_cRegexp, "==", rb_reg_equal, 1);
4130  rb_define_method(rb_cRegexp, "===", rb_reg_eqq, 1);
4132  rb_define_method(rb_cRegexp, "match", rb_reg_match_m, -1);
4133  rb_define_method(rb_cRegexp, "match?", rb_reg_match_m_p, -1);
4134  rb_define_method(rb_cRegexp, "to_s", rb_reg_to_s, 0);
4135  rb_define_method(rb_cRegexp, "inspect", rb_reg_inspect, 0);
4136  rb_define_method(rb_cRegexp, "source", rb_reg_source, 0);
4137  rb_define_method(rb_cRegexp, "casefold?", rb_reg_casefold_p, 0);
4138  rb_define_method(rb_cRegexp, "options", rb_reg_options_m, 0);
4139  rb_define_method(rb_cRegexp, "encoding", rb_obj_encoding, 0); /* in encoding.c */
4140  rb_define_method(rb_cRegexp, "fixed_encoding?", rb_reg_fixed_encoding_p, 0);
4141  rb_define_method(rb_cRegexp, "names", rb_reg_names, 0);
4142  rb_define_method(rb_cRegexp, "named_captures", rb_reg_named_captures, 0);
4143 
4144  /* see Regexp.options and Regexp.new */
4145  rb_define_const(rb_cRegexp, "IGNORECASE", INT2FIX(ONIG_OPTION_IGNORECASE));
4146  /* see Regexp.options and Regexp.new */
4147  rb_define_const(rb_cRegexp, "EXTENDED", INT2FIX(ONIG_OPTION_EXTEND));
4148  /* see Regexp.options and Regexp.new */
4149  rb_define_const(rb_cRegexp, "MULTILINE", INT2FIX(ONIG_OPTION_MULTILINE));
4150  /* see Regexp.options and Regexp.new */
4151  rb_define_const(rb_cRegexp, "FIXEDENCODING", INT2FIX(ARG_ENCODING_FIXED));
4152  /* see Regexp.options and Regexp.new */
4153  rb_define_const(rb_cRegexp, "NOENCODING", INT2FIX(ARG_ENCODING_NONE));
4154 
4155  rb_global_variable(&reg_cache);
4156 
4157  rb_cMatch = rb_define_class("MatchData", rb_cObject);
4158  rb_define_alloc_func(rb_cMatch, match_alloc);
4160  rb_undef_method(CLASS_OF(rb_cMatch), "allocate");
4161 
4162  rb_define_method(rb_cMatch, "initialize_copy", match_init_copy, 1);
4163  rb_define_method(rb_cMatch, "regexp", match_regexp, 0);
4164  rb_define_method(rb_cMatch, "names", match_names, 0);
4165  rb_define_method(rb_cMatch, "size", match_size, 0);
4166  rb_define_method(rb_cMatch, "length", match_size, 0);
4167  rb_define_method(rb_cMatch, "offset", match_offset, 1);
4168  rb_define_method(rb_cMatch, "begin", match_begin, 1);
4169  rb_define_method(rb_cMatch, "end", match_end, 1);
4170  rb_define_method(rb_cMatch, "match", match_nth, 1);
4171  rb_define_method(rb_cMatch, "match_length", match_nth_length, 1);
4172  rb_define_method(rb_cMatch, "to_a", match_to_a, 0);
4173  rb_define_method(rb_cMatch, "[]", match_aref, -1);
4174  rb_define_method(rb_cMatch, "captures", match_captures, 0);
4175  rb_define_method(rb_cMatch, "named_captures", match_named_captures, 0);
4176  rb_define_method(rb_cMatch, "values_at", match_values_at, -1);
4177  rb_define_method(rb_cMatch, "pre_match", rb_reg_match_pre, 0);
4178  rb_define_method(rb_cMatch, "post_match", rb_reg_match_post, 0);
4179  rb_define_method(rb_cMatch, "to_s", match_to_s, 0);
4180  rb_define_method(rb_cMatch, "inspect", match_inspect, 0);
4181  rb_define_method(rb_cMatch, "string", match_string, 0);
4182  rb_define_method(rb_cMatch, "hash", match_hash, 0);
4183  rb_define_method(rb_cMatch, "eql?", match_equal, 1);
4184  rb_define_method(rb_cMatch, "==", match_equal, 1);
4185 }
#define rb_define_singleton_method(klass, mid, func, arity)
Defines klass.mid.
Definition: cxxanyargs.hpp:685
static bool rb_enc_isprint(OnigCodePoint c, rb_encoding *enc)
Identical to rb_isprint(), except it additionally takes an encoding.
Definition: ctype.h:166
static bool rb_enc_isspace(OnigCodePoint c, rb_encoding *enc)
Identical to rb_isspace(), except it additionally takes an encoding.
Definition: ctype.h:180
VALUE rb_define_class(const char *name, VALUE super)
Defines a top-level class.
Definition: class.c:837
void rb_undef_method(VALUE klass, const char *name)
Defines an undef of a method.
Definition: class.c:1938
int rb_scan_args(int argc, const VALUE *argv, const char *fmt,...)
Retrieves argument from argc and argv to given VALUE references according to the format string.
Definition: class.c:2406
void rb_define_method(VALUE klass, const char *name, VALUE(*func)(ANYARGS), int argc)
Defines a method.
Definition: class.c:1914
int rb_block_given_p(void)
Determines if the current method is given a block.
Definition: eval.c:854
#define rb_str_new2
Old name of rb_str_new_cstr.
Definition: string.h:1738
#define NEWOBJ_OF
Old name of RB_NEWOBJ_OF.
Definition: newobj.h:61
#define ENC_CODERANGE_7BIT
Old name of RUBY_ENC_CODERANGE_7BIT.
Definition: coderange.h:180
#define REALLOC_N
Old name of RB_REALLOC_N.
Definition: memory.h:397
#define OBJ_INIT_COPY(obj, orig)
Old name of RB_OBJ_INIT_COPY.
Definition: object.h:41
#define ISSPACE
Old name of rb_isspace.
Definition: ctype.h:88
#define T_STRING
Old name of RUBY_T_STRING.
Definition: value_type.h:78
#define ENC_CODERANGE_CLEAN_P(cr)
Old name of RB_ENC_CODERANGE_CLEAN_P.
Definition: coderange.h:183
#define INT2FIX
Old name of RB_INT2FIX.
Definition: long.h:48
#define rb_str_buf_new2
Old name of rb_str_buf_new_cstr.
Definition: string.h:1743
#define ENC_CODERANGE(obj)
Old name of RB_ENC_CODERANGE.
Definition: coderange.h:184
#define ZALLOC
Old name of RB_ZALLOC.
Definition: memory.h:396
#define CLASS_OF
Old name of rb_class_of.
Definition: globals.h:203
#define ENC_CODERANGE_UNKNOWN
Old name of RUBY_ENC_CODERANGE_UNKNOWN.
Definition: coderange.h:179
#define ENCODING_GET(obj)
Old name of RB_ENCODING_GET.
Definition: encoding.h:108
#define LONG2FIX
Old name of RB_INT2FIX.
Definition: long.h:49
#define FIX2INT
Old name of RB_FIX2INT.
Definition: int.h:41
#define rb_str_new3
Old name of rb_str_new_shared.
Definition: string.h:1739
#define MBCLEN_CHARFOUND_LEN(ret)
Old name of ONIGENC_MBCLEN_CHARFOUND_LEN.
Definition: encoding.h:533
#define FL_SET
Old name of RB_FL_SET.
Definition: fl_type.h:137
#define LONG2NUM
Old name of RB_LONG2NUM.
Definition: long.h:50
#define rb_exc_new3
Old name of rb_exc_new_str.
Definition: error.h:38
#define MBCLEN_INVALID_P(ret)
Old name of ONIGENC_MBCLEN_INVALID_P.
Definition: encoding.h:534
#define Qtrue
Old name of RUBY_Qtrue.
#define ST2FIX
Old name of RB_ST2FIX.
Definition: st_data_t.h:33
#define MBCLEN_NEEDMORE_P(ret)
Old name of ONIGENC_MBCLEN_NEEDMORE_P.
Definition: encoding.h:535
#define NUM2INT
Old name of RB_NUM2INT.
Definition: int.h:44
#define INT2NUM
Old name of RB_INT2NUM.
Definition: int.h:43
#define Qnil
Old name of RUBY_Qnil.
#define Qfalse
Old name of RUBY_Qfalse.
#define ENC_CODERANGE_BROKEN
Old name of RUBY_ENC_CODERANGE_BROKEN.
Definition: coderange.h:182
#define scan_hex(s, l, e)
Old name of ruby_scan_hex.
Definition: util.h:97
#define NIL_P
Old name of RB_NIL_P.
#define MBCLEN_CHARFOUND_P(ret)
Old name of ONIGENC_MBCLEN_CHARFOUND_P.
Definition: encoding.h:532
#define FL_WB_PROTECTED
Old name of RUBY_FL_WB_PROTECTED.
Definition: fl_type.h:59
#define T_MATCH
Old name of RUBY_T_MATCH.
Definition: value_type.h:69
#define FL_TEST
Old name of RB_FL_TEST.
Definition: fl_type.h:139
#define NUM2LONG
Old name of RB_NUM2LONG.
Definition: long.h:51
#define FL_UNSET
Old name of RB_FL_UNSET.
Definition: fl_type.h:141
#define FIXNUM_P
Old name of RB_FIXNUM_P.
#define scan_oct(s, l, e)
Old name of ruby_scan_oct.
Definition: util.h:74
#define rb_ary_new2
Old name of rb_ary_new_capa.
Definition: array.h:651
#define rb_str_new4
Old name of rb_str_new_frozen.
Definition: string.h:1740
#define SYMBOL_P
Old name of RB_SYMBOL_P.
Definition: value_type.h:88
#define T_REGEXP
Old name of RUBY_T_REGEXP.
Definition: value_type.h:77
void rb_category_warn(rb_warning_category_t category, const char *fmt,...)
Identical to rb_category_warning(), except it reports always regardless of runtime -W flag.
Definition: error.c:428
void rb_raise(VALUE exc, const char *fmt,...)
Exception entry point.
Definition: error.c:3025
void rb_exc_raise(VALUE mesg)
Raises an exception in the current thread.
Definition: eval.c:675
void rb_bug(const char *fmt,...)
Interpreter panic switch.
Definition: error.c:802
VALUE rb_eStandardError
StandardError exception.
Definition: error.c:1096
void rb_set_errinfo(VALUE err)
Sets the current exception ($!) to the given value.
Definition: eval.c:1764
VALUE rb_eRegexpError
RegexpError exception.
Definition: re.c:28
#define ruby_verbose
This variable controls whether the interpreter is in debug mode.
Definition: error.h:459
VALUE rb_eTypeError
TypeError exception.
Definition: error.c:1099
VALUE rb_eEncCompatError
Encoding::CompatibilityError exception.
Definition: error.c:1106
VALUE rb_eRuntimeError
RuntimeError exception.
Definition: error.c:1097
void rb_warn(const char *fmt,...)
Identical to rb_warning(), except it reports always regardless of runtime -W flag.
Definition: error.c:418
VALUE rb_eArgError
ArgumentError exception.
Definition: error.c:1100
VALUE rb_eIndexError
IndexError exception.
Definition: error.c:1101
VALUE rb_eSecurityError
SecurityError exception.
Definition: error.c:1108
@ RB_WARN_CATEGORY_DEPRECATED
Warning is for deprecated features.
Definition: error.h:48
VALUE rb_check_convert_type(VALUE val, int type, const char *name, const char *mid)
Identical to rb_convert_type(), except it returns RUBY_Qnil instead of raising exceptions,...
Definition: object.c:2933
VALUE rb_any_to_s(VALUE obj)
Generates a textual representation of the given object.
Definition: object.c:553
VALUE rb_class_new_instance(int argc, const VALUE *argv, VALUE klass)
Allocates, then initialises an instance of the given class.
Definition: object.c:1950
VALUE rb_cMatch
MatchData class.
Definition: re.c:939
VALUE rb_cRegexp
Regexp class.
Definition: re.c:2370
VALUE rb_obj_class(VALUE obj)
Queries the class of an object.
Definition: object.c:188
VALUE rb_obj_freeze(VALUE obj)
Just calls rb_obj_freeze_inline() inside.
Definition: object.c:1161
#define RB_OBJ_WRITE(old, slot, young)
Declaration of a "back" pointer.
Definition: rgengc.h:220
Encoding relates APIs.
rb_encoding * rb_default_external_encoding(void)
Queries the "default external" encoding.
Definition: encoding.c:1637
int rb_enc_dummy_p(rb_encoding *enc)
Queries if the passed encoding is dummy.
Definition: encoding.c:203
int rb_enc_precise_mbclen(const char *p, const char *e, rb_encoding *enc)
Queries the number of bytes of the character at the passed pointer.
Definition: encoding.c:1234
VALUE rb_enc_associate(VALUE obj, rb_encoding *enc)
Identical to rb_enc_associate(), except it takes an encoding itself instead of its index.
Definition: encoding.c:1066
rb_encoding * rb_usascii_encoding(void)
Queries the encoding that represents US-ASCII.
Definition: encoding.c:1539
static char * rb_enc_left_char_head(const char *s, const char *p, const char *e, rb_encoding *enc)
Queries the left boundary of a character.
Definition: encoding.h:697
void rb_enc_copy(VALUE dst, VALUE src)
Destructively copies the encoding of the latter object to that of former one.
Definition: encoding.c:1192
int rb_utf8_encindex(void)
Identical to rb_utf8_encoding(), except it returns the encoding's index instead of the encoding itsel...
Definition: encoding.c:1533
int rb_ascii8bit_encindex(void)
Identical to rb_ascii8bit_encoding(), except it returns the encoding's index instead of the encoding ...
Definition: encoding.c:1521
int rb_enc_unicode_p(rb_encoding *enc)
Queries if the passed encoding is either one of UTF-8/16/32.
Definition: encoding.c:689
rb_encoding * rb_default_internal_encoding(void)
Queries the "default internal" encoding.
Definition: encoding.c:1724
rb_encoding * rb_ascii8bit_encoding(void)
Queries the encoding that represents ASCII-8BIT a.k.a.
Definition: encoding.c:1515
VALUE rb_enc_from_encoding(rb_encoding *enc)
Queries the Ruby-level counterpart instance of rb_cEncoding that corresponds to the passed encoding.
Definition: encoding.c:188
static bool rb_enc_asciicompat(rb_encoding *enc)
Queries if the passed encoding is in some sense compatible with ASCII.
Definition: encoding.h:782
rb_encoding * rb_utf8_encoding(void)
Queries the encoding that represents UTF-8.
Definition: encoding.c:1527
static int rb_enc_mbcput(unsigned int c, void *buf, rb_encoding *enc)
Identical to rb_enc_uint_chr(), except it writes back to the passed buffer instead of allocating one.
Definition: encoding.h:657
int rb_char_to_option_kcode(int c, int *option, int *kcode)
Converts a character option to its encoding.
Definition: re.c:329
static int rb_enc_mbmaxlen(rb_encoding *enc)
Queries the maximum number of bytes that the passed encoding needs to represent a character.
Definition: encoding.h:463
rb_encoding * rb_enc_get(VALUE obj)
Identical to rb_enc_get_index(), except the return type.
Definition: encoding.c:1072
static OnigCodePoint rb_enc_mbc_to_codepoint(const char *p, const char *e, rb_encoding *enc)
Identical to rb_enc_codepoint(), except it assumes the passed character is not broken.
Definition: encoding.h:607
static const char * rb_enc_name(rb_encoding *enc)
Queries the (canonical) name of the passed encoding.
Definition: encoding.h:433
static int rb_enc_mbminlen(rb_encoding *enc)
Queries the minimum number of bytes that the passed encoding needs to represent a character.
Definition: encoding.h:448
int rb_enc_ascget(const char *p, const char *e, int *len, rb_encoding *enc)
Queries the code point of character pointed by the passed pointer.
Definition: encoding.c:1246
VALUE rb_enc_reg_new(const char *ptr, long len, rb_encoding *enc, int opts)
Identical to rb_reg_new(), except it additionally takes an encoding.
Definition: re.c:3015
int rb_enc_str_coderange(VALUE str)
Scans the passed string to collect its code range.
Definition: string.c:776
long rb_memsearch(const void *x, long m, const void *y, long n, rb_encoding *enc)
Looks for the passed string in the passed buffer.
Definition: re.c:247
long rb_enc_strlen(const char *head, const char *tail, rb_encoding *enc)
Counts the number of characters of the passed string, according to the passed encoding.
Definition: string.c:2071
VALUE rb_enc_str_buf_cat(VALUE str, const char *ptr, long len, rb_encoding *enc)
Identical to rb_str_cat(), except it additionally takes an encoding.
Definition: string.c:3271
VALUE rb_enc_str_new(const char *ptr, long len, rb_encoding *enc)
Identical to rb_enc_str_new(), except it additionally takes an encoding.
Definition: string.c:940
int rb_enc_str_asciionly_p(VALUE str)
Queries if the passed string is "ASCII only".
Definition: string.c:790
VALUE rb_obj_encoding(VALUE obj)
Identical to rb_enc_get_index(), except the return type.
Definition: encoding.c:1206
long rb_str_coderange_scan_restartable(const char *str, const char *end, rb_encoding *enc, int *cr)
Scans the passed string until it finds something odd.
Definition: string.c:668
VALUE rb_str_encode(VALUE str, VALUE to, int ecflags, VALUE ecopts)
Converts the contents of the passed string from its encoding to the passed one.
Definition: transcode.c:2929
void rb_global_variable(VALUE *)
An alias for rb_gc_register_address().
Definition: gc.c:8743
VALUE rb_check_array_type(VALUE obj)
Try converting an object to its array representation using its to_ary method, if any.
Definition: array.c:989
VALUE rb_ary_new_capa(long capa)
Identical to rb_ary_new(), except it additionally specifies how many rooms of objects it should alloc...
Definition: array.c:744
VALUE rb_ary_resize(VALUE ary, long len)
Expands or shrinks the passed array to the passed length.
Definition: array.c:2234
VALUE rb_ary_push(VALUE ary, VALUE elem)
Special case of rb_ary_cat() that it adds only one element.
Definition: array.c:1308
VALUE rb_ary_entry(VALUE ary, long off)
Queries an element of an array.
Definition: array.c:1679
VALUE rb_assoc_new(VALUE car, VALUE cdr)
Identical to rb_ary_new_from_values(), except it expects exactly two parameters.
Definition: array.c:976
void rb_ary_store(VALUE ary, long key, VALUE val)
Destructively stores the passed value to the passed array's passed index.
Definition: array.c:1148
int rb_uv_to_utf8(char buf[6], unsigned long uv)
Encodes a Unicode codepoint into its UTF-8 representation.
Definition: pack.c:1638
#define rb_check_frozen
Just another name of rb_check_frozen.
Definition: error.h:278
static int rb_check_arity(int argc, int min, int max)
Ensures that the passed integer is in the passed range.
Definition: error.h:294
void rb_memerror(void)
Triggers out-of-memory error.
Definition: gc.c:11117
void rb_gc(void)
Triggers a GC process.
Definition: gc.c:10293
VALUE rb_hash_aset(VALUE hash, VALUE key, VALUE val)
Inserts or replaces ("upsert"s) the objects into the given hash table.
Definition: hash.c:2903
VALUE rb_hash_new(void)
Creates a new, empty hash object.
Definition: hash.c:1529
VALUE rb_backref_get(void)
Queries the last match, or Regexp.last_match, or the $~.
Definition: vm.c:1580
VALUE rb_lastline_get(void)
Queries the last line, or the $_.
Definition: vm.c:1592
void rb_backref_set(VALUE md)
Updates $~.
Definition: vm.c:1586
VALUE rb_range_beg_len(VALUE range, long *begp, long *lenp, long len, int err)
Deconstructs a numerical range.
Definition: range.c:1578
int rb_reg_backref_number(VALUE match, VALUE backref)
Queries the index of the given named capture.
Definition: re.c:1197
int rb_reg_options(VALUE re)
Queries the options of the passed regular expression.
Definition: re.c:3659
VALUE rb_reg_last_match(VALUE md)
This just returns the argument, stringified.
Definition: re.c:1818
VALUE rb_reg_match(VALUE re, VALUE str)
This is the match operator.
Definition: re.c:3260
void rb_match_busy(VALUE md)
Asserts that the given MatchData is "occupied".
Definition: re.c:1377
VALUE rb_reg_nth_match(int n, VALUE md)
Queries the nth captured substring.
Definition: re.c:1793
VALUE rb_reg_match_post(VALUE md)
The portion of the original string after the given match.
Definition: re.c:1862
VALUE rb_reg_nth_defined(int n, VALUE md)
Identical to rb_reg_nth_match(), except it just returns Boolean.
Definition: re.c:1776
VALUE rb_reg_match_pre(VALUE md)
The portion of the original string before the given match.
Definition: re.c:1836
VALUE rb_reg_new_str(VALUE src, int opts)
Identical to rb_reg_new(), except it takes the expression in Ruby's string instead of C's.
Definition: re.c:2975
VALUE rb_reg_match_last(VALUE md)
The portion of the original string that captured at the very last.
Definition: re.c:1879
VALUE rb_reg_match2(VALUE re)
Identical to rb_reg_match(), except it matches against rb_lastline_get() (or, the $_).
Definition: re.c:3319
VALUE rb_reg_new(const char *src, long len, int opts)
Creates a new Regular expression.
Definition: re.c:3029
int rb_memcicmp(const void *s1, const void *s2, long n)
Identical to st_locale_insensitive_strcasecmp(), except it is timing safe and returns something diffe...
Definition: re.c:88
#define rb_hash_uint(h, i)
Just another name of st_hash_uint.
Definition: string.h:973
#define rb_hash_end(h)
Just another name of st_hash_end.
Definition: string.h:976
VALUE rb_str_append(VALUE dst, VALUE src)
Identical to rb_str_buf_append(), except it converts the right hand side before concatenating.
Definition: string.c:3317
long rb_str_offset(VALUE str, long pos)
"Inverse" of rb_str_sublen().
Definition: string.c:2763
VALUE rb_str_subseq(VALUE str, long beg, long len)
Identical to rb_str_substr(), except the numbers are interpreted as byte offsets instead of character...
Definition: string.c:2821
st_index_t rb_memhash(const void *ptr, long len)
This is a universal hash function.
Definition: random.c:1720
VALUE rb_str_buf_cat(VALUE, const char *, long)
Just another name of rb_str_cat.
VALUE rb_str_dup(VALUE str)
Duplicates a string.
Definition: string.c:1808
st_index_t rb_str_hash(VALUE str)
Calculates a hash value of a string.
Definition: string.c:3516
VALUE rb_str_buf_cat2(VALUE, const char *)
Just another name of rb_str_cat_cstr.
char * rb_str_subpos(VALUE str, long beg, long *len)
Identical to rb_str_substr(), except it returns a C's string instead of Ruby's.
Definition: string.c:2844
VALUE rb_str_buf_append(VALUE dst, VALUE src)
Identical to rb_str_cat_cstr(), except it takes Ruby's string instead of C's.
Definition: string.c:3302
long rb_str_sublen(VALUE str, long pos)
Byte offset to character offset conversion.
Definition: string.c:2810
VALUE rb_str_equal(VALUE str1, VALUE str2)
Equality of two strings.
Definition: string.c:3628
st_index_t rb_hash_start(st_index_t i)
Starts a series of hashing.
Definition: random.c:1714
VALUE rb_str_inspect(VALUE str)
Generates a "readable" version of the receiver.
Definition: string.c:6456
VALUE rb_str_buf_cat_ascii(VALUE dst, const char *src)
Identical to rb_str_cat_cstr(), except it additionally assumes the source string be a NUL terminated ...
Definition: string.c:3278
VALUE rb_str_new(const char *ptr, long len)
Allocates an instance of rb_cString.
Definition: string.c:918
VALUE rb_check_string_type(VALUE obj)
Try converting an object to its stringised representation using its to_str method,...
Definition: string.c:2659
VALUE rb_str_resize(VALUE str, long len)
Overwrites the length of the string.
Definition: string.c:3056
VALUE rb_str_buf_new(long capa)
Allocates a "string buffer".
Definition: string.c:1506
VALUE rb_str_length(VALUE)
Identical to rb_str_strlen(), except it returns the value in rb_cInteger.
Definition: string.c:2180
VALUE rb_class_path(VALUE mod)
Identical to rb_mod_name(), except it returns #<Class: ...> style inspection for anonymous modules.
Definition: variable.c:172
void rb_define_alloc_func(VALUE klass, rb_alloc_func_t func)
Sets the allocator function of a class.
VALUE rb_sym2str(VALUE id)
Identical to rb_id2str(), except it takes an instance of rb_cSymbol rather than an ID.
Definition: symbol.c:924
void rb_define_virtual_variable(const char *name, rb_gvar_getter_t *getter, rb_gvar_setter_t *setter)
Defines a global variable that is purely function-backended.
Definition: variable.c:594
void rb_define_const(VALUE klass, const char *name, VALUE val)
Defines a Ruby level constant under a namespace.
Definition: variable.c:3253
regex_t * rb_reg_prepare_re(VALUE re, VALUE str)
Exercises various checks and preprocesses so that the given regular expression can be applied to the ...
Definition: re.c:1580
long rb_reg_search(VALUE re, VALUE str, long pos, int dir)
Runs the passed regular expression over the passed string.
Definition: re.c:1697
long rb_reg_adjust_startpos(VALUE re, VALUE str, long pos, int dir)
Tell us if this is a wrong idea, but it seems this function has no usage at all.
Definition: re.c:1587
VALUE rb_reg_regcomp(VALUE str)
Creates a new instance of rb_cRegexp.
Definition: re.c:3053
VALUE rb_reg_quote(VALUE str)
Escapes any characters that would have special meaning in a regular expression.
Definition: re.c:3541
VALUE rb_reg_regsub(VALUE repl, VALUE src, struct re_registers *regs, VALUE rexp)
Substitution.
Definition: re.c:3856
int rb_reg_region_copy(struct re_registers *dst, const struct re_registers *src)
Duplicates a match data.
Definition: re.c:955
unsigned long ruby_scan_hex(const char *str, size_t len, size_t *ret)
Interprets the passed string a hexadecimal unsigned integer.
Definition: util.c:56
unsigned long ruby_scan_oct(const char *str, size_t len, size_t *consumed)
Interprets the passed string as an octal unsigned integer.
Definition: util.c:38
VALUE rb_sprintf(const char *fmt,...)
Ruby's extended sprintf(3).
Definition: sprintf.c:1201
VALUE rb_str_catf(VALUE dst, const char *fmt,...)
Identical to rb_sprintf(), except it renders the output to the specified object rather than creating ...
Definition: sprintf.c:1241
VALUE rb_yield(VALUE val)
Yields the block.
Definition: vm_eval.c:1357
#define MEMCPY(p1, p2, type, n)
Handy macro to call memcpy.
Definition: memory.h:366
#define ALLOCA_N(type, n)
Definition: memory.h:286
#define MEMZERO(p, type, n)
Handy macro to erase a region of memory.
Definition: memory.h:354
#define RB_GC_GUARD(v)
Prevents premature destruction of local objects.
Definition: memory.h:161
#define RARRAY_LEN
Just another name of rb_array_len.
Definition: rarray.h:68
#define RARRAY_AREF(a, i)
Definition: rarray.h:588
#define RBASIC(obj)
Convenient casting macro.
Definition: rbasic.h:40
#define RGENGC_WB_PROTECTED_REGEXP
This is a compile-time flag to enable/disable write barrier for struct RRegexp.
Definition: rgengc.h:129
#define RMATCH(obj)
Convenient casting macro.
Definition: rmatch.h:37
static struct re_registers * RMATCH_REGS(VALUE match)
Queries the raw re_registers.
Definition: rmatch.h:139
#define RREGEXP(obj)
Convenient casting macro.
Definition: rregexp.h:37
static VALUE RREGEXP_SRC(VALUE rexp)
Convenient getter function.
Definition: rregexp.h:103
static char * RREGEXP_SRC_PTR(VALUE rexp)
Convenient getter function.
Definition: rregexp.h:125
#define RREGEXP_PTR(obj)
Convenient accessor macro.
Definition: rregexp.h:45
static long RREGEXP_SRC_LEN(VALUE rexp)
Convenient getter function.
Definition: rregexp.h:144
#define StringValue(v)
Ensures that the parameter object is a String.
Definition: rstring.h:72
#define StringValuePtr(v)
Identical to StringValue, except it returns a char*.
Definition: rstring.h:82
static char * RSTRING_END(VALUE str)
Queries the end of the contents pointer of the string.
Definition: rstring.h:527
static char * RSTRING_PTR(VALUE str)
Queries the contents pointer of the string.
Definition: rstring.h:497
#define RSTRING_GETMEM(str, ptrvar, lenvar)
Convenient macro to obtain the contents and length at once.
Definition: rstring.h:573
static long RSTRING_LEN(VALUE str)
Queries the length of the string.
Definition: rstring.h:483
VALUE rb_str_to_str(VALUE obj)
Identical to rb_check_string_type(), except it raises exceptions in case of conversion failures.
Definition: string.c:1584
#define StringValueCStr(v)
Identical to StringValuePtr, except it additionally checks for the contents for viability as a C stri...
Definition: rstring.h:95
#define RTEST
This is an old name of RB_TEST.
#define _(args)
This was a transition path from K&R to ANSI.
Definition: stdarg.h:35
MEMO.
Definition: imemo.h:104
VALUE flags
Per-object flags.
Definition: rbasic.h:77
Regular expression execution context.
Definition: rmatch.h:94
VALUE regexp
The expression of this match.
Definition: rmatch.h:112
struct rmatch * rmatch
The result of this match.
Definition: rmatch.h:107
VALUE str
The target string that the match was made against.
Definition: rmatch.h:102
Ruby's regular expression.
Definition: rregexp.h:60
struct RBasic basic
Basic part, including flags and class.
Definition: rregexp.h:63
const VALUE src
Source code of this expression.
Definition: rregexp.h:74
unsigned long usecnt
Reference count.
Definition: rregexp.h:90
struct re_pattern_buffer * ptr
The pattern buffer.
Definition: rregexp.h:71
Definition: re.c:965
Represents the region of a capture group.
Definition: rmatch.h:65
long beg
Beginning of a group.
Definition: rmatch.h:66
long end
End of a group.
Definition: rmatch.h:67
Represents a match.
Definition: rmatch.h:71
int char_offset_num_allocated
Number of rmatch_offset that rmatch::char_offset holds.
Definition: rmatch.h:82
struct rmatch_offset * char_offset
Capture group offsets, in C array.
Definition: rmatch.h:79
struct re_registers regs
"Registers" of a match.
Definition: rmatch.h:76
uintptr_t ID
Type that represents a Ruby identifier such as a variable name.
Definition: value.h:52
#define SIZEOF_VALUE
Identical to sizeof(VALUE), except it is a macro that can also be used inside of preprocessor directi...
Definition: value.h:69
uintptr_t VALUE
Type that represents a Ruby object.
Definition: value.h:40
static void Check_Type(VALUE v, enum ruby_value_type t)
Identical to RB_TYPE_P(), except it raises exceptions on predication failure.
Definition: value_type.h:432
static bool RB_TYPE_P(VALUE obj, enum ruby_value_type t)
Queries if the given object is of given type.
Definition: value_type.h:375