12 #include "ruby/internal/config.h"
18 #include "internal/hash.h"
19 #include "internal/imemo.h"
20 #include "internal/re.h"
21 #include "internal/string.h"
22 #include "internal/variable.h"
30 typedef char onig_errmsg_buffer[ONIG_MAX_ERROR_MESSAGE_LEN];
31 #define errcpy(err, msg) strlcpy((err), (msg), ONIG_MAX_ERROR_MESSAGE_LEN)
33 #define BEG(no) (regs->beg[(no)])
34 #define END(no) (regs->end[(no)])
37 static const char casetable[] = {
38 '\000',
'\001',
'\002',
'\003',
'\004',
'\005',
'\006',
'\007',
39 '\010',
'\011',
'\012',
'\013',
'\014',
'\015',
'\016',
'\017',
40 '\020',
'\021',
'\022',
'\023',
'\024',
'\025',
'\026',
'\027',
41 '\030',
'\031',
'\032',
'\033',
'\034',
'\035',
'\036',
'\037',
43 '\040',
'\041',
'\042',
'\043',
'\044',
'\045',
'\046',
'\047',
45 '\050',
'\051',
'\052',
'\053',
'\054',
'\055',
'\056',
'\057',
47 '\060',
'\061',
'\062',
'\063',
'\064',
'\065',
'\066',
'\067',
49 '\070',
'\071',
'\072',
'\073',
'\074',
'\075',
'\076',
'\077',
51 '\100',
'\141',
'\142',
'\143',
'\144',
'\145',
'\146',
'\147',
53 '\150',
'\151',
'\152',
'\153',
'\154',
'\155',
'\156',
'\157',
55 '\160',
'\161',
'\162',
'\163',
'\164',
'\165',
'\166',
'\167',
57 '\170',
'\171',
'\172',
'\133',
'\134',
'\135',
'\136',
'\137',
59 '\140',
'\141',
'\142',
'\143',
'\144',
'\145',
'\146',
'\147',
61 '\150',
'\151',
'\152',
'\153',
'\154',
'\155',
'\156',
'\157',
63 '\160',
'\161',
'\162',
'\163',
'\164',
'\165',
'\166',
'\167',
65 '\170',
'\171',
'\172',
'\173',
'\174',
'\175',
'\176',
'\177',
66 '\200',
'\201',
'\202',
'\203',
'\204',
'\205',
'\206',
'\207',
67 '\210',
'\211',
'\212',
'\213',
'\214',
'\215',
'\216',
'\217',
68 '\220',
'\221',
'\222',
'\223',
'\224',
'\225',
'\226',
'\227',
69 '\230',
'\231',
'\232',
'\233',
'\234',
'\235',
'\236',
'\237',
70 '\240',
'\241',
'\242',
'\243',
'\244',
'\245',
'\246',
'\247',
71 '\250',
'\251',
'\252',
'\253',
'\254',
'\255',
'\256',
'\257',
72 '\260',
'\261',
'\262',
'\263',
'\264',
'\265',
'\266',
'\267',
73 '\270',
'\271',
'\272',
'\273',
'\274',
'\275',
'\276',
'\277',
74 '\300',
'\301',
'\302',
'\303',
'\304',
'\305',
'\306',
'\307',
75 '\310',
'\311',
'\312',
'\313',
'\314',
'\315',
'\316',
'\317',
76 '\320',
'\321',
'\322',
'\323',
'\324',
'\325',
'\326',
'\327',
77 '\330',
'\331',
'\332',
'\333',
'\334',
'\335',
'\336',
'\337',
78 '\340',
'\341',
'\342',
'\343',
'\344',
'\345',
'\346',
'\347',
79 '\350',
'\351',
'\352',
'\353',
'\354',
'\355',
'\356',
'\357',
80 '\360',
'\361',
'\362',
'\363',
'\364',
'\365',
'\366',
'\367',
81 '\370',
'\371',
'\372',
'\373',
'\374',
'\375',
'\376',
'\377',
84 # error >>> "You lose. You will need a translation table for your character set." <<<
90 const unsigned char *p1 = x, *p2 = y;
94 if ((tmp = casetable[(
unsigned)*p1++] - casetable[(
unsigned)*p2++]))
102 rb_memsearch_ss(
const unsigned char *xs,
long m,
const unsigned char *ys,
long n)
104 const unsigned char *y;
106 if ((y = memmem(ys, n, xs, m)) != NULL)
113 rb_memsearch_ss(
const unsigned char *xs,
long m,
const unsigned char *ys,
long n)
115 const unsigned char *x = xs, *xe = xs + m;
116 const unsigned char *y = ys, *ye = ys + n;
117 #define VALUE_MAX ((VALUE)~(VALUE)0)
121 rb_bug(
"!!too long pattern string!!");
123 if (!(y = memchr(y, *x, n - m + 1)))
127 for (hx = *x++, hy = *y++; x < xe; ++x, ++y) {
147 rb_memsearch_qs(
const unsigned char *xs,
long m,
const unsigned char *ys,
long n)
149 const unsigned char *x = xs, *xe = xs + m;
150 const unsigned char *y = ys;
151 VALUE i, qstable[256];
154 for (i = 0; i < 256; ++i)
157 qstable[*x] = xe - x;
159 for (; y + m <= ys + n; y += *(qstable + y[m])) {
160 if (*xs == *y && memcmp(xs, y, m) == 0)
166 static inline unsigned int
167 rb_memsearch_qs_utf8_hash(
const unsigned char *x)
169 register const unsigned int mix = 8353;
170 register unsigned int h = *x;
195 return (
unsigned char)h;
199 rb_memsearch_qs_utf8(
const unsigned char *xs,
long m,
const unsigned char *ys,
long n)
201 const unsigned char *x = xs, *xe = xs + m;
202 const unsigned char *y = ys;
203 VALUE i, qstable[512];
206 for (i = 0; i < 512; ++i) {
209 for (; x < xe; ++x) {
210 qstable[rb_memsearch_qs_utf8_hash(x)] = xe - x;
213 for (; y + m <= ys + n; y += qstable[rb_memsearch_qs_utf8_hash(y+m)]) {
214 if (*xs == *y && memcmp(xs, y, m) == 0)
221 rb_memsearch_wchar(
const unsigned char *xs,
long m,
const unsigned char *ys,
long n)
223 const unsigned char *x = xs, x0 = *xs, *y = ys;
224 enum {char_size = 2};
226 for (n -= m; n >= 0; n -= char_size, y += char_size) {
227 if (x0 == *y && memcmp(x+1, y+1, m-1) == 0)
234 rb_memsearch_qchar(
const unsigned char *xs,
long m,
const unsigned char *ys,
long n)
236 const unsigned char *x = xs, x0 = *xs, *y = ys;
237 enum {char_size = 4};
239 for (n -= m; n >= 0; n -= char_size, y += char_size) {
240 if (x0 == *y && memcmp(x+1, y+1, m-1) == 0)
249 const unsigned char *x = x0, *y = y0;
251 if (m > n)
return -1;
253 return memcmp(x0, y0, m) == 0 ? 0 : -1;
259 const unsigned char *ys = memchr(y, *x, n);
268 return rb_memsearch_ss(x0, m, y0, n);
271 return rb_memsearch_qs_utf8(x0, m, y0, n);
275 return rb_memsearch_wchar(x0, m, y0, n);
278 return rb_memsearch_qchar(x0, m, y0, n);
280 return rb_memsearch_qs(x0, m, y0, n);
283 #define REG_LITERAL FL_USER5
284 #define REG_ENCODING_NONE FL_USER6
286 #define KCODE_FIXED FL_USER4
288 #define ARG_REG_OPTION_MASK \
289 (ONIG_OPTION_IGNORECASE|ONIG_OPTION_MULTILINE|ONIG_OPTION_EXTEND)
290 #define ARG_ENCODING_FIXED 16
291 #define ARG_ENCODING_NONE 32
294 char_to_option(
int c)
300 val = ONIG_OPTION_IGNORECASE;
303 val = ONIG_OPTION_EXTEND;
306 val = ONIG_OPTION_MULTILINE;
315 enum { OPTBUF_SIZE = 4 };
318 option_to_str(
char str[OPTBUF_SIZE],
int options)
321 if (options & ONIG_OPTION_MULTILINE) *p++ =
'm';
322 if (options & ONIG_OPTION_IGNORECASE) *p++ =
'i';
323 if (options & ONIG_OPTION_EXTEND) *p++ =
'x';
336 return (*option = ARG_ENCODING_NONE);
338 *kcode = ENCINDEX_EUC_JP;
341 *kcode = ENCINDEX_Windows_31J;
348 return (*option = char_to_option(c));
350 *option = ARG_ENCODING_FIXED;
355 rb_reg_check(
VALUE re)
363 rb_reg_expr_str(
VALUE str,
const char *s,
long len,
366 const char *p, *pend;
371 p = s; pend = p + len;
378 p += mbclen(p, pend, enc);
406 if (c ==
'\\' && p+clen < pend) {
407 int n = clen + mbclen(p+clen, pend, enc);
415 c = (
unsigned char)*p;
421 rb_str_buf_cat_escaped_char(str, c, unicode_p);
428 else if (c == term) {
440 snprintf(b,
sizeof(b),
"\\x%02X", c);
452 rb_reg_desc(
const char *s,
long len,
VALUE re)
465 rb_reg_expr_str(str, s, len, enc, resenc,
'/');
468 char opts[OPTBUF_SIZE];
470 if (*option_to_str(opts,
RREGEXP_PTR(re)->options))
472 if (
RBASIC(re)->flags & REG_ENCODING_NONE)
494 rb_reg_source(
VALUE re)
516 rb_reg_inspect(
VALUE re)
524 static VALUE rb_reg_str_with_term(
VALUE re,
int term);
547 rb_reg_to_s(
VALUE re)
549 return rb_reg_str_with_term(re,
'/');
553 rb_reg_str_with_term(
VALUE re,
int term)
556 const int embeddable = ONIG_OPTION_MULTILINE|ONIG_OPTION_IGNORECASE|ONIG_OPTION_EXTEND;
560 char optbuf[OPTBUF_SIZE + 1];
570 if (len >= 4 && ptr[0] ==
'(' && ptr[1] ==
'?') {
573 if ((len -= 2) > 0) {
575 opt = char_to_option((
int )*ptr);
585 if (len > 1 && *ptr ==
'-') {
589 opt = char_to_option((
int )*ptr);
604 if (*ptr ==
':' && ptr[len-1] ==
')') {
611 err = onig_new(&rp, ptr, ptr + len, options,
612 enc, OnigDefaultSyntax, NULL);
625 if ((options & embeddable) != embeddable) {
627 option_to_str(optbuf + 1, ~options);
633 rb_reg_expr_str(str, (
char*)ptr, len, enc, NULL, term);
653 rb_reg_expr_str(str, (
char*)ptr, len, enc, NULL, term);
661 NORETURN(
static void rb_reg_raise(
const char *s,
long len,
const char *err,
VALUE re));
664 rb_reg_raise(
const char *s,
long len,
const char *err,
VALUE re)
666 VALUE desc = rb_reg_desc(s, len, re);
672 rb_enc_reg_error_desc(
const char *s,
long len,
rb_encoding *enc,
int options,
const char *err)
674 char opts[OPTBUF_SIZE + 1];
681 rb_reg_expr_str(desc, s, len, enc, resenc,
'/');
683 option_to_str(opts + 1, options);
688 NORETURN(
static void rb_enc_reg_raise(
const char *s,
long len,
rb_encoding *enc,
int options,
const char *err));
691 rb_enc_reg_raise(
const char *s,
long len,
rb_encoding *enc,
int options,
const char *err)
693 rb_exc_raise(rb_enc_reg_error_desc(s, len, enc, options, err));
697 rb_reg_error_desc(
VALUE str,
int options,
const char *err)
703 NORETURN(
static void rb_reg_raise_str(
VALUE str,
int options,
const char *err));
706 rb_reg_raise_str(
VALUE str,
int options,
const char *err)
724 rb_reg_casefold_p(
VALUE re)
727 return RBOOL(
RREGEXP_PTR(re)->options & ONIG_OPTION_IGNORECASE);
755 rb_reg_options_m(
VALUE re)
762 reg_names_iter(
const OnigUChar *name,
const OnigUChar *name_end,
763 int back_num,
int *back_refs,
OnigRegex regex,
void *arg)
787 rb_reg_names(
VALUE re)
792 onig_foreach_name(
RREGEXP_PTR(re), reg_names_iter, (
void*)ary);
797 reg_named_captures_iter(
const OnigUChar *name,
const OnigUChar *name_end,
798 int back_num,
int *back_refs,
OnigRegex regex,
void *arg)
804 for (i = 0; i < back_num; i++)
835 rb_reg_named_captures(
VALUE re)
838 VALUE hash = rb_hash_new_with_size(onig_number_of_names(reg));
839 onig_foreach_name(reg, reg_named_captures_iter, (
void*)hash);
844 onig_new_with_source(
regex_t** reg,
const UChar* pattern,
const UChar* pattern_end,
846 OnigErrorInfo* einfo,
const char *sourcefile,
int sourceline)
851 if (IS_NULL(*reg))
return ONIGERR_MEMORY;
853 r = onig_reg_init(*reg, option, ONIGENC_CASE_FOLD_DEFAULT, enc, syntax);
856 r = onig_compile_ruby(*reg, pattern, pattern_end, einfo, sourcefile, sourceline);
866 make_regexp(
const char *s,
long len,
rb_encoding *enc,
int flags, onig_errmsg_buffer err,
867 const char *sourcefile,
int sourceline)
880 r = onig_new_with_source(&rp, (UChar*)s, (UChar*)(s + len), flags,
881 enc, OnigDefaultSyntax, &einfo, sourcefile, sourceline);
883 onig_error_code_to_str((UChar*)err, r, &einfo);
942 match_alloc(
VALUE klass)
958 if (to->allocated)
return 0;
961 if (to->allocated)
return 0;
962 return ONIGERR_MEMORY;
971 pair_byte_cmp(
const void *pair1,
const void *pair2)
973 long diff = ((
pair_t*)pair1)->byte_pos - ((
pair_t*)pair2)->byte_pos;
974 #if SIZEOF_LONG > SIZEOF_INT
975 return diff ? diff > 0 ? 1 : -1 : 0;
982 update_char_offset(
VALUE match)
986 int i, num_regs, num_pos;
996 num_regs = rm->
regs.num_regs;
1005 for (i = 0; i < num_regs; i++) {
1014 for (i = 0; i < num_regs; i++) {
1017 pairs[num_pos++].byte_pos = BEG(i);
1018 pairs[num_pos++].byte_pos = END(i);
1020 qsort(pairs, num_pos,
sizeof(
pair_t), pair_byte_cmp);
1024 for (i = 0; i < num_pos; i++) {
1025 q = s + pairs[i].byte_pos;
1027 pairs[i].char_pos = c;
1031 for (i = 0; i < num_regs; i++) {
1039 key.byte_pos = BEG(i);
1040 found = bsearch(&key, pairs, num_pos,
sizeof(
pair_t), pair_byte_cmp);
1043 key.byte_pos = END(i);
1044 found = bsearch(&key, pairs, num_pos,
sizeof(
pair_t), pair_byte_cmp);
1050 match_check(
VALUE match)
1052 if (!
RMATCH(match)->regexp) {
1068 rm =
RMATCH(obj)->rmatch;
1097 match_regexp(
VALUE match)
1101 regexp =
RMATCH(match)->regexp;
1102 if (
NIL_P(regexp)) {
1105 RMATCH(match)->regexp = regexp;
1125 match_names(
VALUE match)
1130 return rb_reg_names(
RMATCH(match)->regexp);
1146 match_size(
VALUE match)
1152 static int name_to_backref_number(
struct re_registers *,
VALUE,
const char*,
const char*);
1153 NORETURN(
static void name_to_backref_error(
VALUE name));
1156 name_to_backref_error(
VALUE name)
1165 if (i < 0 || regs->num_regs <= i)
1170 match_backref_number(
VALUE match,
VALUE backref)
1187 num = name_to_backref_number(regs, regexp, name, name +
RSTRING_LEN(backref));
1190 name_to_backref_error(backref);
1199 return match_backref_number(match, backref);
1223 int i = match_backref_number(match, n);
1227 backref_number_check(regs, i);
1232 update_char_offset(match);
1258 int i = match_backref_number(match, n);
1262 backref_number_check(regs, i);
1267 update_char_offset(match);
1292 int i = match_backref_number(match, n);
1296 backref_number_check(regs, i);
1301 update_char_offset(match);
1326 int i = match_backref_number(match, n);
1329 backref_number_check(regs, i);
1331 long start = BEG(i), end = END(i);
1359 int i = match_backref_number(match, n);
1363 backref_number_check(regs, i);
1368 update_char_offset(match);
1370 &
RMATCH(match)->rmatch->char_offset[i];
1374 #define MATCH_BUSY FL_USER2
1379 FL_SET(match, MATCH_BUSY);
1383 rb_match_unbusy(
VALUE match)
1389 rb_match_count(
VALUE match)
1392 if (
NIL_P(match))
return -1;
1394 if (!regs)
return -1;
1395 return regs->num_regs;
1399 rb_match_nth_defined(
int nth,
VALUE match)
1402 if (
NIL_P(match))
return FALSE;
1404 if (!regs)
return FALSE;
1405 if (nth >= regs->num_regs) {
1409 nth += regs->num_regs;
1410 if (nth <= 0)
return FALSE;
1412 return (BEG(nth) != -1);
1416 match_set_string(
VALUE m,
VALUE string,
long pos,
long len)
1421 match->
str = string;
1423 int err = onig_region_resize(&
rmatch->
regs, 1);
1430 rb_backref_set_string(
VALUE string,
long pos,
long len)
1436 match_set_string(match,
string, pos, len);
1470 rb_reg_fixed_encoding_p(
VALUE re)
1472 return RBOOL(
FL_TEST(re, KCODE_FIXED));
1476 rb_reg_preprocess(
const char *p,
const char *end,
rb_encoding *enc,
1479 NORETURN(
static void reg_enc_error(
VALUE re,
VALUE str));
1485 "incompatible encoding regexp match (%s regexp with %s string)",
1491 str_coderange(
VALUE str)
1501 rb_reg_prepare_enc(
VALUE re,
VALUE str,
int warn)
1504 int cr = str_coderange(str);
1508 "invalid byte sequence in %s",
1521 reg_enc_error(re, str);
1523 else if (rb_reg_fixed_encoding_p(re)) {
1526 reg_enc_error(re, str);
1530 else if (warn && (
RBASIC(re)->flags & REG_ENCODING_NONE) &&
1533 rb_warn(
"historical binary regexp match /.../n against %s string",
1540 rb_reg_prepare_re0(
VALUE re,
VALUE str, onig_errmsg_buffer err)
1545 const char *pattern;
1548 rb_encoding *enc = rb_reg_prepare_enc(re, str, 1);
1550 if (reg->enc == enc)
return reg;
1556 unescaped = rb_reg_preprocess(
1560 if (
NIL_P(unescaped)) {
1567 r = onig_new(®, (UChar *)ptr, (UChar *)(ptr + len),
1569 OnigDefaultSyntax, &einfo);
1571 onig_error_code_to_str((UChar*)err, r, &einfo);
1582 onig_errmsg_buffer err =
"";
1583 return rb_reg_prepare_re0(re, str, err);
1593 enc = rb_reg_prepare_enc(re, str, 0);
1602 if (pos > 0 && ONIGENC_MBC_MAXLEN(enc) != 1 && pos <
RSTRING_LEN(str)) {
1606 p = onigenc_get_right_adjust_char_head(enc,
string,
string + pos,
string +
RSTRING_LEN(str));
1609 p = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc,
string,
string + pos,
string +
RSTRING_LEN(str));
1619 rb_reg_search_set_match(
VALUE re,
VALUE str,
long pos,
int reverse,
int set_backref_str,
VALUE *set_match)
1624 char *start, *range;
1628 onig_errmsg_buffer err =
"";
1632 if (pos > len || pos < 0) {
1637 reg = rb_reg_prepare_re0(re, str, err);
1639 if (!tmpreg)
RREGEXP(re)->usecnt++;
1645 result = onig_search(reg,
1647 ((UChar*)(start + len)),
1648 ((UChar*)(start + pos)),
1650 regs, ONIG_OPTION_NONE);
1651 if (!tmpreg)
RREGEXP(re)->usecnt--;
1663 onig_region_free(regs, 0);
1664 if (result == ONIG_MISMATCH) {
1669 onig_error_code_to_str((UChar*)err, (
int)result);
1676 onig_region_free(regs, 0);
1679 if (set_backref_str) {
1683 RMATCH(match)->regexp = re;
1685 if (set_match) *set_match = match;
1691 rb_reg_search0(
VALUE re,
VALUE str,
long pos,
int reverse,
int set_backref_str)
1693 return rb_reg_search_set_match(re, str, pos, reverse, set_backref_str, NULL);
1699 return rb_reg_search0(re, str, pos, reverse, 1);
1710 onig_errmsg_buffer err =
"";
1712 reg = rb_reg_prepare_re0(re, str, err);
1714 if (!tmpreg)
RREGEXP(re)->usecnt++;
1717 if (!
NIL_P(match)) {
1718 if (
FL_TEST(match, MATCH_BUSY)) {
1731 result = onig_match(reg,
1733 ((UChar*)(ptr + len)),
1735 regs, ONIG_OPTION_NONE);
1736 if (!tmpreg)
RREGEXP(re)->usecnt--;
1748 onig_region_free(regs, 0);
1749 if (result == ONIG_MISMATCH) {
1754 onig_error_code_to_str((UChar*)err, (
int)result);
1763 onig_region_free(regs, 0);
1769 RMATCH(match)->regexp = re;
1782 if (nth >= regs->num_regs) {
1786 nth += regs->num_regs;
1787 if (nth <= 0)
return Qnil;
1789 return RBOOL(BEG(nth) != -1);
1796 long start, end, len;
1802 if (nth >= regs->num_regs) {
1806 nth += regs->num_regs;
1807 if (nth <= 0)
return Qnil;
1810 if (start == -1)
return Qnil;
1844 if (BEG(0) == -1)
return Qnil;
1871 if (BEG(0) == -1)
return Qnil;
1872 str =
RMATCH(match)->str;
1887 if (BEG(0) == -1)
return Qnil;
1889 for (i=regs->num_regs-1; BEG(i) == -1 && i > 0; i--)
1891 if (i == 0)
return Qnil;
1896 last_match_getter(
ID _x,
VALUE *_y)
1902 prematch_getter(
ID _x,
VALUE *_y)
1908 postmatch_getter(
ID _x,
VALUE *_y)
1914 last_paren_match_getter(
ID _x,
VALUE *_y)
1920 match_array(
VALUE match,
int start)
1930 target =
RMATCH(match)->str;
1932 for (i=start; i<regs->num_regs; i++) {
1933 if (regs->beg[i] == -1) {
1968 match_to_a(
VALUE match)
1970 return match_array(match, 0);
1987 match_captures(
VALUE match)
1989 return match_array(match, 1);
1993 name_to_backref_number(
struct re_registers *regs,
VALUE regexp,
const char* name,
const char* name_end)
1995 if (
NIL_P(regexp))
return -1;
1996 return onig_name_to_backref_number(
RREGEXP_PTR(regexp),
1997 (
const unsigned char *)name, (
const unsigned char *)name_end, regs);
2000 #define NAME_TO_NUMBER(regs, re, name, name_ptr, name_end) \
2002 !rb_enc_compatible(RREGEXP_SRC(re), (name)) ? 0 : \
2003 name_to_backref_number((regs), (re), (name_ptr), (name_end)))
2016 num = NAME_TO_NUMBER(regs, re, name,
2019 name_to_backref_error(name);
2025 match_ary_subseq(
VALUE match,
long beg,
long len,
VALUE result)
2028 long j, end = olen < beg+len ? olen : beg+len;
2030 if (len == 0)
return result;
2032 for (j = beg; j < end; j++) {
2035 if (beg + len > j) {
2056 return match_ary_subseq(match, beg, len, result);
2088 match_aref(
int argc,
VALUE *argv,
VALUE match)
2095 if (
NIL_P(length)) {
2100 int num = namev_to_backref_number(
RMATCH_REGS(match),
RMATCH(match)->regexp, idx);
2105 return match_ary_aref(match, idx,
Qnil);
2118 if (beg < 0)
return Qnil;
2120 else if (beg > num_regs) {
2123 if (beg+len > num_regs) {
2124 len = num_regs - beg;
2126 return match_ary_subseq(match, beg, len,
Qnil);
2149 match_values_at(
int argc,
VALUE *argv,
VALUE match)
2157 for (i=0; i<argc; i++) {
2162 int num = namev_to_backref_number(
RMATCH_REGS(match),
RMATCH(match)->regexp, argv[i]);
2167 match_ary_aref(match, argv[i], result);
2186 match_to_s(
VALUE match)
2196 match_named_captures_iter(
const OnigUChar *name,
const OnigUChar *name_end,
2197 int back_num,
int *back_refs,
OnigRegex regex,
void *arg) {
2198 struct MEMO *memo = MEMO_CAST(arg);
2199 VALUE hash = memo->v1;
2200 VALUE match = memo->v2;
2208 for (i = 0; i < back_num; i++) {
2247 match_named_captures(
VALUE match)
2257 memo = MEMO_NEW(hash, match, 0);
2259 onig_foreach_name(
RREGEXP(
RMATCH(match)->regexp)->ptr, match_named_captures_iter, (
void*)memo);
2275 match_string(
VALUE match)
2278 return RMATCH(match)->str;
2287 match_inspect_name_iter(
const OnigUChar *name,
const OnigUChar *name_end,
2288 int back_num,
int *back_refs,
OnigRegex regex,
void *arg0)
2293 for (i = 0; i < back_num; i++) {
2294 arg[back_refs[i]].name = name;
2295 arg[back_refs[i]].len = name_end - name;
2321 match_inspect(
VALUE match)
2327 int num_regs = regs->num_regs;
2332 return rb_sprintf(
"#<%"PRIsVALUE
":%p>", cname, (
void*)match);
2334 else if (
NIL_P(regexp)) {
2335 return rb_sprintf(
"#<%"PRIsVALUE
": %"PRIsVALUE
">",
2343 match_inspect_name_iter, names);
2348 for (i = 0; i < num_regs; i++) {
2373 read_escaped_byte(
const char **pp,
const char *end, onig_errmsg_buffer err)
2375 const char *p = *pp;
2377 int meta_prefix = 0, ctrl_prefix = 0;
2380 if (p == end || *p++ !=
'\\') {
2381 errcpy(err,
"too short escaped multibyte character");
2387 errcpy(err,
"too short escape sequence");
2391 case '\\': code =
'\\';
break;
2392 case 'n': code =
'\n';
break;
2393 case 't': code =
'\t';
break;
2394 case 'r': code =
'\r';
break;
2395 case 'f': code =
'\f';
break;
2396 case 'v': code =
'\013';
break;
2397 case 'a': code =
'\007';
break;
2398 case 'e': code =
'\033';
break;
2401 case '0':
case '1':
case '2':
case '3':
2402 case '4':
case '5':
case '6':
case '7':
2404 code =
scan_oct(p, end < p+3 ? end-p : 3, &len);
2409 code =
scan_hex(p, end < p+2 ? end-p : 2, &len);
2411 errcpy(err,
"invalid hex escape");
2419 errcpy(err,
"duplicate meta escape");
2423 if (p+1 < end && *p++ ==
'-' && (*p & 0x80) == 0) {
2433 errcpy(err,
"too short meta escape");
2437 if (p == end || *p++ !=
'-') {
2438 errcpy(err,
"too short control escape");
2443 errcpy(err,
"duplicate control escape");
2447 if (p < end && (*p & 0x80) == 0) {
2457 errcpy(err,
"too short control escape");
2461 errcpy(err,
"unexpected escape sequence");
2464 if (code < 0 || 0xff < code) {
2465 errcpy(err,
"invalid escape code");
2479 unescape_escaped_nonascii(
const char **pp,
const char *end,
rb_encoding *enc,
2482 const char *p = *pp;
2484 unsigned char *area =
ALLOCA_N(
unsigned char, chmaxlen);
2485 char *chbuf = (
char *)area;
2490 memset(chbuf, 0, chmaxlen);
2492 byte = read_escaped_byte(&p, end, err);
2497 area[chlen++] = byte;
2498 while (chlen < chmaxlen &&
2500 byte = read_escaped_byte(&p, end, err);
2504 area[chlen++] = byte;
2509 errcpy(err,
"invalid multibyte escape");
2512 if (1 < chlen || (area[0] & 0x80)) {
2517 else if (*encp != enc) {
2518 errcpy(err,
"escaped non ASCII character in UTF-8 regexp");
2524 snprintf(escbuf,
sizeof(escbuf),
"\\x%02X", area[0]&0xff);
2532 check_unicode_range(
unsigned long code, onig_errmsg_buffer err)
2534 if ((0xd800 <= code && code <= 0xdfff) ||
2536 errcpy(err,
"invalid Unicode range");
2543 append_utf8(
unsigned long uv,
2546 if (check_unicode_range(uv, err) != 0)
2550 snprintf(escbuf,
sizeof(escbuf),
"\\x%02X", (
int)uv);
2562 errcpy(err,
"UTF-8 character in non UTF-8 regexp");
2570 unescape_unicode_list(
const char **pp,
const char *end,
2573 const char *p = *pp;
2574 int has_unicode = 0;
2578 while (p < end &&
ISSPACE(*p)) p++;
2585 errcpy(err,
"invalid Unicode range");
2589 if (append_utf8(code, buf, encp, err) != 0)
2593 while (p < end &&
ISSPACE(*p)) p++;
2596 if (has_unicode == 0) {
2597 errcpy(err,
"invalid Unicode list");
2607 unescape_unicode_bmp(
const char **pp,
const char *end,
2610 const char *p = *pp;
2615 errcpy(err,
"invalid Unicode escape");
2620 errcpy(err,
"invalid Unicode escape");
2623 if (append_utf8(code, buf, encp, err) != 0)
2630 unescape_nonascii(
const char *p,
const char *end,
rb_encoding *enc,
2632 onig_errmsg_buffer err)
2641 errcpy(err,
"invalid multibyte character");
2645 if (1 < chlen || (*p & 0x80)) {
2651 else if (*encp != enc) {
2652 errcpy(err,
"non ASCII character in UTF-8 regexp");
2661 errcpy(err,
"too short escape sequence");
2666 goto invalid_multibyte;
2675 case '1':
case '2':
case '3':
2676 case '4':
case '5':
case '6':
case '7':
2678 size_t len = end-(p-1), octlen;
2679 if (
ruby_scan_oct(p-1, len < 3 ? len : 3, &octlen) <= 0177) {
2696 const char *pbeg = p;
2697 int byte = read_escaped_byte(&p, end, err);
2698 if (
byte == -1)
return -1;
2703 if (unescape_escaped_nonascii(&p, end, enc, buf, encp, err) != 0)
2710 errcpy(err,
"too short escape sequence");
2716 if (unescape_unicode_list(&p, end, buf, encp, err) != 0)
2718 if (p == end || *p++ !=
'}') {
2719 errcpy(err,
"invalid Unicode list");
2726 if (unescape_unicode_bmp(&p, end, buf, encp, err) != 0)
2757 rb_reg_preprocess(
const char *p,
const char *end,
rb_encoding *enc,
2761 int has_property = 0;
2772 if (unescape_nonascii(p, end, enc, buf, fixed_enc, &has_property, err) != 0)
2775 if (has_property && !*fixed_enc) {
2787 rb_reg_check_preprocess(
VALUE str)
2790 onig_errmsg_buffer err =
"";
2800 buf = rb_reg_preprocess(p, end, enc, &fixed_enc, err);
2804 return rb_reg_error_desc(str, 0, err);
2810 rb_reg_preprocess_dregexp(
VALUE ary,
int options)
2814 onig_errmsg_buffer err =
"";
2830 if (options & ARG_ENCODING_NONE &&
2831 src_enc != ascii8bit) {
2835 src_enc = ascii8bit;
2842 buf = rb_reg_preprocess(p, end, src_enc, &fixed_enc, err);
2847 if (fixed_enc != 0) {
2848 if (regexp_enc != 0 && regexp_enc != fixed_enc) {
2852 regexp_enc = fixed_enc;
2869 int options, onig_errmsg_buffer err,
2870 const char *sourcefile,
int sourceline)
2878 if (
FL_TEST(obj, REG_LITERAL))
2885 errcpy(err,
"can't make regexp with dummy encoding");
2889 unescaped = rb_reg_preprocess(s, s+len, enc, &fixed_enc, err);
2890 if (
NIL_P(unescaped))
2894 if ((fixed_enc != enc && (options & ARG_ENCODING_FIXED)) ||
2895 (fixed_enc != a_enc && (options & ARG_ENCODING_NONE))) {
2896 errcpy(err,
"incompatible character encoding");
2899 if (fixed_enc != a_enc) {
2900 options |= ARG_ENCODING_FIXED;
2904 else if (!(options & ARG_ENCODING_FIXED)) {
2909 if ((options & ARG_ENCODING_FIXED) || fixed_enc) {
2912 if (options & ARG_ENCODING_NONE) {
2917 options & ARG_REG_OPTION_MASK, err,
2918 sourcefile, sourceline);
2919 if (!re->
ptr)
return -1;
2928 if (regenc != enc) {
2935 rb_reg_initialize_str(
VALUE obj,
VALUE str,
int options, onig_errmsg_buffer err,
2936 const char *sourcefile,
int sourceline)
2940 if (options & ARG_ENCODING_NONE) {
2942 if (enc != ascii8bit) {
2944 errcpy(err,
"/.../n has a non escaped non ASCII character in non ASCII-8BIT script");
2951 options, err, sourcefile, sourceline);
2952 if (ret == 0) reg_set_source(obj, str, str_enc);
2957 rb_reg_s_alloc(
VALUE klass)
2977 return rb_reg_init_str(rb_reg_alloc(), s, options);
2981 rb_reg_init_str(
VALUE re,
VALUE s,
int options)
2983 onig_errmsg_buffer err =
"";
2985 if (rb_reg_initialize_str(re, s, options, err, NULL, 0) != 0) {
2986 rb_reg_raise_str(s, options, err);
2995 onig_errmsg_buffer err =
"";
2998 enc, options, err, NULL, 0) != 0) {
2999 rb_reg_raise_str(s, options, err);
3001 reg_set_source(re, s, enc);
3006 MJIT_FUNC_EXPORTED
VALUE
3007 rb_reg_new_ary(
VALUE ary,
int opt)
3017 VALUE re = rb_reg_alloc();
3018 onig_errmsg_buffer err =
"";
3020 if (rb_reg_initialize(re, s, len, enc, options, err, NULL, 0) != 0) {
3021 rb_enc_reg_raise(s, len, enc, options, err);
3035 rb_reg_compile(
VALUE str,
int options,
const char *sourcefile,
int sourceline)
3037 VALUE re = rb_reg_alloc();
3038 onig_errmsg_buffer err =
"";
3041 if (rb_reg_initialize_str(re, str, options, err, sourcefile, sourceline) != 0) {
3050 static VALUE reg_cache;
3063 static st_index_t reg_hash(
VALUE re);
3074 rb_reg_hash(
VALUE re)
3076 st_index_t hashval = reg_hash(re);
3110 if (re1 == re2)
return Qtrue;
3112 rb_reg_check(re1); rb_reg_check(re2);
3131 match_hash(
VALUE match)
3138 hashval =
rb_hash_uint(hashval, reg_hash(match_regexp(match)));
3161 if (match1 == match2)
return Qtrue;
3165 if (!rb_reg_equal(match_regexp(match1), match_regexp(match2)))
return Qfalse;
3168 if (regs1->num_regs != regs2->num_regs)
return Qfalse;
3169 if (memcmp(regs1->beg, regs2->beg, regs1->num_regs *
sizeof(*regs1->beg)))
return Qfalse;
3170 if (memcmp(regs1->end, regs2->end, regs1->num_regs *
sizeof(*regs1->end)))
return Qfalse;
3175 reg_operand(
VALUE s,
int check)
3197 *strp = str = reg_operand(str, TRUE);
3208 return rb_reg_search_set_match(re, str, pos, 0, 1, set_match);
3262 long pos = reg_match_pos(re, &str, 0, NULL);
3263 if (pos < 0)
return Qnil;
3294 str = reg_operand(str, FALSE);
3371 rb_reg_match_m(
int argc,
VALUE *argv,
VALUE re)
3376 if (
rb_scan_args(argc, argv,
"11", &str, &initpos) == 2) {
3383 pos = reg_match_pos(re, &str, pos, &result);
3412 rb_reg_match_m_p(
int argc,
VALUE *argv,
VALUE re)
3415 return rb_reg_match_p(re, argv[0], pos);
3419 rb_reg_match_p(
VALUE re,
VALUE str,
long pos)
3422 onig_errmsg_buffer err =
"";
3423 OnigPosition result;
3424 const UChar *start, *end;
3432 if (pos < 0)
return Qfalse;
3441 reg = rb_reg_prepare_re0(re, str, err);
3443 if (!tmpreg)
RREGEXP(re)->usecnt++;
3446 result = onig_search(reg, start, end, start + pos, end,
3447 NULL, ONIG_OPTION_NONE);
3448 if (!tmpreg)
RREGEXP(re)->usecnt--;
3459 if (result == ONIG_MISMATCH) {
3463 onig_error_code_to_str((UChar*)err, (
int)result);
3499 rb_reg_initialize_m(
int argc,
VALUE *argv,
VALUE self)
3519 else if (
RTEST(argv[1])) flags = ONIG_OPTION_IGNORECASE;
3521 if (argc == 3 && !
NIL_P(argv[2])) {
3523 if (kcode[0] ==
'n' || kcode[0] ==
'N') {
3525 flags |= ARG_ENCODING_NONE;
3534 rb_reg_init_str_enc(
self, str, enc, flags);
3536 rb_reg_init_str(
self, str, flags);
3554 s += mbclen(s, send, enc);
3558 case '[':
case ']':
case '{':
case '}':
3559 case '(':
case ')':
case '|':
case '-':
3560 case '*':
case '.':
case '\\':
3561 case '?':
case '+':
case '^':
case '$':
3563 case '\t':
case '\f':
case '\v':
case '\n':
case '\r':
3585 memcpy(t, p, s - p);
3591 int n = mbclen(s, send, enc);
3599 case '[':
case ']':
case '{':
case '}':
3600 case '(':
case ')':
case '|':
case '-':
3601 case '*':
case '.':
case '\\':
3602 case '?':
case '+':
case '^':
case '$':
3664 options =
RREGEXP_PTR(re)->options & ARG_REG_OPTION_MASK;
3665 if (
RBASIC(re)->flags & KCODE_FIXED) options |= ARG_ENCODING_FIXED;
3666 if (
RBASIC(re)->flags & REG_ENCODING_NONE) options |= ARG_ENCODING_NONE;
3671 rb_check_regexp_type(
VALUE re)
3696 return rb_check_regexp_type(re);
3709 else if (argc == 1) {
3711 VALUE re = rb_check_regexp_type(arg);
3716 quoted = rb_reg_s_quote(
Qnil, arg);
3725 int has_asciionly = 0;
3729 for (i = 0; i < argc; i++) {
3736 v = rb_check_regexp_type(e);
3740 if (!has_ascii_incompat)
3741 has_ascii_incompat = enc;
3742 else if (has_ascii_incompat != enc)
3746 else if (rb_reg_fixed_encoding_p(v)) {
3747 if (!has_ascii_compat_fixed)
3748 has_ascii_compat_fixed = enc;
3749 else if (has_ascii_compat_fixed != enc)
3756 v = rb_reg_str_with_term(v, -1);
3763 if (!has_ascii_incompat)
3764 has_ascii_incompat = enc;
3765 else if (has_ascii_incompat != enc)
3773 if (!has_ascii_compat_fixed)
3774 has_ascii_compat_fixed = enc;
3775 else if (has_ascii_compat_fixed != enc)
3779 v = rb_reg_s_quote(
Qnil, e);
3781 if (has_ascii_incompat) {
3782 if (has_asciionly) {
3786 if (has_ascii_compat_fixed) {
3798 if (has_ascii_incompat) {
3799 result_enc = has_ascii_incompat;
3801 else if (has_ascii_compat_fixed) {
3802 result_enc = has_ascii_compat_fixed;
3841 return rb_reg_s_union(
self, v);
3843 return rb_reg_s_union(
self, args);
3865 #define ASCGET(s,e,cl) (acompat ? (*(cl)=1,ISASCII((s)[0])?(s)[0]:-1) : rb_enc_ascget((s), (e), (cl), str_enc))
3872 int c = ASCGET(s, e, &clen);
3876 s += mbclen(s, e, str_enc);
3882 if (c !=
'\\' || s == e)
continue;
3889 c = ASCGET(s, e, &clen);
3891 s += mbclen(s, e, str_enc);
3900 case '1':
case '2':
case '3':
case '4':
3901 case '5':
case '6':
case '7':
case '8':
case '9':
3902 if (!
NIL_P(regexp) && onig_noname_group_capture_is_active(
RREGEXP_PTR(regexp))) {
3911 if (s < e && ASCGET(s, e, &clen) ==
'<') {
3912 char *name, *name_end;
3914 name_end = name = s + clen;
3915 while (name_end < e) {
3916 c = ASCGET(name_end, e, &clen);
3917 if (c ==
'>')
break;
3918 name_end += c == -1 ? mbclen(name_end, e, str_enc) : clen;
3922 (
long)(name_end - name));
3923 if ((no = NAME_TO_NUMBER(regs, regexp, n, name, name_end)) < 1) {
3924 name_to_backref_error(n);
3926 p = s = name_end + clen;
3951 no = regs->num_regs-1;
3952 while (BEG(no) == -1 && no > 0) no--;
3953 if (no == 0)
continue;
3966 if (no >= regs->num_regs)
continue;
3967 if (BEG(no) == -1)
continue;
3972 if (!val)
return str;
3981 ignorecase_getter(
ID _x,
VALUE *_y)
4004 get_LAST_MATCH_INFO(
ID _x,
VALUE *_y)
4006 return match_getter();
4047 rb_reg_s_last_match(
int argc,
VALUE *argv,
VALUE _)
4053 n = match_backref_number(match, argv[0]);
4056 return match_getter();
4060 re_warn(
const char *s)
4097 onigenc_set_default_encoding(ONIG_ENCODING_ASCII);
4098 onig_set_warn_func(re_warn);
4099 onig_set_verb_warn_func(re_warn);
4107 rb_gvar_ractor_local(
"$~");
4108 rb_gvar_ractor_local(
"$&");
4109 rb_gvar_ractor_local(
"$`");
4110 rb_gvar_ractor_local(
"$'");
4111 rb_gvar_ractor_local(
"$+");
#define rb_define_singleton_method(klass, mid, func, arity)
Defines klass.mid.
static bool rb_enc_isprint(OnigCodePoint c, rb_encoding *enc)
Identical to rb_isprint(), except it additionally takes an encoding.
static bool rb_enc_isspace(OnigCodePoint c, rb_encoding *enc)
Identical to rb_isspace(), except it additionally takes an encoding.
VALUE rb_define_class(const char *name, VALUE super)
Defines a top-level class.
void rb_undef_method(VALUE klass, const char *name)
Defines an undef of a method.
int rb_scan_args(int argc, const VALUE *argv, const char *fmt,...)
Retrieves argument from argc and argv to given VALUE references according to the format string.
void rb_define_method(VALUE klass, const char *name, VALUE(*func)(ANYARGS), int argc)
Defines a method.
int rb_block_given_p(void)
Determines if the current method is given a block.
#define rb_str_new2
Old name of rb_str_new_cstr.
#define NEWOBJ_OF
Old name of RB_NEWOBJ_OF.
#define ENC_CODERANGE_7BIT
Old name of RUBY_ENC_CODERANGE_7BIT.
#define REALLOC_N
Old name of RB_REALLOC_N.
#define OBJ_INIT_COPY(obj, orig)
Old name of RB_OBJ_INIT_COPY.
#define ISSPACE
Old name of rb_isspace.
#define T_STRING
Old name of RUBY_T_STRING.
#define ENC_CODERANGE_CLEAN_P(cr)
Old name of RB_ENC_CODERANGE_CLEAN_P.
#define INT2FIX
Old name of RB_INT2FIX.
#define rb_str_buf_new2
Old name of rb_str_buf_new_cstr.
#define ENC_CODERANGE(obj)
Old name of RB_ENC_CODERANGE.
#define ZALLOC
Old name of RB_ZALLOC.
#define CLASS_OF
Old name of rb_class_of.
#define ENC_CODERANGE_UNKNOWN
Old name of RUBY_ENC_CODERANGE_UNKNOWN.
#define ENCODING_GET(obj)
Old name of RB_ENCODING_GET.
#define LONG2FIX
Old name of RB_INT2FIX.
#define FIX2INT
Old name of RB_FIX2INT.
#define rb_str_new3
Old name of rb_str_new_shared.
#define MBCLEN_CHARFOUND_LEN(ret)
Old name of ONIGENC_MBCLEN_CHARFOUND_LEN.
#define FL_SET
Old name of RB_FL_SET.
#define LONG2NUM
Old name of RB_LONG2NUM.
#define rb_exc_new3
Old name of rb_exc_new_str.
#define MBCLEN_INVALID_P(ret)
Old name of ONIGENC_MBCLEN_INVALID_P.
#define Qtrue
Old name of RUBY_Qtrue.
#define ST2FIX
Old name of RB_ST2FIX.
#define MBCLEN_NEEDMORE_P(ret)
Old name of ONIGENC_MBCLEN_NEEDMORE_P.
#define NUM2INT
Old name of RB_NUM2INT.
#define INT2NUM
Old name of RB_INT2NUM.
#define Qnil
Old name of RUBY_Qnil.
#define Qfalse
Old name of RUBY_Qfalse.
#define ENC_CODERANGE_BROKEN
Old name of RUBY_ENC_CODERANGE_BROKEN.
#define scan_hex(s, l, e)
Old name of ruby_scan_hex.
#define NIL_P
Old name of RB_NIL_P.
#define MBCLEN_CHARFOUND_P(ret)
Old name of ONIGENC_MBCLEN_CHARFOUND_P.
#define FL_WB_PROTECTED
Old name of RUBY_FL_WB_PROTECTED.
#define T_MATCH
Old name of RUBY_T_MATCH.
#define FL_TEST
Old name of RB_FL_TEST.
#define NUM2LONG
Old name of RB_NUM2LONG.
#define FL_UNSET
Old name of RB_FL_UNSET.
#define FIXNUM_P
Old name of RB_FIXNUM_P.
#define scan_oct(s, l, e)
Old name of ruby_scan_oct.
#define rb_ary_new2
Old name of rb_ary_new_capa.
#define rb_str_new4
Old name of rb_str_new_frozen.
#define SYMBOL_P
Old name of RB_SYMBOL_P.
#define T_REGEXP
Old name of RUBY_T_REGEXP.
void rb_category_warn(rb_warning_category_t category, const char *fmt,...)
Identical to rb_category_warning(), except it reports always regardless of runtime -W flag.
void rb_raise(VALUE exc, const char *fmt,...)
Exception entry point.
void rb_exc_raise(VALUE mesg)
Raises an exception in the current thread.
void rb_bug(const char *fmt,...)
Interpreter panic switch.
VALUE rb_eStandardError
StandardError exception.
void rb_set_errinfo(VALUE err)
Sets the current exception ($!) to the given value.
VALUE rb_eRegexpError
RegexpError exception.
#define ruby_verbose
This variable controls whether the interpreter is in debug mode.
VALUE rb_eTypeError
TypeError exception.
VALUE rb_eEncCompatError
Encoding::CompatibilityError exception.
VALUE rb_eRuntimeError
RuntimeError exception.
void rb_warn(const char *fmt,...)
Identical to rb_warning(), except it reports always regardless of runtime -W flag.
VALUE rb_eArgError
ArgumentError exception.
VALUE rb_eIndexError
IndexError exception.
VALUE rb_eSecurityError
SecurityError exception.
@ RB_WARN_CATEGORY_DEPRECATED
Warning is for deprecated features.
VALUE rb_check_convert_type(VALUE val, int type, const char *name, const char *mid)
Identical to rb_convert_type(), except it returns RUBY_Qnil instead of raising exceptions,...
VALUE rb_any_to_s(VALUE obj)
Generates a textual representation of the given object.
VALUE rb_class_new_instance(int argc, const VALUE *argv, VALUE klass)
Allocates, then initialises an instance of the given class.
VALUE rb_cMatch
MatchData class.
VALUE rb_cRegexp
Regexp class.
VALUE rb_obj_class(VALUE obj)
Queries the class of an object.
VALUE rb_obj_freeze(VALUE obj)
Just calls rb_obj_freeze_inline() inside.
#define RB_OBJ_WRITE(old, slot, young)
Declaration of a "back" pointer.
rb_encoding * rb_default_external_encoding(void)
Queries the "default external" encoding.
int rb_enc_dummy_p(rb_encoding *enc)
Queries if the passed encoding is dummy.
int rb_enc_precise_mbclen(const char *p, const char *e, rb_encoding *enc)
Queries the number of bytes of the character at the passed pointer.
VALUE rb_enc_associate(VALUE obj, rb_encoding *enc)
Identical to rb_enc_associate(), except it takes an encoding itself instead of its index.
rb_encoding * rb_usascii_encoding(void)
Queries the encoding that represents US-ASCII.
static char * rb_enc_left_char_head(const char *s, const char *p, const char *e, rb_encoding *enc)
Queries the left boundary of a character.
void rb_enc_copy(VALUE dst, VALUE src)
Destructively copies the encoding of the latter object to that of former one.
int rb_utf8_encindex(void)
Identical to rb_utf8_encoding(), except it returns the encoding's index instead of the encoding itsel...
int rb_ascii8bit_encindex(void)
Identical to rb_ascii8bit_encoding(), except it returns the encoding's index instead of the encoding ...
int rb_enc_unicode_p(rb_encoding *enc)
Queries if the passed encoding is either one of UTF-8/16/32.
rb_encoding * rb_default_internal_encoding(void)
Queries the "default internal" encoding.
rb_encoding * rb_ascii8bit_encoding(void)
Queries the encoding that represents ASCII-8BIT a.k.a.
VALUE rb_enc_from_encoding(rb_encoding *enc)
Queries the Ruby-level counterpart instance of rb_cEncoding that corresponds to the passed encoding.
static bool rb_enc_asciicompat(rb_encoding *enc)
Queries if the passed encoding is in some sense compatible with ASCII.
rb_encoding * rb_utf8_encoding(void)
Queries the encoding that represents UTF-8.
static int rb_enc_mbcput(unsigned int c, void *buf, rb_encoding *enc)
Identical to rb_enc_uint_chr(), except it writes back to the passed buffer instead of allocating one.
int rb_char_to_option_kcode(int c, int *option, int *kcode)
Converts a character option to its encoding.
static int rb_enc_mbmaxlen(rb_encoding *enc)
Queries the maximum number of bytes that the passed encoding needs to represent a character.
rb_encoding * rb_enc_get(VALUE obj)
Identical to rb_enc_get_index(), except the return type.
static OnigCodePoint rb_enc_mbc_to_codepoint(const char *p, const char *e, rb_encoding *enc)
Identical to rb_enc_codepoint(), except it assumes the passed character is not broken.
static const char * rb_enc_name(rb_encoding *enc)
Queries the (canonical) name of the passed encoding.
static int rb_enc_mbminlen(rb_encoding *enc)
Queries the minimum number of bytes that the passed encoding needs to represent a character.
int rb_enc_ascget(const char *p, const char *e, int *len, rb_encoding *enc)
Queries the code point of character pointed by the passed pointer.
VALUE rb_enc_reg_new(const char *ptr, long len, rb_encoding *enc, int opts)
Identical to rb_reg_new(), except it additionally takes an encoding.
int rb_enc_str_coderange(VALUE str)
Scans the passed string to collect its code range.
long rb_memsearch(const void *x, long m, const void *y, long n, rb_encoding *enc)
Looks for the passed string in the passed buffer.
long rb_enc_strlen(const char *head, const char *tail, rb_encoding *enc)
Counts the number of characters of the passed string, according to the passed encoding.
VALUE rb_enc_str_buf_cat(VALUE str, const char *ptr, long len, rb_encoding *enc)
Identical to rb_str_cat(), except it additionally takes an encoding.
VALUE rb_enc_str_new(const char *ptr, long len, rb_encoding *enc)
Identical to rb_enc_str_new(), except it additionally takes an encoding.
int rb_enc_str_asciionly_p(VALUE str)
Queries if the passed string is "ASCII only".
VALUE rb_obj_encoding(VALUE obj)
Identical to rb_enc_get_index(), except the return type.
long rb_str_coderange_scan_restartable(const char *str, const char *end, rb_encoding *enc, int *cr)
Scans the passed string until it finds something odd.
VALUE rb_str_encode(VALUE str, VALUE to, int ecflags, VALUE ecopts)
Converts the contents of the passed string from its encoding to the passed one.
void rb_global_variable(VALUE *)
An alias for rb_gc_register_address().
VALUE rb_check_array_type(VALUE obj)
Try converting an object to its array representation using its to_ary method, if any.
VALUE rb_ary_new_capa(long capa)
Identical to rb_ary_new(), except it additionally specifies how many rooms of objects it should alloc...
VALUE rb_ary_resize(VALUE ary, long len)
Expands or shrinks the passed array to the passed length.
VALUE rb_ary_push(VALUE ary, VALUE elem)
Special case of rb_ary_cat() that it adds only one element.
VALUE rb_ary_entry(VALUE ary, long off)
Queries an element of an array.
VALUE rb_assoc_new(VALUE car, VALUE cdr)
Identical to rb_ary_new_from_values(), except it expects exactly two parameters.
void rb_ary_store(VALUE ary, long key, VALUE val)
Destructively stores the passed value to the passed array's passed index.
int rb_uv_to_utf8(char buf[6], unsigned long uv)
Encodes a Unicode codepoint into its UTF-8 representation.
#define rb_check_frozen
Just another name of rb_check_frozen.
static int rb_check_arity(int argc, int min, int max)
Ensures that the passed integer is in the passed range.
void rb_memerror(void)
Triggers out-of-memory error.
void rb_gc(void)
Triggers a GC process.
VALUE rb_hash_aset(VALUE hash, VALUE key, VALUE val)
Inserts or replaces ("upsert"s) the objects into the given hash table.
VALUE rb_hash_new(void)
Creates a new, empty hash object.
VALUE rb_backref_get(void)
Queries the last match, or Regexp.last_match, or the $~.
VALUE rb_lastline_get(void)
Queries the last line, or the $_.
void rb_backref_set(VALUE md)
Updates $~.
VALUE rb_range_beg_len(VALUE range, long *begp, long *lenp, long len, int err)
Deconstructs a numerical range.
int rb_reg_backref_number(VALUE match, VALUE backref)
Queries the index of the given named capture.
int rb_reg_options(VALUE re)
Queries the options of the passed regular expression.
VALUE rb_reg_last_match(VALUE md)
This just returns the argument, stringified.
VALUE rb_reg_match(VALUE re, VALUE str)
This is the match operator.
void rb_match_busy(VALUE md)
Asserts that the given MatchData is "occupied".
VALUE rb_reg_nth_match(int n, VALUE md)
Queries the nth captured substring.
VALUE rb_reg_match_post(VALUE md)
The portion of the original string after the given match.
VALUE rb_reg_nth_defined(int n, VALUE md)
Identical to rb_reg_nth_match(), except it just returns Boolean.
VALUE rb_reg_match_pre(VALUE md)
The portion of the original string before the given match.
VALUE rb_reg_new_str(VALUE src, int opts)
Identical to rb_reg_new(), except it takes the expression in Ruby's string instead of C's.
VALUE rb_reg_match_last(VALUE md)
The portion of the original string that captured at the very last.
VALUE rb_reg_match2(VALUE re)
Identical to rb_reg_match(), except it matches against rb_lastline_get() (or, the $_).
VALUE rb_reg_new(const char *src, long len, int opts)
Creates a new Regular expression.
int rb_memcicmp(const void *s1, const void *s2, long n)
Identical to st_locale_insensitive_strcasecmp(), except it is timing safe and returns something diffe...
#define rb_hash_uint(h, i)
Just another name of st_hash_uint.
#define rb_hash_end(h)
Just another name of st_hash_end.
VALUE rb_str_append(VALUE dst, VALUE src)
Identical to rb_str_buf_append(), except it converts the right hand side before concatenating.
long rb_str_offset(VALUE str, long pos)
"Inverse" of rb_str_sublen().
VALUE rb_str_subseq(VALUE str, long beg, long len)
Identical to rb_str_substr(), except the numbers are interpreted as byte offsets instead of character...
st_index_t rb_memhash(const void *ptr, long len)
This is a universal hash function.
VALUE rb_str_buf_cat(VALUE, const char *, long)
Just another name of rb_str_cat.
VALUE rb_str_dup(VALUE str)
Duplicates a string.
st_index_t rb_str_hash(VALUE str)
Calculates a hash value of a string.
VALUE rb_str_buf_cat2(VALUE, const char *)
Just another name of rb_str_cat_cstr.
char * rb_str_subpos(VALUE str, long beg, long *len)
Identical to rb_str_substr(), except it returns a C's string instead of Ruby's.
VALUE rb_str_buf_append(VALUE dst, VALUE src)
Identical to rb_str_cat_cstr(), except it takes Ruby's string instead of C's.
long rb_str_sublen(VALUE str, long pos)
Byte offset to character offset conversion.
VALUE rb_str_equal(VALUE str1, VALUE str2)
Equality of two strings.
st_index_t rb_hash_start(st_index_t i)
Starts a series of hashing.
VALUE rb_str_inspect(VALUE str)
Generates a "readable" version of the receiver.
VALUE rb_str_buf_cat_ascii(VALUE dst, const char *src)
Identical to rb_str_cat_cstr(), except it additionally assumes the source string be a NUL terminated ...
VALUE rb_str_new(const char *ptr, long len)
Allocates an instance of rb_cString.
VALUE rb_check_string_type(VALUE obj)
Try converting an object to its stringised representation using its to_str method,...
VALUE rb_str_resize(VALUE str, long len)
Overwrites the length of the string.
VALUE rb_str_buf_new(long capa)
Allocates a "string buffer".
VALUE rb_str_length(VALUE)
Identical to rb_str_strlen(), except it returns the value in rb_cInteger.
VALUE rb_class_path(VALUE mod)
Identical to rb_mod_name(), except it returns #<Class: ...> style inspection for anonymous modules.
void rb_define_alloc_func(VALUE klass, rb_alloc_func_t func)
Sets the allocator function of a class.
VALUE rb_sym2str(VALUE id)
Identical to rb_id2str(), except it takes an instance of rb_cSymbol rather than an ID.
void rb_define_virtual_variable(const char *name, rb_gvar_getter_t *getter, rb_gvar_setter_t *setter)
Defines a global variable that is purely function-backended.
void rb_define_const(VALUE klass, const char *name, VALUE val)
Defines a Ruby level constant under a namespace.
regex_t * rb_reg_prepare_re(VALUE re, VALUE str)
Exercises various checks and preprocesses so that the given regular expression can be applied to the ...
long rb_reg_search(VALUE re, VALUE str, long pos, int dir)
Runs the passed regular expression over the passed string.
long rb_reg_adjust_startpos(VALUE re, VALUE str, long pos, int dir)
Tell us if this is a wrong idea, but it seems this function has no usage at all.
VALUE rb_reg_regcomp(VALUE str)
Creates a new instance of rb_cRegexp.
VALUE rb_reg_quote(VALUE str)
Escapes any characters that would have special meaning in a regular expression.
VALUE rb_reg_regsub(VALUE repl, VALUE src, struct re_registers *regs, VALUE rexp)
Substitution.
int rb_reg_region_copy(struct re_registers *dst, const struct re_registers *src)
Duplicates a match data.
unsigned long ruby_scan_hex(const char *str, size_t len, size_t *ret)
Interprets the passed string a hexadecimal unsigned integer.
unsigned long ruby_scan_oct(const char *str, size_t len, size_t *consumed)
Interprets the passed string as an octal unsigned integer.
VALUE rb_sprintf(const char *fmt,...)
Ruby's extended sprintf(3).
VALUE rb_str_catf(VALUE dst, const char *fmt,...)
Identical to rb_sprintf(), except it renders the output to the specified object rather than creating ...
VALUE rb_yield(VALUE val)
Yields the block.
#define MEMCPY(p1, p2, type, n)
Handy macro to call memcpy.
#define ALLOCA_N(type, n)
#define MEMZERO(p, type, n)
Handy macro to erase a region of memory.
#define RB_GC_GUARD(v)
Prevents premature destruction of local objects.
#define RARRAY_LEN
Just another name of rb_array_len.
#define RARRAY_AREF(a, i)
#define RBASIC(obj)
Convenient casting macro.
#define RGENGC_WB_PROTECTED_REGEXP
This is a compile-time flag to enable/disable write barrier for struct RRegexp.
#define RMATCH(obj)
Convenient casting macro.
static struct re_registers * RMATCH_REGS(VALUE match)
Queries the raw re_registers.
#define RREGEXP(obj)
Convenient casting macro.
static VALUE RREGEXP_SRC(VALUE rexp)
Convenient getter function.
static char * RREGEXP_SRC_PTR(VALUE rexp)
Convenient getter function.
#define RREGEXP_PTR(obj)
Convenient accessor macro.
static long RREGEXP_SRC_LEN(VALUE rexp)
Convenient getter function.
#define StringValue(v)
Ensures that the parameter object is a String.
#define StringValuePtr(v)
Identical to StringValue, except it returns a char*.
static char * RSTRING_END(VALUE str)
Queries the end of the contents pointer of the string.
static char * RSTRING_PTR(VALUE str)
Queries the contents pointer of the string.
#define RSTRING_GETMEM(str, ptrvar, lenvar)
Convenient macro to obtain the contents and length at once.
static long RSTRING_LEN(VALUE str)
Queries the length of the string.
VALUE rb_str_to_str(VALUE obj)
Identical to rb_check_string_type(), except it raises exceptions in case of conversion failures.
#define StringValueCStr(v)
Identical to StringValuePtr, except it additionally checks for the contents for viability as a C stri...
#define RTEST
This is an old name of RB_TEST.
#define _(args)
This was a transition path from K&R to ANSI.
VALUE flags
Per-object flags.
Regular expression execution context.
VALUE regexp
The expression of this match.
struct rmatch * rmatch
The result of this match.
VALUE str
The target string that the match was made against.
Ruby's regular expression.
struct RBasic basic
Basic part, including flags and class.
const VALUE src
Source code of this expression.
unsigned long usecnt
Reference count.
struct re_pattern_buffer * ptr
The pattern buffer.
Represents the region of a capture group.
long beg
Beginning of a group.
int char_offset_num_allocated
Number of rmatch_offset that rmatch::char_offset holds.
struct rmatch_offset * char_offset
Capture group offsets, in C array.
struct re_registers regs
"Registers" of a match.
uintptr_t ID
Type that represents a Ruby identifier such as a variable name.
#define SIZEOF_VALUE
Identical to sizeof(VALUE), except it is a macro that can also be used inside of preprocessor directi...
uintptr_t VALUE
Type that represents a Ruby object.
static void Check_Type(VALUE v, enum ruby_value_type t)
Identical to RB_TYPE_P(), except it raises exceptions on predication failure.
static bool RB_TYPE_P(VALUE obj, enum ruby_value_type t)
Queries if the given object is of given type.